cortexdb-airflow 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cortexdb_airflow/__init__.py +29 -0
- cortexdb_airflow/hooks.py +124 -0
- cortexdb_airflow/operators.py +142 -0
- cortexdb_airflow-0.1.0.dist-info/METADATA +13 -0
- cortexdb_airflow-0.1.0.dist-info/RECORD +7 -0
- cortexdb_airflow-0.1.0.dist-info/WHEEL +4 -0
- cortexdb_airflow-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Apache Airflow provider for CortexDB.
|
|
2
|
+
|
|
3
|
+
Provides a Hook for connection management and an Operator for executing
|
|
4
|
+
CortexDB operations (experience, recall, forget, search) within Airflow DAGs.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from cortexdb_airflow.hooks import CortexDBHook
|
|
8
|
+
from cortexdb_airflow.operators import CortexDBOperator
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"CortexDBHook",
|
|
12
|
+
"CortexDBOperator",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_provider_info() -> dict:
|
|
17
|
+
"""Return Airflow provider metadata for auto-discovery."""
|
|
18
|
+
return {
|
|
19
|
+
"package-name": "cortexdb-airflow",
|
|
20
|
+
"name": "CortexDB",
|
|
21
|
+
"description": "Apache Airflow provider for CortexDB long-term memory.",
|
|
22
|
+
"connection-types": [
|
|
23
|
+
{
|
|
24
|
+
"connection-type": "cortexdb",
|
|
25
|
+
"hook-class-name": "cortexdb_airflow.hooks.CortexDBHook",
|
|
26
|
+
}
|
|
27
|
+
],
|
|
28
|
+
"versions": ["0.1.0"],
|
|
29
|
+
}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Airflow Hook for CortexDB connections.
|
|
2
|
+
|
|
3
|
+
Manages CortexDB client lifecycle through Airflow's connection management
|
|
4
|
+
system, enabling operators and tasks to share connection configuration.
|
|
5
|
+
|
|
6
|
+
Example::
|
|
7
|
+
|
|
8
|
+
from cortexdb_airflow import CortexDBHook
|
|
9
|
+
|
|
10
|
+
hook = CortexDBHook(cortexdb_conn_id="cortexdb_default")
|
|
11
|
+
client = hook.get_client()
|
|
12
|
+
result = client.recall("user:default", query="recent events")
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from typing import Any, Optional
|
|
18
|
+
|
|
19
|
+
from airflow.hooks.base import BaseHook
|
|
20
|
+
|
|
21
|
+
from cortexdb import Cortex
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CortexDBHook(BaseHook):
|
|
25
|
+
"""Airflow Hook for managing CortexDB connections.
|
|
26
|
+
|
|
27
|
+
Reads connection parameters from Airflow's connection store and creates
|
|
28
|
+
a configured :class:`cortexdb.Cortex` client instance.
|
|
29
|
+
|
|
30
|
+
The Airflow connection should be configured as:
|
|
31
|
+
- **Host**: CortexDB server hostname (e.g., ``localhost``)
|
|
32
|
+
- **Port**: CortexDB server port (e.g., ``3141``)
|
|
33
|
+
- **Password**: PASETO bearer token (optional)
|
|
34
|
+
- **Login**: Actor id (optional, defaults to ``"user:default"``)
|
|
35
|
+
- **Schema**: Default scope path (optional, defaults to ``"user:default"``)
|
|
36
|
+
- **Extra**: JSON with ``timeout`` (optional)
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
cortexdb_conn_id: The Airflow connection ID. Defaults to
|
|
40
|
+
``"cortexdb_default"``.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
conn_name_attr = "cortexdb_conn_id"
|
|
44
|
+
default_conn_name = "cortexdb_default"
|
|
45
|
+
conn_type = "cortexdb"
|
|
46
|
+
hook_name = "CortexDB"
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
cortexdb_conn_id: str = "cortexdb_default",
|
|
51
|
+
**kwargs: Any,
|
|
52
|
+
) -> None:
|
|
53
|
+
super().__init__(**kwargs)
|
|
54
|
+
self.cortexdb_conn_id = cortexdb_conn_id
|
|
55
|
+
self._client: Cortex | None = None
|
|
56
|
+
|
|
57
|
+
def get_client(self) -> Cortex:
|
|
58
|
+
"""Create or return a cached CortexDB client.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
An initialized :class:`cortexdb.Cortex` client using the Airflow
|
|
62
|
+
connection configuration.
|
|
63
|
+
"""
|
|
64
|
+
if self._client is not None:
|
|
65
|
+
return self._client
|
|
66
|
+
|
|
67
|
+
conn = self.get_connection(self.cortexdb_conn_id)
|
|
68
|
+
extra = conn.extra_dejson if conn.extra else {}
|
|
69
|
+
|
|
70
|
+
scheme = extra.get("scheme", "http")
|
|
71
|
+
host = conn.host or "localhost"
|
|
72
|
+
port = conn.port or 3141
|
|
73
|
+
api_url = f"{scheme}://{host}:{port}"
|
|
74
|
+
|
|
75
|
+
self._client = Cortex(
|
|
76
|
+
api_url,
|
|
77
|
+
actor=conn.login or "user:default",
|
|
78
|
+
bearer=conn.password or None,
|
|
79
|
+
timeout=float(extra.get("timeout", 30.0)),
|
|
80
|
+
)
|
|
81
|
+
return self._client
|
|
82
|
+
|
|
83
|
+
def get_scope(self) -> str:
|
|
84
|
+
"""Return the default scope path from the connection schema.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
The scope path, defaulting to ``"user:default"`` if not configured.
|
|
88
|
+
"""
|
|
89
|
+
conn = self.get_connection(self.cortexdb_conn_id)
|
|
90
|
+
return conn.schema or "user:default"
|
|
91
|
+
|
|
92
|
+
def test_connection(self) -> tuple[bool, str]:
|
|
93
|
+
"""Test the CortexDB connection.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
A tuple of (success, message).
|
|
97
|
+
"""
|
|
98
|
+
try:
|
|
99
|
+
client = self.get_client()
|
|
100
|
+
who = client.whoami()
|
|
101
|
+
return True, f"Connection successful: {who}"
|
|
102
|
+
except Exception as exc:
|
|
103
|
+
return False, f"Connection failed: {exc}"
|
|
104
|
+
|
|
105
|
+
@classmethod
|
|
106
|
+
def get_ui_field_behaviour(cls) -> dict[str, Any]:
|
|
107
|
+
"""Return custom UI field configuration for the Airflow connection form."""
|
|
108
|
+
return {
|
|
109
|
+
"relabeling": {
|
|
110
|
+
"login": "Actor",
|
|
111
|
+
"password": "Bearer Token",
|
|
112
|
+
"schema": "Default Scope",
|
|
113
|
+
"host": "CortexDB Host",
|
|
114
|
+
"port": "CortexDB Port",
|
|
115
|
+
},
|
|
116
|
+
"placeholders": {
|
|
117
|
+
"host": "localhost",
|
|
118
|
+
"port": "3141",
|
|
119
|
+
"login": "user:default",
|
|
120
|
+
"password": "v4.public... (optional)",
|
|
121
|
+
"schema": "user:default",
|
|
122
|
+
"extra": '{"timeout": 30, "scheme": "http"}',
|
|
123
|
+
},
|
|
124
|
+
}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Airflow Operator for CortexDB operations.
|
|
2
|
+
|
|
3
|
+
Provides a single operator that can execute experience (write), recall,
|
|
4
|
+
forget, and search operations against CortexDB within Airflow DAGs.
|
|
5
|
+
|
|
6
|
+
Example::
|
|
7
|
+
|
|
8
|
+
from airflow.decorators import dag
|
|
9
|
+
from cortexdb_airflow import CortexDBOperator
|
|
10
|
+
|
|
11
|
+
@dag(schedule=None)
|
|
12
|
+
def memory_dag():
|
|
13
|
+
store = CortexDBOperator(
|
|
14
|
+
task_id="store_result",
|
|
15
|
+
operation="experience",
|
|
16
|
+
content="Pipeline completed successfully with 1,000 records.",
|
|
17
|
+
scope="org:data-team",
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
recall = CortexDBOperator(
|
|
21
|
+
task_id="recall_context",
|
|
22
|
+
operation="recall",
|
|
23
|
+
query="recent pipeline results",
|
|
24
|
+
scope="org:data-team",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
store >> recall
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
from typing import Any, Optional, Sequence
|
|
33
|
+
|
|
34
|
+
from airflow.models import BaseOperator
|
|
35
|
+
from airflow.utils.context import Context
|
|
36
|
+
|
|
37
|
+
from cortexdb_airflow.hooks import CortexDBHook
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class CortexDBOperator(BaseOperator):
|
|
41
|
+
"""Airflow Operator for executing CortexDB operations.
|
|
42
|
+
|
|
43
|
+
Supports four operations: ``experience``, ``recall``, ``forget``, and
|
|
44
|
+
``search``. The operation result is pushed to XCom for downstream
|
|
45
|
+
task consumption.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
operation: The CortexDB operation to execute. One of
|
|
49
|
+
``"experience"``, ``"recall"``, ``"forget"``, ``"search"``.
|
|
50
|
+
cortexdb_conn_id: Airflow connection ID. Defaults to
|
|
51
|
+
``"cortexdb_default"``.
|
|
52
|
+
scope: Scope path. If ``None``, uses the connection's
|
|
53
|
+
default scope.
|
|
54
|
+
content: Content to store (for ``experience`` operation).
|
|
55
|
+
query: Query string (for ``recall``, ``search``).
|
|
56
|
+
labels: Optional labels for the stored experience.
|
|
57
|
+
reason: Reason for forgetting (for ``forget``).
|
|
58
|
+
view: Recall view (for ``recall``/``search``). Defaults to ``holistic``.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
template_fields: Sequence[str] = (
|
|
62
|
+
"content",
|
|
63
|
+
"query",
|
|
64
|
+
"scope",
|
|
65
|
+
"reason",
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
def __init__(
|
|
69
|
+
self,
|
|
70
|
+
*,
|
|
71
|
+
operation: str,
|
|
72
|
+
cortexdb_conn_id: str = "cortexdb_default",
|
|
73
|
+
scope: Optional[str] = None,
|
|
74
|
+
content: Optional[str] = None,
|
|
75
|
+
query: Optional[str] = None,
|
|
76
|
+
labels: Optional[Sequence[str]] = None,
|
|
77
|
+
reason: str = "",
|
|
78
|
+
view: str = "holistic",
|
|
79
|
+
**kwargs: Any,
|
|
80
|
+
) -> None:
|
|
81
|
+
super().__init__(**kwargs)
|
|
82
|
+
|
|
83
|
+
valid_operations = ("experience", "recall", "forget", "search")
|
|
84
|
+
if operation not in valid_operations:
|
|
85
|
+
raise ValueError(
|
|
86
|
+
f"Invalid operation '{operation}'. "
|
|
87
|
+
f"Must be one of {valid_operations}."
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
self.operation = operation
|
|
91
|
+
self.cortexdb_conn_id = cortexdb_conn_id
|
|
92
|
+
self.scope = scope
|
|
93
|
+
self.content = content
|
|
94
|
+
self.query = query
|
|
95
|
+
self.labels = labels
|
|
96
|
+
self.reason = reason
|
|
97
|
+
self.view = view
|
|
98
|
+
|
|
99
|
+
def execute(self, context: Context) -> Any:
|
|
100
|
+
"""Execute the configured CortexDB operation.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
context: The Airflow task execution context.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
The result of the CortexDB operation, pushed to XCom.
|
|
107
|
+
"""
|
|
108
|
+
hook = CortexDBHook(cortexdb_conn_id=self.cortexdb_conn_id)
|
|
109
|
+
client = hook.get_client()
|
|
110
|
+
scope = self.scope or hook.get_scope()
|
|
111
|
+
|
|
112
|
+
if self.operation == "experience":
|
|
113
|
+
if not self.content:
|
|
114
|
+
raise ValueError("'content' is required for the experience operation.")
|
|
115
|
+
result = client.experience(
|
|
116
|
+
scope,
|
|
117
|
+
text=self.content,
|
|
118
|
+
labels=self.labels,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
elif self.operation in ("recall", "search"):
|
|
122
|
+
if not self.query:
|
|
123
|
+
raise ValueError(f"'query' is required for the {self.operation} operation.")
|
|
124
|
+
result = client.recall(
|
|
125
|
+
scope,
|
|
126
|
+
query=self.query,
|
|
127
|
+
view=self.view,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
elif self.operation == "forget":
|
|
131
|
+
result = client.forget(
|
|
132
|
+
scope,
|
|
133
|
+
confirm_all=True,
|
|
134
|
+
cascade="redact_events",
|
|
135
|
+
reason=self.reason,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
else:
|
|
139
|
+
raise ValueError(f"Unsupported operation: {self.operation}")
|
|
140
|
+
|
|
141
|
+
self.log.info("CortexDB %s operation completed.", self.operation)
|
|
142
|
+
return result
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cortexdb-airflow
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Apache Airflow provider for CortexDB — long-term memory for data orchestration
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: apache-airflow>=2.8
|
|
8
|
+
Requires-Dist: cortexdbai>=0.1.0
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# cortexdb-airflow
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
cortexdb_airflow/__init__.py,sha256=rsp3oDKSx9ZIe5Mbh-xJb5dE5_zQDRpdwcqBFV_ppPA,880
|
|
2
|
+
cortexdb_airflow/hooks.py,sha256=q3GJi0_o48Mmgr_PZGxon-9EtakVTMbpLFjJ_A0P79o,4101
|
|
3
|
+
cortexdb_airflow/operators.py,sha256=m7zEZm8Nfg6l4c4p03eDfTUtw_d-i02xY5RDOssZGIo,4577
|
|
4
|
+
cortexdb_airflow-0.1.0.dist-info/METADATA,sha256=qk9CC3JfgNEZAmbTQs1KqoTQoIknY1zWjBMht24cyIE,396
|
|
5
|
+
cortexdb_airflow-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
6
|
+
cortexdb_airflow-0.1.0.dist-info/entry_points.txt,sha256=E5H0b6TawCb3ykL9hk-IbR1Cdiu6TfdxkmyDKY9H3qk,75
|
|
7
|
+
cortexdb_airflow-0.1.0.dist-info/RECORD,,
|