cortexdb-airflow 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ """Apache Airflow provider for CortexDB.
2
+
3
+ Provides a Hook for connection management and an Operator for executing
4
+ CortexDB operations (experience, recall, forget, search) within Airflow DAGs.
5
+ """
6
+
7
+ from cortexdb_airflow.hooks import CortexDBHook
8
+ from cortexdb_airflow.operators import CortexDBOperator
9
+
10
+ __all__ = [
11
+ "CortexDBHook",
12
+ "CortexDBOperator",
13
+ ]
14
+
15
+
16
+ def get_provider_info() -> dict:
17
+ """Return Airflow provider metadata for auto-discovery."""
18
+ return {
19
+ "package-name": "cortexdb-airflow",
20
+ "name": "CortexDB",
21
+ "description": "Apache Airflow provider for CortexDB long-term memory.",
22
+ "connection-types": [
23
+ {
24
+ "connection-type": "cortexdb",
25
+ "hook-class-name": "cortexdb_airflow.hooks.CortexDBHook",
26
+ }
27
+ ],
28
+ "versions": ["0.1.0"],
29
+ }
@@ -0,0 +1,124 @@
1
+ """Airflow Hook for CortexDB connections.
2
+
3
+ Manages CortexDB client lifecycle through Airflow's connection management
4
+ system, enabling operators and tasks to share connection configuration.
5
+
6
+ Example::
7
+
8
+ from cortexdb_airflow import CortexDBHook
9
+
10
+ hook = CortexDBHook(cortexdb_conn_id="cortexdb_default")
11
+ client = hook.get_client()
12
+ result = client.recall("user:default", query="recent events")
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from typing import Any, Optional
18
+
19
+ from airflow.hooks.base import BaseHook
20
+
21
+ from cortexdb import Cortex
22
+
23
+
24
+ class CortexDBHook(BaseHook):
25
+ """Airflow Hook for managing CortexDB connections.
26
+
27
+ Reads connection parameters from Airflow's connection store and creates
28
+ a configured :class:`cortexdb.Cortex` client instance.
29
+
30
+ The Airflow connection should be configured as:
31
+ - **Host**: CortexDB server hostname (e.g., ``localhost``)
32
+ - **Port**: CortexDB server port (e.g., ``3141``)
33
+ - **Password**: PASETO bearer token (optional)
34
+ - **Login**: Actor id (optional, defaults to ``"user:default"``)
35
+ - **Schema**: Default scope path (optional, defaults to ``"user:default"``)
36
+ - **Extra**: JSON with ``timeout`` (optional)
37
+
38
+ Args:
39
+ cortexdb_conn_id: The Airflow connection ID. Defaults to
40
+ ``"cortexdb_default"``.
41
+ """
42
+
43
+ conn_name_attr = "cortexdb_conn_id"
44
+ default_conn_name = "cortexdb_default"
45
+ conn_type = "cortexdb"
46
+ hook_name = "CortexDB"
47
+
48
+ def __init__(
49
+ self,
50
+ cortexdb_conn_id: str = "cortexdb_default",
51
+ **kwargs: Any,
52
+ ) -> None:
53
+ super().__init__(**kwargs)
54
+ self.cortexdb_conn_id = cortexdb_conn_id
55
+ self._client: Cortex | None = None
56
+
57
+ def get_client(self) -> Cortex:
58
+ """Create or return a cached CortexDB client.
59
+
60
+ Returns:
61
+ An initialized :class:`cortexdb.Cortex` client using the Airflow
62
+ connection configuration.
63
+ """
64
+ if self._client is not None:
65
+ return self._client
66
+
67
+ conn = self.get_connection(self.cortexdb_conn_id)
68
+ extra = conn.extra_dejson if conn.extra else {}
69
+
70
+ scheme = extra.get("scheme", "http")
71
+ host = conn.host or "localhost"
72
+ port = conn.port or 3141
73
+ api_url = f"{scheme}://{host}:{port}"
74
+
75
+ self._client = Cortex(
76
+ api_url,
77
+ actor=conn.login or "user:default",
78
+ bearer=conn.password or None,
79
+ timeout=float(extra.get("timeout", 30.0)),
80
+ )
81
+ return self._client
82
+
83
+ def get_scope(self) -> str:
84
+ """Return the default scope path from the connection schema.
85
+
86
+ Returns:
87
+ The scope path, defaulting to ``"user:default"`` if not configured.
88
+ """
89
+ conn = self.get_connection(self.cortexdb_conn_id)
90
+ return conn.schema or "user:default"
91
+
92
+ def test_connection(self) -> tuple[bool, str]:
93
+ """Test the CortexDB connection.
94
+
95
+ Returns:
96
+ A tuple of (success, message).
97
+ """
98
+ try:
99
+ client = self.get_client()
100
+ who = client.whoami()
101
+ return True, f"Connection successful: {who}"
102
+ except Exception as exc:
103
+ return False, f"Connection failed: {exc}"
104
+
105
+ @classmethod
106
+ def get_ui_field_behaviour(cls) -> dict[str, Any]:
107
+ """Return custom UI field configuration for the Airflow connection form."""
108
+ return {
109
+ "relabeling": {
110
+ "login": "Actor",
111
+ "password": "Bearer Token",
112
+ "schema": "Default Scope",
113
+ "host": "CortexDB Host",
114
+ "port": "CortexDB Port",
115
+ },
116
+ "placeholders": {
117
+ "host": "localhost",
118
+ "port": "3141",
119
+ "login": "user:default",
120
+ "password": "v4.public... (optional)",
121
+ "schema": "user:default",
122
+ "extra": '{"timeout": 30, "scheme": "http"}',
123
+ },
124
+ }
@@ -0,0 +1,142 @@
1
+ """Airflow Operator for CortexDB operations.
2
+
3
+ Provides a single operator that can execute experience (write), recall,
4
+ forget, and search operations against CortexDB within Airflow DAGs.
5
+
6
+ Example::
7
+
8
+ from airflow.decorators import dag
9
+ from cortexdb_airflow import CortexDBOperator
10
+
11
+ @dag(schedule=None)
12
+ def memory_dag():
13
+ store = CortexDBOperator(
14
+ task_id="store_result",
15
+ operation="experience",
16
+ content="Pipeline completed successfully with 1,000 records.",
17
+ scope="org:data-team",
18
+ )
19
+
20
+ recall = CortexDBOperator(
21
+ task_id="recall_context",
22
+ operation="recall",
23
+ query="recent pipeline results",
24
+ scope="org:data-team",
25
+ )
26
+
27
+ store >> recall
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ from typing import Any, Optional, Sequence
33
+
34
+ from airflow.models import BaseOperator
35
+ from airflow.utils.context import Context
36
+
37
+ from cortexdb_airflow.hooks import CortexDBHook
38
+
39
+
40
+ class CortexDBOperator(BaseOperator):
41
+ """Airflow Operator for executing CortexDB operations.
42
+
43
+ Supports four operations: ``experience``, ``recall``, ``forget``, and
44
+ ``search``. The operation result is pushed to XCom for downstream
45
+ task consumption.
46
+
47
+ Args:
48
+ operation: The CortexDB operation to execute. One of
49
+ ``"experience"``, ``"recall"``, ``"forget"``, ``"search"``.
50
+ cortexdb_conn_id: Airflow connection ID. Defaults to
51
+ ``"cortexdb_default"``.
52
+ scope: Scope path. If ``None``, uses the connection's
53
+ default scope.
54
+ content: Content to store (for ``experience`` operation).
55
+ query: Query string (for ``recall``, ``search``).
56
+ labels: Optional labels for the stored experience.
57
+ reason: Reason for forgetting (for ``forget``).
58
+ view: Recall view (for ``recall``/``search``). Defaults to ``holistic``.
59
+ """
60
+
61
+ template_fields: Sequence[str] = (
62
+ "content",
63
+ "query",
64
+ "scope",
65
+ "reason",
66
+ )
67
+
68
+ def __init__(
69
+ self,
70
+ *,
71
+ operation: str,
72
+ cortexdb_conn_id: str = "cortexdb_default",
73
+ scope: Optional[str] = None,
74
+ content: Optional[str] = None,
75
+ query: Optional[str] = None,
76
+ labels: Optional[Sequence[str]] = None,
77
+ reason: str = "",
78
+ view: str = "holistic",
79
+ **kwargs: Any,
80
+ ) -> None:
81
+ super().__init__(**kwargs)
82
+
83
+ valid_operations = ("experience", "recall", "forget", "search")
84
+ if operation not in valid_operations:
85
+ raise ValueError(
86
+ f"Invalid operation '{operation}'. "
87
+ f"Must be one of {valid_operations}."
88
+ )
89
+
90
+ self.operation = operation
91
+ self.cortexdb_conn_id = cortexdb_conn_id
92
+ self.scope = scope
93
+ self.content = content
94
+ self.query = query
95
+ self.labels = labels
96
+ self.reason = reason
97
+ self.view = view
98
+
99
+ def execute(self, context: Context) -> Any:
100
+ """Execute the configured CortexDB operation.
101
+
102
+ Args:
103
+ context: The Airflow task execution context.
104
+
105
+ Returns:
106
+ The result of the CortexDB operation, pushed to XCom.
107
+ """
108
+ hook = CortexDBHook(cortexdb_conn_id=self.cortexdb_conn_id)
109
+ client = hook.get_client()
110
+ scope = self.scope or hook.get_scope()
111
+
112
+ if self.operation == "experience":
113
+ if not self.content:
114
+ raise ValueError("'content' is required for the experience operation.")
115
+ result = client.experience(
116
+ scope,
117
+ text=self.content,
118
+ labels=self.labels,
119
+ )
120
+
121
+ elif self.operation in ("recall", "search"):
122
+ if not self.query:
123
+ raise ValueError(f"'query' is required for the {self.operation} operation.")
124
+ result = client.recall(
125
+ scope,
126
+ query=self.query,
127
+ view=self.view,
128
+ )
129
+
130
+ elif self.operation == "forget":
131
+ result = client.forget(
132
+ scope,
133
+ confirm_all=True,
134
+ cascade="redact_events",
135
+ reason=self.reason,
136
+ )
137
+
138
+ else:
139
+ raise ValueError(f"Unsupported operation: {self.operation}")
140
+
141
+ self.log.info("CortexDB %s operation completed.", self.operation)
142
+ return result
@@ -0,0 +1,13 @@
1
+ Metadata-Version: 2.4
2
+ Name: cortexdb-airflow
3
+ Version: 0.1.0
4
+ Summary: Apache Airflow provider for CortexDB — long-term memory for data orchestration
5
+ License-Expression: Apache-2.0
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: apache-airflow>=2.8
8
+ Requires-Dist: cortexdbai>=0.1.0
9
+ Provides-Extra: dev
10
+ Requires-Dist: pytest>=7.0; extra == 'dev'
11
+ Description-Content-Type: text/markdown
12
+
13
+ # cortexdb-airflow
@@ -0,0 +1,7 @@
1
+ cortexdb_airflow/__init__.py,sha256=rsp3oDKSx9ZIe5Mbh-xJb5dE5_zQDRpdwcqBFV_ppPA,880
2
+ cortexdb_airflow/hooks.py,sha256=q3GJi0_o48Mmgr_PZGxon-9EtakVTMbpLFjJ_A0P79o,4101
3
+ cortexdb_airflow/operators.py,sha256=m7zEZm8Nfg6l4c4p03eDfTUtw_d-i02xY5RDOssZGIo,4577
4
+ cortexdb_airflow-0.1.0.dist-info/METADATA,sha256=qk9CC3JfgNEZAmbTQs1KqoTQoIknY1zWjBMht24cyIE,396
5
+ cortexdb_airflow-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
6
+ cortexdb_airflow-0.1.0.dist-info/entry_points.txt,sha256=E5H0b6TawCb3ykL9hk-IbR1Cdiu6TfdxkmyDKY9H3qk,75
7
+ cortexdb_airflow-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [airflow.providers]
2
+ cortexdb = cortexdb_airflow.__init__:get_provider_info