cortexdb-airflow 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cortexdb_airflow-0.1.0/.gitignore +79 -0
- cortexdb_airflow-0.1.0/PKG-INFO +13 -0
- cortexdb_airflow-0.1.0/README.md +1 -0
- cortexdb_airflow-0.1.0/cortexdb_airflow/__init__.py +29 -0
- cortexdb_airflow-0.1.0/cortexdb_airflow/hooks.py +124 -0
- cortexdb_airflow-0.1.0/cortexdb_airflow/operators.py +142 -0
- cortexdb_airflow-0.1.0/pyproject.toml +23 -0
- cortexdb_airflow-0.1.0/tests/__init__.py +0 -0
- cortexdb_airflow-0.1.0/tests/test_hooks.py +148 -0
- cortexdb_airflow-0.1.0/tests/test_operators.py +166 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Rust
|
|
2
|
+
/target
|
|
3
|
+
**/*.rs.bk
|
|
4
|
+
|
|
5
|
+
# Environment / secrets
|
|
6
|
+
.env
|
|
7
|
+
.env.local
|
|
8
|
+
.env*.local
|
|
9
|
+
*.pem
|
|
10
|
+
*.key
|
|
11
|
+
.npmrc
|
|
12
|
+
|
|
13
|
+
# SQLite database
|
|
14
|
+
*.sqlite
|
|
15
|
+
*.sqlite-wal
|
|
16
|
+
*.sqlite-shm
|
|
17
|
+
|
|
18
|
+
# OS
|
|
19
|
+
.DS_Store
|
|
20
|
+
Thumbs.db
|
|
21
|
+
desktop.ini
|
|
22
|
+
|
|
23
|
+
# IDE
|
|
24
|
+
.idea/
|
|
25
|
+
.vscode/
|
|
26
|
+
*.swp
|
|
27
|
+
*.swo
|
|
28
|
+
|
|
29
|
+
# Data directories
|
|
30
|
+
cortexdb_data*/
|
|
31
|
+
/data/
|
|
32
|
+
# Per-bench tenant stores (RocksDB + Tantivy + HNSW state; regeneratable per run)
|
|
33
|
+
/data_*/
|
|
34
|
+
# Experimental per-branch stores (not tracked on this branch but left gitignored
|
|
35
|
+
# so checkout from other branches doesn't surface them in git status)
|
|
36
|
+
/event_memory_store/
|
|
37
|
+
/llm_cache/
|
|
38
|
+
|
|
39
|
+
# Benchmark inputs and per-run outputs (kept local, regenerated each run)
|
|
40
|
+
benchmarks/longmemeval/data/
|
|
41
|
+
benchmarks/longmemeval/server_results/
|
|
42
|
+
benchmarks/longmemeval/fast_results/
|
|
43
|
+
benchmarks/longmemeval/micro_results/
|
|
44
|
+
benchmarks/longmemeval/server_logs/
|
|
45
|
+
benchmarks/longmemeval/*.log
|
|
46
|
+
benchmarks/locomo/locomo_results*.json
|
|
47
|
+
benchmarks/locomo/server_results/
|
|
48
|
+
benchmarks/locomo/*.log
|
|
49
|
+
/answer_out.json
|
|
50
|
+
|
|
51
|
+
# Local Claude Code state
|
|
52
|
+
.claude/
|
|
53
|
+
.tmp/
|
|
54
|
+
|
|
55
|
+
# Python
|
|
56
|
+
__pycache__/
|
|
57
|
+
*.pyc
|
|
58
|
+
.venv/
|
|
59
|
+
venv/
|
|
60
|
+
|
|
61
|
+
# Node
|
|
62
|
+
node_modules/
|
|
63
|
+
dist/
|
|
64
|
+
.next/
|
|
65
|
+
|
|
66
|
+
# Egg info
|
|
67
|
+
*.egg-info/
|
|
68
|
+
|
|
69
|
+
# Scratch/debug text files at root
|
|
70
|
+
/*.txt
|
|
71
|
+
/*.log
|
|
72
|
+
|
|
73
|
+
# Local debug / marketing / private content (not for repo)
|
|
74
|
+
harness/.reports/
|
|
75
|
+
harness_data_*/
|
|
76
|
+
blog/
|
|
77
|
+
sales/
|
|
78
|
+
videos/
|
|
79
|
+
local-instance/
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cortexdb-airflow
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Apache Airflow provider for CortexDB — long-term memory for data orchestration
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: apache-airflow>=2.8
|
|
8
|
+
Requires-Dist: cortexdbai>=0.1.0
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# cortexdb-airflow
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# cortexdb-airflow
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Apache Airflow provider for CortexDB.
|
|
2
|
+
|
|
3
|
+
Provides a Hook for connection management and an Operator for executing
|
|
4
|
+
CortexDB operations (experience, recall, forget, search) within Airflow DAGs.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from cortexdb_airflow.hooks import CortexDBHook
|
|
8
|
+
from cortexdb_airflow.operators import CortexDBOperator
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"CortexDBHook",
|
|
12
|
+
"CortexDBOperator",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_provider_info() -> dict:
|
|
17
|
+
"""Return Airflow provider metadata for auto-discovery."""
|
|
18
|
+
return {
|
|
19
|
+
"package-name": "cortexdb-airflow",
|
|
20
|
+
"name": "CortexDB",
|
|
21
|
+
"description": "Apache Airflow provider for CortexDB long-term memory.",
|
|
22
|
+
"connection-types": [
|
|
23
|
+
{
|
|
24
|
+
"connection-type": "cortexdb",
|
|
25
|
+
"hook-class-name": "cortexdb_airflow.hooks.CortexDBHook",
|
|
26
|
+
}
|
|
27
|
+
],
|
|
28
|
+
"versions": ["0.1.0"],
|
|
29
|
+
}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Airflow Hook for CortexDB connections.
|
|
2
|
+
|
|
3
|
+
Manages CortexDB client lifecycle through Airflow's connection management
|
|
4
|
+
system, enabling operators and tasks to share connection configuration.
|
|
5
|
+
|
|
6
|
+
Example::
|
|
7
|
+
|
|
8
|
+
from cortexdb_airflow import CortexDBHook
|
|
9
|
+
|
|
10
|
+
hook = CortexDBHook(cortexdb_conn_id="cortexdb_default")
|
|
11
|
+
client = hook.get_client()
|
|
12
|
+
result = client.recall("user:default", query="recent events")
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from typing import Any, Optional
|
|
18
|
+
|
|
19
|
+
from airflow.hooks.base import BaseHook
|
|
20
|
+
|
|
21
|
+
from cortexdb import Cortex
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CortexDBHook(BaseHook):
|
|
25
|
+
"""Airflow Hook for managing CortexDB connections.
|
|
26
|
+
|
|
27
|
+
Reads connection parameters from Airflow's connection store and creates
|
|
28
|
+
a configured :class:`cortexdb.Cortex` client instance.
|
|
29
|
+
|
|
30
|
+
The Airflow connection should be configured as:
|
|
31
|
+
- **Host**: CortexDB server hostname (e.g., ``localhost``)
|
|
32
|
+
- **Port**: CortexDB server port (e.g., ``3141``)
|
|
33
|
+
- **Password**: PASETO bearer token (optional)
|
|
34
|
+
- **Login**: Actor id (optional, defaults to ``"user:default"``)
|
|
35
|
+
- **Schema**: Default scope path (optional, defaults to ``"user:default"``)
|
|
36
|
+
- **Extra**: JSON with ``timeout`` (optional)
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
cortexdb_conn_id: The Airflow connection ID. Defaults to
|
|
40
|
+
``"cortexdb_default"``.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
conn_name_attr = "cortexdb_conn_id"
|
|
44
|
+
default_conn_name = "cortexdb_default"
|
|
45
|
+
conn_type = "cortexdb"
|
|
46
|
+
hook_name = "CortexDB"
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
cortexdb_conn_id: str = "cortexdb_default",
|
|
51
|
+
**kwargs: Any,
|
|
52
|
+
) -> None:
|
|
53
|
+
super().__init__(**kwargs)
|
|
54
|
+
self.cortexdb_conn_id = cortexdb_conn_id
|
|
55
|
+
self._client: Cortex | None = None
|
|
56
|
+
|
|
57
|
+
def get_client(self) -> Cortex:
|
|
58
|
+
"""Create or return a cached CortexDB client.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
An initialized :class:`cortexdb.Cortex` client using the Airflow
|
|
62
|
+
connection configuration.
|
|
63
|
+
"""
|
|
64
|
+
if self._client is not None:
|
|
65
|
+
return self._client
|
|
66
|
+
|
|
67
|
+
conn = self.get_connection(self.cortexdb_conn_id)
|
|
68
|
+
extra = conn.extra_dejson if conn.extra else {}
|
|
69
|
+
|
|
70
|
+
scheme = extra.get("scheme", "http")
|
|
71
|
+
host = conn.host or "localhost"
|
|
72
|
+
port = conn.port or 3141
|
|
73
|
+
api_url = f"{scheme}://{host}:{port}"
|
|
74
|
+
|
|
75
|
+
self._client = Cortex(
|
|
76
|
+
api_url,
|
|
77
|
+
actor=conn.login or "user:default",
|
|
78
|
+
bearer=conn.password or None,
|
|
79
|
+
timeout=float(extra.get("timeout", 30.0)),
|
|
80
|
+
)
|
|
81
|
+
return self._client
|
|
82
|
+
|
|
83
|
+
def get_scope(self) -> str:
|
|
84
|
+
"""Return the default scope path from the connection schema.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
The scope path, defaulting to ``"user:default"`` if not configured.
|
|
88
|
+
"""
|
|
89
|
+
conn = self.get_connection(self.cortexdb_conn_id)
|
|
90
|
+
return conn.schema or "user:default"
|
|
91
|
+
|
|
92
|
+
def test_connection(self) -> tuple[bool, str]:
|
|
93
|
+
"""Test the CortexDB connection.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
A tuple of (success, message).
|
|
97
|
+
"""
|
|
98
|
+
try:
|
|
99
|
+
client = self.get_client()
|
|
100
|
+
who = client.whoami()
|
|
101
|
+
return True, f"Connection successful: {who}"
|
|
102
|
+
except Exception as exc:
|
|
103
|
+
return False, f"Connection failed: {exc}"
|
|
104
|
+
|
|
105
|
+
@classmethod
|
|
106
|
+
def get_ui_field_behaviour(cls) -> dict[str, Any]:
|
|
107
|
+
"""Return custom UI field configuration for the Airflow connection form."""
|
|
108
|
+
return {
|
|
109
|
+
"relabeling": {
|
|
110
|
+
"login": "Actor",
|
|
111
|
+
"password": "Bearer Token",
|
|
112
|
+
"schema": "Default Scope",
|
|
113
|
+
"host": "CortexDB Host",
|
|
114
|
+
"port": "CortexDB Port",
|
|
115
|
+
},
|
|
116
|
+
"placeholders": {
|
|
117
|
+
"host": "localhost",
|
|
118
|
+
"port": "3141",
|
|
119
|
+
"login": "user:default",
|
|
120
|
+
"password": "v4.public... (optional)",
|
|
121
|
+
"schema": "user:default",
|
|
122
|
+
"extra": '{"timeout": 30, "scheme": "http"}',
|
|
123
|
+
},
|
|
124
|
+
}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Airflow Operator for CortexDB operations.
|
|
2
|
+
|
|
3
|
+
Provides a single operator that can execute experience (write), recall,
|
|
4
|
+
forget, and search operations against CortexDB within Airflow DAGs.
|
|
5
|
+
|
|
6
|
+
Example::
|
|
7
|
+
|
|
8
|
+
from airflow.decorators import dag
|
|
9
|
+
from cortexdb_airflow import CortexDBOperator
|
|
10
|
+
|
|
11
|
+
@dag(schedule=None)
|
|
12
|
+
def memory_dag():
|
|
13
|
+
store = CortexDBOperator(
|
|
14
|
+
task_id="store_result",
|
|
15
|
+
operation="experience",
|
|
16
|
+
content="Pipeline completed successfully with 1,000 records.",
|
|
17
|
+
scope="org:data-team",
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
recall = CortexDBOperator(
|
|
21
|
+
task_id="recall_context",
|
|
22
|
+
operation="recall",
|
|
23
|
+
query="recent pipeline results",
|
|
24
|
+
scope="org:data-team",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
store >> recall
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
from typing import Any, Optional, Sequence
|
|
33
|
+
|
|
34
|
+
from airflow.models import BaseOperator
|
|
35
|
+
from airflow.utils.context import Context
|
|
36
|
+
|
|
37
|
+
from cortexdb_airflow.hooks import CortexDBHook
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class CortexDBOperator(BaseOperator):
|
|
41
|
+
"""Airflow Operator for executing CortexDB operations.
|
|
42
|
+
|
|
43
|
+
Supports four operations: ``experience``, ``recall``, ``forget``, and
|
|
44
|
+
``search``. The operation result is pushed to XCom for downstream
|
|
45
|
+
task consumption.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
operation: The CortexDB operation to execute. One of
|
|
49
|
+
``"experience"``, ``"recall"``, ``"forget"``, ``"search"``.
|
|
50
|
+
cortexdb_conn_id: Airflow connection ID. Defaults to
|
|
51
|
+
``"cortexdb_default"``.
|
|
52
|
+
scope: Scope path. If ``None``, uses the connection's
|
|
53
|
+
default scope.
|
|
54
|
+
content: Content to store (for ``experience`` operation).
|
|
55
|
+
query: Query string (for ``recall``, ``search``).
|
|
56
|
+
labels: Optional labels for the stored experience.
|
|
57
|
+
reason: Reason for forgetting (for ``forget``).
|
|
58
|
+
view: Recall view (for ``recall``/``search``). Defaults to ``holistic``.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
template_fields: Sequence[str] = (
|
|
62
|
+
"content",
|
|
63
|
+
"query",
|
|
64
|
+
"scope",
|
|
65
|
+
"reason",
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
def __init__(
|
|
69
|
+
self,
|
|
70
|
+
*,
|
|
71
|
+
operation: str,
|
|
72
|
+
cortexdb_conn_id: str = "cortexdb_default",
|
|
73
|
+
scope: Optional[str] = None,
|
|
74
|
+
content: Optional[str] = None,
|
|
75
|
+
query: Optional[str] = None,
|
|
76
|
+
labels: Optional[Sequence[str]] = None,
|
|
77
|
+
reason: str = "",
|
|
78
|
+
view: str = "holistic",
|
|
79
|
+
**kwargs: Any,
|
|
80
|
+
) -> None:
|
|
81
|
+
super().__init__(**kwargs)
|
|
82
|
+
|
|
83
|
+
valid_operations = ("experience", "recall", "forget", "search")
|
|
84
|
+
if operation not in valid_operations:
|
|
85
|
+
raise ValueError(
|
|
86
|
+
f"Invalid operation '{operation}'. "
|
|
87
|
+
f"Must be one of {valid_operations}."
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
self.operation = operation
|
|
91
|
+
self.cortexdb_conn_id = cortexdb_conn_id
|
|
92
|
+
self.scope = scope
|
|
93
|
+
self.content = content
|
|
94
|
+
self.query = query
|
|
95
|
+
self.labels = labels
|
|
96
|
+
self.reason = reason
|
|
97
|
+
self.view = view
|
|
98
|
+
|
|
99
|
+
def execute(self, context: Context) -> Any:
|
|
100
|
+
"""Execute the configured CortexDB operation.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
context: The Airflow task execution context.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
The result of the CortexDB operation, pushed to XCom.
|
|
107
|
+
"""
|
|
108
|
+
hook = CortexDBHook(cortexdb_conn_id=self.cortexdb_conn_id)
|
|
109
|
+
client = hook.get_client()
|
|
110
|
+
scope = self.scope or hook.get_scope()
|
|
111
|
+
|
|
112
|
+
if self.operation == "experience":
|
|
113
|
+
if not self.content:
|
|
114
|
+
raise ValueError("'content' is required for the experience operation.")
|
|
115
|
+
result = client.experience(
|
|
116
|
+
scope,
|
|
117
|
+
text=self.content,
|
|
118
|
+
labels=self.labels,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
elif self.operation in ("recall", "search"):
|
|
122
|
+
if not self.query:
|
|
123
|
+
raise ValueError(f"'query' is required for the {self.operation} operation.")
|
|
124
|
+
result = client.recall(
|
|
125
|
+
scope,
|
|
126
|
+
query=self.query,
|
|
127
|
+
view=self.view,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
elif self.operation == "forget":
|
|
131
|
+
result = client.forget(
|
|
132
|
+
scope,
|
|
133
|
+
confirm_all=True,
|
|
134
|
+
cascade="redact_events",
|
|
135
|
+
reason=self.reason,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
else:
|
|
139
|
+
raise ValueError(f"Unsupported operation: {self.operation}")
|
|
140
|
+
|
|
141
|
+
self.log.info("CortexDB %s operation completed.", self.operation)
|
|
142
|
+
return result
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "cortexdb-airflow"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Apache Airflow provider for CortexDB — long-term memory for data orchestration"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "Apache-2.0"
|
|
12
|
+
dependencies = [
|
|
13
|
+
"cortexdbai>=0.1.0",
|
|
14
|
+
"apache-airflow>=2.8",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[project.optional-dependencies]
|
|
18
|
+
dev = [
|
|
19
|
+
"pytest>=7.0",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[project.entry-points."airflow.providers"]
|
|
23
|
+
cortexdb = "cortexdb_airflow.__init__:get_provider_info"
|
|
File without changes
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Tests for CortexDB Airflow Hook."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from unittest.mock import MagicMock, patch
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from cortexdb_airflow.hooks import CortexDBHook
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TestCortexDBHook:
|
|
13
|
+
"""Tests for the CortexDBHook class."""
|
|
14
|
+
|
|
15
|
+
def test_default_conn_id(self) -> None:
|
|
16
|
+
"""Hook should use the default connection ID."""
|
|
17
|
+
hook = CortexDBHook()
|
|
18
|
+
assert hook.cortexdb_conn_id == "cortexdb_default"
|
|
19
|
+
|
|
20
|
+
def test_custom_conn_id(self) -> None:
|
|
21
|
+
"""Hook should accept a custom connection ID."""
|
|
22
|
+
hook = CortexDBHook(cortexdb_conn_id="my_cortex")
|
|
23
|
+
assert hook.cortexdb_conn_id == "my_cortex"
|
|
24
|
+
|
|
25
|
+
@patch("cortexdb_airflow.hooks.Cortex")
|
|
26
|
+
@patch.object(CortexDBHook, "get_connection")
|
|
27
|
+
def test_get_client(
|
|
28
|
+
self, mock_get_conn: MagicMock, mock_cortex_cls: MagicMock
|
|
29
|
+
) -> None:
|
|
30
|
+
"""get_client should create a Cortex client from Airflow connection."""
|
|
31
|
+
mock_conn = MagicMock()
|
|
32
|
+
mock_conn.host = "cortex.example.com"
|
|
33
|
+
mock_conn.port = 3141
|
|
34
|
+
mock_conn.login = "user:svc"
|
|
35
|
+
mock_conn.password = "v4.public.token"
|
|
36
|
+
mock_conn.extra = '{"timeout": 15}'
|
|
37
|
+
mock_conn.extra_dejson = {"timeout": 15}
|
|
38
|
+
mock_get_conn.return_value = mock_conn
|
|
39
|
+
|
|
40
|
+
hook = CortexDBHook()
|
|
41
|
+
client = hook.get_client()
|
|
42
|
+
|
|
43
|
+
mock_cortex_cls.assert_called_once_with(
|
|
44
|
+
"http://cortex.example.com:3141",
|
|
45
|
+
actor="user:svc",
|
|
46
|
+
bearer="v4.public.token",
|
|
47
|
+
timeout=15.0,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
@patch("cortexdb_airflow.hooks.Cortex")
|
|
51
|
+
@patch.object(CortexDBHook, "get_connection")
|
|
52
|
+
def test_get_client_defaults(
|
|
53
|
+
self, mock_get_conn: MagicMock, mock_cortex_cls: MagicMock
|
|
54
|
+
) -> None:
|
|
55
|
+
"""get_client should use defaults when connection fields are empty."""
|
|
56
|
+
mock_conn = MagicMock()
|
|
57
|
+
mock_conn.host = None
|
|
58
|
+
mock_conn.port = None
|
|
59
|
+
mock_conn.login = None
|
|
60
|
+
mock_conn.password = None
|
|
61
|
+
mock_conn.extra = None
|
|
62
|
+
mock_conn.extra_dejson = {}
|
|
63
|
+
mock_get_conn.return_value = mock_conn
|
|
64
|
+
|
|
65
|
+
hook = CortexDBHook()
|
|
66
|
+
client = hook.get_client()
|
|
67
|
+
|
|
68
|
+
mock_cortex_cls.assert_called_once_with(
|
|
69
|
+
"http://localhost:3141",
|
|
70
|
+
actor="user:default",
|
|
71
|
+
bearer=None,
|
|
72
|
+
timeout=30.0,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
@patch.object(CortexDBHook, "get_connection")
|
|
76
|
+
def test_get_scope(self, mock_get_conn: MagicMock) -> None:
|
|
77
|
+
"""get_scope should return schema or default."""
|
|
78
|
+
mock_conn = MagicMock()
|
|
79
|
+
mock_conn.schema = "org:production"
|
|
80
|
+
mock_get_conn.return_value = mock_conn
|
|
81
|
+
|
|
82
|
+
hook = CortexDBHook()
|
|
83
|
+
assert hook.get_scope() == "org:production"
|
|
84
|
+
|
|
85
|
+
@patch.object(CortexDBHook, "get_connection")
|
|
86
|
+
def test_get_scope_default(self, mock_get_conn: MagicMock) -> None:
|
|
87
|
+
"""get_scope should return 'user:default' when schema is empty."""
|
|
88
|
+
mock_conn = MagicMock()
|
|
89
|
+
mock_conn.schema = None
|
|
90
|
+
mock_get_conn.return_value = mock_conn
|
|
91
|
+
|
|
92
|
+
hook = CortexDBHook()
|
|
93
|
+
assert hook.get_scope() == "user:default"
|
|
94
|
+
|
|
95
|
+
@patch.object(CortexDBHook, "get_client")
|
|
96
|
+
def test_test_connection_success(
|
|
97
|
+
self, mock_get_client: MagicMock
|
|
98
|
+
) -> None:
|
|
99
|
+
"""test_connection should return success on healthy client."""
|
|
100
|
+
mock_client = MagicMock()
|
|
101
|
+
mock_client.whoami.return_value = {"actor": "user:svc"}
|
|
102
|
+
mock_get_client.return_value = mock_client
|
|
103
|
+
|
|
104
|
+
hook = CortexDBHook()
|
|
105
|
+
success, msg = hook.test_connection()
|
|
106
|
+
|
|
107
|
+
assert success is True
|
|
108
|
+
assert "successful" in msg
|
|
109
|
+
|
|
110
|
+
@patch.object(CortexDBHook, "get_client")
|
|
111
|
+
def test_test_connection_failure(
|
|
112
|
+
self, mock_get_client: MagicMock
|
|
113
|
+
) -> None:
|
|
114
|
+
"""test_connection should return failure on error."""
|
|
115
|
+
mock_get_client.side_effect = ConnectionError("refused")
|
|
116
|
+
|
|
117
|
+
hook = CortexDBHook()
|
|
118
|
+
success, msg = hook.test_connection()
|
|
119
|
+
|
|
120
|
+
assert success is False
|
|
121
|
+
assert "failed" in msg.lower()
|
|
122
|
+
|
|
123
|
+
def test_ui_field_behaviour(self) -> None:
|
|
124
|
+
"""UI field behaviour should define expected fields."""
|
|
125
|
+
behaviour = CortexDBHook.get_ui_field_behaviour()
|
|
126
|
+
assert "relabeling" in behaviour
|
|
127
|
+
assert "placeholders" in behaviour
|
|
128
|
+
|
|
129
|
+
@patch("cortexdb_airflow.hooks.Cortex")
|
|
130
|
+
@patch.object(CortexDBHook, "get_connection")
|
|
131
|
+
def test_client_caching(
|
|
132
|
+
self, mock_get_conn: MagicMock, mock_cortex_cls: MagicMock
|
|
133
|
+
) -> None:
|
|
134
|
+
"""get_client should cache and reuse the client instance."""
|
|
135
|
+
mock_conn = MagicMock()
|
|
136
|
+
mock_conn.host = "localhost"
|
|
137
|
+
mock_conn.port = 3141
|
|
138
|
+
mock_conn.password = None
|
|
139
|
+
mock_conn.extra = None
|
|
140
|
+
mock_conn.extra_dejson = {}
|
|
141
|
+
mock_get_conn.return_value = mock_conn
|
|
142
|
+
|
|
143
|
+
hook = CortexDBHook()
|
|
144
|
+
client1 = hook.get_client()
|
|
145
|
+
client2 = hook.get_client()
|
|
146
|
+
|
|
147
|
+
assert client1 is client2
|
|
148
|
+
assert mock_cortex_cls.call_count == 1
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""Tests for CortexDB Airflow Operator (v1 surface)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from unittest.mock import MagicMock, patch
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from cortexdb_airflow.operators import CortexDBOperator
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@pytest.fixture
|
|
13
|
+
def mock_hook():
|
|
14
|
+
"""Create a mock CortexDBHook with a mock client."""
|
|
15
|
+
with patch("cortexdb_airflow.operators.CortexDBHook") as mock_hook_cls:
|
|
16
|
+
mock_client = MagicMock()
|
|
17
|
+
mock_client.experience.return_value = {"status": "ok"}
|
|
18
|
+
mock_client.recall.return_value = {"context_block": ""}
|
|
19
|
+
mock_client.forget.return_value = {"deleted": 0}
|
|
20
|
+
|
|
21
|
+
mock_hook_instance = MagicMock()
|
|
22
|
+
mock_hook_instance.get_client.return_value = mock_client
|
|
23
|
+
mock_hook_instance.get_scope.return_value = "user:default"
|
|
24
|
+
mock_hook_cls.return_value = mock_hook_instance
|
|
25
|
+
|
|
26
|
+
yield mock_client, mock_hook_instance
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class TestCortexDBOperator:
|
|
30
|
+
"""Tests for the CortexDBOperator class."""
|
|
31
|
+
|
|
32
|
+
def test_invalid_operation(self) -> None:
|
|
33
|
+
"""Operator should reject invalid operations."""
|
|
34
|
+
with pytest.raises(ValueError, match="Invalid operation"):
|
|
35
|
+
CortexDBOperator(
|
|
36
|
+
task_id="test",
|
|
37
|
+
operation="invalid",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def test_experience(self, mock_hook) -> None:
|
|
41
|
+
"""Experience operation should call client.experience."""
|
|
42
|
+
mock_client, _ = mock_hook
|
|
43
|
+
|
|
44
|
+
op = CortexDBOperator(
|
|
45
|
+
task_id="test_experience",
|
|
46
|
+
operation="experience",
|
|
47
|
+
content="Test content",
|
|
48
|
+
scope="org:test",
|
|
49
|
+
labels=["a", "b"],
|
|
50
|
+
)
|
|
51
|
+
result = op.execute(context=MagicMock())
|
|
52
|
+
|
|
53
|
+
mock_client.experience.assert_called_once_with(
|
|
54
|
+
"org:test",
|
|
55
|
+
text="Test content",
|
|
56
|
+
labels=["a", "b"],
|
|
57
|
+
)
|
|
58
|
+
assert result == {"status": "ok"}
|
|
59
|
+
|
|
60
|
+
def test_experience_missing_content(self, mock_hook) -> None:
|
|
61
|
+
"""Experience operation should raise if content is missing."""
|
|
62
|
+
op = CortexDBOperator(
|
|
63
|
+
task_id="test_experience",
|
|
64
|
+
operation="experience",
|
|
65
|
+
)
|
|
66
|
+
with pytest.raises(ValueError, match="content"):
|
|
67
|
+
op.execute(context=MagicMock())
|
|
68
|
+
|
|
69
|
+
def test_recall(self, mock_hook) -> None:
|
|
70
|
+
"""Recall operation should call client.recall."""
|
|
71
|
+
mock_client, _ = mock_hook
|
|
72
|
+
|
|
73
|
+
op = CortexDBOperator(
|
|
74
|
+
task_id="test_recall",
|
|
75
|
+
operation="recall",
|
|
76
|
+
query="What happened?",
|
|
77
|
+
scope="org:test",
|
|
78
|
+
)
|
|
79
|
+
op.execute(context=MagicMock())
|
|
80
|
+
|
|
81
|
+
mock_client.recall.assert_called_once_with(
|
|
82
|
+
"org:test",
|
|
83
|
+
query="What happened?",
|
|
84
|
+
view="holistic",
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def test_recall_missing_query(self, mock_hook) -> None:
|
|
88
|
+
"""Recall operation should raise if query is missing."""
|
|
89
|
+
op = CortexDBOperator(
|
|
90
|
+
task_id="test_recall",
|
|
91
|
+
operation="recall",
|
|
92
|
+
)
|
|
93
|
+
with pytest.raises(ValueError, match="query"):
|
|
94
|
+
op.execute(context=MagicMock())
|
|
95
|
+
|
|
96
|
+
def test_forget(self, mock_hook) -> None:
|
|
97
|
+
"""Forget operation should call client.forget."""
|
|
98
|
+
mock_client, _ = mock_hook
|
|
99
|
+
|
|
100
|
+
op = CortexDBOperator(
|
|
101
|
+
task_id="test_forget",
|
|
102
|
+
operation="forget",
|
|
103
|
+
reason="cleanup",
|
|
104
|
+
scope="org:test",
|
|
105
|
+
)
|
|
106
|
+
op.execute(context=MagicMock())
|
|
107
|
+
|
|
108
|
+
mock_client.forget.assert_called_once_with(
|
|
109
|
+
"org:test",
|
|
110
|
+
confirm_all=True,
|
|
111
|
+
cascade="redact_events",
|
|
112
|
+
reason="cleanup",
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def test_search(self, mock_hook) -> None:
|
|
116
|
+
"""Search operation should map to client.recall."""
|
|
117
|
+
mock_client, _ = mock_hook
|
|
118
|
+
|
|
119
|
+
op = CortexDBOperator(
|
|
120
|
+
task_id="test_search",
|
|
121
|
+
operation="search",
|
|
122
|
+
query="pipeline events",
|
|
123
|
+
scope="org:test",
|
|
124
|
+
)
|
|
125
|
+
op.execute(context=MagicMock())
|
|
126
|
+
|
|
127
|
+
mock_client.recall.assert_called_once_with(
|
|
128
|
+
"org:test",
|
|
129
|
+
query="pipeline events",
|
|
130
|
+
view="holistic",
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
def test_search_missing_query(self, mock_hook) -> None:
|
|
134
|
+
"""Search operation should raise if query is missing."""
|
|
135
|
+
op = CortexDBOperator(
|
|
136
|
+
task_id="test_search",
|
|
137
|
+
operation="search",
|
|
138
|
+
)
|
|
139
|
+
with pytest.raises(ValueError, match="query"):
|
|
140
|
+
op.execute(context=MagicMock())
|
|
141
|
+
|
|
142
|
+
def test_scope_fallback_to_hook(self, mock_hook) -> None:
|
|
143
|
+
"""Operator should use hook's scope when scope is not set."""
|
|
144
|
+
mock_client, mock_hook_instance = mock_hook
|
|
145
|
+
mock_hook_instance.get_scope.return_value = "user:hook"
|
|
146
|
+
|
|
147
|
+
op = CortexDBOperator(
|
|
148
|
+
task_id="test_fallback",
|
|
149
|
+
operation="experience",
|
|
150
|
+
content="test",
|
|
151
|
+
)
|
|
152
|
+
op.execute(context=MagicMock())
|
|
153
|
+
|
|
154
|
+
mock_client.experience.assert_called_once()
|
|
155
|
+
assert mock_client.experience.call_args.args[0] == "user:hook"
|
|
156
|
+
|
|
157
|
+
def test_template_fields(self) -> None:
|
|
158
|
+
"""Operator should expose expected template fields."""
|
|
159
|
+
assert "content" in CortexDBOperator.template_fields
|
|
160
|
+
assert "query" in CortexDBOperator.template_fields
|
|
161
|
+
assert "scope" in CortexDBOperator.template_fields
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
if __name__ == "__main__":
|
|
165
|
+
import sys
|
|
166
|
+
sys.exit(pytest.main([__file__]))
|