aethergraph 0.1.0a3__py3-none-any.whl → 0.1.0a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aethergraph/api/v1/artifacts.py +23 -4
- aethergraph/api/v1/schemas.py +7 -0
- aethergraph/api/v1/session.py +123 -4
- aethergraph/config/config.py +2 -0
- aethergraph/config/search.py +49 -0
- aethergraph/contracts/services/channel.py +18 -1
- aethergraph/contracts/services/execution.py +58 -0
- aethergraph/contracts/services/llm.py +26 -0
- aethergraph/contracts/services/memory.py +10 -4
- aethergraph/contracts/services/planning.py +53 -0
- aethergraph/contracts/storage/event_log.py +8 -0
- aethergraph/contracts/storage/search_backend.py +47 -0
- aethergraph/contracts/storage/vector_index.py +73 -0
- aethergraph/core/graph/action_spec.py +76 -0
- aethergraph/core/graph/graph_fn.py +75 -2
- aethergraph/core/graph/graphify.py +74 -2
- aethergraph/core/runtime/graph_runner.py +2 -1
- aethergraph/core/runtime/node_context.py +66 -3
- aethergraph/core/runtime/node_services.py +8 -0
- aethergraph/core/runtime/run_manager.py +263 -271
- aethergraph/core/runtime/run_types.py +54 -1
- aethergraph/core/runtime/runtime_env.py +35 -14
- aethergraph/core/runtime/runtime_services.py +308 -18
- aethergraph/plugins/agents/default_chat_agent.py +266 -74
- aethergraph/plugins/agents/default_chat_agent_v2.py +487 -0
- aethergraph/plugins/channel/adapters/webui.py +69 -21
- aethergraph/plugins/channel/routes/webui_routes.py +8 -48
- aethergraph/runtime/__init__.py +12 -0
- aethergraph/server/app_factory.py +3 -0
- aethergraph/server/ui_static/assets/index-CFktGdbW.js +4913 -0
- aethergraph/server/ui_static/assets/index-DcfkFlTA.css +1 -0
- aethergraph/server/ui_static/index.html +2 -2
- aethergraph/services/artifacts/facade.py +157 -21
- aethergraph/services/artifacts/types.py +35 -0
- aethergraph/services/artifacts/utils.py +42 -0
- aethergraph/services/channel/channel_bus.py +3 -1
- aethergraph/services/channel/event_hub copy.py +55 -0
- aethergraph/services/channel/event_hub.py +81 -0
- aethergraph/services/channel/factory.py +3 -2
- aethergraph/services/channel/session.py +709 -74
- aethergraph/services/container/default_container.py +69 -7
- aethergraph/services/execution/__init__.py +0 -0
- aethergraph/services/execution/local_python.py +118 -0
- aethergraph/services/indices/__init__.py +0 -0
- aethergraph/services/indices/global_indices.py +21 -0
- aethergraph/services/indices/scoped_indices.py +292 -0
- aethergraph/services/llm/generic_client.py +342 -46
- aethergraph/services/llm/generic_embed_client.py +359 -0
- aethergraph/services/llm/types.py +3 -1
- aethergraph/services/memory/distillers/llm_long_term.py +60 -109
- aethergraph/services/memory/distillers/llm_long_term_v1.py +180 -0
- aethergraph/services/memory/distillers/llm_meta_summary.py +57 -266
- aethergraph/services/memory/distillers/llm_meta_summary_v1.py +342 -0
- aethergraph/services/memory/distillers/long_term.py +48 -131
- aethergraph/services/memory/distillers/long_term_v1.py +170 -0
- aethergraph/services/memory/facade/chat.py +18 -8
- aethergraph/services/memory/facade/core.py +159 -19
- aethergraph/services/memory/facade/distillation.py +86 -31
- aethergraph/services/memory/facade/retrieval.py +100 -1
- aethergraph/services/memory/factory.py +4 -1
- aethergraph/services/planning/__init__.py +0 -0
- aethergraph/services/planning/action_catalog.py +271 -0
- aethergraph/services/planning/bindings.py +56 -0
- aethergraph/services/planning/dependency_index.py +65 -0
- aethergraph/services/planning/flow_validator.py +263 -0
- aethergraph/services/planning/graph_io_adapter.py +150 -0
- aethergraph/services/planning/input_parser.py +312 -0
- aethergraph/services/planning/missing_inputs.py +28 -0
- aethergraph/services/planning/node_planner.py +613 -0
- aethergraph/services/planning/orchestrator.py +112 -0
- aethergraph/services/planning/plan_executor.py +506 -0
- aethergraph/services/planning/plan_types.py +321 -0
- aethergraph/services/planning/planner.py +617 -0
- aethergraph/services/planning/planner_service.py +369 -0
- aethergraph/services/planning/planning_context_builder.py +43 -0
- aethergraph/services/planning/quick_actions.py +29 -0
- aethergraph/services/planning/routers/__init__.py +0 -0
- aethergraph/services/planning/routers/simple_router.py +26 -0
- aethergraph/services/rag/facade.py +0 -3
- aethergraph/services/scope/scope.py +30 -30
- aethergraph/services/scope/scope_factory.py +15 -7
- aethergraph/services/skills/__init__.py +0 -0
- aethergraph/services/skills/skill_registry.py +465 -0
- aethergraph/services/skills/skills.py +220 -0
- aethergraph/services/skills/utils.py +194 -0
- aethergraph/storage/artifacts/artifact_index_jsonl.py +16 -10
- aethergraph/storage/artifacts/artifact_index_sqlite.py +12 -2
- aethergraph/storage/docstore/sqlite_doc_sync.py +1 -1
- aethergraph/storage/memory/event_persist.py +42 -2
- aethergraph/storage/memory/fs_persist.py +32 -2
- aethergraph/storage/search_backend/__init__.py +0 -0
- aethergraph/storage/search_backend/generic_vector_backend.py +230 -0
- aethergraph/storage/search_backend/null_backend.py +34 -0
- aethergraph/storage/search_backend/sqlite_lexical_backend.py +387 -0
- aethergraph/storage/search_backend/utils.py +31 -0
- aethergraph/storage/search_factory.py +75 -0
- aethergraph/storage/vector_index/faiss_index.py +72 -4
- aethergraph/storage/vector_index/sqlite_index.py +521 -52
- aethergraph/storage/vector_index/sqlite_index_vanila.py +311 -0
- aethergraph/storage/vector_index/utils.py +22 -0
- {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/METADATA +1 -1
- {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/RECORD +107 -63
- {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/WHEEL +1 -1
- aethergraph/plugins/agents/default_chat_agent copy.py +0 -90
- aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +0 -1
- aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +0 -400
- aethergraph/services/eventhub/event_hub.py +0 -76
- aethergraph/services/llm/generic_client copy.py +0 -691
- aethergraph/services/prompts/file_store.py +0 -41
- {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/entry_points.txt +0 -0
- {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/licenses/LICENSE +0 -0
- {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/licenses/NOTICE +0 -0
- {aethergraph-0.1.0a3.dist-info → aethergraph-0.1.0a4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import re
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from .skills import Skill
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import yaml # type: ignore
|
|
9
|
+
except ImportError: # pragma: no cover
|
|
10
|
+
yaml = None # TODO: enforce PyYAML as a dependency?
|
|
11
|
+
|
|
12
|
+
_FRONT_MATTER_DELIM = re.compile(r"^---\s*$")
|
|
13
|
+
# Only treat H2 (##) as section delimiters, per spec.
|
|
14
|
+
_SECTION_HEADING_RE = re.compile(r"^(##)\s+(.*)$")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _split_front_matter(text: str) -> tuple[dict[str, Any], str]:
|
|
18
|
+
"""
|
|
19
|
+
Split YAML front matter from the rest of the markdown body.
|
|
20
|
+
|
|
21
|
+
Expects:
|
|
22
|
+
---
|
|
23
|
+
yaml: here
|
|
24
|
+
---
|
|
25
|
+
# Markdown starts here
|
|
26
|
+
|
|
27
|
+
Returns: (meta_dict, body_markdown)
|
|
28
|
+
"""
|
|
29
|
+
lines = text.splitlines()
|
|
30
|
+
if not lines or not _FRONT_MATTER_DELIM.match(lines[0].strip()):
|
|
31
|
+
# No front matter block
|
|
32
|
+
return {}, text
|
|
33
|
+
|
|
34
|
+
# Find closing '---'
|
|
35
|
+
end_idx = None
|
|
36
|
+
for i in range(1, len(lines)):
|
|
37
|
+
if _FRONT_MATTER_DELIM.match(lines[i].strip()):
|
|
38
|
+
end_idx = i
|
|
39
|
+
break
|
|
40
|
+
|
|
41
|
+
if end_idx is None:
|
|
42
|
+
# Malformed front matter; treat entire file as body
|
|
43
|
+
return {}, text
|
|
44
|
+
|
|
45
|
+
fm_lines = lines[1:end_idx]
|
|
46
|
+
body_lines = lines[end_idx + 1 :]
|
|
47
|
+
|
|
48
|
+
fm_str = "\n".join(fm_lines)
|
|
49
|
+
body = "\n".join(body_lines)
|
|
50
|
+
|
|
51
|
+
if yaml is None:
|
|
52
|
+
# If PyYAML is not installed, return empty meta.
|
|
53
|
+
return {}, body
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
meta = yaml.safe_load(fm_str) or {}
|
|
57
|
+
except Exception as exc:
|
|
58
|
+
# Surface a clear error – this is almost always a YAML indentation / syntax issue
|
|
59
|
+
raise ValueError(
|
|
60
|
+
f"Failed to parse YAML front matter: {exc!r}\n" f"Front matter was:\n{fm_str}"
|
|
61
|
+
) from exc
|
|
62
|
+
|
|
63
|
+
if not isinstance(meta, dict):
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"YAML front matter must be a mapping (dict), got {type(meta)} instead.\n"
|
|
66
|
+
f"Front matter was:\n{fm_str}"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
return meta, body
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _normalize_section_key(heading: str) -> str:
|
|
73
|
+
"""
|
|
74
|
+
Normalize a heading text into a section key.
|
|
75
|
+
|
|
76
|
+
Rules:
|
|
77
|
+
- If the heading already contains a dot (e.g. "chat.system"), keep as-is.
|
|
78
|
+
- Else, lowercase and replace spaces with underscores, e.g. "Chat System" -> "chat_system".
|
|
79
|
+
"""
|
|
80
|
+
raw = heading.strip()
|
|
81
|
+
if "." in raw:
|
|
82
|
+
return raw.strip()
|
|
83
|
+
return raw.lower().replace(" ", "_")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _split_sections_from_body(body: str) -> dict[str, str]:
|
|
87
|
+
"""
|
|
88
|
+
Split markdown body into sections keyed by normalized heading.
|
|
89
|
+
|
|
90
|
+
- Intro text before any heading -> section "body"
|
|
91
|
+
- Only H2 headings (`## something`) start new sections.
|
|
92
|
+
- H3+ (`### ...`) are treated as content.
|
|
93
|
+
"""
|
|
94
|
+
sections: dict[str, list[str]] = {}
|
|
95
|
+
current_key: str | None = None
|
|
96
|
+
buffer: list[str] = []
|
|
97
|
+
preface: list[str] = []
|
|
98
|
+
|
|
99
|
+
lines = body.splitlines()
|
|
100
|
+
|
|
101
|
+
for line in lines:
|
|
102
|
+
m = _SECTION_HEADING_RE.match(line)
|
|
103
|
+
if m:
|
|
104
|
+
# Flush previous section or preface
|
|
105
|
+
if current_key is None:
|
|
106
|
+
if buffer:
|
|
107
|
+
preface.extend(buffer)
|
|
108
|
+
else:
|
|
109
|
+
sections[current_key] = sections.get(current_key, []) + buffer
|
|
110
|
+
|
|
111
|
+
buffer = []
|
|
112
|
+
heading = m.group(2).strip()
|
|
113
|
+
current_key = _normalize_section_key(heading)
|
|
114
|
+
else:
|
|
115
|
+
buffer.append(line)
|
|
116
|
+
|
|
117
|
+
# Flush last buffer
|
|
118
|
+
if current_key is None:
|
|
119
|
+
if buffer:
|
|
120
|
+
preface.extend(buffer)
|
|
121
|
+
else:
|
|
122
|
+
sections[current_key] = sections.get(current_key, []) + buffer
|
|
123
|
+
|
|
124
|
+
out: dict[str, str] = {}
|
|
125
|
+
if preface:
|
|
126
|
+
out["body"] = "\n".join(preface).strip()
|
|
127
|
+
|
|
128
|
+
for k, lines_ in sections.items():
|
|
129
|
+
text = "\n".join(lines_).strip()
|
|
130
|
+
if text:
|
|
131
|
+
out[k] = text
|
|
132
|
+
|
|
133
|
+
return out
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def parse_skill_markdown(text: str, path: Path | None = None) -> Skill:
|
|
137
|
+
"""
|
|
138
|
+
Parse a single markdown file into a Skill.
|
|
139
|
+
|
|
140
|
+
The file must have YAML front matter with at least:
|
|
141
|
+
- id: string
|
|
142
|
+
- title: string
|
|
143
|
+
|
|
144
|
+
Sections are defined by `## section.key` headings.
|
|
145
|
+
"""
|
|
146
|
+
meta, body = _split_front_matter(text)
|
|
147
|
+
sections = _split_sections_from_body(body)
|
|
148
|
+
|
|
149
|
+
location = str(path) if path is not None else "<string>"
|
|
150
|
+
|
|
151
|
+
# ---- Basic validation of YAML meta ----
|
|
152
|
+
if not meta:
|
|
153
|
+
raise ValueError(
|
|
154
|
+
f"Skill file {location} has no YAML front matter. "
|
|
155
|
+
"Expected at least:\n"
|
|
156
|
+
"---\n"
|
|
157
|
+
"id: some.id\n"
|
|
158
|
+
"title: Some title\n"
|
|
159
|
+
"---"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
skill_id = meta.get("id")
|
|
163
|
+
title = meta.get("title")
|
|
164
|
+
|
|
165
|
+
if not isinstance(skill_id, str) or not skill_id.strip():
|
|
166
|
+
raise ValueError(
|
|
167
|
+
f"Skill file {location} is missing a valid 'id' in front matter. "
|
|
168
|
+
f"Got id={skill_id!r} in:\n{meta}"
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
if not isinstance(title, str) or not title.strip():
|
|
172
|
+
raise ValueError(
|
|
173
|
+
f"Skill file {location} is missing a valid 'title' in front matter. "
|
|
174
|
+
f"Got title={title!r} in:\n{meta}"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
try:
|
|
178
|
+
skill = Skill.from_dict(
|
|
179
|
+
meta=meta,
|
|
180
|
+
sections=sections,
|
|
181
|
+
raw_markdown=text,
|
|
182
|
+
path=path,
|
|
183
|
+
)
|
|
184
|
+
except Exception as e:
|
|
185
|
+
raise ValueError(f"Failed to construct Skill from {location}: {e}") from e
|
|
186
|
+
|
|
187
|
+
# Extra guard – if Skill.from_dict ever returns None, fail loudly.
|
|
188
|
+
if skill is None:
|
|
189
|
+
raise ValueError(
|
|
190
|
+
f"Skill.from_dict returned None for skill file {location}. "
|
|
191
|
+
f"Front matter: {meta}, sections: {list(sections.keys())}"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
return skill
|
|
@@ -75,20 +75,26 @@ class JsonlArtifactIndexSync:
|
|
|
75
75
|
if labels:
|
|
76
76
|
for k, v in labels.items():
|
|
77
77
|
if k in TENANT_KEYS:
|
|
78
|
-
|
|
79
|
-
rows = [r for r in rows if r.get(k) == v]
|
|
80
|
-
continue
|
|
81
|
-
|
|
82
|
-
# Normal label filters
|
|
83
|
-
if isinstance(v, list):
|
|
78
|
+
sv = str(v)
|
|
84
79
|
rows = [
|
|
85
80
|
r
|
|
86
81
|
for r in rows
|
|
87
|
-
if
|
|
88
|
-
|
|
82
|
+
if str(r.get(k) or "") == sv
|
|
83
|
+
or str((r.get("labels") or {}).get(k) or "") == sv
|
|
89
84
|
]
|
|
90
|
-
|
|
91
|
-
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
# Normal label filters
|
|
88
|
+
if k == "tags":
|
|
89
|
+
want = v if isinstance(v, list) else [v]
|
|
90
|
+
want = [str(t).strip() for t in want if str(t).strip()]
|
|
91
|
+
if want:
|
|
92
|
+
rows = [
|
|
93
|
+
r
|
|
94
|
+
for r in rows
|
|
95
|
+
if any(t in (r.get("labels", {}).get("tags") or []) for t in want)
|
|
96
|
+
]
|
|
97
|
+
continue
|
|
92
98
|
|
|
93
99
|
if metric and mode:
|
|
94
100
|
rows = [r for r in rows if metric in r.get("metrics", {})]
|
|
@@ -273,9 +273,19 @@ class SqliteArtifactIndexSync:
|
|
|
273
273
|
where.append("(" + " OR ".join(ors) + ")")
|
|
274
274
|
continue
|
|
275
275
|
|
|
276
|
+
# if k in TENANT_KEYS:
|
|
277
|
+
# where.append(f"{TENANT_KEYS[k]} = ?")
|
|
278
|
+
# params.append(v)
|
|
279
|
+
# continue
|
|
280
|
+
|
|
276
281
|
if k in TENANT_KEYS:
|
|
277
|
-
|
|
278
|
-
|
|
282
|
+
col = TENANT_KEYS[k]
|
|
283
|
+
sv = str(v)
|
|
284
|
+
|
|
285
|
+
# column OR labels_json fallback
|
|
286
|
+
where.append(f"({col} = ? OR labels_json LIKE ?)")
|
|
287
|
+
params.append(sv)
|
|
288
|
+
params.append(f'%"{k}": "{sv}"%')
|
|
279
289
|
continue
|
|
280
290
|
|
|
281
291
|
where.append("labels_json LIKE ?")
|
|
@@ -67,7 +67,7 @@ class SQLiteDocStoreSync:
|
|
|
67
67
|
(doc_id, payload, now),
|
|
68
68
|
)
|
|
69
69
|
except sqlite3.Error as e:
|
|
70
|
-
print("
|
|
70
|
+
print("SQLiteDocStoreSync ERROR during put:", doc_id, repr(e))
|
|
71
71
|
raise
|
|
72
72
|
|
|
73
73
|
def get(self, doc_id: str) -> dict[str, Any] | None:
|
|
@@ -47,9 +47,9 @@ class EventLogPersistence(Persistence):
|
|
|
47
47
|
return f"{self._prefix}{doc_id}"
|
|
48
48
|
|
|
49
49
|
# --------- API ---------
|
|
50
|
-
async def append_event(self,
|
|
50
|
+
async def append_event(self, scope_id: str, evt: Event) -> None:
|
|
51
51
|
payload = asdict(evt)
|
|
52
|
-
payload.setdefault("scope_id",
|
|
52
|
+
payload.setdefault("scope_id", scope_id)
|
|
53
53
|
payload.setdefault("kind", "memory")
|
|
54
54
|
# you can add tags like ["mem"] if useful
|
|
55
55
|
await self._log.append(payload)
|
|
@@ -66,3 +66,43 @@ class EventLogPersistence(Persistence):
|
|
|
66
66
|
if doc is None:
|
|
67
67
|
raise FileNotFoundError(f"Memory JSON not found for URI: {uri}")
|
|
68
68
|
return doc
|
|
69
|
+
|
|
70
|
+
async def get_events_by_ids(
|
|
71
|
+
self,
|
|
72
|
+
scope_id: str,
|
|
73
|
+
event_ids: list[str],
|
|
74
|
+
) -> list[Event]:
|
|
75
|
+
"""
|
|
76
|
+
Fetch events for a given scope_id (timeline) by event_id.
|
|
77
|
+
|
|
78
|
+
Implementation v0: use EventLog.query and filter in Python.
|
|
79
|
+
For moderate timeline sizes and small event_ids lists, this is fine.
|
|
80
|
+
Later, you can optimize by adding a direct get_many API on EventLog
|
|
81
|
+
or indexing by (scope_id, event_id).
|
|
82
|
+
"""
|
|
83
|
+
if not event_ids:
|
|
84
|
+
return []
|
|
85
|
+
|
|
86
|
+
# Fetch all events for the scope_id; TODO: add reasonable limits / paging
|
|
87
|
+
rows = await self._log.query(
|
|
88
|
+
scope_id=scope_id,
|
|
89
|
+
since=None,
|
|
90
|
+
until=None,
|
|
91
|
+
kinds=None,
|
|
92
|
+
tags=None,
|
|
93
|
+
limit=None,
|
|
94
|
+
offset=0,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
by_id: dict[str, Event] = {}
|
|
98
|
+
for row in rows:
|
|
99
|
+
eid = row.get("event_id")
|
|
100
|
+
if eid:
|
|
101
|
+
by_id[eid] = row
|
|
102
|
+
|
|
103
|
+
result: list[Event] = []
|
|
104
|
+
for eid in event_ids:
|
|
105
|
+
row = by_id.get(eid)
|
|
106
|
+
if row is not None:
|
|
107
|
+
result.append(Event(**row))
|
|
108
|
+
return result
|
|
@@ -30,9 +30,9 @@ class FSPersistence(Persistence):
|
|
|
30
30
|
|
|
31
31
|
# ---------- Event log (append-only JSONL) ----------
|
|
32
32
|
|
|
33
|
-
async def append_event(self,
|
|
33
|
+
async def append_event(self, scope_id: str, evt: Event) -> None:
|
|
34
34
|
day = time.strftime("%Y-%m-%d", time.gmtime())
|
|
35
|
-
path = self.base_dir / "mem" /
|
|
35
|
+
path = self.base_dir / "mem" / scope_id / "events" / f"{day}.jsonl"
|
|
36
36
|
|
|
37
37
|
def _write() -> None:
|
|
38
38
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -116,3 +116,33 @@ class FSPersistence(Persistence):
|
|
|
116
116
|
return json.load(f)
|
|
117
117
|
|
|
118
118
|
return await asyncio.to_thread(_read)
|
|
119
|
+
|
|
120
|
+
async def get_events_by_ids(
|
|
121
|
+
self,
|
|
122
|
+
scope_id: str,
|
|
123
|
+
event_ids: list[str],
|
|
124
|
+
) -> list[Event]:
|
|
125
|
+
"""
|
|
126
|
+
Fetch events for a given scope_id (timeline) by event_id.
|
|
127
|
+
"""
|
|
128
|
+
id_set = set(event_ids)
|
|
129
|
+
found: list[Event] = []
|
|
130
|
+
|
|
131
|
+
day = time.strftime("%Y-%m-%d", time.gmtime())
|
|
132
|
+
path = self.base_dir / "mem" / scope_id / "events" / f"{day}.jsonl"
|
|
133
|
+
|
|
134
|
+
if not path.exists():
|
|
135
|
+
return found
|
|
136
|
+
|
|
137
|
+
def _read() -> list[Event]:
|
|
138
|
+
results: list[Event] = []
|
|
139
|
+
with self._lock, path.open("r", encoding="utf-8") as f:
|
|
140
|
+
for line in f:
|
|
141
|
+
data = json.loads(line)
|
|
142
|
+
if data.get("event_id") in id_set:
|
|
143
|
+
evt = Event(**data)
|
|
144
|
+
results.append(evt)
|
|
145
|
+
return results
|
|
146
|
+
|
|
147
|
+
found = await asyncio.to_thread(_read)
|
|
148
|
+
return found
|
|
File without changes
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from time import time
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from aethergraph.contracts.services.llm import EmbeddingClientProtocol
|
|
9
|
+
from aethergraph.contracts.storage.search_backend import ScoredItem, SearchBackend
|
|
10
|
+
from aethergraph.contracts.storage.vector_index import PROMOTED_FIELDS, VectorIndex
|
|
11
|
+
|
|
12
|
+
from .utils import _parse_time_window
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class GenericVectorSearchBackend(SearchBackend):
|
|
17
|
+
"""
|
|
18
|
+
SearchBackend implementation on top of a VectorIndex + EmbeddingClient.
|
|
19
|
+
|
|
20
|
+
- Upserts: embed text and store (vector, metadata) in the index
|
|
21
|
+
- Search: embed query, retrieve top-k by cosine similarity,
|
|
22
|
+
then apply Python-level metadata filters.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
index: VectorIndex
|
|
26
|
+
embedder: EmbeddingClientProtocol
|
|
27
|
+
|
|
28
|
+
# -------- helpers ----------------------------------------------------
|
|
29
|
+
async def _embed(self, text: str) -> list[float]:
|
|
30
|
+
vec: Sequence[float] = await self.embedder.embed_one(text)
|
|
31
|
+
# Ensure a concrete list[float] for numpy/etc
|
|
32
|
+
return [float(x) for x in vec]
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def _match_value(mv: Any, val: Any) -> bool:
|
|
36
|
+
"""
|
|
37
|
+
Rich matching semantics for filters:
|
|
38
|
+
- If val is list/tuple/set:
|
|
39
|
+
- if mv is list-like too -> match if intersection is non-empty
|
|
40
|
+
- else -> match if mv is in val
|
|
41
|
+
- If val is scalar:
|
|
42
|
+
- if mv is list-like -> match if val is in mv
|
|
43
|
+
- else -> match if mv == val
|
|
44
|
+
"""
|
|
45
|
+
if val is None:
|
|
46
|
+
return True
|
|
47
|
+
|
|
48
|
+
def _is_list_like(x: Any) -> bool:
|
|
49
|
+
return isinstance(x, (list, tuple, set)) # noqa: UP038
|
|
50
|
+
|
|
51
|
+
if _is_list_like(val):
|
|
52
|
+
if _is_list_like(mv):
|
|
53
|
+
# any overlap between filter values and meta values
|
|
54
|
+
return any(x in val for x in mv)
|
|
55
|
+
else:
|
|
56
|
+
# meta is scalar, filter is list-like
|
|
57
|
+
return mv in val
|
|
58
|
+
|
|
59
|
+
# val is scalar
|
|
60
|
+
if _is_list_like(mv):
|
|
61
|
+
return val in mv
|
|
62
|
+
|
|
63
|
+
return mv == val
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def _matches_filters(meta: dict[str, Any], filters: dict[str, Any]) -> bool:
|
|
67
|
+
"""
|
|
68
|
+
Simple AND filter: all filter keys must match exactly.
|
|
69
|
+
- If filter value is a list, meta[key] must be in that list.
|
|
70
|
+
- If filter value is None, we don't constrain that key.
|
|
71
|
+
"""
|
|
72
|
+
for k, v in filters.items():
|
|
73
|
+
if v is None:
|
|
74
|
+
continue
|
|
75
|
+
if k not in meta:
|
|
76
|
+
return False
|
|
77
|
+
mv = meta[k]
|
|
78
|
+
if not GenericVectorSearchBackend._match_value(mv, v):
|
|
79
|
+
return False
|
|
80
|
+
return True
|
|
81
|
+
|
|
82
|
+
# -------- public APIs ------------------------------------------------
|
|
83
|
+
async def upsert(
|
|
84
|
+
self,
|
|
85
|
+
*,
|
|
86
|
+
corpus: str,
|
|
87
|
+
item_id: str,
|
|
88
|
+
text: str,
|
|
89
|
+
metadata: dict[str, Any],
|
|
90
|
+
) -> None:
|
|
91
|
+
if not text:
|
|
92
|
+
# avoid zero vector; caller should ensure text is non-empty
|
|
93
|
+
text = ""
|
|
94
|
+
|
|
95
|
+
vector = await self._embed(text)
|
|
96
|
+
await self.index.add(
|
|
97
|
+
corpus_id=corpus,
|
|
98
|
+
chunk_ids=[item_id],
|
|
99
|
+
vectors=[vector],
|
|
100
|
+
metas=[metadata],
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
async def search(
|
|
104
|
+
self,
|
|
105
|
+
*,
|
|
106
|
+
corpus: str,
|
|
107
|
+
query: str,
|
|
108
|
+
top_k: int = 10,
|
|
109
|
+
filters: dict[str, Any] | None = None,
|
|
110
|
+
time_window: str | None = None,
|
|
111
|
+
created_at_min: float | None = None,
|
|
112
|
+
created_at_max: float | None = None,
|
|
113
|
+
) -> list[ScoredItem]:
|
|
114
|
+
filters = filters or {}
|
|
115
|
+
if not query.strip():
|
|
116
|
+
return []
|
|
117
|
+
|
|
118
|
+
q_vec = await self._embed(query)
|
|
119
|
+
|
|
120
|
+
# ---- 1) Handle time constraints ---------------------------------
|
|
121
|
+
now_ts = time()
|
|
122
|
+
|
|
123
|
+
# If time_window is provided and no explicit min, interpret it as [now - window, now]
|
|
124
|
+
if time_window and created_at_min is None:
|
|
125
|
+
duration = _parse_time_window(time_window)
|
|
126
|
+
created_at_min = now_ts - duration
|
|
127
|
+
|
|
128
|
+
# If max is not provided but we used a time_window, default to now
|
|
129
|
+
if time_window and created_at_max is None:
|
|
130
|
+
created_at_max = now_ts
|
|
131
|
+
|
|
132
|
+
# ---- 2) Split filters into index-level vs Python-level ---------
|
|
133
|
+
index_filters: dict[str, Any] = {}
|
|
134
|
+
post_filters: dict[str, Any] = {}
|
|
135
|
+
|
|
136
|
+
for key, val in filters.items():
|
|
137
|
+
if val is None:
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
if key in PROMOTED_FIELDS and not isinstance(val, (list, tuple, set)): # noqa: UP038
|
|
141
|
+
index_filters[key] = val
|
|
142
|
+
else:
|
|
143
|
+
post_filters[key] = val
|
|
144
|
+
|
|
145
|
+
# ---- 3) Ask index for scoped, time-bounded candidates ----------
|
|
146
|
+
raw_k = max(top_k * 3, top_k)
|
|
147
|
+
max_candidates = max(top_k * 50, raw_k) # tunable safety cap
|
|
148
|
+
|
|
149
|
+
rows = await self.index.search(
|
|
150
|
+
corpus_id=corpus,
|
|
151
|
+
query_vec=q_vec,
|
|
152
|
+
k=raw_k,
|
|
153
|
+
where=index_filters,
|
|
154
|
+
max_candidates=max_candidates,
|
|
155
|
+
created_at_min=created_at_min,
|
|
156
|
+
created_at_max=created_at_max,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# ---- 4) Apply Python-level filters + build ScoredItem list -----
|
|
160
|
+
results: list[ScoredItem] = []
|
|
161
|
+
for row in rows:
|
|
162
|
+
chunk_id = row["chunk_id"]
|
|
163
|
+
score = float(row["score"])
|
|
164
|
+
meta = dict(row.get("meta") or {})
|
|
165
|
+
|
|
166
|
+
if post_filters and not self._matches_filters(meta, post_filters):
|
|
167
|
+
continue
|
|
168
|
+
|
|
169
|
+
results.append(
|
|
170
|
+
ScoredItem(
|
|
171
|
+
item_id=chunk_id,
|
|
172
|
+
corpus=corpus,
|
|
173
|
+
score=score,
|
|
174
|
+
metadata=meta,
|
|
175
|
+
)
|
|
176
|
+
)
|
|
177
|
+
if len(results) >= top_k:
|
|
178
|
+
break
|
|
179
|
+
|
|
180
|
+
return results
|
|
181
|
+
|
|
182
|
+
async def search_old(
|
|
183
|
+
self,
|
|
184
|
+
*,
|
|
185
|
+
corpus: str,
|
|
186
|
+
query: str,
|
|
187
|
+
top_k: int = 10,
|
|
188
|
+
filters: dict[str, Any] | None = None,
|
|
189
|
+
) -> list[ScoredItem]:
|
|
190
|
+
"""
|
|
191
|
+
1) Embed the query
|
|
192
|
+
2) Vector search in the underlying index
|
|
193
|
+
3) Apply metadata filters in Python
|
|
194
|
+
"""
|
|
195
|
+
filters = filters or {}
|
|
196
|
+
if not query.strip():
|
|
197
|
+
# empty query: probably return nothing for now
|
|
198
|
+
return []
|
|
199
|
+
|
|
200
|
+
q_vec = await self._embed(query)
|
|
201
|
+
|
|
202
|
+
# Ask underlying VectorIndex for more than top_k, since we may
|
|
203
|
+
# filter some out. Factor 3 is arbitrary but usually safe.
|
|
204
|
+
raw_k = max(top_k * 3, top_k)
|
|
205
|
+
rows = await self.index.search(
|
|
206
|
+
corpus_id=corpus,
|
|
207
|
+
query_vec=q_vec,
|
|
208
|
+
k=raw_k,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
results: list[ScoredItem] = []
|
|
212
|
+
for row in rows:
|
|
213
|
+
chunk_id = row["chunk_id"]
|
|
214
|
+
score = float(row["score"])
|
|
215
|
+
meta = dict(row.get("meta") or {})
|
|
216
|
+
|
|
217
|
+
if filters and not self._matches_filters(meta, filters):
|
|
218
|
+
continue
|
|
219
|
+
|
|
220
|
+
results.append(
|
|
221
|
+
ScoredItem(
|
|
222
|
+
item_id=chunk_id,
|
|
223
|
+
corpus=corpus,
|
|
224
|
+
score=score,
|
|
225
|
+
metadata=meta,
|
|
226
|
+
)
|
|
227
|
+
)
|
|
228
|
+
if len(results) >= top_k:
|
|
229
|
+
break
|
|
230
|
+
return results
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from aethergraph.contracts.storage.search_backend import ScoredItem, SearchBackend
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class NullSearchBackend(SearchBackend):
|
|
9
|
+
"""A no-op search backend that performs no indexing or searching."""
|
|
10
|
+
|
|
11
|
+
async def upsert(
|
|
12
|
+
self,
|
|
13
|
+
*,
|
|
14
|
+
corpus: str,
|
|
15
|
+
item_id: str,
|
|
16
|
+
text: str,
|
|
17
|
+
metadata: dict[str, Any],
|
|
18
|
+
) -> None:
|
|
19
|
+
# no-op
|
|
20
|
+
return
|
|
21
|
+
|
|
22
|
+
async def search(
|
|
23
|
+
self,
|
|
24
|
+
*,
|
|
25
|
+
corpus: str,
|
|
26
|
+
query: str,
|
|
27
|
+
top_k: int = 10,
|
|
28
|
+
filters: dict[str, Any] | None = None,
|
|
29
|
+
time_window: str | None = None,
|
|
30
|
+
created_at_min: float | None = None,
|
|
31
|
+
created_at_max: float | None = None,
|
|
32
|
+
) -> list[ScoredItem]:
|
|
33
|
+
# either empty or raise FeatureDisabled
|
|
34
|
+
return []
|