aethergraph 0.1.0a2__py3-none-any.whl → 0.1.0a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. aethergraph/__main__.py +3 -0
  2. aethergraph/api/v1/artifacts.py +23 -4
  3. aethergraph/api/v1/schemas.py +7 -0
  4. aethergraph/api/v1/session.py +123 -4
  5. aethergraph/config/config.py +2 -0
  6. aethergraph/config/search.py +49 -0
  7. aethergraph/contracts/services/channel.py +18 -1
  8. aethergraph/contracts/services/execution.py +58 -0
  9. aethergraph/contracts/services/llm.py +26 -0
  10. aethergraph/contracts/services/memory.py +10 -4
  11. aethergraph/contracts/services/planning.py +53 -0
  12. aethergraph/contracts/storage/event_log.py +8 -0
  13. aethergraph/contracts/storage/search_backend.py +47 -0
  14. aethergraph/contracts/storage/vector_index.py +73 -0
  15. aethergraph/core/graph/action_spec.py +76 -0
  16. aethergraph/core/graph/graph_fn.py +75 -2
  17. aethergraph/core/graph/graphify.py +74 -2
  18. aethergraph/core/runtime/graph_runner.py +2 -1
  19. aethergraph/core/runtime/node_context.py +66 -3
  20. aethergraph/core/runtime/node_services.py +8 -0
  21. aethergraph/core/runtime/run_manager.py +263 -271
  22. aethergraph/core/runtime/run_types.py +54 -1
  23. aethergraph/core/runtime/runtime_env.py +35 -14
  24. aethergraph/core/runtime/runtime_services.py +308 -18
  25. aethergraph/plugins/agents/default_chat_agent.py +266 -74
  26. aethergraph/plugins/agents/default_chat_agent_v2.py +487 -0
  27. aethergraph/plugins/channel/adapters/webui.py +69 -21
  28. aethergraph/plugins/channel/routes/webui_routes.py +8 -48
  29. aethergraph/runtime/__init__.py +12 -0
  30. aethergraph/server/app_factory.py +10 -1
  31. aethergraph/server/ui_static/assets/index-CFktGdbW.js +4913 -0
  32. aethergraph/server/ui_static/assets/index-DcfkFlTA.css +1 -0
  33. aethergraph/server/ui_static/index.html +2 -2
  34. aethergraph/services/artifacts/facade.py +157 -21
  35. aethergraph/services/artifacts/types.py +35 -0
  36. aethergraph/services/artifacts/utils.py +42 -0
  37. aethergraph/services/channel/channel_bus.py +3 -1
  38. aethergraph/services/channel/event_hub copy.py +55 -0
  39. aethergraph/services/channel/event_hub.py +81 -0
  40. aethergraph/services/channel/factory.py +3 -2
  41. aethergraph/services/channel/session.py +709 -74
  42. aethergraph/services/container/default_container.py +69 -7
  43. aethergraph/services/execution/__init__.py +0 -0
  44. aethergraph/services/execution/local_python.py +118 -0
  45. aethergraph/services/indices/__init__.py +0 -0
  46. aethergraph/services/indices/global_indices.py +21 -0
  47. aethergraph/services/indices/scoped_indices.py +292 -0
  48. aethergraph/services/llm/generic_client.py +342 -46
  49. aethergraph/services/llm/generic_embed_client.py +359 -0
  50. aethergraph/services/llm/types.py +3 -1
  51. aethergraph/services/memory/distillers/llm_long_term.py +60 -109
  52. aethergraph/services/memory/distillers/llm_long_term_v1.py +180 -0
  53. aethergraph/services/memory/distillers/llm_meta_summary.py +57 -266
  54. aethergraph/services/memory/distillers/llm_meta_summary_v1.py +342 -0
  55. aethergraph/services/memory/distillers/long_term.py +48 -131
  56. aethergraph/services/memory/distillers/long_term_v1.py +170 -0
  57. aethergraph/services/memory/facade/chat.py +18 -8
  58. aethergraph/services/memory/facade/core.py +159 -19
  59. aethergraph/services/memory/facade/distillation.py +86 -31
  60. aethergraph/services/memory/facade/retrieval.py +100 -1
  61. aethergraph/services/memory/factory.py +4 -1
  62. aethergraph/services/planning/__init__.py +0 -0
  63. aethergraph/services/planning/action_catalog.py +271 -0
  64. aethergraph/services/planning/bindings.py +56 -0
  65. aethergraph/services/planning/dependency_index.py +65 -0
  66. aethergraph/services/planning/flow_validator.py +263 -0
  67. aethergraph/services/planning/graph_io_adapter.py +150 -0
  68. aethergraph/services/planning/input_parser.py +312 -0
  69. aethergraph/services/planning/missing_inputs.py +28 -0
  70. aethergraph/services/planning/node_planner.py +613 -0
  71. aethergraph/services/planning/orchestrator.py +112 -0
  72. aethergraph/services/planning/plan_executor.py +506 -0
  73. aethergraph/services/planning/plan_types.py +321 -0
  74. aethergraph/services/planning/planner.py +617 -0
  75. aethergraph/services/planning/planner_service.py +369 -0
  76. aethergraph/services/planning/planning_context_builder.py +43 -0
  77. aethergraph/services/planning/quick_actions.py +29 -0
  78. aethergraph/services/planning/routers/__init__.py +0 -0
  79. aethergraph/services/planning/routers/simple_router.py +26 -0
  80. aethergraph/services/rag/facade.py +0 -3
  81. aethergraph/services/scope/scope.py +30 -30
  82. aethergraph/services/scope/scope_factory.py +15 -7
  83. aethergraph/services/skills/__init__.py +0 -0
  84. aethergraph/services/skills/skill_registry.py +465 -0
  85. aethergraph/services/skills/skills.py +220 -0
  86. aethergraph/services/skills/utils.py +194 -0
  87. aethergraph/storage/artifacts/artifact_index_jsonl.py +16 -10
  88. aethergraph/storage/artifacts/artifact_index_sqlite.py +12 -2
  89. aethergraph/storage/docstore/sqlite_doc_sync.py +1 -1
  90. aethergraph/storage/memory/event_persist.py +42 -2
  91. aethergraph/storage/memory/fs_persist.py +32 -2
  92. aethergraph/storage/search_backend/__init__.py +0 -0
  93. aethergraph/storage/search_backend/generic_vector_backend.py +230 -0
  94. aethergraph/storage/search_backend/null_backend.py +34 -0
  95. aethergraph/storage/search_backend/sqlite_lexical_backend.py +387 -0
  96. aethergraph/storage/search_backend/utils.py +31 -0
  97. aethergraph/storage/search_factory.py +75 -0
  98. aethergraph/storage/vector_index/faiss_index.py +72 -4
  99. aethergraph/storage/vector_index/sqlite_index.py +521 -52
  100. aethergraph/storage/vector_index/sqlite_index_vanila.py +311 -0
  101. aethergraph/storage/vector_index/utils.py +22 -0
  102. {aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/METADATA +1 -1
  103. {aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/RECORD +108 -64
  104. {aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/WHEEL +1 -1
  105. aethergraph/plugins/agents/default_chat_agent copy.py +0 -90
  106. aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +0 -1
  107. aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +0 -400
  108. aethergraph/services/eventhub/event_hub.py +0 -76
  109. aethergraph/services/llm/generic_client copy.py +0 -691
  110. aethergraph/services/prompts/file_store.py +0 -41
  111. {aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/entry_points.txt +0 -0
  112. {aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/licenses/LICENSE +0 -0
  113. {aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/licenses/NOTICE +0 -0
  114. {aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,194 @@
1
+ from pathlib import Path
2
+ import re
3
+ from typing import Any
4
+
5
+ from .skills import Skill
6
+
7
+ try:
8
+ import yaml # type: ignore
9
+ except ImportError: # pragma: no cover
10
+ yaml = None # TODO: enforce PyYAML as a dependency?
11
+
12
+ _FRONT_MATTER_DELIM = re.compile(r"^---\s*$")
13
+ # Only treat H2 (##) as section delimiters, per spec.
14
+ _SECTION_HEADING_RE = re.compile(r"^(##)\s+(.*)$")
15
+
16
+
17
+ def _split_front_matter(text: str) -> tuple[dict[str, Any], str]:
18
+ """
19
+ Split YAML front matter from the rest of the markdown body.
20
+
21
+ Expects:
22
+ ---
23
+ yaml: here
24
+ ---
25
+ # Markdown starts here
26
+
27
+ Returns: (meta_dict, body_markdown)
28
+ """
29
+ lines = text.splitlines()
30
+ if not lines or not _FRONT_MATTER_DELIM.match(lines[0].strip()):
31
+ # No front matter block
32
+ return {}, text
33
+
34
+ # Find closing '---'
35
+ end_idx = None
36
+ for i in range(1, len(lines)):
37
+ if _FRONT_MATTER_DELIM.match(lines[i].strip()):
38
+ end_idx = i
39
+ break
40
+
41
+ if end_idx is None:
42
+ # Malformed front matter; treat entire file as body
43
+ return {}, text
44
+
45
+ fm_lines = lines[1:end_idx]
46
+ body_lines = lines[end_idx + 1 :]
47
+
48
+ fm_str = "\n".join(fm_lines)
49
+ body = "\n".join(body_lines)
50
+
51
+ if yaml is None:
52
+ # If PyYAML is not installed, return empty meta.
53
+ return {}, body
54
+
55
+ try:
56
+ meta = yaml.safe_load(fm_str) or {}
57
+ except Exception as exc:
58
+ # Surface a clear error – this is almost always a YAML indentation / syntax issue
59
+ raise ValueError(
60
+ f"Failed to parse YAML front matter: {exc!r}\n" f"Front matter was:\n{fm_str}"
61
+ ) from exc
62
+
63
+ if not isinstance(meta, dict):
64
+ raise ValueError(
65
+ f"YAML front matter must be a mapping (dict), got {type(meta)} instead.\n"
66
+ f"Front matter was:\n{fm_str}"
67
+ )
68
+
69
+ return meta, body
70
+
71
+
72
+ def _normalize_section_key(heading: str) -> str:
73
+ """
74
+ Normalize a heading text into a section key.
75
+
76
+ Rules:
77
+ - If the heading already contains a dot (e.g. "chat.system"), keep as-is.
78
+ - Else, lowercase and replace spaces with underscores, e.g. "Chat System" -> "chat_system".
79
+ """
80
+ raw = heading.strip()
81
+ if "." in raw:
82
+ return raw.strip()
83
+ return raw.lower().replace(" ", "_")
84
+
85
+
86
+ def _split_sections_from_body(body: str) -> dict[str, str]:
87
+ """
88
+ Split markdown body into sections keyed by normalized heading.
89
+
90
+ - Intro text before any heading -> section "body"
91
+ - Only H2 headings (`## something`) start new sections.
92
+ - H3+ (`### ...`) are treated as content.
93
+ """
94
+ sections: dict[str, list[str]] = {}
95
+ current_key: str | None = None
96
+ buffer: list[str] = []
97
+ preface: list[str] = []
98
+
99
+ lines = body.splitlines()
100
+
101
+ for line in lines:
102
+ m = _SECTION_HEADING_RE.match(line)
103
+ if m:
104
+ # Flush previous section or preface
105
+ if current_key is None:
106
+ if buffer:
107
+ preface.extend(buffer)
108
+ else:
109
+ sections[current_key] = sections.get(current_key, []) + buffer
110
+
111
+ buffer = []
112
+ heading = m.group(2).strip()
113
+ current_key = _normalize_section_key(heading)
114
+ else:
115
+ buffer.append(line)
116
+
117
+ # Flush last buffer
118
+ if current_key is None:
119
+ if buffer:
120
+ preface.extend(buffer)
121
+ else:
122
+ sections[current_key] = sections.get(current_key, []) + buffer
123
+
124
+ out: dict[str, str] = {}
125
+ if preface:
126
+ out["body"] = "\n".join(preface).strip()
127
+
128
+ for k, lines_ in sections.items():
129
+ text = "\n".join(lines_).strip()
130
+ if text:
131
+ out[k] = text
132
+
133
+ return out
134
+
135
+
136
+ def parse_skill_markdown(text: str, path: Path | None = None) -> Skill:
137
+ """
138
+ Parse a single markdown file into a Skill.
139
+
140
+ The file must have YAML front matter with at least:
141
+ - id: string
142
+ - title: string
143
+
144
+ Sections are defined by `## section.key` headings.
145
+ """
146
+ meta, body = _split_front_matter(text)
147
+ sections = _split_sections_from_body(body)
148
+
149
+ location = str(path) if path is not None else "<string>"
150
+
151
+ # ---- Basic validation of YAML meta ----
152
+ if not meta:
153
+ raise ValueError(
154
+ f"Skill file {location} has no YAML front matter. "
155
+ "Expected at least:\n"
156
+ "---\n"
157
+ "id: some.id\n"
158
+ "title: Some title\n"
159
+ "---"
160
+ )
161
+
162
+ skill_id = meta.get("id")
163
+ title = meta.get("title")
164
+
165
+ if not isinstance(skill_id, str) or not skill_id.strip():
166
+ raise ValueError(
167
+ f"Skill file {location} is missing a valid 'id' in front matter. "
168
+ f"Got id={skill_id!r} in:\n{meta}"
169
+ )
170
+
171
+ if not isinstance(title, str) or not title.strip():
172
+ raise ValueError(
173
+ f"Skill file {location} is missing a valid 'title' in front matter. "
174
+ f"Got title={title!r} in:\n{meta}"
175
+ )
176
+
177
+ try:
178
+ skill = Skill.from_dict(
179
+ meta=meta,
180
+ sections=sections,
181
+ raw_markdown=text,
182
+ path=path,
183
+ )
184
+ except Exception as e:
185
+ raise ValueError(f"Failed to construct Skill from {location}: {e}") from e
186
+
187
+ # Extra guard – if Skill.from_dict ever returns None, fail loudly.
188
+ if skill is None:
189
+ raise ValueError(
190
+ f"Skill.from_dict returned None for skill file {location}. "
191
+ f"Front matter: {meta}, sections: {list(sections.keys())}"
192
+ )
193
+
194
+ return skill
@@ -75,20 +75,26 @@ class JsonlArtifactIndexSync:
75
75
  if labels:
76
76
  for k, v in labels.items():
77
77
  if k in TENANT_KEYS:
78
- # Match against top-level JSON fields
79
- rows = [r for r in rows if r.get(k) == v]
80
- continue
81
-
82
- # Normal label filters
83
- if isinstance(v, list):
78
+ sv = str(v)
84
79
  rows = [
85
80
  r
86
81
  for r in rows
87
- if isinstance(r.get("labels", {}).get(k), list)
88
- and set(v).issubset(set(r["labels"][k]))
82
+ if str(r.get(k) or "") == sv
83
+ or str((r.get("labels") or {}).get(k) or "") == sv
89
84
  ]
90
- else:
91
- rows = [r for r in rows if r.get("labels", {}).get(k) == v]
85
+ continue
86
+
87
+ # Normal label filters
88
+ if k == "tags":
89
+ want = v if isinstance(v, list) else [v]
90
+ want = [str(t).strip() for t in want if str(t).strip()]
91
+ if want:
92
+ rows = [
93
+ r
94
+ for r in rows
95
+ if any(t in (r.get("labels", {}).get("tags") or []) for t in want)
96
+ ]
97
+ continue
92
98
 
93
99
  if metric and mode:
94
100
  rows = [r for r in rows if metric in r.get("metrics", {})]
@@ -273,9 +273,19 @@ class SqliteArtifactIndexSync:
273
273
  where.append("(" + " OR ".join(ors) + ")")
274
274
  continue
275
275
 
276
+ # if k in TENANT_KEYS:
277
+ # where.append(f"{TENANT_KEYS[k]} = ?")
278
+ # params.append(v)
279
+ # continue
280
+
276
281
  if k in TENANT_KEYS:
277
- where.append(f"{TENANT_KEYS[k]} = ?")
278
- params.append(v)
282
+ col = TENANT_KEYS[k]
283
+ sv = str(v)
284
+
285
+ # column OR labels_json fallback
286
+ where.append(f"({col} = ? OR labels_json LIKE ?)")
287
+ params.append(sv)
288
+ params.append(f'%"{k}": "{sv}"%')
279
289
  continue
280
290
 
281
291
  where.append("labels_json LIKE ?")
@@ -67,7 +67,7 @@ class SQLiteDocStoreSync:
67
67
  (doc_id, payload, now),
68
68
  )
69
69
  except sqlite3.Error as e:
70
- print("🍓 SQLiteDocStoreSync ERROR during put:", doc_id, repr(e))
70
+ print("SQLiteDocStoreSync ERROR during put:", doc_id, repr(e))
71
71
  raise
72
72
 
73
73
  def get(self, doc_id: str) -> dict[str, Any] | None:
@@ -47,9 +47,9 @@ class EventLogPersistence(Persistence):
47
47
  return f"{self._prefix}{doc_id}"
48
48
 
49
49
  # --------- API ---------
50
- async def append_event(self, run_id: str, evt: Event) -> None:
50
+ async def append_event(self, scope_id: str, evt: Event) -> None:
51
51
  payload = asdict(evt)
52
- payload.setdefault("scope_id", run_id)
52
+ payload.setdefault("scope_id", scope_id)
53
53
  payload.setdefault("kind", "memory")
54
54
  # you can add tags like ["mem"] if useful
55
55
  await self._log.append(payload)
@@ -66,3 +66,43 @@ class EventLogPersistence(Persistence):
66
66
  if doc is None:
67
67
  raise FileNotFoundError(f"Memory JSON not found for URI: {uri}")
68
68
  return doc
69
+
70
+ async def get_events_by_ids(
71
+ self,
72
+ scope_id: str,
73
+ event_ids: list[str],
74
+ ) -> list[Event]:
75
+ """
76
+ Fetch events for a given scope_id (timeline) by event_id.
77
+
78
+ Implementation v0: use EventLog.query and filter in Python.
79
+ For moderate timeline sizes and small event_ids lists, this is fine.
80
+ Later, you can optimize by adding a direct get_many API on EventLog
81
+ or indexing by (scope_id, event_id).
82
+ """
83
+ if not event_ids:
84
+ return []
85
+
86
+ # Fetch all events for the scope_id; TODO: add reasonable limits / paging
87
+ rows = await self._log.query(
88
+ scope_id=scope_id,
89
+ since=None,
90
+ until=None,
91
+ kinds=None,
92
+ tags=None,
93
+ limit=None,
94
+ offset=0,
95
+ )
96
+
97
+ by_id: dict[str, Event] = {}
98
+ for row in rows:
99
+ eid = row.get("event_id")
100
+ if eid:
101
+ by_id[eid] = row
102
+
103
+ result: list[Event] = []
104
+ for eid in event_ids:
105
+ row = by_id.get(eid)
106
+ if row is not None:
107
+ result.append(Event(**row))
108
+ return result
@@ -30,9 +30,9 @@ class FSPersistence(Persistence):
30
30
 
31
31
  # ---------- Event log (append-only JSONL) ----------
32
32
 
33
- async def append_event(self, run_id: str, evt: Event) -> None:
33
+ async def append_event(self, scope_id: str, evt: Event) -> None:
34
34
  day = time.strftime("%Y-%m-%d", time.gmtime())
35
- path = self.base_dir / "mem" / run_id / "events" / f"{day}.jsonl"
35
+ path = self.base_dir / "mem" / scope_id / "events" / f"{day}.jsonl"
36
36
 
37
37
  def _write() -> None:
38
38
  path.parent.mkdir(parents=True, exist_ok=True)
@@ -116,3 +116,33 @@ class FSPersistence(Persistence):
116
116
  return json.load(f)
117
117
 
118
118
  return await asyncio.to_thread(_read)
119
+
120
+ async def get_events_by_ids(
121
+ self,
122
+ scope_id: str,
123
+ event_ids: list[str],
124
+ ) -> list[Event]:
125
+ """
126
+ Fetch events for a given scope_id (timeline) by event_id.
127
+ """
128
+ id_set = set(event_ids)
129
+ found: list[Event] = []
130
+
131
+ day = time.strftime("%Y-%m-%d", time.gmtime())
132
+ path = self.base_dir / "mem" / scope_id / "events" / f"{day}.jsonl"
133
+
134
+ if not path.exists():
135
+ return found
136
+
137
+ def _read() -> list[Event]:
138
+ results: list[Event] = []
139
+ with self._lock, path.open("r", encoding="utf-8") as f:
140
+ for line in f:
141
+ data = json.loads(line)
142
+ if data.get("event_id") in id_set:
143
+ evt = Event(**data)
144
+ results.append(evt)
145
+ return results
146
+
147
+ found = await asyncio.to_thread(_read)
148
+ return found
File without changes
@@ -0,0 +1,230 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Sequence
4
+ from dataclasses import dataclass
5
+ from time import time
6
+ from typing import Any
7
+
8
+ from aethergraph.contracts.services.llm import EmbeddingClientProtocol
9
+ from aethergraph.contracts.storage.search_backend import ScoredItem, SearchBackend
10
+ from aethergraph.contracts.storage.vector_index import PROMOTED_FIELDS, VectorIndex
11
+
12
+ from .utils import _parse_time_window
13
+
14
+
15
+ @dataclass
16
+ class GenericVectorSearchBackend(SearchBackend):
17
+ """
18
+ SearchBackend implementation on top of a VectorIndex + EmbeddingClient.
19
+
20
+ - Upserts: embed text and store (vector, metadata) in the index
21
+ - Search: embed query, retrieve top-k by cosine similarity,
22
+ then apply Python-level metadata filters.
23
+ """
24
+
25
+ index: VectorIndex
26
+ embedder: EmbeddingClientProtocol
27
+
28
+ # -------- helpers ----------------------------------------------------
29
+ async def _embed(self, text: str) -> list[float]:
30
+ vec: Sequence[float] = await self.embedder.embed_one(text)
31
+ # Ensure a concrete list[float] for numpy/etc
32
+ return [float(x) for x in vec]
33
+
34
+ @staticmethod
35
+ def _match_value(mv: Any, val: Any) -> bool:
36
+ """
37
+ Rich matching semantics for filters:
38
+ - If val is list/tuple/set:
39
+ - if mv is list-like too -> match if intersection is non-empty
40
+ - else -> match if mv is in val
41
+ - If val is scalar:
42
+ - if mv is list-like -> match if val is in mv
43
+ - else -> match if mv == val
44
+ """
45
+ if val is None:
46
+ return True
47
+
48
+ def _is_list_like(x: Any) -> bool:
49
+ return isinstance(x, (list, tuple, set)) # noqa: UP038
50
+
51
+ if _is_list_like(val):
52
+ if _is_list_like(mv):
53
+ # any overlap between filter values and meta values
54
+ return any(x in val for x in mv)
55
+ else:
56
+ # meta is scalar, filter is list-like
57
+ return mv in val
58
+
59
+ # val is scalar
60
+ if _is_list_like(mv):
61
+ return val in mv
62
+
63
+ return mv == val
64
+
65
+ @staticmethod
66
+ def _matches_filters(meta: dict[str, Any], filters: dict[str, Any]) -> bool:
67
+ """
68
+ Simple AND filter: all filter keys must match exactly.
69
+ - If filter value is a list, meta[key] must be in that list.
70
+ - If filter value is None, we don't constrain that key.
71
+ """
72
+ for k, v in filters.items():
73
+ if v is None:
74
+ continue
75
+ if k not in meta:
76
+ return False
77
+ mv = meta[k]
78
+ if not GenericVectorSearchBackend._match_value(mv, v):
79
+ return False
80
+ return True
81
+
82
+ # -------- public APIs ------------------------------------------------
83
+ async def upsert(
84
+ self,
85
+ *,
86
+ corpus: str,
87
+ item_id: str,
88
+ text: str,
89
+ metadata: dict[str, Any],
90
+ ) -> None:
91
+ if not text:
92
+ # avoid zero vector; caller should ensure text is non-empty
93
+ text = ""
94
+
95
+ vector = await self._embed(text)
96
+ await self.index.add(
97
+ corpus_id=corpus,
98
+ chunk_ids=[item_id],
99
+ vectors=[vector],
100
+ metas=[metadata],
101
+ )
102
+
103
+ async def search(
104
+ self,
105
+ *,
106
+ corpus: str,
107
+ query: str,
108
+ top_k: int = 10,
109
+ filters: dict[str, Any] | None = None,
110
+ time_window: str | None = None,
111
+ created_at_min: float | None = None,
112
+ created_at_max: float | None = None,
113
+ ) -> list[ScoredItem]:
114
+ filters = filters or {}
115
+ if not query.strip():
116
+ return []
117
+
118
+ q_vec = await self._embed(query)
119
+
120
+ # ---- 1) Handle time constraints ---------------------------------
121
+ now_ts = time()
122
+
123
+ # If time_window is provided and no explicit min, interpret it as [now - window, now]
124
+ if time_window and created_at_min is None:
125
+ duration = _parse_time_window(time_window)
126
+ created_at_min = now_ts - duration
127
+
128
+ # If max is not provided but we used a time_window, default to now
129
+ if time_window and created_at_max is None:
130
+ created_at_max = now_ts
131
+
132
+ # ---- 2) Split filters into index-level vs Python-level ---------
133
+ index_filters: dict[str, Any] = {}
134
+ post_filters: dict[str, Any] = {}
135
+
136
+ for key, val in filters.items():
137
+ if val is None:
138
+ continue
139
+
140
+ if key in PROMOTED_FIELDS and not isinstance(val, (list, tuple, set)): # noqa: UP038
141
+ index_filters[key] = val
142
+ else:
143
+ post_filters[key] = val
144
+
145
+ # ---- 3) Ask index for scoped, time-bounded candidates ----------
146
+ raw_k = max(top_k * 3, top_k)
147
+ max_candidates = max(top_k * 50, raw_k) # tunable safety cap
148
+
149
+ rows = await self.index.search(
150
+ corpus_id=corpus,
151
+ query_vec=q_vec,
152
+ k=raw_k,
153
+ where=index_filters,
154
+ max_candidates=max_candidates,
155
+ created_at_min=created_at_min,
156
+ created_at_max=created_at_max,
157
+ )
158
+
159
+ # ---- 4) Apply Python-level filters + build ScoredItem list -----
160
+ results: list[ScoredItem] = []
161
+ for row in rows:
162
+ chunk_id = row["chunk_id"]
163
+ score = float(row["score"])
164
+ meta = dict(row.get("meta") or {})
165
+
166
+ if post_filters and not self._matches_filters(meta, post_filters):
167
+ continue
168
+
169
+ results.append(
170
+ ScoredItem(
171
+ item_id=chunk_id,
172
+ corpus=corpus,
173
+ score=score,
174
+ metadata=meta,
175
+ )
176
+ )
177
+ if len(results) >= top_k:
178
+ break
179
+
180
+ return results
181
+
182
+ async def search_old(
183
+ self,
184
+ *,
185
+ corpus: str,
186
+ query: str,
187
+ top_k: int = 10,
188
+ filters: dict[str, Any] | None = None,
189
+ ) -> list[ScoredItem]:
190
+ """
191
+ 1) Embed the query
192
+ 2) Vector search in the underlying index
193
+ 3) Apply metadata filters in Python
194
+ """
195
+ filters = filters or {}
196
+ if not query.strip():
197
+ # empty query: probably return nothing for now
198
+ return []
199
+
200
+ q_vec = await self._embed(query)
201
+
202
+ # Ask underlying VectorIndex for more than top_k, since we may
203
+ # filter some out. Factor 3 is arbitrary but usually safe.
204
+ raw_k = max(top_k * 3, top_k)
205
+ rows = await self.index.search(
206
+ corpus_id=corpus,
207
+ query_vec=q_vec,
208
+ k=raw_k,
209
+ )
210
+
211
+ results: list[ScoredItem] = []
212
+ for row in rows:
213
+ chunk_id = row["chunk_id"]
214
+ score = float(row["score"])
215
+ meta = dict(row.get("meta") or {})
216
+
217
+ if filters and not self._matches_filters(meta, filters):
218
+ continue
219
+
220
+ results.append(
221
+ ScoredItem(
222
+ item_id=chunk_id,
223
+ corpus=corpus,
224
+ score=score,
225
+ metadata=meta,
226
+ )
227
+ )
228
+ if len(results) >= top_k:
229
+ break
230
+ return results
@@ -0,0 +1,34 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any
3
+
4
+ from aethergraph.contracts.storage.search_backend import ScoredItem, SearchBackend
5
+
6
+
7
+ @dataclass
8
+ class NullSearchBackend(SearchBackend):
9
+ """A no-op search backend that performs no indexing or searching."""
10
+
11
+ async def upsert(
12
+ self,
13
+ *,
14
+ corpus: str,
15
+ item_id: str,
16
+ text: str,
17
+ metadata: dict[str, Any],
18
+ ) -> None:
19
+ # no-op
20
+ return
21
+
22
+ async def search(
23
+ self,
24
+ *,
25
+ corpus: str,
26
+ query: str,
27
+ top_k: int = 10,
28
+ filters: dict[str, Any] | None = None,
29
+ time_window: str | None = None,
30
+ created_at_min: float | None = None,
31
+ created_at_max: float | None = None,
32
+ ) -> list[ScoredItem]:
33
+ # either empty or raise FeatureDisabled
34
+ return []