aethergraph 0.1.0a1__py3-none-any.whl → 0.1.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. aethergraph/__init__.py +4 -10
  2. aethergraph/__main__.py +293 -0
  3. aethergraph/api/v1/__init__.py +0 -0
  4. aethergraph/api/v1/agents.py +46 -0
  5. aethergraph/api/v1/apps.py +70 -0
  6. aethergraph/api/v1/artifacts.py +415 -0
  7. aethergraph/api/v1/channels.py +89 -0
  8. aethergraph/api/v1/deps.py +168 -0
  9. aethergraph/api/v1/graphs.py +259 -0
  10. aethergraph/api/v1/identity.py +25 -0
  11. aethergraph/api/v1/memory.py +353 -0
  12. aethergraph/api/v1/misc.py +47 -0
  13. aethergraph/api/v1/pagination.py +29 -0
  14. aethergraph/api/v1/runs.py +568 -0
  15. aethergraph/api/v1/schemas.py +535 -0
  16. aethergraph/api/v1/session.py +323 -0
  17. aethergraph/api/v1/stats.py +201 -0
  18. aethergraph/api/v1/viz.py +152 -0
  19. aethergraph/config/config.py +22 -0
  20. aethergraph/config/loader.py +3 -2
  21. aethergraph/config/storage.py +209 -0
  22. aethergraph/contracts/__init__.py +0 -0
  23. aethergraph/contracts/services/__init__.py +0 -0
  24. aethergraph/contracts/services/artifacts.py +27 -14
  25. aethergraph/contracts/services/memory.py +45 -17
  26. aethergraph/contracts/services/metering.py +129 -0
  27. aethergraph/contracts/services/runs.py +50 -0
  28. aethergraph/contracts/services/sessions.py +87 -0
  29. aethergraph/contracts/services/state_stores.py +3 -0
  30. aethergraph/contracts/services/viz.py +44 -0
  31. aethergraph/contracts/storage/artifact_index.py +88 -0
  32. aethergraph/contracts/storage/artifact_store.py +99 -0
  33. aethergraph/contracts/storage/async_kv.py +34 -0
  34. aethergraph/contracts/storage/blob_store.py +50 -0
  35. aethergraph/contracts/storage/doc_store.py +35 -0
  36. aethergraph/contracts/storage/event_log.py +31 -0
  37. aethergraph/contracts/storage/vector_index.py +48 -0
  38. aethergraph/core/__init__.py +0 -0
  39. aethergraph/core/execution/forward_scheduler.py +13 -2
  40. aethergraph/core/execution/global_scheduler.py +21 -15
  41. aethergraph/core/execution/step_forward.py +10 -1
  42. aethergraph/core/graph/__init__.py +0 -0
  43. aethergraph/core/graph/graph_builder.py +8 -4
  44. aethergraph/core/graph/graph_fn.py +156 -15
  45. aethergraph/core/graph/graph_spec.py +8 -0
  46. aethergraph/core/graph/graphify.py +146 -27
  47. aethergraph/core/graph/node_spec.py +0 -2
  48. aethergraph/core/graph/node_state.py +3 -0
  49. aethergraph/core/graph/task_graph.py +39 -1
  50. aethergraph/core/runtime/__init__.py +0 -0
  51. aethergraph/core/runtime/ad_hoc_context.py +64 -4
  52. aethergraph/core/runtime/base_service.py +28 -4
  53. aethergraph/core/runtime/execution_context.py +13 -15
  54. aethergraph/core/runtime/graph_runner.py +222 -37
  55. aethergraph/core/runtime/node_context.py +510 -6
  56. aethergraph/core/runtime/node_services.py +12 -5
  57. aethergraph/core/runtime/recovery.py +15 -1
  58. aethergraph/core/runtime/run_manager.py +783 -0
  59. aethergraph/core/runtime/run_manager_local.py +204 -0
  60. aethergraph/core/runtime/run_registration.py +2 -2
  61. aethergraph/core/runtime/run_types.py +89 -0
  62. aethergraph/core/runtime/runtime_env.py +136 -7
  63. aethergraph/core/runtime/runtime_metering.py +71 -0
  64. aethergraph/core/runtime/runtime_registry.py +36 -13
  65. aethergraph/core/runtime/runtime_services.py +194 -6
  66. aethergraph/core/tools/builtins/toolset.py +1 -1
  67. aethergraph/core/tools/toolkit.py +5 -0
  68. aethergraph/plugins/agents/default_chat_agent copy.py +90 -0
  69. aethergraph/plugins/agents/default_chat_agent.py +171 -0
  70. aethergraph/plugins/agents/shared.py +81 -0
  71. aethergraph/plugins/channel/adapters/webui.py +112 -112
  72. aethergraph/plugins/channel/routes/webui_routes.py +367 -102
  73. aethergraph/plugins/channel/utils/slack_utils.py +115 -59
  74. aethergraph/plugins/channel/utils/telegram_utils.py +88 -47
  75. aethergraph/plugins/channel/websockets/weibui_ws.py +172 -0
  76. aethergraph/runtime/__init__.py +15 -0
  77. aethergraph/server/app_factory.py +190 -34
  78. aethergraph/server/clients/channel_client.py +202 -0
  79. aethergraph/server/http/channel_http_routes.py +116 -0
  80. aethergraph/server/http/channel_ws_routers.py +45 -0
  81. aethergraph/server/loading.py +117 -0
  82. aethergraph/server/server.py +131 -0
  83. aethergraph/server/server_state.py +240 -0
  84. aethergraph/server/start.py +227 -66
  85. aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-BQhdFMY1.woff2 +0 -0
  86. aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DMm9YOAa.woff +0 -0
  87. aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DRggAlZN.ttf +0 -0
  88. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-ATXxdsX0.ttf +0 -0
  89. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-BEiXGLvX.woff +0 -0
  90. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-Dq_IR9rO.woff2 +0 -0
  91. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-CTRA-rTL.woff +0 -0
  92. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-Di6jR-x-.woff2 +0 -0
  93. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-wX97UBjC.ttf +0 -0
  94. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BdnERNNW.ttf +0 -0
  95. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BsDP51OF.woff +0 -0
  96. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-CL6g_b3V.woff2 +0 -0
  97. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CB_wures.ttf +0 -0
  98. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CTYiF6lA.woff2 +0 -0
  99. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-Dxdc4cR9.woff +0 -0
  100. aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Cx986IdX.woff2 +0 -0
  101. aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Jm3AIy58.woff +0 -0
  102. aethergraph/server/ui_static/assets/KaTeX_Main-Bold-waoOVXN0.ttf +0 -0
  103. aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DxDJ3AOS.woff2 +0 -0
  104. aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DzxPMmG6.ttf +0 -0
  105. aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-SpSLRI95.woff +0 -0
  106. aethergraph/server/ui_static/assets/KaTeX_Main-Italic-3WenGoN9.ttf +0 -0
  107. aethergraph/server/ui_static/assets/KaTeX_Main-Italic-BMLOBm91.woff +0 -0
  108. aethergraph/server/ui_static/assets/KaTeX_Main-Italic-NWA7e6Wa.woff2 +0 -0
  109. aethergraph/server/ui_static/assets/KaTeX_Main-Regular-B22Nviop.woff2 +0 -0
  110. aethergraph/server/ui_static/assets/KaTeX_Main-Regular-Dr94JaBh.woff +0 -0
  111. aethergraph/server/ui_static/assets/KaTeX_Main-Regular-ypZvNtVU.ttf +0 -0
  112. aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-B3XSjfu4.ttf +0 -0
  113. aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-CZnvNsCZ.woff2 +0 -0
  114. aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-iY-2wyZ7.woff +0 -0
  115. aethergraph/server/ui_static/assets/KaTeX_Math-Italic-DA0__PXp.woff +0 -0
  116. aethergraph/server/ui_static/assets/KaTeX_Math-Italic-flOr_0UB.ttf +0 -0
  117. aethergraph/server/ui_static/assets/KaTeX_Math-Italic-t53AETM-.woff2 +0 -0
  118. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-CFMepnvq.ttf +0 -0
  119. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-D1sUS0GD.woff2 +0 -0
  120. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-DbIhKOiC.woff +0 -0
  121. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-C3H0VqGB.woff2 +0 -0
  122. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-DN2j7dab.woff +0 -0
  123. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-YYjJ1zSn.ttf +0 -0
  124. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-BNo7hRIc.ttf +0 -0
  125. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-CS6fqUqJ.woff +0 -0
  126. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-DDBCnlJ7.woff2 +0 -0
  127. aethergraph/server/ui_static/assets/KaTeX_Script-Regular-C5JkGWo-.ttf +0 -0
  128. aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D3wIWfF6.woff2 +0 -0
  129. aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D5yQViql.woff +0 -0
  130. aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-C195tn64.woff +0 -0
  131. aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-Dbsnue_I.ttf +0 -0
  132. aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-mCD8mA8B.woff2 +0 -0
  133. aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-B7gKUWhC.ttf +0 -0
  134. aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-Dy4dx90m.woff2 +0 -0
  135. aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-oD1tc_U0.woff +0 -0
  136. aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-CTq5MqoE.woff +0 -0
  137. aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-DgpXs0kz.ttf +0 -0
  138. aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-BF-4gkZK.woff +0 -0
  139. aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-DWFBv043.ttf +0 -0
  140. aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-Dl5lxZxV.woff2 +0 -0
  141. aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-C0xS9mPB.woff +0 -0
  142. aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-CO6r4hn1.woff2 +0 -0
  143. aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-D3Ib7_Hf.ttf +0 -0
  144. aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +1 -0
  145. aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +400 -0
  146. aethergraph/server/ui_static/index.html +15 -0
  147. aethergraph/server/ui_static/logo.png +0 -0
  148. aethergraph/services/artifacts/__init__.py +0 -0
  149. aethergraph/services/artifacts/facade.py +1239 -132
  150. aethergraph/services/auth/{dev.py → authn.py} +0 -8
  151. aethergraph/services/auth/authz.py +100 -0
  152. aethergraph/services/channel/__init__.py +0 -0
  153. aethergraph/services/channel/channel_bus.py +19 -1
  154. aethergraph/services/channel/factory.py +13 -1
  155. aethergraph/services/channel/ingress.py +311 -0
  156. aethergraph/services/channel/queue_adapter.py +75 -0
  157. aethergraph/services/channel/session.py +502 -19
  158. aethergraph/services/container/default_container.py +122 -43
  159. aethergraph/services/continuations/continuation.py +6 -0
  160. aethergraph/services/continuations/stores/fs_store.py +19 -0
  161. aethergraph/services/eventhub/event_hub.py +76 -0
  162. aethergraph/services/kv/__init__.py +0 -0
  163. aethergraph/services/kv/ephemeral.py +244 -0
  164. aethergraph/services/llm/__init__.py +0 -0
  165. aethergraph/services/llm/generic_client copy.py +691 -0
  166. aethergraph/services/llm/generic_client.py +1288 -187
  167. aethergraph/services/llm/providers.py +3 -1
  168. aethergraph/services/llm/types.py +47 -0
  169. aethergraph/services/llm/utils.py +284 -0
  170. aethergraph/services/logger/std.py +3 -0
  171. aethergraph/services/mcp/__init__.py +9 -0
  172. aethergraph/services/mcp/http_client.py +38 -0
  173. aethergraph/services/mcp/service.py +225 -1
  174. aethergraph/services/mcp/stdio_client.py +41 -6
  175. aethergraph/services/mcp/ws_client.py +44 -2
  176. aethergraph/services/memory/__init__.py +0 -0
  177. aethergraph/services/memory/distillers/llm_long_term.py +234 -0
  178. aethergraph/services/memory/distillers/llm_meta_summary.py +398 -0
  179. aethergraph/services/memory/distillers/long_term.py +225 -0
  180. aethergraph/services/memory/facade/__init__.py +3 -0
  181. aethergraph/services/memory/facade/chat.py +440 -0
  182. aethergraph/services/memory/facade/core.py +447 -0
  183. aethergraph/services/memory/facade/distillation.py +424 -0
  184. aethergraph/services/memory/facade/rag.py +410 -0
  185. aethergraph/services/memory/facade/results.py +315 -0
  186. aethergraph/services/memory/facade/retrieval.py +139 -0
  187. aethergraph/services/memory/facade/types.py +77 -0
  188. aethergraph/services/memory/facade/utils.py +43 -0
  189. aethergraph/services/memory/facade_dep.py +1539 -0
  190. aethergraph/services/memory/factory.py +9 -3
  191. aethergraph/services/memory/utils.py +10 -0
  192. aethergraph/services/metering/eventlog_metering.py +470 -0
  193. aethergraph/services/metering/noop.py +25 -4
  194. aethergraph/services/rag/__init__.py +0 -0
  195. aethergraph/services/rag/facade.py +279 -23
  196. aethergraph/services/rag/index_factory.py +2 -2
  197. aethergraph/services/rag/node_rag.py +317 -0
  198. aethergraph/services/rate_limit/inmem_rate_limit.py +24 -0
  199. aethergraph/services/registry/__init__.py +0 -0
  200. aethergraph/services/registry/agent_app_meta.py +419 -0
  201. aethergraph/services/registry/registry_key.py +1 -1
  202. aethergraph/services/registry/unified_registry.py +74 -6
  203. aethergraph/services/scope/scope.py +159 -0
  204. aethergraph/services/scope/scope_factory.py +164 -0
  205. aethergraph/services/state_stores/serialize.py +5 -0
  206. aethergraph/services/state_stores/utils.py +2 -1
  207. aethergraph/services/viz/__init__.py +0 -0
  208. aethergraph/services/viz/facade.py +413 -0
  209. aethergraph/services/viz/viz_service.py +69 -0
  210. aethergraph/storage/artifacts/artifact_index_jsonl.py +180 -0
  211. aethergraph/storage/artifacts/artifact_index_sqlite.py +426 -0
  212. aethergraph/storage/artifacts/cas_store.py +422 -0
  213. aethergraph/storage/artifacts/fs_cas.py +18 -0
  214. aethergraph/storage/artifacts/s3_cas.py +14 -0
  215. aethergraph/storage/artifacts/utils.py +124 -0
  216. aethergraph/storage/blob/fs_blob.py +86 -0
  217. aethergraph/storage/blob/s3_blob.py +115 -0
  218. aethergraph/storage/continuation_store/fs_cont.py +283 -0
  219. aethergraph/storage/continuation_store/inmem_cont.py +146 -0
  220. aethergraph/storage/continuation_store/kvdoc_cont.py +261 -0
  221. aethergraph/storage/docstore/fs_doc.py +63 -0
  222. aethergraph/storage/docstore/sqlite_doc.py +31 -0
  223. aethergraph/storage/docstore/sqlite_doc_sync.py +90 -0
  224. aethergraph/storage/eventlog/fs_event.py +136 -0
  225. aethergraph/storage/eventlog/sqlite_event.py +47 -0
  226. aethergraph/storage/eventlog/sqlite_event_sync.py +178 -0
  227. aethergraph/storage/factory.py +432 -0
  228. aethergraph/storage/fs_utils.py +28 -0
  229. aethergraph/storage/graph_state_store/state_store.py +64 -0
  230. aethergraph/storage/kv/inmem_kv.py +103 -0
  231. aethergraph/storage/kv/layered_kv.py +52 -0
  232. aethergraph/storage/kv/sqlite_kv.py +39 -0
  233. aethergraph/storage/kv/sqlite_kv_sync.py +98 -0
  234. aethergraph/storage/memory/event_persist.py +68 -0
  235. aethergraph/storage/memory/fs_persist.py +118 -0
  236. aethergraph/{services/memory/hotlog_kv.py → storage/memory/hotlog.py} +8 -2
  237. aethergraph/{services → storage}/memory/indices.py +31 -7
  238. aethergraph/storage/metering/meter_event.py +55 -0
  239. aethergraph/storage/runs/doc_store.py +280 -0
  240. aethergraph/storage/runs/inmen_store.py +82 -0
  241. aethergraph/storage/runs/sqlite_run_store.py +403 -0
  242. aethergraph/storage/sessions/doc_store.py +183 -0
  243. aethergraph/storage/sessions/inmem_store.py +110 -0
  244. aethergraph/storage/sessions/sqlite_session_store.py +399 -0
  245. aethergraph/storage/vector_index/chroma_index.py +138 -0
  246. aethergraph/storage/vector_index/faiss_index.py +179 -0
  247. aethergraph/storage/vector_index/sqlite_index.py +187 -0
  248. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/METADATA +138 -31
  249. aethergraph-0.1.0a2.dist-info/RECORD +356 -0
  250. aethergraph-0.1.0a2.dist-info/entry_points.txt +3 -0
  251. aethergraph/services/artifacts/factory.py +0 -35
  252. aethergraph/services/artifacts/fs_store.py +0 -656
  253. aethergraph/services/artifacts/jsonl_index.py +0 -123
  254. aethergraph/services/artifacts/sqlite_index.py +0 -209
  255. aethergraph/services/memory/distillers/episode.py +0 -116
  256. aethergraph/services/memory/distillers/rolling.py +0 -74
  257. aethergraph/services/memory/facade.py +0 -633
  258. aethergraph/services/memory/persist_fs.py +0 -40
  259. aethergraph/services/rag/index/base.py +0 -27
  260. aethergraph/services/rag/index/faiss_index.py +0 -121
  261. aethergraph/services/rag/index/sqlite_index.py +0 -134
  262. aethergraph-0.1.0a1.dist-info/RECORD +0 -182
  263. aethergraph-0.1.0a1.dist-info/entry_points.txt +0 -2
  264. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/WHEEL +0 -0
  265. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/LICENSE +0 -0
  266. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/NOTICE +0 -0
  267. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import asyncio
3
4
  from dataclasses import dataclass
4
5
  import hashlib
5
6
  import json
@@ -9,6 +10,7 @@ import time
9
10
  from typing import Any
10
11
 
11
12
  from aethergraph.contracts.services.llm import LLMClientProtocol
13
+ from aethergraph.services.scope.scope import Scope
12
14
 
13
15
  from .chunker import TextSplitter
14
16
  from .utils.hybrid import topk_fuse
@@ -94,7 +96,13 @@ class RAGFacade:
94
96
  return os.path.join(self.root, make_fs_key(corpus_id))
95
97
 
96
98
  # ---------- ingestion ----------
97
- async def add_corpus(self, corpus_id: str, meta: dict[str, Any] | None = None):
99
+ async def add_corpus(
100
+ self,
101
+ corpus_id: str,
102
+ meta: dict[str, Any] | None = None,
103
+ *,
104
+ scope_labels: dict[str, str] | None = None,
105
+ ):
98
106
  """Create a new corpus with optional metadata.
99
107
  Args:
100
108
  corpus_id: Unique identifier for the corpus.
@@ -104,18 +112,27 @@ class RAGFacade:
104
112
  os.makedirs(p, exist_ok=True)
105
113
  meta_path = os.path.join(p, "corpus.json")
106
114
  if not os.path.exists(meta_path):
115
+ full_meta = {
116
+ "corpus_id": corpus_id,
117
+ "fs_key": make_fs_key(corpus_id),
118
+ "created_at": _now_iso(),
119
+ "meta": meta or {},
120
+ }
121
+ if scope_labels:
122
+ full_meta.setdefault("meta", {})
123
+ full_meta["meta"]["scope"] = dict(scope_labels)
124
+
107
125
  with open(meta_path, "w", encoding="utf-8") as f:
108
- json.dump(
109
- {
110
- "corpus_id": corpus_id,
111
- "fs_key": make_fs_key(corpus_id), # for reference
112
- "created_at": _now_iso(),
113
- "meta": meta or {},
114
- },
115
- f,
116
- )
126
+ json.dump(full_meta, f)
117
127
 
118
- async def upsert_docs(self, corpus_id: str, docs: list[dict[str, Any]]) -> dict[str, Any]:
128
+ async def upsert_docs(
129
+ self,
130
+ corpus_id: str,
131
+ docs: list[dict[str, Any]],
132
+ *,
133
+ scope: Scope | None = None,
134
+ scope_id: str | None = None, # e.g. memory_scope_id if tied to memory
135
+ ) -> dict[str, Any]:
119
136
  """Ingest and index a list of documents into the specified corpus.
120
137
  Args:
121
138
  corpus_id: The target corpus identifier.
@@ -125,10 +142,16 @@ class RAGFacade:
125
142
  - File-based documents: {"path": "/path/to/doc.pdf", "labels": {...}}
126
143
  - Inline text documents: {"text": "Document content...", "title": "Doc Title", "labels": {...}}
127
144
  """
145
+
128
146
  if not self.embed:
129
147
  raise RuntimeError("RAGFacade: embed client not configured")
130
148
 
131
- await self.add_corpus(corpus_id)
149
+ scope_labels: dict[str, str] = {}
150
+ if scope is not None:
151
+ scope_labels = scope.rag_labels(scope_id=scope_id)
152
+
153
+ await self.add_corpus(corpus_id, meta=None, scope_labels=scope_labels)
154
+
132
155
  cdir = self._cdir(corpus_id)
133
156
  docs_jl = os.path.join(cdir, "docs.jsonl")
134
157
  chunks_jl = os.path.join(cdir, "chunks.jsonl")
@@ -139,7 +162,8 @@ class RAGFacade:
139
162
  total_chunks = 0
140
163
 
141
164
  for d in docs:
142
- labels = d.get("labels", {})
165
+ # Merge scope labels into provided labels
166
+ labels = {**scope_labels, **(d.get("labels", {}) or {})}
143
167
  title = d.get("title") or os.path.basename(d.get("path", "")) or "untitled"
144
168
  doc_id = _stable_id({"title": title, "labels": labels, "ts": _now_iso()})
145
169
  text = None
@@ -150,13 +174,13 @@ class RAGFacade:
150
174
  uri = await self.artifacts.save_file(
151
175
  path=d["path"],
152
176
  kind="doc",
153
- run_id="rag",
154
- graph_id="rag",
155
- node_id="rag",
177
+ run_id=scope.run_id if scope else "rag",
178
+ graph_id=scope.graph_id if scope else "rag",
179
+ node_id=scope.node_id if scope else "rag",
156
180
  tool_name="rag.upsert",
157
181
  tool_version="0.1.0",
158
182
  labels=labels,
159
- cleanup=False,
183
+ cleanup=False, # keep source file as this is the original
160
184
  )
161
185
  path = d["path"].lower()
162
186
  if path.endswith(".pdf"):
@@ -175,8 +199,29 @@ class RAGFacade:
175
199
  else:
176
200
  # inline text doc — persist as artifact first
177
201
  payload = d.get("text", "")
178
- uri = await self.artifacts.save_text(payload=payload) # store as temp artifact
179
- doc_uri = uri.uri if hasattr(uri, "uri") else uri
202
+
203
+ # stage and save:
204
+ staged = await self.artifacts.plan_staging_path(".txt")
205
+ payload = d.get("text", "")
206
+
207
+ def _write_staged(path: str, content: str) -> None:
208
+ with open(path, "w", encoding="utf-8") as f:
209
+ f.write(content)
210
+
211
+ await asyncio.to_thread(_write_staged, staged, payload)
212
+
213
+ a = await self.artifacts.save_file(
214
+ path=staged,
215
+ kind="doc",
216
+ run_id=scope.run_id if scope else "rag",
217
+ graph_id=scope.graph_id if scope else "rag",
218
+ node_id=scope.node_id if scope else "rag",
219
+ tool_name="rag.upsert",
220
+ tool_version="0.1.0",
221
+ labels=labels,
222
+ )
223
+
224
+ doc_uri = a.uri if hasattr(a, "uri") else a
180
225
  text = payload
181
226
 
182
227
  text = (text or "").strip()
@@ -185,7 +230,7 @@ class RAGFacade:
185
230
  self.logger.warning(f"RAG: empty text for doc {title}")
186
231
  continue
187
232
 
188
- # write doc record
233
+ # write doc record with labels including scope
189
234
  with open(docs_jl, "a", encoding="utf-8") as f:
190
235
  f.write(
191
236
  json.dumps(
@@ -207,6 +252,7 @@ class RAGFacade:
207
252
  chunks = self.chunker.split(text)
208
253
  if not chunks:
209
254
  continue
255
+
210
256
  # batch embed
211
257
  vecs = await self.embed.embed(chunks)
212
258
  for i, (chunk_text, vec) in enumerate(zip(chunks, vecs, strict=True)):
@@ -255,6 +301,40 @@ class RAGFacade:
255
301
  out[obj["chunk_id"]] = obj
256
302
  return out
257
303
 
304
+ def _apply_filters(
305
+ self,
306
+ corpus_id: str,
307
+ hits: list[dict[str, Any]],
308
+ filters: dict[str, Any] | None = None,
309
+ ) -> list[dict[str, Any]]:
310
+ """Apply filters to the search hits."""
311
+ if not filters:
312
+ return hits
313
+
314
+ # We need labels to test filters. They are in meta["labels"] for each chunk.
315
+ # hits come from index.search as [{"chunk_id", "score", "meta": {...}}, ...].
316
+ # It works as follows:
317
+ # 1. For each hit, we extract the labels from the meta information.
318
+ # 2. We then check if the labels match the desired filters.
319
+ out = []
320
+ for h in hits:
321
+ meta = h.get("meta", {}) or {}
322
+ labels = meta.get("labels", {}) or {}
323
+ ok = True
324
+ for k, want in filters.items():
325
+ val = labels.get(k)
326
+ if isinstance(want, list | tuple | set):
327
+ if val not in want:
328
+ ok = False
329
+ break
330
+ else:
331
+ if val != want:
332
+ ok = False
333
+ break
334
+ if ok:
335
+ out.append(h)
336
+ return out
337
+
258
338
  async def search(
259
339
  self,
260
340
  corpus_id: str,
@@ -277,7 +357,13 @@ class RAGFacade:
277
357
  # dense search via index then optional lexical fusion
278
358
  qvec = (await self.embed.embed([query]))[0]
279
359
  dense_hits = await self.index.search(corpus_id, qvec, max(24, k))
360
+
361
+ # apply filters before fusion
362
+ dense_hits = self._apply_filters(corpus_id, dense_hits, filters=filters)
363
+
280
364
  chunks_map = self._load_chunks_map(corpus_id)
365
+
366
+ # if only dense or no hits, return directly
281
367
  if mode == "dense" or not dense_hits:
282
368
  dense_hits = dense_hits[:k]
283
369
  return [
@@ -292,6 +378,7 @@ class RAGFacade:
292
378
  for h in dense_hits
293
379
  ]
294
380
 
381
+ # hybrid fusion: i.e. dense + lexical
295
382
  fused = topk_fuse(
296
383
  query, dense_hits, {cid: rec.get("text", "") for cid, rec in chunks_map.items()}, k
297
384
  )
@@ -310,6 +397,38 @@ class RAGFacade:
310
397
  )
311
398
  return out
312
399
 
400
+ async def search_scoped(
401
+ self,
402
+ *,
403
+ curpus_id: str,
404
+ query: str,
405
+ scope: Scope | None = None,
406
+ scope_id: str | None = None, # e.g. memory_scope_id if tied to memory, can be None
407
+ k: int = 8,
408
+ mode: str = "hybrid",
409
+ ) -> list[SearchHit]:
410
+ """
411
+ Convenience wrapper to search with scope-based filters.
412
+ Args:
413
+ curpus_id: Target corpus identifier.
414
+ query: The search query string.
415
+ scope: Scope object for filtering.
416
+ k: Number of top results to return.
417
+ mode: Search mode - "dense", "hybrid".
418
+ """
419
+ filters: dict[str, Any] | None = None
420
+ if scope is not None:
421
+ # build filters from scope labels
422
+ filters = scope.rag_filter(scope_id=scope_id) # scope_id is optional
423
+
424
+ return await self.search(
425
+ curpus_id,
426
+ query,
427
+ k=k,
428
+ filters=filters,
429
+ mode=mode,
430
+ )
431
+
313
432
  async def retrieve(
314
433
  self, corpus_id: str, query: str, k: int = 6, rerank: bool = True
315
434
  ) -> list[SearchHit]:
@@ -320,6 +439,9 @@ class RAGFacade:
320
439
  k: Number of top results to return.
321
440
  rerank: Whether to rerank results using hybrid scoring.
322
441
  """
442
+ print(
443
+ f"🍏 RAGFacade.retrieve: corpus_id={corpus_id}, query={query}, k={k}, rerank={rerank}"
444
+ )
323
445
  # For now, rerank flag is ignored; fused hybrid already sorts reasonably.
324
446
  return await self.search(corpus_id, query, k=k, mode="hybrid")
325
447
 
@@ -332,6 +454,8 @@ class RAGFacade:
332
454
  style: str = "concise",
333
455
  with_citations: bool = True,
334
456
  k: int = 6,
457
+ scope: Scope | None = None,
458
+ scope_id: str | None = None, # e.g. memory_scope_id if tied to memory, can be None
335
459
  ) -> dict[str, Any]:
336
460
  """Answer a question using retrieved context from the corpus.
337
461
  Args:
@@ -346,7 +470,19 @@ class RAGFacade:
346
470
  # use default LLM client
347
471
  llm = self.llm
348
472
 
349
- hits = await self.retrieve(corpus_id, question, k=k, rerank=True)
473
+ filters: dict[str, Any] | None = None
474
+ if scope is not None:
475
+ # build filters from scope labels
476
+ filters = scope.rag_filter(scope_id=scope_id) # scope_id is optional
477
+
478
+ hits = await self.search(
479
+ corpus_id,
480
+ question,
481
+ k=k,
482
+ filters=filters,
483
+ mode="hybrid",
484
+ )
485
+
350
486
  context = "\n\n".join([f"[{i + 1}] {h.text}" for i, h in enumerate(hits)])
351
487
  sys = "You answer strictly from the provided context. Cite chunk numbers like [1],[2]. If insufficient, say you don't know."
352
488
  if style == "detailed":
@@ -406,6 +542,26 @@ class RAGFacade:
406
542
  return out
407
543
 
408
544
  async def list_corpora(self) -> list[dict]:
545
+ """
546
+ List all available corpora managed by this RAGFacade.
547
+
548
+ This method scans the corpus root directory, loads metadata for each corpus,
549
+ and returns a list of corpus records with their logical IDs and metadata.
550
+
551
+ Examples:
552
+ Basic usage to enumerate corpora:
553
+ ```python
554
+ corpora = await context.rag().list_corpora()
555
+ for c in corpora:
556
+ print(c["corpus_id"], c["meta"].get("created_at"))
557
+ ```
558
+
559
+ Returns:
560
+ list[dict]: A list of dictionaries, each containing:
561
+
562
+ - "corpus_id": The logical identifier for the corpus.
563
+ - "meta": The metadata dictionary loaded from corpus.json (may be empty).
564
+ """
409
565
  out = []
410
566
  for d in sorted(os.listdir(self.root)):
411
567
  # cdir = self._cdir(d)
@@ -428,6 +584,32 @@ class RAGFacade:
428
584
  async def list_docs(
429
585
  self, corpus_id: str, limit: int = 200, after: str | None = None
430
586
  ) -> list[dict]:
587
+ """
588
+ List documents from a corpus in a paginated fashion.
589
+
590
+ This method reads documents from the `docs.jsonl` file associated with the given `corpus_id`,
591
+ returning up to `limit` documents after the specified `after` document ID.
592
+ It is typically accessed via `context.rag().list_docs(...)`.
593
+
594
+ Examples:
595
+ Basic usage to list the first 100 documents:
596
+ ```python
597
+ docs = await context.rag().list_docs("my-corpus", limit=100)
598
+ ```
599
+
600
+ Paginating after a specific document:
601
+ ```python
602
+ docs = await context.rag().list_docs("my-corpus", after="doc_123")
603
+ ```
604
+
605
+ Args:
606
+ corpus_id: The unique identifier for the corpus whose documents are to be listed.
607
+ limit: The maximum number of documents to return (default: 200).
608
+ after: If provided, only documents after this document ID will be returned.
609
+
610
+ Returns:
611
+ list[dict]: A list of document objects, each represented as a dictionary.
612
+ """
431
613
  cdir = self._cdir(corpus_id)
432
614
  docs_jl = os.path.join(cdir, "docs.jsonl")
433
615
  if not os.path.exists(docs_jl):
@@ -450,7 +632,32 @@ class RAGFacade:
450
632
 
451
633
  async def delete_docs(self, corpus_id: str, doc_ids: list[str]) -> dict:
452
634
  """
453
- Removes docs from docs.jsonl and any chunks in chunks.jsonl; asks the index to drop vectors if supported.
635
+ Remove one or more documents and their associated chunks from a corpus.
636
+
637
+ This method deletes all records for the specified `doc_ids` from both the `docs.jsonl`
638
+ and `chunks.jsonl` files within the given corpus. It also instructs the vector index
639
+ backend to remove any vectors associated with the deleted chunks, if supported.
640
+
641
+ Examples:
642
+ Basic usage to delete a single document:
643
+ ```python
644
+ await context.rag().delete_docs("my-corpus", ["doc_123"])
645
+ ```
646
+
647
+ Deleting multiple documents at once:
648
+ ```python
649
+ await context.rag().delete_docs("my-corpus", ["doc_1", "doc_2", "doc_3"])
650
+ ```
651
+
652
+ Args:
653
+ corpus_id: The unique identifier for the corpus from which documents will be removed.
654
+ doc_ids: A list of document IDs to delete. All chunks belonging to these documents
655
+ will also be removed.
656
+
657
+ Returns:
658
+ dict: A dictionary containing:
659
+ - "removed_docs": The number of documents removed.
660
+ - "removed_chunks": The number of chunks removed from the index and storage.
454
661
  """
455
662
  cdir = self._cdir(corpus_id)
456
663
  docs_jl = os.path.join(cdir, "docs.jsonl")
@@ -495,7 +702,32 @@ class RAGFacade:
495
702
  self, corpus_id: str, *, doc_ids: list[str] | None = None, batch: int = 64
496
703
  ) -> dict:
497
704
  """
498
- Re-embeds selected docs (or all) and re-adds vectors. Uses the configured embed client or a model override if your client supports it.
705
+ Re-embed vectors for selected documents (or all) in a corpus.
706
+
707
+ This method re-computes embeddings for all chunks belonging to the specified `doc_ids`
708
+ (or for all documents if `doc_ids` is None) and updates the vector index accordingly.
709
+ It uses the currently configured embedding client and can be accessed via `context.rag().reembed(...)`.
710
+
711
+ Examples:
712
+ Re-embed all documents in a corpus:
713
+ ```python
714
+ await context.rag().reembed("my-corpus")
715
+ ```
716
+
717
+ Re-embed only specific documents:
718
+ ```python
719
+ await context.rag().reembed("my-corpus", doc_ids=["doc_123", "doc_456"])
720
+ ```
721
+
722
+ Args:
723
+ corpus_id: The unique identifier for the corpus whose vectors will be re-embedded.
724
+ doc_ids: Optional list of document IDs to re-embed. If None, all documents are processed.
725
+ batch: The number of chunks to embed per batch (default: 64).
726
+
727
+ Returns:
728
+ dict: A dictionary containing:
729
+ - "reembedded": The number of chunks re-embedded.
730
+ - "model": The embedding model used (if available).
499
731
  """
500
732
  cdir = self._cdir(corpus_id)
501
733
  chunks_jl = os.path.join(cdir, "chunks.jsonl")
@@ -526,6 +758,30 @@ class RAGFacade:
526
758
  return {"reembedded": added, "model": getattr(embed, "embed_model", None)}
527
759
 
528
760
  async def stats(self, corpus_id: str) -> dict:
761
+ """
762
+ Retrieve summary statistics for a given corpus.
763
+
764
+ This method counts the number of documents and chunks in the specified corpus,
765
+ and loads the associated corpus metadata. It is typically accessed via
766
+ `context.rag().stats(...)`.
767
+
768
+ Examples:
769
+ Basic usage to get corpus statistics:
770
+ ```python
771
+ stats = await context.rag().stats("my-corpus")
772
+ print(stats["docs"], stats["chunks"])
773
+ ```
774
+
775
+ Args:
776
+ corpus_id: The unique identifier for the corpus whose statistics are to be retrieved.
777
+
778
+ Returns:
779
+ dict: A dictionary containing:
780
+ - "corpus_id": The logical identifier for the corpus.
781
+ - "docs": The number of documents in the corpus.
782
+ - "chunks": The number of text chunks in the corpus.
783
+ - "meta": The metadata dictionary loaded from corpus.json (may be empty).
784
+ """
529
785
  cdir = self._cdir(corpus_id)
530
786
  docs_jl = os.path.join(cdir, "docs.jsonl")
531
787
  chunks_jl = os.path.join(cdir, "chunks.jsonl")
@@ -29,7 +29,7 @@ def create_vector_index(
29
29
  # try FAISS, fallback to sqlite with a warning
30
30
  try:
31
31
  require("faiss", "faiss") # faiss-cpu exposes module 'faiss'
32
- from .index.faiss_index import FAISSVectorIndex
32
+ from aethergraph.storage.vector_index.faiss_index import FAISSVectorIndex
33
33
 
34
34
  path = (
35
35
  str(Path(index_path) / "faiss")
@@ -42,7 +42,7 @@ def create_vector_index(
42
42
  backend = "sqlite"
43
43
 
44
44
  # sqlite (default)
45
- from .index.sqlite_index import SQLiteVectorIndex
45
+ from aethergraph.storage.vector_index.sqlite_index import SQLiteVectorIndex
46
46
 
47
47
  path = (
48
48
  str(Path(index_path) / "sqlite")