aethergraph 0.1.0a1__py3-none-any.whl → 0.1.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aethergraph/__init__.py +4 -10
- aethergraph/__main__.py +293 -0
- aethergraph/api/v1/__init__.py +0 -0
- aethergraph/api/v1/agents.py +46 -0
- aethergraph/api/v1/apps.py +70 -0
- aethergraph/api/v1/artifacts.py +415 -0
- aethergraph/api/v1/channels.py +89 -0
- aethergraph/api/v1/deps.py +168 -0
- aethergraph/api/v1/graphs.py +259 -0
- aethergraph/api/v1/identity.py +25 -0
- aethergraph/api/v1/memory.py +353 -0
- aethergraph/api/v1/misc.py +47 -0
- aethergraph/api/v1/pagination.py +29 -0
- aethergraph/api/v1/runs.py +568 -0
- aethergraph/api/v1/schemas.py +535 -0
- aethergraph/api/v1/session.py +323 -0
- aethergraph/api/v1/stats.py +201 -0
- aethergraph/api/v1/viz.py +152 -0
- aethergraph/config/config.py +22 -0
- aethergraph/config/loader.py +3 -2
- aethergraph/config/storage.py +209 -0
- aethergraph/contracts/__init__.py +0 -0
- aethergraph/contracts/services/__init__.py +0 -0
- aethergraph/contracts/services/artifacts.py +27 -14
- aethergraph/contracts/services/memory.py +45 -17
- aethergraph/contracts/services/metering.py +129 -0
- aethergraph/contracts/services/runs.py +50 -0
- aethergraph/contracts/services/sessions.py +87 -0
- aethergraph/contracts/services/state_stores.py +3 -0
- aethergraph/contracts/services/viz.py +44 -0
- aethergraph/contracts/storage/artifact_index.py +88 -0
- aethergraph/contracts/storage/artifact_store.py +99 -0
- aethergraph/contracts/storage/async_kv.py +34 -0
- aethergraph/contracts/storage/blob_store.py +50 -0
- aethergraph/contracts/storage/doc_store.py +35 -0
- aethergraph/contracts/storage/event_log.py +31 -0
- aethergraph/contracts/storage/vector_index.py +48 -0
- aethergraph/core/__init__.py +0 -0
- aethergraph/core/execution/forward_scheduler.py +13 -2
- aethergraph/core/execution/global_scheduler.py +21 -15
- aethergraph/core/execution/step_forward.py +10 -1
- aethergraph/core/graph/__init__.py +0 -0
- aethergraph/core/graph/graph_builder.py +8 -4
- aethergraph/core/graph/graph_fn.py +156 -15
- aethergraph/core/graph/graph_spec.py +8 -0
- aethergraph/core/graph/graphify.py +146 -27
- aethergraph/core/graph/node_spec.py +0 -2
- aethergraph/core/graph/node_state.py +3 -0
- aethergraph/core/graph/task_graph.py +39 -1
- aethergraph/core/runtime/__init__.py +0 -0
- aethergraph/core/runtime/ad_hoc_context.py +64 -4
- aethergraph/core/runtime/base_service.py +28 -4
- aethergraph/core/runtime/execution_context.py +13 -15
- aethergraph/core/runtime/graph_runner.py +222 -37
- aethergraph/core/runtime/node_context.py +510 -6
- aethergraph/core/runtime/node_services.py +12 -5
- aethergraph/core/runtime/recovery.py +15 -1
- aethergraph/core/runtime/run_manager.py +783 -0
- aethergraph/core/runtime/run_manager_local.py +204 -0
- aethergraph/core/runtime/run_registration.py +2 -2
- aethergraph/core/runtime/run_types.py +89 -0
- aethergraph/core/runtime/runtime_env.py +136 -7
- aethergraph/core/runtime/runtime_metering.py +71 -0
- aethergraph/core/runtime/runtime_registry.py +36 -13
- aethergraph/core/runtime/runtime_services.py +194 -6
- aethergraph/core/tools/builtins/toolset.py +1 -1
- aethergraph/core/tools/toolkit.py +5 -0
- aethergraph/plugins/agents/default_chat_agent copy.py +90 -0
- aethergraph/plugins/agents/default_chat_agent.py +171 -0
- aethergraph/plugins/agents/shared.py +81 -0
- aethergraph/plugins/channel/adapters/webui.py +112 -112
- aethergraph/plugins/channel/routes/webui_routes.py +367 -102
- aethergraph/plugins/channel/utils/slack_utils.py +115 -59
- aethergraph/plugins/channel/utils/telegram_utils.py +88 -47
- aethergraph/plugins/channel/websockets/weibui_ws.py +172 -0
- aethergraph/runtime/__init__.py +15 -0
- aethergraph/server/app_factory.py +190 -34
- aethergraph/server/clients/channel_client.py +202 -0
- aethergraph/server/http/channel_http_routes.py +116 -0
- aethergraph/server/http/channel_ws_routers.py +45 -0
- aethergraph/server/loading.py +117 -0
- aethergraph/server/server.py +131 -0
- aethergraph/server/server_state.py +240 -0
- aethergraph/server/start.py +227 -66
- aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-BQhdFMY1.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DMm9YOAa.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DRggAlZN.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-ATXxdsX0.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-BEiXGLvX.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-Dq_IR9rO.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-CTRA-rTL.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-Di6jR-x-.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-wX97UBjC.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BdnERNNW.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BsDP51OF.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-CL6g_b3V.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CB_wures.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CTYiF6lA.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-Dxdc4cR9.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Cx986IdX.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Jm3AIy58.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Bold-waoOVXN0.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DxDJ3AOS.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DzxPMmG6.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-SpSLRI95.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Italic-3WenGoN9.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Italic-BMLOBm91.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Italic-NWA7e6Wa.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Regular-B22Nviop.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Regular-Dr94JaBh.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Regular-ypZvNtVU.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-B3XSjfu4.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-CZnvNsCZ.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-iY-2wyZ7.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-Italic-DA0__PXp.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-Italic-flOr_0UB.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-Italic-t53AETM-.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-CFMepnvq.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-D1sUS0GD.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-DbIhKOiC.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-C3H0VqGB.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-DN2j7dab.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-YYjJ1zSn.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-BNo7hRIc.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-CS6fqUqJ.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-DDBCnlJ7.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Script-Regular-C5JkGWo-.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D3wIWfF6.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D5yQViql.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-C195tn64.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-Dbsnue_I.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-mCD8mA8B.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-B7gKUWhC.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-Dy4dx90m.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-oD1tc_U0.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-CTq5MqoE.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-DgpXs0kz.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-BF-4gkZK.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-DWFBv043.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-Dl5lxZxV.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-C0xS9mPB.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-CO6r4hn1.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-D3Ib7_Hf.ttf +0 -0
- aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +1 -0
- aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +400 -0
- aethergraph/server/ui_static/index.html +15 -0
- aethergraph/server/ui_static/logo.png +0 -0
- aethergraph/services/artifacts/__init__.py +0 -0
- aethergraph/services/artifacts/facade.py +1239 -132
- aethergraph/services/auth/{dev.py → authn.py} +0 -8
- aethergraph/services/auth/authz.py +100 -0
- aethergraph/services/channel/__init__.py +0 -0
- aethergraph/services/channel/channel_bus.py +19 -1
- aethergraph/services/channel/factory.py +13 -1
- aethergraph/services/channel/ingress.py +311 -0
- aethergraph/services/channel/queue_adapter.py +75 -0
- aethergraph/services/channel/session.py +502 -19
- aethergraph/services/container/default_container.py +122 -43
- aethergraph/services/continuations/continuation.py +6 -0
- aethergraph/services/continuations/stores/fs_store.py +19 -0
- aethergraph/services/eventhub/event_hub.py +76 -0
- aethergraph/services/kv/__init__.py +0 -0
- aethergraph/services/kv/ephemeral.py +244 -0
- aethergraph/services/llm/__init__.py +0 -0
- aethergraph/services/llm/generic_client copy.py +691 -0
- aethergraph/services/llm/generic_client.py +1288 -187
- aethergraph/services/llm/providers.py +3 -1
- aethergraph/services/llm/types.py +47 -0
- aethergraph/services/llm/utils.py +284 -0
- aethergraph/services/logger/std.py +3 -0
- aethergraph/services/mcp/__init__.py +9 -0
- aethergraph/services/mcp/http_client.py +38 -0
- aethergraph/services/mcp/service.py +225 -1
- aethergraph/services/mcp/stdio_client.py +41 -6
- aethergraph/services/mcp/ws_client.py +44 -2
- aethergraph/services/memory/__init__.py +0 -0
- aethergraph/services/memory/distillers/llm_long_term.py +234 -0
- aethergraph/services/memory/distillers/llm_meta_summary.py +398 -0
- aethergraph/services/memory/distillers/long_term.py +225 -0
- aethergraph/services/memory/facade/__init__.py +3 -0
- aethergraph/services/memory/facade/chat.py +440 -0
- aethergraph/services/memory/facade/core.py +447 -0
- aethergraph/services/memory/facade/distillation.py +424 -0
- aethergraph/services/memory/facade/rag.py +410 -0
- aethergraph/services/memory/facade/results.py +315 -0
- aethergraph/services/memory/facade/retrieval.py +139 -0
- aethergraph/services/memory/facade/types.py +77 -0
- aethergraph/services/memory/facade/utils.py +43 -0
- aethergraph/services/memory/facade_dep.py +1539 -0
- aethergraph/services/memory/factory.py +9 -3
- aethergraph/services/memory/utils.py +10 -0
- aethergraph/services/metering/eventlog_metering.py +470 -0
- aethergraph/services/metering/noop.py +25 -4
- aethergraph/services/rag/__init__.py +0 -0
- aethergraph/services/rag/facade.py +279 -23
- aethergraph/services/rag/index_factory.py +2 -2
- aethergraph/services/rag/node_rag.py +317 -0
- aethergraph/services/rate_limit/inmem_rate_limit.py +24 -0
- aethergraph/services/registry/__init__.py +0 -0
- aethergraph/services/registry/agent_app_meta.py +419 -0
- aethergraph/services/registry/registry_key.py +1 -1
- aethergraph/services/registry/unified_registry.py +74 -6
- aethergraph/services/scope/scope.py +159 -0
- aethergraph/services/scope/scope_factory.py +164 -0
- aethergraph/services/state_stores/serialize.py +5 -0
- aethergraph/services/state_stores/utils.py +2 -1
- aethergraph/services/viz/__init__.py +0 -0
- aethergraph/services/viz/facade.py +413 -0
- aethergraph/services/viz/viz_service.py +69 -0
- aethergraph/storage/artifacts/artifact_index_jsonl.py +180 -0
- aethergraph/storage/artifacts/artifact_index_sqlite.py +426 -0
- aethergraph/storage/artifacts/cas_store.py +422 -0
- aethergraph/storage/artifacts/fs_cas.py +18 -0
- aethergraph/storage/artifacts/s3_cas.py +14 -0
- aethergraph/storage/artifacts/utils.py +124 -0
- aethergraph/storage/blob/fs_blob.py +86 -0
- aethergraph/storage/blob/s3_blob.py +115 -0
- aethergraph/storage/continuation_store/fs_cont.py +283 -0
- aethergraph/storage/continuation_store/inmem_cont.py +146 -0
- aethergraph/storage/continuation_store/kvdoc_cont.py +261 -0
- aethergraph/storage/docstore/fs_doc.py +63 -0
- aethergraph/storage/docstore/sqlite_doc.py +31 -0
- aethergraph/storage/docstore/sqlite_doc_sync.py +90 -0
- aethergraph/storage/eventlog/fs_event.py +136 -0
- aethergraph/storage/eventlog/sqlite_event.py +47 -0
- aethergraph/storage/eventlog/sqlite_event_sync.py +178 -0
- aethergraph/storage/factory.py +432 -0
- aethergraph/storage/fs_utils.py +28 -0
- aethergraph/storage/graph_state_store/state_store.py +64 -0
- aethergraph/storage/kv/inmem_kv.py +103 -0
- aethergraph/storage/kv/layered_kv.py +52 -0
- aethergraph/storage/kv/sqlite_kv.py +39 -0
- aethergraph/storage/kv/sqlite_kv_sync.py +98 -0
- aethergraph/storage/memory/event_persist.py +68 -0
- aethergraph/storage/memory/fs_persist.py +118 -0
- aethergraph/{services/memory/hotlog_kv.py → storage/memory/hotlog.py} +8 -2
- aethergraph/{services → storage}/memory/indices.py +31 -7
- aethergraph/storage/metering/meter_event.py +55 -0
- aethergraph/storage/runs/doc_store.py +280 -0
- aethergraph/storage/runs/inmen_store.py +82 -0
- aethergraph/storage/runs/sqlite_run_store.py +403 -0
- aethergraph/storage/sessions/doc_store.py +183 -0
- aethergraph/storage/sessions/inmem_store.py +110 -0
- aethergraph/storage/sessions/sqlite_session_store.py +399 -0
- aethergraph/storage/vector_index/chroma_index.py +138 -0
- aethergraph/storage/vector_index/faiss_index.py +179 -0
- aethergraph/storage/vector_index/sqlite_index.py +187 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/METADATA +138 -31
- aethergraph-0.1.0a2.dist-info/RECORD +356 -0
- aethergraph-0.1.0a2.dist-info/entry_points.txt +3 -0
- aethergraph/services/artifacts/factory.py +0 -35
- aethergraph/services/artifacts/fs_store.py +0 -656
- aethergraph/services/artifacts/jsonl_index.py +0 -123
- aethergraph/services/artifacts/sqlite_index.py +0 -209
- aethergraph/services/memory/distillers/episode.py +0 -116
- aethergraph/services/memory/distillers/rolling.py +0 -74
- aethergraph/services/memory/facade.py +0 -633
- aethergraph/services/memory/persist_fs.py +0 -40
- aethergraph/services/rag/index/base.py +0 -27
- aethergraph/services/rag/index/faiss_index.py +0 -121
- aethergraph/services/rag/index/sqlite_index.py +0 -134
- aethergraph-0.1.0a1.dist-info/RECORD +0 -182
- aethergraph-0.1.0a1.dist-info/entry_points.txt +0 -2
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/WHEEL +0 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/LICENSE +0 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/NOTICE +0 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import asyncio
|
|
3
4
|
from dataclasses import dataclass
|
|
4
5
|
import hashlib
|
|
5
6
|
import json
|
|
@@ -9,6 +10,7 @@ import time
|
|
|
9
10
|
from typing import Any
|
|
10
11
|
|
|
11
12
|
from aethergraph.contracts.services.llm import LLMClientProtocol
|
|
13
|
+
from aethergraph.services.scope.scope import Scope
|
|
12
14
|
|
|
13
15
|
from .chunker import TextSplitter
|
|
14
16
|
from .utils.hybrid import topk_fuse
|
|
@@ -94,7 +96,13 @@ class RAGFacade:
|
|
|
94
96
|
return os.path.join(self.root, make_fs_key(corpus_id))
|
|
95
97
|
|
|
96
98
|
# ---------- ingestion ----------
|
|
97
|
-
async def add_corpus(
|
|
99
|
+
async def add_corpus(
|
|
100
|
+
self,
|
|
101
|
+
corpus_id: str,
|
|
102
|
+
meta: dict[str, Any] | None = None,
|
|
103
|
+
*,
|
|
104
|
+
scope_labels: dict[str, str] | None = None,
|
|
105
|
+
):
|
|
98
106
|
"""Create a new corpus with optional metadata.
|
|
99
107
|
Args:
|
|
100
108
|
corpus_id: Unique identifier for the corpus.
|
|
@@ -104,18 +112,27 @@ class RAGFacade:
|
|
|
104
112
|
os.makedirs(p, exist_ok=True)
|
|
105
113
|
meta_path = os.path.join(p, "corpus.json")
|
|
106
114
|
if not os.path.exists(meta_path):
|
|
115
|
+
full_meta = {
|
|
116
|
+
"corpus_id": corpus_id,
|
|
117
|
+
"fs_key": make_fs_key(corpus_id),
|
|
118
|
+
"created_at": _now_iso(),
|
|
119
|
+
"meta": meta or {},
|
|
120
|
+
}
|
|
121
|
+
if scope_labels:
|
|
122
|
+
full_meta.setdefault("meta", {})
|
|
123
|
+
full_meta["meta"]["scope"] = dict(scope_labels)
|
|
124
|
+
|
|
107
125
|
with open(meta_path, "w", encoding="utf-8") as f:
|
|
108
|
-
json.dump(
|
|
109
|
-
{
|
|
110
|
-
"corpus_id": corpus_id,
|
|
111
|
-
"fs_key": make_fs_key(corpus_id), # for reference
|
|
112
|
-
"created_at": _now_iso(),
|
|
113
|
-
"meta": meta or {},
|
|
114
|
-
},
|
|
115
|
-
f,
|
|
116
|
-
)
|
|
126
|
+
json.dump(full_meta, f)
|
|
117
127
|
|
|
118
|
-
async def upsert_docs(
|
|
128
|
+
async def upsert_docs(
|
|
129
|
+
self,
|
|
130
|
+
corpus_id: str,
|
|
131
|
+
docs: list[dict[str, Any]],
|
|
132
|
+
*,
|
|
133
|
+
scope: Scope | None = None,
|
|
134
|
+
scope_id: str | None = None, # e.g. memory_scope_id if tied to memory
|
|
135
|
+
) -> dict[str, Any]:
|
|
119
136
|
"""Ingest and index a list of documents into the specified corpus.
|
|
120
137
|
Args:
|
|
121
138
|
corpus_id: The target corpus identifier.
|
|
@@ -125,10 +142,16 @@ class RAGFacade:
|
|
|
125
142
|
- File-based documents: {"path": "/path/to/doc.pdf", "labels": {...}}
|
|
126
143
|
- Inline text documents: {"text": "Document content...", "title": "Doc Title", "labels": {...}}
|
|
127
144
|
"""
|
|
145
|
+
|
|
128
146
|
if not self.embed:
|
|
129
147
|
raise RuntimeError("RAGFacade: embed client not configured")
|
|
130
148
|
|
|
131
|
-
|
|
149
|
+
scope_labels: dict[str, str] = {}
|
|
150
|
+
if scope is not None:
|
|
151
|
+
scope_labels = scope.rag_labels(scope_id=scope_id)
|
|
152
|
+
|
|
153
|
+
await self.add_corpus(corpus_id, meta=None, scope_labels=scope_labels)
|
|
154
|
+
|
|
132
155
|
cdir = self._cdir(corpus_id)
|
|
133
156
|
docs_jl = os.path.join(cdir, "docs.jsonl")
|
|
134
157
|
chunks_jl = os.path.join(cdir, "chunks.jsonl")
|
|
@@ -139,7 +162,8 @@ class RAGFacade:
|
|
|
139
162
|
total_chunks = 0
|
|
140
163
|
|
|
141
164
|
for d in docs:
|
|
142
|
-
labels
|
|
165
|
+
# Merge scope labels into provided labels
|
|
166
|
+
labels = {**scope_labels, **(d.get("labels", {}) or {})}
|
|
143
167
|
title = d.get("title") or os.path.basename(d.get("path", "")) or "untitled"
|
|
144
168
|
doc_id = _stable_id({"title": title, "labels": labels, "ts": _now_iso()})
|
|
145
169
|
text = None
|
|
@@ -150,13 +174,13 @@ class RAGFacade:
|
|
|
150
174
|
uri = await self.artifacts.save_file(
|
|
151
175
|
path=d["path"],
|
|
152
176
|
kind="doc",
|
|
153
|
-
run_id="rag",
|
|
154
|
-
graph_id="rag",
|
|
155
|
-
node_id="rag",
|
|
177
|
+
run_id=scope.run_id if scope else "rag",
|
|
178
|
+
graph_id=scope.graph_id if scope else "rag",
|
|
179
|
+
node_id=scope.node_id if scope else "rag",
|
|
156
180
|
tool_name="rag.upsert",
|
|
157
181
|
tool_version="0.1.0",
|
|
158
182
|
labels=labels,
|
|
159
|
-
cleanup=False,
|
|
183
|
+
cleanup=False, # keep source file as this is the original
|
|
160
184
|
)
|
|
161
185
|
path = d["path"].lower()
|
|
162
186
|
if path.endswith(".pdf"):
|
|
@@ -175,8 +199,29 @@ class RAGFacade:
|
|
|
175
199
|
else:
|
|
176
200
|
# inline text doc — persist as artifact first
|
|
177
201
|
payload = d.get("text", "")
|
|
178
|
-
|
|
179
|
-
|
|
202
|
+
|
|
203
|
+
# stage and save:
|
|
204
|
+
staged = await self.artifacts.plan_staging_path(".txt")
|
|
205
|
+
payload = d.get("text", "")
|
|
206
|
+
|
|
207
|
+
def _write_staged(path: str, content: str) -> None:
|
|
208
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
209
|
+
f.write(content)
|
|
210
|
+
|
|
211
|
+
await asyncio.to_thread(_write_staged, staged, payload)
|
|
212
|
+
|
|
213
|
+
a = await self.artifacts.save_file(
|
|
214
|
+
path=staged,
|
|
215
|
+
kind="doc",
|
|
216
|
+
run_id=scope.run_id if scope else "rag",
|
|
217
|
+
graph_id=scope.graph_id if scope else "rag",
|
|
218
|
+
node_id=scope.node_id if scope else "rag",
|
|
219
|
+
tool_name="rag.upsert",
|
|
220
|
+
tool_version="0.1.0",
|
|
221
|
+
labels=labels,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
doc_uri = a.uri if hasattr(a, "uri") else a
|
|
180
225
|
text = payload
|
|
181
226
|
|
|
182
227
|
text = (text or "").strip()
|
|
@@ -185,7 +230,7 @@ class RAGFacade:
|
|
|
185
230
|
self.logger.warning(f"RAG: empty text for doc {title}")
|
|
186
231
|
continue
|
|
187
232
|
|
|
188
|
-
# write doc record
|
|
233
|
+
# write doc record with labels including scope
|
|
189
234
|
with open(docs_jl, "a", encoding="utf-8") as f:
|
|
190
235
|
f.write(
|
|
191
236
|
json.dumps(
|
|
@@ -207,6 +252,7 @@ class RAGFacade:
|
|
|
207
252
|
chunks = self.chunker.split(text)
|
|
208
253
|
if not chunks:
|
|
209
254
|
continue
|
|
255
|
+
|
|
210
256
|
# batch embed
|
|
211
257
|
vecs = await self.embed.embed(chunks)
|
|
212
258
|
for i, (chunk_text, vec) in enumerate(zip(chunks, vecs, strict=True)):
|
|
@@ -255,6 +301,40 @@ class RAGFacade:
|
|
|
255
301
|
out[obj["chunk_id"]] = obj
|
|
256
302
|
return out
|
|
257
303
|
|
|
304
|
+
def _apply_filters(
|
|
305
|
+
self,
|
|
306
|
+
corpus_id: str,
|
|
307
|
+
hits: list[dict[str, Any]],
|
|
308
|
+
filters: dict[str, Any] | None = None,
|
|
309
|
+
) -> list[dict[str, Any]]:
|
|
310
|
+
"""Apply filters to the search hits."""
|
|
311
|
+
if not filters:
|
|
312
|
+
return hits
|
|
313
|
+
|
|
314
|
+
# We need labels to test filters. They are in meta["labels"] for each chunk.
|
|
315
|
+
# hits come from index.search as [{"chunk_id", "score", "meta": {...}}, ...].
|
|
316
|
+
# It works as follows:
|
|
317
|
+
# 1. For each hit, we extract the labels from the meta information.
|
|
318
|
+
# 2. We then check if the labels match the desired filters.
|
|
319
|
+
out = []
|
|
320
|
+
for h in hits:
|
|
321
|
+
meta = h.get("meta", {}) or {}
|
|
322
|
+
labels = meta.get("labels", {}) or {}
|
|
323
|
+
ok = True
|
|
324
|
+
for k, want in filters.items():
|
|
325
|
+
val = labels.get(k)
|
|
326
|
+
if isinstance(want, list | tuple | set):
|
|
327
|
+
if val not in want:
|
|
328
|
+
ok = False
|
|
329
|
+
break
|
|
330
|
+
else:
|
|
331
|
+
if val != want:
|
|
332
|
+
ok = False
|
|
333
|
+
break
|
|
334
|
+
if ok:
|
|
335
|
+
out.append(h)
|
|
336
|
+
return out
|
|
337
|
+
|
|
258
338
|
async def search(
|
|
259
339
|
self,
|
|
260
340
|
corpus_id: str,
|
|
@@ -277,7 +357,13 @@ class RAGFacade:
|
|
|
277
357
|
# dense search via index then optional lexical fusion
|
|
278
358
|
qvec = (await self.embed.embed([query]))[0]
|
|
279
359
|
dense_hits = await self.index.search(corpus_id, qvec, max(24, k))
|
|
360
|
+
|
|
361
|
+
# apply filters before fusion
|
|
362
|
+
dense_hits = self._apply_filters(corpus_id, dense_hits, filters=filters)
|
|
363
|
+
|
|
280
364
|
chunks_map = self._load_chunks_map(corpus_id)
|
|
365
|
+
|
|
366
|
+
# if only dense or no hits, return directly
|
|
281
367
|
if mode == "dense" or not dense_hits:
|
|
282
368
|
dense_hits = dense_hits[:k]
|
|
283
369
|
return [
|
|
@@ -292,6 +378,7 @@ class RAGFacade:
|
|
|
292
378
|
for h in dense_hits
|
|
293
379
|
]
|
|
294
380
|
|
|
381
|
+
# hybrid fusion: i.e. dense + lexical
|
|
295
382
|
fused = topk_fuse(
|
|
296
383
|
query, dense_hits, {cid: rec.get("text", "") for cid, rec in chunks_map.items()}, k
|
|
297
384
|
)
|
|
@@ -310,6 +397,38 @@ class RAGFacade:
|
|
|
310
397
|
)
|
|
311
398
|
return out
|
|
312
399
|
|
|
400
|
+
async def search_scoped(
|
|
401
|
+
self,
|
|
402
|
+
*,
|
|
403
|
+
curpus_id: str,
|
|
404
|
+
query: str,
|
|
405
|
+
scope: Scope | None = None,
|
|
406
|
+
scope_id: str | None = None, # e.g. memory_scope_id if tied to memory, can be None
|
|
407
|
+
k: int = 8,
|
|
408
|
+
mode: str = "hybrid",
|
|
409
|
+
) -> list[SearchHit]:
|
|
410
|
+
"""
|
|
411
|
+
Convenience wrapper to search with scope-based filters.
|
|
412
|
+
Args:
|
|
413
|
+
curpus_id: Target corpus identifier.
|
|
414
|
+
query: The search query string.
|
|
415
|
+
scope: Scope object for filtering.
|
|
416
|
+
k: Number of top results to return.
|
|
417
|
+
mode: Search mode - "dense", "hybrid".
|
|
418
|
+
"""
|
|
419
|
+
filters: dict[str, Any] | None = None
|
|
420
|
+
if scope is not None:
|
|
421
|
+
# build filters from scope labels
|
|
422
|
+
filters = scope.rag_filter(scope_id=scope_id) # scope_id is optional
|
|
423
|
+
|
|
424
|
+
return await self.search(
|
|
425
|
+
curpus_id,
|
|
426
|
+
query,
|
|
427
|
+
k=k,
|
|
428
|
+
filters=filters,
|
|
429
|
+
mode=mode,
|
|
430
|
+
)
|
|
431
|
+
|
|
313
432
|
async def retrieve(
|
|
314
433
|
self, corpus_id: str, query: str, k: int = 6, rerank: bool = True
|
|
315
434
|
) -> list[SearchHit]:
|
|
@@ -320,6 +439,9 @@ class RAGFacade:
|
|
|
320
439
|
k: Number of top results to return.
|
|
321
440
|
rerank: Whether to rerank results using hybrid scoring.
|
|
322
441
|
"""
|
|
442
|
+
print(
|
|
443
|
+
f"🍏 RAGFacade.retrieve: corpus_id={corpus_id}, query={query}, k={k}, rerank={rerank}"
|
|
444
|
+
)
|
|
323
445
|
# For now, rerank flag is ignored; fused hybrid already sorts reasonably.
|
|
324
446
|
return await self.search(corpus_id, query, k=k, mode="hybrid")
|
|
325
447
|
|
|
@@ -332,6 +454,8 @@ class RAGFacade:
|
|
|
332
454
|
style: str = "concise",
|
|
333
455
|
with_citations: bool = True,
|
|
334
456
|
k: int = 6,
|
|
457
|
+
scope: Scope | None = None,
|
|
458
|
+
scope_id: str | None = None, # e.g. memory_scope_id if tied to memory, can be None
|
|
335
459
|
) -> dict[str, Any]:
|
|
336
460
|
"""Answer a question using retrieved context from the corpus.
|
|
337
461
|
Args:
|
|
@@ -346,7 +470,19 @@ class RAGFacade:
|
|
|
346
470
|
# use default LLM client
|
|
347
471
|
llm = self.llm
|
|
348
472
|
|
|
349
|
-
|
|
473
|
+
filters: dict[str, Any] | None = None
|
|
474
|
+
if scope is not None:
|
|
475
|
+
# build filters from scope labels
|
|
476
|
+
filters = scope.rag_filter(scope_id=scope_id) # scope_id is optional
|
|
477
|
+
|
|
478
|
+
hits = await self.search(
|
|
479
|
+
corpus_id,
|
|
480
|
+
question,
|
|
481
|
+
k=k,
|
|
482
|
+
filters=filters,
|
|
483
|
+
mode="hybrid",
|
|
484
|
+
)
|
|
485
|
+
|
|
350
486
|
context = "\n\n".join([f"[{i + 1}] {h.text}" for i, h in enumerate(hits)])
|
|
351
487
|
sys = "You answer strictly from the provided context. Cite chunk numbers like [1],[2]. If insufficient, say you don't know."
|
|
352
488
|
if style == "detailed":
|
|
@@ -406,6 +542,26 @@ class RAGFacade:
|
|
|
406
542
|
return out
|
|
407
543
|
|
|
408
544
|
async def list_corpora(self) -> list[dict]:
|
|
545
|
+
"""
|
|
546
|
+
List all available corpora managed by this RAGFacade.
|
|
547
|
+
|
|
548
|
+
This method scans the corpus root directory, loads metadata for each corpus,
|
|
549
|
+
and returns a list of corpus records with their logical IDs and metadata.
|
|
550
|
+
|
|
551
|
+
Examples:
|
|
552
|
+
Basic usage to enumerate corpora:
|
|
553
|
+
```python
|
|
554
|
+
corpora = await context.rag().list_corpora()
|
|
555
|
+
for c in corpora:
|
|
556
|
+
print(c["corpus_id"], c["meta"].get("created_at"))
|
|
557
|
+
```
|
|
558
|
+
|
|
559
|
+
Returns:
|
|
560
|
+
list[dict]: A list of dictionaries, each containing:
|
|
561
|
+
|
|
562
|
+
- "corpus_id": The logical identifier for the corpus.
|
|
563
|
+
- "meta": The metadata dictionary loaded from corpus.json (may be empty).
|
|
564
|
+
"""
|
|
409
565
|
out = []
|
|
410
566
|
for d in sorted(os.listdir(self.root)):
|
|
411
567
|
# cdir = self._cdir(d)
|
|
@@ -428,6 +584,32 @@ class RAGFacade:
|
|
|
428
584
|
async def list_docs(
|
|
429
585
|
self, corpus_id: str, limit: int = 200, after: str | None = None
|
|
430
586
|
) -> list[dict]:
|
|
587
|
+
"""
|
|
588
|
+
List documents from a corpus in a paginated fashion.
|
|
589
|
+
|
|
590
|
+
This method reads documents from the `docs.jsonl` file associated with the given `corpus_id`,
|
|
591
|
+
returning up to `limit` documents after the specified `after` document ID.
|
|
592
|
+
It is typically accessed via `context.rag().list_docs(...)`.
|
|
593
|
+
|
|
594
|
+
Examples:
|
|
595
|
+
Basic usage to list the first 100 documents:
|
|
596
|
+
```python
|
|
597
|
+
docs = await context.rag().list_docs("my-corpus", limit=100)
|
|
598
|
+
```
|
|
599
|
+
|
|
600
|
+
Paginating after a specific document:
|
|
601
|
+
```python
|
|
602
|
+
docs = await context.rag().list_docs("my-corpus", after="doc_123")
|
|
603
|
+
```
|
|
604
|
+
|
|
605
|
+
Args:
|
|
606
|
+
corpus_id: The unique identifier for the corpus whose documents are to be listed.
|
|
607
|
+
limit: The maximum number of documents to return (default: 200).
|
|
608
|
+
after: If provided, only documents after this document ID will be returned.
|
|
609
|
+
|
|
610
|
+
Returns:
|
|
611
|
+
list[dict]: A list of document objects, each represented as a dictionary.
|
|
612
|
+
"""
|
|
431
613
|
cdir = self._cdir(corpus_id)
|
|
432
614
|
docs_jl = os.path.join(cdir, "docs.jsonl")
|
|
433
615
|
if not os.path.exists(docs_jl):
|
|
@@ -450,7 +632,32 @@ class RAGFacade:
|
|
|
450
632
|
|
|
451
633
|
async def delete_docs(self, corpus_id: str, doc_ids: list[str]) -> dict:
|
|
452
634
|
"""
|
|
453
|
-
|
|
635
|
+
Remove one or more documents and their associated chunks from a corpus.
|
|
636
|
+
|
|
637
|
+
This method deletes all records for the specified `doc_ids` from both the `docs.jsonl`
|
|
638
|
+
and `chunks.jsonl` files within the given corpus. It also instructs the vector index
|
|
639
|
+
backend to remove any vectors associated with the deleted chunks, if supported.
|
|
640
|
+
|
|
641
|
+
Examples:
|
|
642
|
+
Basic usage to delete a single document:
|
|
643
|
+
```python
|
|
644
|
+
await context.rag().delete_docs("my-corpus", ["doc_123"])
|
|
645
|
+
```
|
|
646
|
+
|
|
647
|
+
Deleting multiple documents at once:
|
|
648
|
+
```python
|
|
649
|
+
await context.rag().delete_docs("my-corpus", ["doc_1", "doc_2", "doc_3"])
|
|
650
|
+
```
|
|
651
|
+
|
|
652
|
+
Args:
|
|
653
|
+
corpus_id: The unique identifier for the corpus from which documents will be removed.
|
|
654
|
+
doc_ids: A list of document IDs to delete. All chunks belonging to these documents
|
|
655
|
+
will also be removed.
|
|
656
|
+
|
|
657
|
+
Returns:
|
|
658
|
+
dict: A dictionary containing:
|
|
659
|
+
- "removed_docs": The number of documents removed.
|
|
660
|
+
- "removed_chunks": The number of chunks removed from the index and storage.
|
|
454
661
|
"""
|
|
455
662
|
cdir = self._cdir(corpus_id)
|
|
456
663
|
docs_jl = os.path.join(cdir, "docs.jsonl")
|
|
@@ -495,7 +702,32 @@ class RAGFacade:
|
|
|
495
702
|
self, corpus_id: str, *, doc_ids: list[str] | None = None, batch: int = 64
|
|
496
703
|
) -> dict:
|
|
497
704
|
"""
|
|
498
|
-
Re-
|
|
705
|
+
Re-embed vectors for selected documents (or all) in a corpus.
|
|
706
|
+
|
|
707
|
+
This method re-computes embeddings for all chunks belonging to the specified `doc_ids`
|
|
708
|
+
(or for all documents if `doc_ids` is None) and updates the vector index accordingly.
|
|
709
|
+
It uses the currently configured embedding client and can be accessed via `context.rag().reembed(...)`.
|
|
710
|
+
|
|
711
|
+
Examples:
|
|
712
|
+
Re-embed all documents in a corpus:
|
|
713
|
+
```python
|
|
714
|
+
await context.rag().reembed("my-corpus")
|
|
715
|
+
```
|
|
716
|
+
|
|
717
|
+
Re-embed only specific documents:
|
|
718
|
+
```python
|
|
719
|
+
await context.rag().reembed("my-corpus", doc_ids=["doc_123", "doc_456"])
|
|
720
|
+
```
|
|
721
|
+
|
|
722
|
+
Args:
|
|
723
|
+
corpus_id: The unique identifier for the corpus whose vectors will be re-embedded.
|
|
724
|
+
doc_ids: Optional list of document IDs to re-embed. If None, all documents are processed.
|
|
725
|
+
batch: The number of chunks to embed per batch (default: 64).
|
|
726
|
+
|
|
727
|
+
Returns:
|
|
728
|
+
dict: A dictionary containing:
|
|
729
|
+
- "reembedded": The number of chunks re-embedded.
|
|
730
|
+
- "model": The embedding model used (if available).
|
|
499
731
|
"""
|
|
500
732
|
cdir = self._cdir(corpus_id)
|
|
501
733
|
chunks_jl = os.path.join(cdir, "chunks.jsonl")
|
|
@@ -526,6 +758,30 @@ class RAGFacade:
|
|
|
526
758
|
return {"reembedded": added, "model": getattr(embed, "embed_model", None)}
|
|
527
759
|
|
|
528
760
|
async def stats(self, corpus_id: str) -> dict:
|
|
761
|
+
"""
|
|
762
|
+
Retrieve summary statistics for a given corpus.
|
|
763
|
+
|
|
764
|
+
This method counts the number of documents and chunks in the specified corpus,
|
|
765
|
+
and loads the associated corpus metadata. It is typically accessed via
|
|
766
|
+
`context.rag().stats(...)`.
|
|
767
|
+
|
|
768
|
+
Examples:
|
|
769
|
+
Basic usage to get corpus statistics:
|
|
770
|
+
```python
|
|
771
|
+
stats = await context.rag().stats("my-corpus")
|
|
772
|
+
print(stats["docs"], stats["chunks"])
|
|
773
|
+
```
|
|
774
|
+
|
|
775
|
+
Args:
|
|
776
|
+
corpus_id: The unique identifier for the corpus whose statistics are to be retrieved.
|
|
777
|
+
|
|
778
|
+
Returns:
|
|
779
|
+
dict: A dictionary containing:
|
|
780
|
+
- "corpus_id": The logical identifier for the corpus.
|
|
781
|
+
- "docs": The number of documents in the corpus.
|
|
782
|
+
- "chunks": The number of text chunks in the corpus.
|
|
783
|
+
- "meta": The metadata dictionary loaded from corpus.json (may be empty).
|
|
784
|
+
"""
|
|
529
785
|
cdir = self._cdir(corpus_id)
|
|
530
786
|
docs_jl = os.path.join(cdir, "docs.jsonl")
|
|
531
787
|
chunks_jl = os.path.join(cdir, "chunks.jsonl")
|
|
@@ -29,7 +29,7 @@ def create_vector_index(
|
|
|
29
29
|
# try FAISS, fallback to sqlite with a warning
|
|
30
30
|
try:
|
|
31
31
|
require("faiss", "faiss") # faiss-cpu exposes module 'faiss'
|
|
32
|
-
from .
|
|
32
|
+
from aethergraph.storage.vector_index.faiss_index import FAISSVectorIndex
|
|
33
33
|
|
|
34
34
|
path = (
|
|
35
35
|
str(Path(index_path) / "faiss")
|
|
@@ -42,7 +42,7 @@ def create_vector_index(
|
|
|
42
42
|
backend = "sqlite"
|
|
43
43
|
|
|
44
44
|
# sqlite (default)
|
|
45
|
-
from .
|
|
45
|
+
from aethergraph.storage.vector_index.sqlite_index import SQLiteVectorIndex
|
|
46
46
|
|
|
47
47
|
path = (
|
|
48
48
|
str(Path(index_path) / "sqlite")
|