aethergraph 0.1.0a1__py3-none-any.whl → 0.1.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. aethergraph/__init__.py +4 -10
  2. aethergraph/__main__.py +293 -0
  3. aethergraph/api/v1/__init__.py +0 -0
  4. aethergraph/api/v1/agents.py +46 -0
  5. aethergraph/api/v1/apps.py +70 -0
  6. aethergraph/api/v1/artifacts.py +415 -0
  7. aethergraph/api/v1/channels.py +89 -0
  8. aethergraph/api/v1/deps.py +168 -0
  9. aethergraph/api/v1/graphs.py +259 -0
  10. aethergraph/api/v1/identity.py +25 -0
  11. aethergraph/api/v1/memory.py +353 -0
  12. aethergraph/api/v1/misc.py +47 -0
  13. aethergraph/api/v1/pagination.py +29 -0
  14. aethergraph/api/v1/runs.py +568 -0
  15. aethergraph/api/v1/schemas.py +535 -0
  16. aethergraph/api/v1/session.py +323 -0
  17. aethergraph/api/v1/stats.py +201 -0
  18. aethergraph/api/v1/viz.py +152 -0
  19. aethergraph/config/config.py +22 -0
  20. aethergraph/config/loader.py +3 -2
  21. aethergraph/config/storage.py +209 -0
  22. aethergraph/contracts/__init__.py +0 -0
  23. aethergraph/contracts/services/__init__.py +0 -0
  24. aethergraph/contracts/services/artifacts.py +27 -14
  25. aethergraph/contracts/services/memory.py +45 -17
  26. aethergraph/contracts/services/metering.py +129 -0
  27. aethergraph/contracts/services/runs.py +50 -0
  28. aethergraph/contracts/services/sessions.py +87 -0
  29. aethergraph/contracts/services/state_stores.py +3 -0
  30. aethergraph/contracts/services/viz.py +44 -0
  31. aethergraph/contracts/storage/artifact_index.py +88 -0
  32. aethergraph/contracts/storage/artifact_store.py +99 -0
  33. aethergraph/contracts/storage/async_kv.py +34 -0
  34. aethergraph/contracts/storage/blob_store.py +50 -0
  35. aethergraph/contracts/storage/doc_store.py +35 -0
  36. aethergraph/contracts/storage/event_log.py +31 -0
  37. aethergraph/contracts/storage/vector_index.py +48 -0
  38. aethergraph/core/__init__.py +0 -0
  39. aethergraph/core/execution/forward_scheduler.py +13 -2
  40. aethergraph/core/execution/global_scheduler.py +21 -15
  41. aethergraph/core/execution/step_forward.py +10 -1
  42. aethergraph/core/graph/__init__.py +0 -0
  43. aethergraph/core/graph/graph_builder.py +8 -4
  44. aethergraph/core/graph/graph_fn.py +156 -15
  45. aethergraph/core/graph/graph_spec.py +8 -0
  46. aethergraph/core/graph/graphify.py +146 -27
  47. aethergraph/core/graph/node_spec.py +0 -2
  48. aethergraph/core/graph/node_state.py +3 -0
  49. aethergraph/core/graph/task_graph.py +39 -1
  50. aethergraph/core/runtime/__init__.py +0 -0
  51. aethergraph/core/runtime/ad_hoc_context.py +64 -4
  52. aethergraph/core/runtime/base_service.py +28 -4
  53. aethergraph/core/runtime/execution_context.py +13 -15
  54. aethergraph/core/runtime/graph_runner.py +222 -37
  55. aethergraph/core/runtime/node_context.py +510 -6
  56. aethergraph/core/runtime/node_services.py +12 -5
  57. aethergraph/core/runtime/recovery.py +15 -1
  58. aethergraph/core/runtime/run_manager.py +783 -0
  59. aethergraph/core/runtime/run_manager_local.py +204 -0
  60. aethergraph/core/runtime/run_registration.py +2 -2
  61. aethergraph/core/runtime/run_types.py +89 -0
  62. aethergraph/core/runtime/runtime_env.py +136 -7
  63. aethergraph/core/runtime/runtime_metering.py +71 -0
  64. aethergraph/core/runtime/runtime_registry.py +36 -13
  65. aethergraph/core/runtime/runtime_services.py +194 -6
  66. aethergraph/core/tools/builtins/toolset.py +1 -1
  67. aethergraph/core/tools/toolkit.py +5 -0
  68. aethergraph/plugins/agents/default_chat_agent copy.py +90 -0
  69. aethergraph/plugins/agents/default_chat_agent.py +171 -0
  70. aethergraph/plugins/agents/shared.py +81 -0
  71. aethergraph/plugins/channel/adapters/webui.py +112 -112
  72. aethergraph/plugins/channel/routes/webui_routes.py +367 -102
  73. aethergraph/plugins/channel/utils/slack_utils.py +115 -59
  74. aethergraph/plugins/channel/utils/telegram_utils.py +88 -47
  75. aethergraph/plugins/channel/websockets/weibui_ws.py +172 -0
  76. aethergraph/runtime/__init__.py +15 -0
  77. aethergraph/server/app_factory.py +190 -34
  78. aethergraph/server/clients/channel_client.py +202 -0
  79. aethergraph/server/http/channel_http_routes.py +116 -0
  80. aethergraph/server/http/channel_ws_routers.py +45 -0
  81. aethergraph/server/loading.py +117 -0
  82. aethergraph/server/server.py +131 -0
  83. aethergraph/server/server_state.py +240 -0
  84. aethergraph/server/start.py +227 -66
  85. aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-BQhdFMY1.woff2 +0 -0
  86. aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DMm9YOAa.woff +0 -0
  87. aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DRggAlZN.ttf +0 -0
  88. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-ATXxdsX0.ttf +0 -0
  89. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-BEiXGLvX.woff +0 -0
  90. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-Dq_IR9rO.woff2 +0 -0
  91. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-CTRA-rTL.woff +0 -0
  92. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-Di6jR-x-.woff2 +0 -0
  93. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-wX97UBjC.ttf +0 -0
  94. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BdnERNNW.ttf +0 -0
  95. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BsDP51OF.woff +0 -0
  96. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-CL6g_b3V.woff2 +0 -0
  97. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CB_wures.ttf +0 -0
  98. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CTYiF6lA.woff2 +0 -0
  99. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-Dxdc4cR9.woff +0 -0
  100. aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Cx986IdX.woff2 +0 -0
  101. aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Jm3AIy58.woff +0 -0
  102. aethergraph/server/ui_static/assets/KaTeX_Main-Bold-waoOVXN0.ttf +0 -0
  103. aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DxDJ3AOS.woff2 +0 -0
  104. aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DzxPMmG6.ttf +0 -0
  105. aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-SpSLRI95.woff +0 -0
  106. aethergraph/server/ui_static/assets/KaTeX_Main-Italic-3WenGoN9.ttf +0 -0
  107. aethergraph/server/ui_static/assets/KaTeX_Main-Italic-BMLOBm91.woff +0 -0
  108. aethergraph/server/ui_static/assets/KaTeX_Main-Italic-NWA7e6Wa.woff2 +0 -0
  109. aethergraph/server/ui_static/assets/KaTeX_Main-Regular-B22Nviop.woff2 +0 -0
  110. aethergraph/server/ui_static/assets/KaTeX_Main-Regular-Dr94JaBh.woff +0 -0
  111. aethergraph/server/ui_static/assets/KaTeX_Main-Regular-ypZvNtVU.ttf +0 -0
  112. aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-B3XSjfu4.ttf +0 -0
  113. aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-CZnvNsCZ.woff2 +0 -0
  114. aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-iY-2wyZ7.woff +0 -0
  115. aethergraph/server/ui_static/assets/KaTeX_Math-Italic-DA0__PXp.woff +0 -0
  116. aethergraph/server/ui_static/assets/KaTeX_Math-Italic-flOr_0UB.ttf +0 -0
  117. aethergraph/server/ui_static/assets/KaTeX_Math-Italic-t53AETM-.woff2 +0 -0
  118. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-CFMepnvq.ttf +0 -0
  119. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-D1sUS0GD.woff2 +0 -0
  120. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-DbIhKOiC.woff +0 -0
  121. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-C3H0VqGB.woff2 +0 -0
  122. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-DN2j7dab.woff +0 -0
  123. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-YYjJ1zSn.ttf +0 -0
  124. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-BNo7hRIc.ttf +0 -0
  125. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-CS6fqUqJ.woff +0 -0
  126. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-DDBCnlJ7.woff2 +0 -0
  127. aethergraph/server/ui_static/assets/KaTeX_Script-Regular-C5JkGWo-.ttf +0 -0
  128. aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D3wIWfF6.woff2 +0 -0
  129. aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D5yQViql.woff +0 -0
  130. aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-C195tn64.woff +0 -0
  131. aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-Dbsnue_I.ttf +0 -0
  132. aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-mCD8mA8B.woff2 +0 -0
  133. aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-B7gKUWhC.ttf +0 -0
  134. aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-Dy4dx90m.woff2 +0 -0
  135. aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-oD1tc_U0.woff +0 -0
  136. aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-CTq5MqoE.woff +0 -0
  137. aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-DgpXs0kz.ttf +0 -0
  138. aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-BF-4gkZK.woff +0 -0
  139. aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-DWFBv043.ttf +0 -0
  140. aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-Dl5lxZxV.woff2 +0 -0
  141. aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-C0xS9mPB.woff +0 -0
  142. aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-CO6r4hn1.woff2 +0 -0
  143. aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-D3Ib7_Hf.ttf +0 -0
  144. aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +1 -0
  145. aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +400 -0
  146. aethergraph/server/ui_static/index.html +15 -0
  147. aethergraph/server/ui_static/logo.png +0 -0
  148. aethergraph/services/artifacts/__init__.py +0 -0
  149. aethergraph/services/artifacts/facade.py +1239 -132
  150. aethergraph/services/auth/{dev.py → authn.py} +0 -8
  151. aethergraph/services/auth/authz.py +100 -0
  152. aethergraph/services/channel/__init__.py +0 -0
  153. aethergraph/services/channel/channel_bus.py +19 -1
  154. aethergraph/services/channel/factory.py +13 -1
  155. aethergraph/services/channel/ingress.py +311 -0
  156. aethergraph/services/channel/queue_adapter.py +75 -0
  157. aethergraph/services/channel/session.py +502 -19
  158. aethergraph/services/container/default_container.py +122 -43
  159. aethergraph/services/continuations/continuation.py +6 -0
  160. aethergraph/services/continuations/stores/fs_store.py +19 -0
  161. aethergraph/services/eventhub/event_hub.py +76 -0
  162. aethergraph/services/kv/__init__.py +0 -0
  163. aethergraph/services/kv/ephemeral.py +244 -0
  164. aethergraph/services/llm/__init__.py +0 -0
  165. aethergraph/services/llm/generic_client copy.py +691 -0
  166. aethergraph/services/llm/generic_client.py +1288 -187
  167. aethergraph/services/llm/providers.py +3 -1
  168. aethergraph/services/llm/types.py +47 -0
  169. aethergraph/services/llm/utils.py +284 -0
  170. aethergraph/services/logger/std.py +3 -0
  171. aethergraph/services/mcp/__init__.py +9 -0
  172. aethergraph/services/mcp/http_client.py +38 -0
  173. aethergraph/services/mcp/service.py +225 -1
  174. aethergraph/services/mcp/stdio_client.py +41 -6
  175. aethergraph/services/mcp/ws_client.py +44 -2
  176. aethergraph/services/memory/__init__.py +0 -0
  177. aethergraph/services/memory/distillers/llm_long_term.py +234 -0
  178. aethergraph/services/memory/distillers/llm_meta_summary.py +398 -0
  179. aethergraph/services/memory/distillers/long_term.py +225 -0
  180. aethergraph/services/memory/facade/__init__.py +3 -0
  181. aethergraph/services/memory/facade/chat.py +440 -0
  182. aethergraph/services/memory/facade/core.py +447 -0
  183. aethergraph/services/memory/facade/distillation.py +424 -0
  184. aethergraph/services/memory/facade/rag.py +410 -0
  185. aethergraph/services/memory/facade/results.py +315 -0
  186. aethergraph/services/memory/facade/retrieval.py +139 -0
  187. aethergraph/services/memory/facade/types.py +77 -0
  188. aethergraph/services/memory/facade/utils.py +43 -0
  189. aethergraph/services/memory/facade_dep.py +1539 -0
  190. aethergraph/services/memory/factory.py +9 -3
  191. aethergraph/services/memory/utils.py +10 -0
  192. aethergraph/services/metering/eventlog_metering.py +470 -0
  193. aethergraph/services/metering/noop.py +25 -4
  194. aethergraph/services/rag/__init__.py +0 -0
  195. aethergraph/services/rag/facade.py +279 -23
  196. aethergraph/services/rag/index_factory.py +2 -2
  197. aethergraph/services/rag/node_rag.py +317 -0
  198. aethergraph/services/rate_limit/inmem_rate_limit.py +24 -0
  199. aethergraph/services/registry/__init__.py +0 -0
  200. aethergraph/services/registry/agent_app_meta.py +419 -0
  201. aethergraph/services/registry/registry_key.py +1 -1
  202. aethergraph/services/registry/unified_registry.py +74 -6
  203. aethergraph/services/scope/scope.py +159 -0
  204. aethergraph/services/scope/scope_factory.py +164 -0
  205. aethergraph/services/state_stores/serialize.py +5 -0
  206. aethergraph/services/state_stores/utils.py +2 -1
  207. aethergraph/services/viz/__init__.py +0 -0
  208. aethergraph/services/viz/facade.py +413 -0
  209. aethergraph/services/viz/viz_service.py +69 -0
  210. aethergraph/storage/artifacts/artifact_index_jsonl.py +180 -0
  211. aethergraph/storage/artifacts/artifact_index_sqlite.py +426 -0
  212. aethergraph/storage/artifacts/cas_store.py +422 -0
  213. aethergraph/storage/artifacts/fs_cas.py +18 -0
  214. aethergraph/storage/artifacts/s3_cas.py +14 -0
  215. aethergraph/storage/artifacts/utils.py +124 -0
  216. aethergraph/storage/blob/fs_blob.py +86 -0
  217. aethergraph/storage/blob/s3_blob.py +115 -0
  218. aethergraph/storage/continuation_store/fs_cont.py +283 -0
  219. aethergraph/storage/continuation_store/inmem_cont.py +146 -0
  220. aethergraph/storage/continuation_store/kvdoc_cont.py +261 -0
  221. aethergraph/storage/docstore/fs_doc.py +63 -0
  222. aethergraph/storage/docstore/sqlite_doc.py +31 -0
  223. aethergraph/storage/docstore/sqlite_doc_sync.py +90 -0
  224. aethergraph/storage/eventlog/fs_event.py +136 -0
  225. aethergraph/storage/eventlog/sqlite_event.py +47 -0
  226. aethergraph/storage/eventlog/sqlite_event_sync.py +178 -0
  227. aethergraph/storage/factory.py +432 -0
  228. aethergraph/storage/fs_utils.py +28 -0
  229. aethergraph/storage/graph_state_store/state_store.py +64 -0
  230. aethergraph/storage/kv/inmem_kv.py +103 -0
  231. aethergraph/storage/kv/layered_kv.py +52 -0
  232. aethergraph/storage/kv/sqlite_kv.py +39 -0
  233. aethergraph/storage/kv/sqlite_kv_sync.py +98 -0
  234. aethergraph/storage/memory/event_persist.py +68 -0
  235. aethergraph/storage/memory/fs_persist.py +118 -0
  236. aethergraph/{services/memory/hotlog_kv.py → storage/memory/hotlog.py} +8 -2
  237. aethergraph/{services → storage}/memory/indices.py +31 -7
  238. aethergraph/storage/metering/meter_event.py +55 -0
  239. aethergraph/storage/runs/doc_store.py +280 -0
  240. aethergraph/storage/runs/inmen_store.py +82 -0
  241. aethergraph/storage/runs/sqlite_run_store.py +403 -0
  242. aethergraph/storage/sessions/doc_store.py +183 -0
  243. aethergraph/storage/sessions/inmem_store.py +110 -0
  244. aethergraph/storage/sessions/sqlite_session_store.py +399 -0
  245. aethergraph/storage/vector_index/chroma_index.py +138 -0
  246. aethergraph/storage/vector_index/faiss_index.py +179 -0
  247. aethergraph/storage/vector_index/sqlite_index.py +187 -0
  248. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/METADATA +138 -31
  249. aethergraph-0.1.0a2.dist-info/RECORD +356 -0
  250. aethergraph-0.1.0a2.dist-info/entry_points.txt +3 -0
  251. aethergraph/services/artifacts/factory.py +0 -35
  252. aethergraph/services/artifacts/fs_store.py +0 -656
  253. aethergraph/services/artifacts/jsonl_index.py +0 -123
  254. aethergraph/services/artifacts/sqlite_index.py +0 -209
  255. aethergraph/services/memory/distillers/episode.py +0 -116
  256. aethergraph/services/memory/distillers/rolling.py +0 -74
  257. aethergraph/services/memory/facade.py +0 -633
  258. aethergraph/services/memory/persist_fs.py +0 -40
  259. aethergraph/services/rag/index/base.py +0 -27
  260. aethergraph/services/rag/index/faiss_index.py +0 -121
  261. aethergraph/services/rag/index/sqlite_index.py +0 -134
  262. aethergraph-0.1.0a1.dist-info/RECORD +0 -182
  263. aethergraph-0.1.0a1.dist-info/entry_points.txt +0 -2
  264. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/WHEEL +0 -0
  265. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/LICENSE +0 -0
  266. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/NOTICE +0 -0
  267. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,422 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import AsyncIterator
4
+ from contextlib import asynccontextmanager
5
+ import datetime
6
+ import json
7
+ import logging
8
+ import os
9
+ from pathlib import Path
10
+ import shutil
11
+ import tempfile
12
+ from typing import Any, BinaryIO
13
+
14
+ from aethergraph.contracts.services.artifacts import Artifact
15
+ from aethergraph.contracts.storage.artifact_store import AsyncArtifactStore
16
+ from aethergraph.contracts.storage.blob_store import BlobStore
17
+
18
+ from .utils import (
19
+ _now_iso,
20
+ _sha256_file,
21
+ _tree_manifest_and_hash,
22
+ to_thread,
23
+ )
24
+
25
+ logger = logging.getLogger("aethergraph.services.artifacts.cas_store")
26
+
27
+
28
+ class CASArtifactStore(AsyncArtifactStore):
29
+ """
30
+ Content-addressed artifact store built on top of a BlobStore.
31
+
32
+ - Uses local staging_dir for temp files/dirs.
33
+ - Stores blobs via BlobStore with keys derived from SHA-256 hashes.
34
+ - Persists minimal manifest/metadata as blobs too (for directories).
35
+ """
36
+
37
+ def __init__(self, blob: BlobStore, staging_dir: str):
38
+ self._blob = blob
39
+ self._staging_dir = os.path.abspath(staging_dir)
40
+ os.makedirs(self._staging_dir, exist_ok=True)
41
+ self.last_artifact: Artifact | None = None
42
+
43
+ @property
44
+ def base_uri(self) -> str:
45
+ return self._blob.base_uri
46
+
47
+ def _augment_labels_with_filename(
48
+ self,
49
+ labels: dict | None,
50
+ *,
51
+ suggested_uri: str | None = None,
52
+ path: str | None = None,
53
+ ) -> dict:
54
+ """
55
+ Ensure labels contains a stable 'filename' key when we can infer one.
56
+
57
+ - Prefer an explicit suggested_uri basename.
58
+ - Fallback to the local path basename.
59
+ - Do NOT override an existing 'filename' or 'name' key.
60
+ """
61
+ out: dict[str, Any] = dict(labels or {})
62
+
63
+ # Don't stomp on explicit naming
64
+ if "filename" in out or "name" in out:
65
+ return out
66
+
67
+ candidate: str | None = None
68
+ if suggested_uri:
69
+ candidate = os.path.basename(suggested_uri.rstrip("/"))
70
+ elif path:
71
+ candidate = os.path.basename(path.rstrip(os.sep))
72
+
73
+ if candidate:
74
+ out["filename"] = candidate
75
+
76
+ return out
77
+
78
+ # ---------- staging utils ----------
79
+ async def plan_staging_path(self, planned_ext: str = "") -> str:
80
+ def _mk():
81
+ fd, p = tempfile.mkstemp(suffix=planned_ext, dir=self._staging_dir)
82
+ os.close(fd)
83
+ return p
84
+
85
+ return await to_thread(_mk)
86
+
87
+ async def plan_staging_dir(self, suffix: str = "") -> str:
88
+ def _mkd():
89
+ return tempfile.mkdtemp(prefix="dir_", suffix=suffix, dir=self._staging_dir)
90
+
91
+ return await to_thread(_mkd)
92
+
93
+ # ---------- basic save / ingest ----------
94
+ async def save_file(
95
+ self,
96
+ *,
97
+ path: str,
98
+ kind: str,
99
+ run_id: str,
100
+ graph_id: str,
101
+ node_id: str,
102
+ tool_name: str,
103
+ tool_version: str,
104
+ suggested_uri: str | None = None, # NOTE: only metadata / pretty; impl may ignore
105
+ pin: bool = False,
106
+ labels: dict | None = None,
107
+ metrics: dict | None = None,
108
+ preview_uri: str | None = None, # NOTE: only metadata / pretty; impl may ignore
109
+ cleanup: bool = True,
110
+ ) -> Artifact:
111
+ sha, nbytes = await to_thread(_sha256_file, path)
112
+ ext = os.path.splitext(path)[1]
113
+ key = os.path.join("cas", "blobs", f"{sha}{ext}")
114
+
115
+ blob_uri = await self._blob.put_file(path, key=key, mime=None, keep_source=not cleanup)
116
+
117
+ eff_labels = self._augment_labels_with_filename(
118
+ labels,
119
+ suggested_uri=suggested_uri,
120
+ path=path,
121
+ )
122
+
123
+ a = Artifact(
124
+ artifact_id=sha,
125
+ uri=blob_uri,
126
+ kind=kind,
127
+ bytes=nbytes,
128
+ sha256=sha,
129
+ mime=None, # callers can fill in if desired
130
+ run_id=run_id,
131
+ graph_id=graph_id,
132
+ node_id=node_id,
133
+ tool_name=tool_name,
134
+ tool_version=tool_version,
135
+ created_at=_now_iso(),
136
+ labels=eff_labels,
137
+ metrics=metrics or {},
138
+ preview_uri=preview_uri,
139
+ pinned=pin,
140
+ )
141
+ self.last_artifact = a
142
+ return a
143
+
144
+ # ---------- streaming writer ----------
145
+ @asynccontextmanager
146
+ async def open_writer(
147
+ self,
148
+ *,
149
+ kind: str,
150
+ run_id: str,
151
+ graph_id: str,
152
+ node_id: str,
153
+ tool_name: str,
154
+ tool_version: str,
155
+ planned_ext: str | None = None,
156
+ pin: bool = False,
157
+ ) -> AsyncIterator[Any]:
158
+ staged_path = await self.plan_staging_path(planned_ext or "")
159
+
160
+ class _Writer:
161
+ """Helper class for streaming writes to a temp file."""
162
+
163
+ def __init__(self, path: str, f: BinaryIO):
164
+ self.tmp_path = path
165
+ self._f = f
166
+ self._labels: dict[str, str] = {}
167
+ self._metrics: dict[str, float] = {}
168
+ self.artifact: Artifact | None = None # filled after finalize
169
+
170
+ def write(self, chunk: bytes) -> None:
171
+ self._f.write(chunk)
172
+
173
+ def add_labels(self, labels: dict[str, str]) -> None:
174
+ self._labels.update(labels or {})
175
+
176
+ def add_metrics(self, metrics: dict[str, float]) -> None:
177
+ self._metrics.update(metrics or {})
178
+
179
+ writer: _Writer | None = None
180
+
181
+ try:
182
+ # Ruff-friendly: file is opened via a context manager, and kept
183
+ # open for the duration of the user’s writes.
184
+ with open(staged_path, "wb") as f:
185
+ writer = _Writer(staged_path, f)
186
+ # Yield to caller; they can await inside and call writer.write(...)
187
+ yield writer
188
+ # <-- file is closed here when the with-block exits
189
+
190
+ # Now ingest the staged file into CAS and create the Artifact
191
+ if writer is not None:
192
+ a = await self.ingest_staged_file(
193
+ staged_path=staged_path,
194
+ kind=kind,
195
+ run_id=run_id,
196
+ graph_id=graph_id,
197
+ node_id=node_id,
198
+ tool_name=tool_name,
199
+ tool_version=tool_version,
200
+ pin=pin,
201
+ labels=writer._labels,
202
+ metrics=writer._metrics,
203
+ )
204
+ writer.artifact = a
205
+
206
+ except Exception:
207
+ # Best-effort cleanup of staged file on error
208
+ try:
209
+ if os.path.exists(staged_path):
210
+ os.remove(staged_path)
211
+ finally:
212
+ raise
213
+
214
+ async def ingest_staged_file(
215
+ self,
216
+ *,
217
+ staged_path: str,
218
+ kind: str,
219
+ run_id: str,
220
+ graph_id: str,
221
+ node_id: str,
222
+ tool_name: str,
223
+ tool_version: str,
224
+ pin: bool = False,
225
+ labels: dict | None = None,
226
+ metrics: dict | None = None,
227
+ preview_uri: str | None = None,
228
+ suggested_uri: str | None = None,
229
+ ) -> Artifact:
230
+ # just delegate to save_file (same semantics)
231
+ a = await self.save_file(
232
+ path=staged_path,
233
+ kind=kind,
234
+ run_id=run_id,
235
+ graph_id=graph_id,
236
+ node_id=node_id,
237
+ tool_name=tool_name,
238
+ tool_version=tool_version,
239
+ suggested_uri=suggested_uri,
240
+ pin=pin,
241
+ labels=labels,
242
+ metrics=metrics,
243
+ preview_uri=preview_uri,
244
+ )
245
+ try:
246
+ os.remove(staged_path)
247
+ except Exception:
248
+ logger.warning("ingest_staged_file: failed to delete staged file %s", staged_path)
249
+ return a
250
+
251
+ async def ingest_directory(
252
+ self,
253
+ *,
254
+ staged_dir: str,
255
+ kind: str = "dataset",
256
+ run_id: str,
257
+ graph_id: str,
258
+ node_id: str,
259
+ tool_name: str,
260
+ tool_version: str,
261
+ include: list[str] | None = None,
262
+ exclude: list[str] | None = None,
263
+ index_children: bool = False, # TODO: use later for per-file artifacts
264
+ pin: bool = False,
265
+ labels: dict | None = None,
266
+ metrics: dict | None = None,
267
+ suggested_uri: str | None = None,
268
+ archive: bool = False,
269
+ archive_name: str = "bundle.tar.gz",
270
+ cleanup: bool = True,
271
+ store: str | None = None, # "archive" | "manifest"
272
+ ) -> Artifact:
273
+ if not os.path.isdir(staged_dir):
274
+ raise ValueError(f"ingest_directory: not a directory: {staged_dir}")
275
+
276
+ if store is None:
277
+ store = "archive" if archive else "manifest"
278
+
279
+ manifest_entries, tree_sha = await to_thread(
280
+ _tree_manifest_and_hash, staged_dir, include, exclude
281
+ )
282
+
283
+ # manifest blob
284
+ manifest_key = os.path.join("cas", "trees", tree_sha, "manifest.json")
285
+
286
+ def _dump_manifest() -> bytes:
287
+ return json.dumps(
288
+ {
289
+ "files": manifest_entries,
290
+ "created_at": _now_iso(),
291
+ "tool_name": tool_name,
292
+ "tool_version": tool_version,
293
+ },
294
+ indent=2,
295
+ ).encode("utf-8")
296
+
297
+ # manifest URI -> use it in future if needed
298
+ _ = await self._blob.put_bytes(
299
+ _dump_manifest(),
300
+ key=manifest_key,
301
+ ext=".json",
302
+ mime="application/json",
303
+ )
304
+
305
+ archive_uri: str | None = None
306
+ if store == "archive":
307
+ # build tar.gz locally, then upload
308
+ archive_path = os.path.join(self._staging_dir, f"{tree_sha}.tar.gz")
309
+
310
+ def _make_tar():
311
+ import tarfile
312
+
313
+ with tarfile.open(archive_path, mode="w:gz") as tar:
314
+ for e in sorted(manifest_entries, key=lambda x: x["path"]):
315
+ abs_file = os.path.join(staged_dir, e["path"])
316
+ tar.add(abs_file, arcname=e["path"])
317
+ return archive_path
318
+
319
+ archive_path = await to_thread(_make_tar)
320
+ archive_key = os.path.join("cas", "trees", tree_sha, archive_name)
321
+ archive_uri = await self._blob.put_file(
322
+ archive_path,
323
+ key=archive_key,
324
+ mime="application/gzip",
325
+ )
326
+
327
+ elif store == "manifest":
328
+ if cleanup:
329
+ # we will delete staged_dir; only OK if user accepts that artifacts
330
+ # are now represented by manifest (+ optional archive)
331
+ pass
332
+ else:
333
+ raise ValueError(f"unknown store mode: {store}")
334
+
335
+ # Directory "handle" URI: base_uri + prefix
336
+ dir_prefix = os.path.join("cas", "trees", tree_sha)
337
+ # NOTE: we don't require an actual object at dir_prefix; it's a logical handle.
338
+ dir_uri = self.base_uri.rstrip("/") + "/" + dir_prefix.replace(os.sep, "/")
339
+
340
+ total_bytes = sum(e["bytes"] for e in manifest_entries)
341
+
342
+ eff_labels = self._augment_labels_with_filename(
343
+ labels,
344
+ suggested_uri=suggested_uri or archive_name,
345
+ path=staged_dir,
346
+ )
347
+
348
+ a = Artifact(
349
+ artifact_id=tree_sha,
350
+ uri=dir_uri,
351
+ kind=kind,
352
+ bytes=total_bytes,
353
+ sha256=tree_sha,
354
+ mime="application/vnd.aethergraph.bundle+dir",
355
+ run_id=run_id,
356
+ graph_id=graph_id,
357
+ node_id=node_id,
358
+ tool_name=tool_name,
359
+ tool_version=tool_version,
360
+ created_at=_now_iso(),
361
+ labels=eff_labels,
362
+ metrics=metrics or {},
363
+ preview_uri=archive_uri,
364
+ pinned=pin,
365
+ )
366
+ self.last_artifact = a
367
+
368
+ if cleanup:
369
+ try:
370
+ shutil.rmtree(staged_dir, ignore_errors=True)
371
+ except Exception:
372
+ logger.warning("ingest_directory: failed to cleanup staged dir %s", staged_dir)
373
+
374
+ return a
375
+
376
+ # ---------- load ----------
377
+ async def load_bytes(self, uri):
378
+ return await self._blob.load_bytes(uri)
379
+
380
+ async def load_text(self, uri: str, *, encoding: str = "utf-8", errors: str = "strict") -> str:
381
+ return await self._blob.load_text(uri, encoding=encoding, errors=errors)
382
+
383
+ async def load_artifact_bytes(self, uri: str) -> bytes:
384
+ return await self._blob.load_bytes(uri)
385
+
386
+ async def load_artifact_dir(self, uri):
387
+ """
388
+ Normalize a directory artifact to a local path.
389
+
390
+ FS backend can simply return the directory; S3 backend
391
+ will download files described by manifest into a temp dir.
392
+ For now, implement generic: if it's already a file:// path,
393
+ just return as-is; otherwise, ArtifactFacade can add a helper
394
+ `as_local_dir(artifact)` that handles S3 download.
395
+ """
396
+ return uri
397
+
398
+ async def load_artifact(self, uri):
399
+ # Compatibility: if direcotry URI, return as-is, else load blob content
400
+ if uri.endswith("/"):
401
+ # directory handle URI
402
+ return await self.load_artifact_dir(uri)
403
+ # else, blob URI
404
+ return await self._blob.load_bytes(uri)
405
+
406
+ # ---------- cleanup ----------
407
+ async def cleanup_tmp(self, max_age_hours: int = 24) -> None:
408
+ now = datetime.datetime.now(datetime.timezone.utc).timestamp()
409
+
410
+ def _cleanup():
411
+ for p in Path(self._staging_dir).rglob("*"):
412
+ try:
413
+ age_h = (now - p.stat().st_mtime) / 3600.0
414
+ if age_h > max_age_hours:
415
+ if p.is_file():
416
+ p.unlink(missing_ok=True)
417
+ else:
418
+ shutil.rmtree(p, ignore_errors=True)
419
+ except Exception:
420
+ pass
421
+
422
+ await to_thread(_cleanup)
@@ -0,0 +1,18 @@
1
+ import os
2
+
3
+ from aethergraph.storage.artifacts.cas_store import CASArtifactStore
4
+ from aethergraph.storage.blob.fs_blob import FSBlobStore
5
+
6
+
7
+ class FSArtifactStore(CASArtifactStore):
8
+ # Initialize with a base directory for storing artifacts
9
+
10
+ def __init__(self, base_dir: str):
11
+ base_dir = os.path.abspath(base_dir)
12
+ blob = FSBlobStore(os.path.join(base_dir, "blobs"))
13
+ staging_dir = os.path.join(base_dir, "staging")
14
+ super().__init__(blob=blob, staging_dir=staging_dir)
15
+
16
+ # TODO: Add any FS-specific optimizations if needed
17
+ # Optionally override load_artifact_dir to return actual local dir path if uri is file://cas/trees/...
18
+ # and implement FS-only "pretty" symlinks.
@@ -0,0 +1,14 @@
1
+ from aethergraph.storage.artifacts.cas_store import CASArtifactStore
2
+ from aethergraph.storage.blob.s3_blob import S3BlobStore
3
+
4
+
5
+ class S3ArtifactStore(CASArtifactStore):
6
+ # Initialize with S3 bucket and optional prefix for storing artifacts
7
+ def __init__(self, bucket: str, prefix: str, staging_dir: str):
8
+ blob = S3BlobStore(bucket=bucket, prefix=prefix)
9
+ super().__init__(blob=blob, staging_dir=staging_dir)
10
+
11
+ # TODO: Optionally add any S3-specific optimizations if needed
12
+ # - parse tree_sha from uri
13
+ # - download files listed in manifest.json into a local temp dir
14
+ # - return that path
@@ -0,0 +1,124 @@
1
+ import asyncio
2
+ from datetime import datetime, timezone
3
+ from fnmatch import fnmatch
4
+ import hashlib
5
+ import json
6
+ import os
7
+ from pathlib import Path
8
+
9
+
10
+ def now_iso() -> str:
11
+ return datetime.now(timezone.utc).isoformat()
12
+
13
+
14
+ async def to_thread(fn, *a, **k):
15
+ return await asyncio.to_thread(fn, *a, **k)
16
+
17
+
18
+ # ----- helpers ----- NOTE: we have multiple copies of these in different places, consider centralizing -----
19
+ def _now_iso():
20
+ return datetime.now(timezone.utc).isoformat()
21
+
22
+
23
+ def _sha256_file(path: str, chunk=1024 * 1024) -> tuple[str, int]:
24
+ """Return (sha256 hex, size in bytes) of a file."""
25
+ h = hashlib.sha256()
26
+ total = 0
27
+ with open(path, "rb") as f:
28
+ while True:
29
+ b = f.read(chunk)
30
+ if not b:
31
+ break
32
+ h.update(b)
33
+ total += len(b)
34
+ return h.hexdigest(), total
35
+
36
+
37
+ def _content_addr_path(base_dir: str, sha256: str, ext: str | None) -> str:
38
+ """Return a content-addressed path under base_dir for a given sha256 and optional extension.
39
+ Creates subdirectories as needed.
40
+
41
+ It works as follows:
42
+ - Takes the first 4 characters of the sha256 hash to create two levels of subdirectories.
43
+ - The first two characters form the first subdirectory (sub1).
44
+ - The next two characters form the second subdirectory (sub2).
45
+ - The full sha256 hash, optionally followed by the provided file extension, is used as the filename.
46
+ - Ensures that the target directory exists by creating it if necessary.
47
+ - Returns the full path to the content-addressed file.
48
+
49
+ The final path structure will look like:
50
+ base_dir/sub1/sub2/sha256[.ext]
51
+ """
52
+ sub1, sub2 = sha256[:2], sha256[2:4]
53
+ fname = sha256 + (ext or "")
54
+ target_dir = os.path.join(base_dir, sub1, sub2)
55
+ os.makedirs(target_dir, exist_ok=True)
56
+ return os.path.join(target_dir, fname)
57
+
58
+
59
+ def _walk_dir(root: str, include: list[str] | None, exclude: list[str] | None):
60
+ """Yield (relpath, abspath) for files under root honoring include/exclude globs."""
61
+ root_p = Path(root)
62
+ for p in root_p.rglob("*"):
63
+ if not p.is_file():
64
+ continue
65
+ rel = str(p.relative_to(root_p)).replace("\\", "/")
66
+ if exclude and any(fnmatch.fnmatch(rel, pat) for pat in exclude):
67
+ continue
68
+ if include and not any(fnmatch.fnmatch(rel, pat) for pat in include):
69
+ continue
70
+ yield rel, str(p)
71
+
72
+
73
+ def _tree_manifest_and_hash(root: str, include: list[str] | None, exclude: list[str] | None):
74
+ """
75
+ Build a deterministic manifest of files: [{"path": rel, "sha256": sha, "bytes": n}, ...]
76
+ The tree hash is sha256 over JSON lines: "<rel> <sha> <bytes>\n" sorted by rel.
77
+ """
78
+ entries = []
79
+ lines = []
80
+ for rel, abspath in _walk_dir(root, include, exclude):
81
+ sha, nbytes = _sha256_file(abspath)
82
+ entries.append({"path": rel, "sha256": sha, "bytes": nbytes})
83
+ lines.append(f"{rel}\t{sha}\t{nbytes}\n")
84
+ # sort for determinism
85
+ lines.sort()
86
+ h = hashlib.sha256()
87
+ for line in lines:
88
+ h.update(line.encode("utf-8"))
89
+ tree_sha = h.hexdigest()
90
+ return entries, tree_sha
91
+
92
+
93
+ def _content_addr_dir_path(base_dir: str, tree_sha: str):
94
+ # content-addressed folder to hold manifest (and optional archive)
95
+ sub1, sub2 = tree_sha[:2], tree_sha[2:4]
96
+ target_dir = os.path.join(base_dir, sub1, sub2, tree_sha)
97
+ os.makedirs(target_dir, exist_ok=True)
98
+ return target_dir
99
+
100
+
101
+ def _write_json(path: str, obj: dict | list):
102
+ with open(path, "w", encoding="utf-8") as f:
103
+ json.dump(obj, f, ensure_ascii=False, separators=(",", ":"))
104
+
105
+
106
+ def _maybe_cleanup_tmp_parent(tmp_root: str, path: str):
107
+ """Remove empty parent dirs strictly under tmp_root (never _tmp itself)."""
108
+ try:
109
+ parent = os.path.dirname(os.path.abspath(path))
110
+ tmp_root_abs = os.path.abspath(tmp_root)
111
+
112
+ # Only operate if `parent` is inside tmp_root
113
+ while (
114
+ os.path.commonpath([parent, tmp_root_abs]) == tmp_root_abs
115
+ and os.path.normcase(parent)
116
+ != os.path.normcase(tmp_root_abs) # don't delete _tmp itself
117
+ ):
118
+ try:
119
+ os.rmdir(parent) # only removes if empty
120
+ except OSError:
121
+ break
122
+ parent = os.path.dirname(parent)
123
+ except Exception:
124
+ pass
@@ -0,0 +1,86 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import shutil
5
+
6
+ from aethergraph.contracts.storage.blob_store import BlobStore
7
+ from aethergraph.storage.fs_utils import _from_uri_or_path, _to_file_uri, to_thread
8
+
9
+
10
+ class FSBlobStore(BlobStore):
11
+ def __init__(self, base_dir: str):
12
+ self.base_dir = os.path.abspath(base_dir)
13
+ os.makedirs(self.base_dir, exist_ok=True)
14
+
15
+ @property
16
+ def base_uri(self) -> str:
17
+ return _to_file_uri(self.base_dir)
18
+
19
+ def _resolve_key(self, key: str | None, ext: str | None) -> str:
20
+ if key is None:
21
+ # fall back to some random-ish name under "blobs/"
22
+ import uuid
23
+
24
+ name = uuid.uuid4().hex + (ext or "")
25
+ key = os.path.join("blobs", name)
26
+ return key
27
+
28
+ async def put_bytes(
29
+ self,
30
+ data: bytes,
31
+ *,
32
+ key: str | None = None,
33
+ ext: str | None = None,
34
+ mime: str | None = None,
35
+ ) -> str:
36
+ key = self._resolve_key(key, ext)
37
+ path = os.path.join(self.base_dir, key)
38
+ os.makedirs(os.path.dirname(path), exist_ok=True)
39
+
40
+ def _write():
41
+ with open(path, "wb") as f:
42
+ f.write(data)
43
+ return _to_file_uri(path)
44
+
45
+ return await to_thread(_write)
46
+
47
+ async def put_file(
48
+ self,
49
+ path: str,
50
+ *,
51
+ key: str | None = None,
52
+ mime: str | None = None,
53
+ keep_source: bool = False,
54
+ ) -> str:
55
+ ext = os.path.splitext(path)[1]
56
+ key = self._resolve_key(key, ext)
57
+ dst = os.path.join(self.base_dir, key)
58
+ os.makedirs(os.path.dirname(dst), exist_ok=True)
59
+
60
+ def _move():
61
+ if keep_source:
62
+ shutil.copy2(os.path.abspath(path), dst)
63
+ else:
64
+ shutil.move(os.path.abspath(path), dst)
65
+ return _to_file_uri(dst)
66
+
67
+ return await to_thread(_move)
68
+
69
+ async def load_bytes(self, uri: str) -> bytes:
70
+ path = _from_uri_or_path(uri)
71
+
72
+ def _read():
73
+ with open(path, "rb") as f:
74
+ return f.read()
75
+
76
+ return await to_thread(_read)
77
+
78
+ async def load_text(
79
+ self,
80
+ uri: str,
81
+ *,
82
+ encoding: str = "utf-8",
83
+ errors: str = "strict",
84
+ ) -> str:
85
+ data = await self.load_bytes(uri)
86
+ return data.decode(encoding, errors)