aethergraph 0.1.0a1__py3-none-any.whl → 0.1.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aethergraph/__init__.py +4 -10
- aethergraph/__main__.py +293 -0
- aethergraph/api/v1/__init__.py +0 -0
- aethergraph/api/v1/agents.py +46 -0
- aethergraph/api/v1/apps.py +70 -0
- aethergraph/api/v1/artifacts.py +415 -0
- aethergraph/api/v1/channels.py +89 -0
- aethergraph/api/v1/deps.py +168 -0
- aethergraph/api/v1/graphs.py +259 -0
- aethergraph/api/v1/identity.py +25 -0
- aethergraph/api/v1/memory.py +353 -0
- aethergraph/api/v1/misc.py +47 -0
- aethergraph/api/v1/pagination.py +29 -0
- aethergraph/api/v1/runs.py +568 -0
- aethergraph/api/v1/schemas.py +535 -0
- aethergraph/api/v1/session.py +323 -0
- aethergraph/api/v1/stats.py +201 -0
- aethergraph/api/v1/viz.py +152 -0
- aethergraph/config/config.py +22 -0
- aethergraph/config/loader.py +3 -2
- aethergraph/config/storage.py +209 -0
- aethergraph/contracts/__init__.py +0 -0
- aethergraph/contracts/services/__init__.py +0 -0
- aethergraph/contracts/services/artifacts.py +27 -14
- aethergraph/contracts/services/memory.py +45 -17
- aethergraph/contracts/services/metering.py +129 -0
- aethergraph/contracts/services/runs.py +50 -0
- aethergraph/contracts/services/sessions.py +87 -0
- aethergraph/contracts/services/state_stores.py +3 -0
- aethergraph/contracts/services/viz.py +44 -0
- aethergraph/contracts/storage/artifact_index.py +88 -0
- aethergraph/contracts/storage/artifact_store.py +99 -0
- aethergraph/contracts/storage/async_kv.py +34 -0
- aethergraph/contracts/storage/blob_store.py +50 -0
- aethergraph/contracts/storage/doc_store.py +35 -0
- aethergraph/contracts/storage/event_log.py +31 -0
- aethergraph/contracts/storage/vector_index.py +48 -0
- aethergraph/core/__init__.py +0 -0
- aethergraph/core/execution/forward_scheduler.py +13 -2
- aethergraph/core/execution/global_scheduler.py +21 -15
- aethergraph/core/execution/step_forward.py +10 -1
- aethergraph/core/graph/__init__.py +0 -0
- aethergraph/core/graph/graph_builder.py +8 -4
- aethergraph/core/graph/graph_fn.py +156 -15
- aethergraph/core/graph/graph_spec.py +8 -0
- aethergraph/core/graph/graphify.py +146 -27
- aethergraph/core/graph/node_spec.py +0 -2
- aethergraph/core/graph/node_state.py +3 -0
- aethergraph/core/graph/task_graph.py +39 -1
- aethergraph/core/runtime/__init__.py +0 -0
- aethergraph/core/runtime/ad_hoc_context.py +64 -4
- aethergraph/core/runtime/base_service.py +28 -4
- aethergraph/core/runtime/execution_context.py +13 -15
- aethergraph/core/runtime/graph_runner.py +222 -37
- aethergraph/core/runtime/node_context.py +510 -6
- aethergraph/core/runtime/node_services.py +12 -5
- aethergraph/core/runtime/recovery.py +15 -1
- aethergraph/core/runtime/run_manager.py +783 -0
- aethergraph/core/runtime/run_manager_local.py +204 -0
- aethergraph/core/runtime/run_registration.py +2 -2
- aethergraph/core/runtime/run_types.py +89 -0
- aethergraph/core/runtime/runtime_env.py +136 -7
- aethergraph/core/runtime/runtime_metering.py +71 -0
- aethergraph/core/runtime/runtime_registry.py +36 -13
- aethergraph/core/runtime/runtime_services.py +194 -6
- aethergraph/core/tools/builtins/toolset.py +1 -1
- aethergraph/core/tools/toolkit.py +5 -0
- aethergraph/plugins/agents/default_chat_agent copy.py +90 -0
- aethergraph/plugins/agents/default_chat_agent.py +171 -0
- aethergraph/plugins/agents/shared.py +81 -0
- aethergraph/plugins/channel/adapters/webui.py +112 -112
- aethergraph/plugins/channel/routes/webui_routes.py +367 -102
- aethergraph/plugins/channel/utils/slack_utils.py +115 -59
- aethergraph/plugins/channel/utils/telegram_utils.py +88 -47
- aethergraph/plugins/channel/websockets/weibui_ws.py +172 -0
- aethergraph/runtime/__init__.py +15 -0
- aethergraph/server/app_factory.py +190 -34
- aethergraph/server/clients/channel_client.py +202 -0
- aethergraph/server/http/channel_http_routes.py +116 -0
- aethergraph/server/http/channel_ws_routers.py +45 -0
- aethergraph/server/loading.py +117 -0
- aethergraph/server/server.py +131 -0
- aethergraph/server/server_state.py +240 -0
- aethergraph/server/start.py +227 -66
- aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-BQhdFMY1.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DMm9YOAa.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DRggAlZN.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-ATXxdsX0.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-BEiXGLvX.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-Dq_IR9rO.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-CTRA-rTL.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-Di6jR-x-.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-wX97UBjC.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BdnERNNW.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BsDP51OF.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-CL6g_b3V.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CB_wures.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CTYiF6lA.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-Dxdc4cR9.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Cx986IdX.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Jm3AIy58.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Bold-waoOVXN0.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DxDJ3AOS.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DzxPMmG6.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-SpSLRI95.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Italic-3WenGoN9.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Italic-BMLOBm91.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Italic-NWA7e6Wa.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Regular-B22Nviop.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Regular-Dr94JaBh.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Regular-ypZvNtVU.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-B3XSjfu4.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-CZnvNsCZ.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-iY-2wyZ7.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-Italic-DA0__PXp.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-Italic-flOr_0UB.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-Italic-t53AETM-.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-CFMepnvq.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-D1sUS0GD.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-DbIhKOiC.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-C3H0VqGB.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-DN2j7dab.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-YYjJ1zSn.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-BNo7hRIc.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-CS6fqUqJ.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-DDBCnlJ7.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Script-Regular-C5JkGWo-.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D3wIWfF6.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D5yQViql.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-C195tn64.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-Dbsnue_I.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-mCD8mA8B.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-B7gKUWhC.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-Dy4dx90m.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-oD1tc_U0.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-CTq5MqoE.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-DgpXs0kz.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-BF-4gkZK.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-DWFBv043.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-Dl5lxZxV.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-C0xS9mPB.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-CO6r4hn1.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-D3Ib7_Hf.ttf +0 -0
- aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +1 -0
- aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +400 -0
- aethergraph/server/ui_static/index.html +15 -0
- aethergraph/server/ui_static/logo.png +0 -0
- aethergraph/services/artifacts/__init__.py +0 -0
- aethergraph/services/artifacts/facade.py +1239 -132
- aethergraph/services/auth/{dev.py → authn.py} +0 -8
- aethergraph/services/auth/authz.py +100 -0
- aethergraph/services/channel/__init__.py +0 -0
- aethergraph/services/channel/channel_bus.py +19 -1
- aethergraph/services/channel/factory.py +13 -1
- aethergraph/services/channel/ingress.py +311 -0
- aethergraph/services/channel/queue_adapter.py +75 -0
- aethergraph/services/channel/session.py +502 -19
- aethergraph/services/container/default_container.py +122 -43
- aethergraph/services/continuations/continuation.py +6 -0
- aethergraph/services/continuations/stores/fs_store.py +19 -0
- aethergraph/services/eventhub/event_hub.py +76 -0
- aethergraph/services/kv/__init__.py +0 -0
- aethergraph/services/kv/ephemeral.py +244 -0
- aethergraph/services/llm/__init__.py +0 -0
- aethergraph/services/llm/generic_client copy.py +691 -0
- aethergraph/services/llm/generic_client.py +1288 -187
- aethergraph/services/llm/providers.py +3 -1
- aethergraph/services/llm/types.py +47 -0
- aethergraph/services/llm/utils.py +284 -0
- aethergraph/services/logger/std.py +3 -0
- aethergraph/services/mcp/__init__.py +9 -0
- aethergraph/services/mcp/http_client.py +38 -0
- aethergraph/services/mcp/service.py +225 -1
- aethergraph/services/mcp/stdio_client.py +41 -6
- aethergraph/services/mcp/ws_client.py +44 -2
- aethergraph/services/memory/__init__.py +0 -0
- aethergraph/services/memory/distillers/llm_long_term.py +234 -0
- aethergraph/services/memory/distillers/llm_meta_summary.py +398 -0
- aethergraph/services/memory/distillers/long_term.py +225 -0
- aethergraph/services/memory/facade/__init__.py +3 -0
- aethergraph/services/memory/facade/chat.py +440 -0
- aethergraph/services/memory/facade/core.py +447 -0
- aethergraph/services/memory/facade/distillation.py +424 -0
- aethergraph/services/memory/facade/rag.py +410 -0
- aethergraph/services/memory/facade/results.py +315 -0
- aethergraph/services/memory/facade/retrieval.py +139 -0
- aethergraph/services/memory/facade/types.py +77 -0
- aethergraph/services/memory/facade/utils.py +43 -0
- aethergraph/services/memory/facade_dep.py +1539 -0
- aethergraph/services/memory/factory.py +9 -3
- aethergraph/services/memory/utils.py +10 -0
- aethergraph/services/metering/eventlog_metering.py +470 -0
- aethergraph/services/metering/noop.py +25 -4
- aethergraph/services/rag/__init__.py +0 -0
- aethergraph/services/rag/facade.py +279 -23
- aethergraph/services/rag/index_factory.py +2 -2
- aethergraph/services/rag/node_rag.py +317 -0
- aethergraph/services/rate_limit/inmem_rate_limit.py +24 -0
- aethergraph/services/registry/__init__.py +0 -0
- aethergraph/services/registry/agent_app_meta.py +419 -0
- aethergraph/services/registry/registry_key.py +1 -1
- aethergraph/services/registry/unified_registry.py +74 -6
- aethergraph/services/scope/scope.py +159 -0
- aethergraph/services/scope/scope_factory.py +164 -0
- aethergraph/services/state_stores/serialize.py +5 -0
- aethergraph/services/state_stores/utils.py +2 -1
- aethergraph/services/viz/__init__.py +0 -0
- aethergraph/services/viz/facade.py +413 -0
- aethergraph/services/viz/viz_service.py +69 -0
- aethergraph/storage/artifacts/artifact_index_jsonl.py +180 -0
- aethergraph/storage/artifacts/artifact_index_sqlite.py +426 -0
- aethergraph/storage/artifacts/cas_store.py +422 -0
- aethergraph/storage/artifacts/fs_cas.py +18 -0
- aethergraph/storage/artifacts/s3_cas.py +14 -0
- aethergraph/storage/artifacts/utils.py +124 -0
- aethergraph/storage/blob/fs_blob.py +86 -0
- aethergraph/storage/blob/s3_blob.py +115 -0
- aethergraph/storage/continuation_store/fs_cont.py +283 -0
- aethergraph/storage/continuation_store/inmem_cont.py +146 -0
- aethergraph/storage/continuation_store/kvdoc_cont.py +261 -0
- aethergraph/storage/docstore/fs_doc.py +63 -0
- aethergraph/storage/docstore/sqlite_doc.py +31 -0
- aethergraph/storage/docstore/sqlite_doc_sync.py +90 -0
- aethergraph/storage/eventlog/fs_event.py +136 -0
- aethergraph/storage/eventlog/sqlite_event.py +47 -0
- aethergraph/storage/eventlog/sqlite_event_sync.py +178 -0
- aethergraph/storage/factory.py +432 -0
- aethergraph/storage/fs_utils.py +28 -0
- aethergraph/storage/graph_state_store/state_store.py +64 -0
- aethergraph/storage/kv/inmem_kv.py +103 -0
- aethergraph/storage/kv/layered_kv.py +52 -0
- aethergraph/storage/kv/sqlite_kv.py +39 -0
- aethergraph/storage/kv/sqlite_kv_sync.py +98 -0
- aethergraph/storage/memory/event_persist.py +68 -0
- aethergraph/storage/memory/fs_persist.py +118 -0
- aethergraph/{services/memory/hotlog_kv.py → storage/memory/hotlog.py} +8 -2
- aethergraph/{services → storage}/memory/indices.py +31 -7
- aethergraph/storage/metering/meter_event.py +55 -0
- aethergraph/storage/runs/doc_store.py +280 -0
- aethergraph/storage/runs/inmen_store.py +82 -0
- aethergraph/storage/runs/sqlite_run_store.py +403 -0
- aethergraph/storage/sessions/doc_store.py +183 -0
- aethergraph/storage/sessions/inmem_store.py +110 -0
- aethergraph/storage/sessions/sqlite_session_store.py +399 -0
- aethergraph/storage/vector_index/chroma_index.py +138 -0
- aethergraph/storage/vector_index/faiss_index.py +179 -0
- aethergraph/storage/vector_index/sqlite_index.py +187 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/METADATA +138 -31
- aethergraph-0.1.0a2.dist-info/RECORD +356 -0
- aethergraph-0.1.0a2.dist-info/entry_points.txt +3 -0
- aethergraph/services/artifacts/factory.py +0 -35
- aethergraph/services/artifacts/fs_store.py +0 -656
- aethergraph/services/artifacts/jsonl_index.py +0 -123
- aethergraph/services/artifacts/sqlite_index.py +0 -209
- aethergraph/services/memory/distillers/episode.py +0 -116
- aethergraph/services/memory/distillers/rolling.py +0 -74
- aethergraph/services/memory/facade.py +0 -633
- aethergraph/services/memory/persist_fs.py +0 -40
- aethergraph/services/rag/index/base.py +0 -27
- aethergraph/services/rag/index/faiss_index.py +0 -121
- aethergraph/services/rag/index/sqlite_index.py +0 -134
- aethergraph-0.1.0a1.dist-info/RECORD +0 -182
- aethergraph-0.1.0a1.dist-info/entry_points.txt +0 -2
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/WHEEL +0 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/LICENSE +0 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/NOTICE +0 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import AsyncIterator
|
|
4
|
+
from contextlib import asynccontextmanager
|
|
5
|
+
import datetime
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import shutil
|
|
11
|
+
import tempfile
|
|
12
|
+
from typing import Any, BinaryIO
|
|
13
|
+
|
|
14
|
+
from aethergraph.contracts.services.artifacts import Artifact
|
|
15
|
+
from aethergraph.contracts.storage.artifact_store import AsyncArtifactStore
|
|
16
|
+
from aethergraph.contracts.storage.blob_store import BlobStore
|
|
17
|
+
|
|
18
|
+
from .utils import (
|
|
19
|
+
_now_iso,
|
|
20
|
+
_sha256_file,
|
|
21
|
+
_tree_manifest_and_hash,
|
|
22
|
+
to_thread,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger("aethergraph.services.artifacts.cas_store")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class CASArtifactStore(AsyncArtifactStore):
|
|
29
|
+
"""
|
|
30
|
+
Content-addressed artifact store built on top of a BlobStore.
|
|
31
|
+
|
|
32
|
+
- Uses local staging_dir for temp files/dirs.
|
|
33
|
+
- Stores blobs via BlobStore with keys derived from SHA-256 hashes.
|
|
34
|
+
- Persists minimal manifest/metadata as blobs too (for directories).
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, blob: BlobStore, staging_dir: str):
|
|
38
|
+
self._blob = blob
|
|
39
|
+
self._staging_dir = os.path.abspath(staging_dir)
|
|
40
|
+
os.makedirs(self._staging_dir, exist_ok=True)
|
|
41
|
+
self.last_artifact: Artifact | None = None
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def base_uri(self) -> str:
|
|
45
|
+
return self._blob.base_uri
|
|
46
|
+
|
|
47
|
+
def _augment_labels_with_filename(
|
|
48
|
+
self,
|
|
49
|
+
labels: dict | None,
|
|
50
|
+
*,
|
|
51
|
+
suggested_uri: str | None = None,
|
|
52
|
+
path: str | None = None,
|
|
53
|
+
) -> dict:
|
|
54
|
+
"""
|
|
55
|
+
Ensure labels contains a stable 'filename' key when we can infer one.
|
|
56
|
+
|
|
57
|
+
- Prefer an explicit suggested_uri basename.
|
|
58
|
+
- Fallback to the local path basename.
|
|
59
|
+
- Do NOT override an existing 'filename' or 'name' key.
|
|
60
|
+
"""
|
|
61
|
+
out: dict[str, Any] = dict(labels or {})
|
|
62
|
+
|
|
63
|
+
# Don't stomp on explicit naming
|
|
64
|
+
if "filename" in out or "name" in out:
|
|
65
|
+
return out
|
|
66
|
+
|
|
67
|
+
candidate: str | None = None
|
|
68
|
+
if suggested_uri:
|
|
69
|
+
candidate = os.path.basename(suggested_uri.rstrip("/"))
|
|
70
|
+
elif path:
|
|
71
|
+
candidate = os.path.basename(path.rstrip(os.sep))
|
|
72
|
+
|
|
73
|
+
if candidate:
|
|
74
|
+
out["filename"] = candidate
|
|
75
|
+
|
|
76
|
+
return out
|
|
77
|
+
|
|
78
|
+
# ---------- staging utils ----------
|
|
79
|
+
async def plan_staging_path(self, planned_ext: str = "") -> str:
|
|
80
|
+
def _mk():
|
|
81
|
+
fd, p = tempfile.mkstemp(suffix=planned_ext, dir=self._staging_dir)
|
|
82
|
+
os.close(fd)
|
|
83
|
+
return p
|
|
84
|
+
|
|
85
|
+
return await to_thread(_mk)
|
|
86
|
+
|
|
87
|
+
async def plan_staging_dir(self, suffix: str = "") -> str:
|
|
88
|
+
def _mkd():
|
|
89
|
+
return tempfile.mkdtemp(prefix="dir_", suffix=suffix, dir=self._staging_dir)
|
|
90
|
+
|
|
91
|
+
return await to_thread(_mkd)
|
|
92
|
+
|
|
93
|
+
# ---------- basic save / ingest ----------
|
|
94
|
+
async def save_file(
|
|
95
|
+
self,
|
|
96
|
+
*,
|
|
97
|
+
path: str,
|
|
98
|
+
kind: str,
|
|
99
|
+
run_id: str,
|
|
100
|
+
graph_id: str,
|
|
101
|
+
node_id: str,
|
|
102
|
+
tool_name: str,
|
|
103
|
+
tool_version: str,
|
|
104
|
+
suggested_uri: str | None = None, # NOTE: only metadata / pretty; impl may ignore
|
|
105
|
+
pin: bool = False,
|
|
106
|
+
labels: dict | None = None,
|
|
107
|
+
metrics: dict | None = None,
|
|
108
|
+
preview_uri: str | None = None, # NOTE: only metadata / pretty; impl may ignore
|
|
109
|
+
cleanup: bool = True,
|
|
110
|
+
) -> Artifact:
|
|
111
|
+
sha, nbytes = await to_thread(_sha256_file, path)
|
|
112
|
+
ext = os.path.splitext(path)[1]
|
|
113
|
+
key = os.path.join("cas", "blobs", f"{sha}{ext}")
|
|
114
|
+
|
|
115
|
+
blob_uri = await self._blob.put_file(path, key=key, mime=None, keep_source=not cleanup)
|
|
116
|
+
|
|
117
|
+
eff_labels = self._augment_labels_with_filename(
|
|
118
|
+
labels,
|
|
119
|
+
suggested_uri=suggested_uri,
|
|
120
|
+
path=path,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
a = Artifact(
|
|
124
|
+
artifact_id=sha,
|
|
125
|
+
uri=blob_uri,
|
|
126
|
+
kind=kind,
|
|
127
|
+
bytes=nbytes,
|
|
128
|
+
sha256=sha,
|
|
129
|
+
mime=None, # callers can fill in if desired
|
|
130
|
+
run_id=run_id,
|
|
131
|
+
graph_id=graph_id,
|
|
132
|
+
node_id=node_id,
|
|
133
|
+
tool_name=tool_name,
|
|
134
|
+
tool_version=tool_version,
|
|
135
|
+
created_at=_now_iso(),
|
|
136
|
+
labels=eff_labels,
|
|
137
|
+
metrics=metrics or {},
|
|
138
|
+
preview_uri=preview_uri,
|
|
139
|
+
pinned=pin,
|
|
140
|
+
)
|
|
141
|
+
self.last_artifact = a
|
|
142
|
+
return a
|
|
143
|
+
|
|
144
|
+
# ---------- streaming writer ----------
|
|
145
|
+
@asynccontextmanager
|
|
146
|
+
async def open_writer(
|
|
147
|
+
self,
|
|
148
|
+
*,
|
|
149
|
+
kind: str,
|
|
150
|
+
run_id: str,
|
|
151
|
+
graph_id: str,
|
|
152
|
+
node_id: str,
|
|
153
|
+
tool_name: str,
|
|
154
|
+
tool_version: str,
|
|
155
|
+
planned_ext: str | None = None,
|
|
156
|
+
pin: bool = False,
|
|
157
|
+
) -> AsyncIterator[Any]:
|
|
158
|
+
staged_path = await self.plan_staging_path(planned_ext or "")
|
|
159
|
+
|
|
160
|
+
class _Writer:
|
|
161
|
+
"""Helper class for streaming writes to a temp file."""
|
|
162
|
+
|
|
163
|
+
def __init__(self, path: str, f: BinaryIO):
|
|
164
|
+
self.tmp_path = path
|
|
165
|
+
self._f = f
|
|
166
|
+
self._labels: dict[str, str] = {}
|
|
167
|
+
self._metrics: dict[str, float] = {}
|
|
168
|
+
self.artifact: Artifact | None = None # filled after finalize
|
|
169
|
+
|
|
170
|
+
def write(self, chunk: bytes) -> None:
|
|
171
|
+
self._f.write(chunk)
|
|
172
|
+
|
|
173
|
+
def add_labels(self, labels: dict[str, str]) -> None:
|
|
174
|
+
self._labels.update(labels or {})
|
|
175
|
+
|
|
176
|
+
def add_metrics(self, metrics: dict[str, float]) -> None:
|
|
177
|
+
self._metrics.update(metrics or {})
|
|
178
|
+
|
|
179
|
+
writer: _Writer | None = None
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
# Ruff-friendly: file is opened via a context manager, and kept
|
|
183
|
+
# open for the duration of the user’s writes.
|
|
184
|
+
with open(staged_path, "wb") as f:
|
|
185
|
+
writer = _Writer(staged_path, f)
|
|
186
|
+
# Yield to caller; they can await inside and call writer.write(...)
|
|
187
|
+
yield writer
|
|
188
|
+
# <-- file is closed here when the with-block exits
|
|
189
|
+
|
|
190
|
+
# Now ingest the staged file into CAS and create the Artifact
|
|
191
|
+
if writer is not None:
|
|
192
|
+
a = await self.ingest_staged_file(
|
|
193
|
+
staged_path=staged_path,
|
|
194
|
+
kind=kind,
|
|
195
|
+
run_id=run_id,
|
|
196
|
+
graph_id=graph_id,
|
|
197
|
+
node_id=node_id,
|
|
198
|
+
tool_name=tool_name,
|
|
199
|
+
tool_version=tool_version,
|
|
200
|
+
pin=pin,
|
|
201
|
+
labels=writer._labels,
|
|
202
|
+
metrics=writer._metrics,
|
|
203
|
+
)
|
|
204
|
+
writer.artifact = a
|
|
205
|
+
|
|
206
|
+
except Exception:
|
|
207
|
+
# Best-effort cleanup of staged file on error
|
|
208
|
+
try:
|
|
209
|
+
if os.path.exists(staged_path):
|
|
210
|
+
os.remove(staged_path)
|
|
211
|
+
finally:
|
|
212
|
+
raise
|
|
213
|
+
|
|
214
|
+
async def ingest_staged_file(
|
|
215
|
+
self,
|
|
216
|
+
*,
|
|
217
|
+
staged_path: str,
|
|
218
|
+
kind: str,
|
|
219
|
+
run_id: str,
|
|
220
|
+
graph_id: str,
|
|
221
|
+
node_id: str,
|
|
222
|
+
tool_name: str,
|
|
223
|
+
tool_version: str,
|
|
224
|
+
pin: bool = False,
|
|
225
|
+
labels: dict | None = None,
|
|
226
|
+
metrics: dict | None = None,
|
|
227
|
+
preview_uri: str | None = None,
|
|
228
|
+
suggested_uri: str | None = None,
|
|
229
|
+
) -> Artifact:
|
|
230
|
+
# just delegate to save_file (same semantics)
|
|
231
|
+
a = await self.save_file(
|
|
232
|
+
path=staged_path,
|
|
233
|
+
kind=kind,
|
|
234
|
+
run_id=run_id,
|
|
235
|
+
graph_id=graph_id,
|
|
236
|
+
node_id=node_id,
|
|
237
|
+
tool_name=tool_name,
|
|
238
|
+
tool_version=tool_version,
|
|
239
|
+
suggested_uri=suggested_uri,
|
|
240
|
+
pin=pin,
|
|
241
|
+
labels=labels,
|
|
242
|
+
metrics=metrics,
|
|
243
|
+
preview_uri=preview_uri,
|
|
244
|
+
)
|
|
245
|
+
try:
|
|
246
|
+
os.remove(staged_path)
|
|
247
|
+
except Exception:
|
|
248
|
+
logger.warning("ingest_staged_file: failed to delete staged file %s", staged_path)
|
|
249
|
+
return a
|
|
250
|
+
|
|
251
|
+
async def ingest_directory(
|
|
252
|
+
self,
|
|
253
|
+
*,
|
|
254
|
+
staged_dir: str,
|
|
255
|
+
kind: str = "dataset",
|
|
256
|
+
run_id: str,
|
|
257
|
+
graph_id: str,
|
|
258
|
+
node_id: str,
|
|
259
|
+
tool_name: str,
|
|
260
|
+
tool_version: str,
|
|
261
|
+
include: list[str] | None = None,
|
|
262
|
+
exclude: list[str] | None = None,
|
|
263
|
+
index_children: bool = False, # TODO: use later for per-file artifacts
|
|
264
|
+
pin: bool = False,
|
|
265
|
+
labels: dict | None = None,
|
|
266
|
+
metrics: dict | None = None,
|
|
267
|
+
suggested_uri: str | None = None,
|
|
268
|
+
archive: bool = False,
|
|
269
|
+
archive_name: str = "bundle.tar.gz",
|
|
270
|
+
cleanup: bool = True,
|
|
271
|
+
store: str | None = None, # "archive" | "manifest"
|
|
272
|
+
) -> Artifact:
|
|
273
|
+
if not os.path.isdir(staged_dir):
|
|
274
|
+
raise ValueError(f"ingest_directory: not a directory: {staged_dir}")
|
|
275
|
+
|
|
276
|
+
if store is None:
|
|
277
|
+
store = "archive" if archive else "manifest"
|
|
278
|
+
|
|
279
|
+
manifest_entries, tree_sha = await to_thread(
|
|
280
|
+
_tree_manifest_and_hash, staged_dir, include, exclude
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# manifest blob
|
|
284
|
+
manifest_key = os.path.join("cas", "trees", tree_sha, "manifest.json")
|
|
285
|
+
|
|
286
|
+
def _dump_manifest() -> bytes:
|
|
287
|
+
return json.dumps(
|
|
288
|
+
{
|
|
289
|
+
"files": manifest_entries,
|
|
290
|
+
"created_at": _now_iso(),
|
|
291
|
+
"tool_name": tool_name,
|
|
292
|
+
"tool_version": tool_version,
|
|
293
|
+
},
|
|
294
|
+
indent=2,
|
|
295
|
+
).encode("utf-8")
|
|
296
|
+
|
|
297
|
+
# manifest URI -> use it in future if needed
|
|
298
|
+
_ = await self._blob.put_bytes(
|
|
299
|
+
_dump_manifest(),
|
|
300
|
+
key=manifest_key,
|
|
301
|
+
ext=".json",
|
|
302
|
+
mime="application/json",
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
archive_uri: str | None = None
|
|
306
|
+
if store == "archive":
|
|
307
|
+
# build tar.gz locally, then upload
|
|
308
|
+
archive_path = os.path.join(self._staging_dir, f"{tree_sha}.tar.gz")
|
|
309
|
+
|
|
310
|
+
def _make_tar():
|
|
311
|
+
import tarfile
|
|
312
|
+
|
|
313
|
+
with tarfile.open(archive_path, mode="w:gz") as tar:
|
|
314
|
+
for e in sorted(manifest_entries, key=lambda x: x["path"]):
|
|
315
|
+
abs_file = os.path.join(staged_dir, e["path"])
|
|
316
|
+
tar.add(abs_file, arcname=e["path"])
|
|
317
|
+
return archive_path
|
|
318
|
+
|
|
319
|
+
archive_path = await to_thread(_make_tar)
|
|
320
|
+
archive_key = os.path.join("cas", "trees", tree_sha, archive_name)
|
|
321
|
+
archive_uri = await self._blob.put_file(
|
|
322
|
+
archive_path,
|
|
323
|
+
key=archive_key,
|
|
324
|
+
mime="application/gzip",
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
elif store == "manifest":
|
|
328
|
+
if cleanup:
|
|
329
|
+
# we will delete staged_dir; only OK if user accepts that artifacts
|
|
330
|
+
# are now represented by manifest (+ optional archive)
|
|
331
|
+
pass
|
|
332
|
+
else:
|
|
333
|
+
raise ValueError(f"unknown store mode: {store}")
|
|
334
|
+
|
|
335
|
+
# Directory "handle" URI: base_uri + prefix
|
|
336
|
+
dir_prefix = os.path.join("cas", "trees", tree_sha)
|
|
337
|
+
# NOTE: we don't require an actual object at dir_prefix; it's a logical handle.
|
|
338
|
+
dir_uri = self.base_uri.rstrip("/") + "/" + dir_prefix.replace(os.sep, "/")
|
|
339
|
+
|
|
340
|
+
total_bytes = sum(e["bytes"] for e in manifest_entries)
|
|
341
|
+
|
|
342
|
+
eff_labels = self._augment_labels_with_filename(
|
|
343
|
+
labels,
|
|
344
|
+
suggested_uri=suggested_uri or archive_name,
|
|
345
|
+
path=staged_dir,
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
a = Artifact(
|
|
349
|
+
artifact_id=tree_sha,
|
|
350
|
+
uri=dir_uri,
|
|
351
|
+
kind=kind,
|
|
352
|
+
bytes=total_bytes,
|
|
353
|
+
sha256=tree_sha,
|
|
354
|
+
mime="application/vnd.aethergraph.bundle+dir",
|
|
355
|
+
run_id=run_id,
|
|
356
|
+
graph_id=graph_id,
|
|
357
|
+
node_id=node_id,
|
|
358
|
+
tool_name=tool_name,
|
|
359
|
+
tool_version=tool_version,
|
|
360
|
+
created_at=_now_iso(),
|
|
361
|
+
labels=eff_labels,
|
|
362
|
+
metrics=metrics or {},
|
|
363
|
+
preview_uri=archive_uri,
|
|
364
|
+
pinned=pin,
|
|
365
|
+
)
|
|
366
|
+
self.last_artifact = a
|
|
367
|
+
|
|
368
|
+
if cleanup:
|
|
369
|
+
try:
|
|
370
|
+
shutil.rmtree(staged_dir, ignore_errors=True)
|
|
371
|
+
except Exception:
|
|
372
|
+
logger.warning("ingest_directory: failed to cleanup staged dir %s", staged_dir)
|
|
373
|
+
|
|
374
|
+
return a
|
|
375
|
+
|
|
376
|
+
# ---------- load ----------
|
|
377
|
+
async def load_bytes(self, uri):
|
|
378
|
+
return await self._blob.load_bytes(uri)
|
|
379
|
+
|
|
380
|
+
async def load_text(self, uri: str, *, encoding: str = "utf-8", errors: str = "strict") -> str:
|
|
381
|
+
return await self._blob.load_text(uri, encoding=encoding, errors=errors)
|
|
382
|
+
|
|
383
|
+
async def load_artifact_bytes(self, uri: str) -> bytes:
|
|
384
|
+
return await self._blob.load_bytes(uri)
|
|
385
|
+
|
|
386
|
+
async def load_artifact_dir(self, uri):
|
|
387
|
+
"""
|
|
388
|
+
Normalize a directory artifact to a local path.
|
|
389
|
+
|
|
390
|
+
FS backend can simply return the directory; S3 backend
|
|
391
|
+
will download files described by manifest into a temp dir.
|
|
392
|
+
For now, implement generic: if it's already a file:// path,
|
|
393
|
+
just return as-is; otherwise, ArtifactFacade can add a helper
|
|
394
|
+
`as_local_dir(artifact)` that handles S3 download.
|
|
395
|
+
"""
|
|
396
|
+
return uri
|
|
397
|
+
|
|
398
|
+
async def load_artifact(self, uri):
|
|
399
|
+
# Compatibility: if direcotry URI, return as-is, else load blob content
|
|
400
|
+
if uri.endswith("/"):
|
|
401
|
+
# directory handle URI
|
|
402
|
+
return await self.load_artifact_dir(uri)
|
|
403
|
+
# else, blob URI
|
|
404
|
+
return await self._blob.load_bytes(uri)
|
|
405
|
+
|
|
406
|
+
# ---------- cleanup ----------
|
|
407
|
+
async def cleanup_tmp(self, max_age_hours: int = 24) -> None:
|
|
408
|
+
now = datetime.datetime.now(datetime.timezone.utc).timestamp()
|
|
409
|
+
|
|
410
|
+
def _cleanup():
|
|
411
|
+
for p in Path(self._staging_dir).rglob("*"):
|
|
412
|
+
try:
|
|
413
|
+
age_h = (now - p.stat().st_mtime) / 3600.0
|
|
414
|
+
if age_h > max_age_hours:
|
|
415
|
+
if p.is_file():
|
|
416
|
+
p.unlink(missing_ok=True)
|
|
417
|
+
else:
|
|
418
|
+
shutil.rmtree(p, ignore_errors=True)
|
|
419
|
+
except Exception:
|
|
420
|
+
pass
|
|
421
|
+
|
|
422
|
+
await to_thread(_cleanup)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from aethergraph.storage.artifacts.cas_store import CASArtifactStore
|
|
4
|
+
from aethergraph.storage.blob.fs_blob import FSBlobStore
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class FSArtifactStore(CASArtifactStore):
|
|
8
|
+
# Initialize with a base directory for storing artifacts
|
|
9
|
+
|
|
10
|
+
def __init__(self, base_dir: str):
|
|
11
|
+
base_dir = os.path.abspath(base_dir)
|
|
12
|
+
blob = FSBlobStore(os.path.join(base_dir, "blobs"))
|
|
13
|
+
staging_dir = os.path.join(base_dir, "staging")
|
|
14
|
+
super().__init__(blob=blob, staging_dir=staging_dir)
|
|
15
|
+
|
|
16
|
+
# TODO: Add any FS-specific optimizations if needed
|
|
17
|
+
# Optionally override load_artifact_dir to return actual local dir path if uri is file://cas/trees/...
|
|
18
|
+
# and implement FS-only "pretty" symlinks.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from aethergraph.storage.artifacts.cas_store import CASArtifactStore
|
|
2
|
+
from aethergraph.storage.blob.s3_blob import S3BlobStore
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class S3ArtifactStore(CASArtifactStore):
|
|
6
|
+
# Initialize with S3 bucket and optional prefix for storing artifacts
|
|
7
|
+
def __init__(self, bucket: str, prefix: str, staging_dir: str):
|
|
8
|
+
blob = S3BlobStore(bucket=bucket, prefix=prefix)
|
|
9
|
+
super().__init__(blob=blob, staging_dir=staging_dir)
|
|
10
|
+
|
|
11
|
+
# TODO: Optionally add any S3-specific optimizations if needed
|
|
12
|
+
# - parse tree_sha from uri
|
|
13
|
+
# - download files listed in manifest.json into a local temp dir
|
|
14
|
+
# - return that path
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
from fnmatch import fnmatch
|
|
4
|
+
import hashlib
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def now_iso() -> str:
|
|
11
|
+
return datetime.now(timezone.utc).isoformat()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
async def to_thread(fn, *a, **k):
|
|
15
|
+
return await asyncio.to_thread(fn, *a, **k)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# ----- helpers ----- NOTE: we have multiple copies of these in different places, consider centralizing -----
|
|
19
|
+
def _now_iso():
|
|
20
|
+
return datetime.now(timezone.utc).isoformat()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _sha256_file(path: str, chunk=1024 * 1024) -> tuple[str, int]:
|
|
24
|
+
"""Return (sha256 hex, size in bytes) of a file."""
|
|
25
|
+
h = hashlib.sha256()
|
|
26
|
+
total = 0
|
|
27
|
+
with open(path, "rb") as f:
|
|
28
|
+
while True:
|
|
29
|
+
b = f.read(chunk)
|
|
30
|
+
if not b:
|
|
31
|
+
break
|
|
32
|
+
h.update(b)
|
|
33
|
+
total += len(b)
|
|
34
|
+
return h.hexdigest(), total
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _content_addr_path(base_dir: str, sha256: str, ext: str | None) -> str:
|
|
38
|
+
"""Return a content-addressed path under base_dir for a given sha256 and optional extension.
|
|
39
|
+
Creates subdirectories as needed.
|
|
40
|
+
|
|
41
|
+
It works as follows:
|
|
42
|
+
- Takes the first 4 characters of the sha256 hash to create two levels of subdirectories.
|
|
43
|
+
- The first two characters form the first subdirectory (sub1).
|
|
44
|
+
- The next two characters form the second subdirectory (sub2).
|
|
45
|
+
- The full sha256 hash, optionally followed by the provided file extension, is used as the filename.
|
|
46
|
+
- Ensures that the target directory exists by creating it if necessary.
|
|
47
|
+
- Returns the full path to the content-addressed file.
|
|
48
|
+
|
|
49
|
+
The final path structure will look like:
|
|
50
|
+
base_dir/sub1/sub2/sha256[.ext]
|
|
51
|
+
"""
|
|
52
|
+
sub1, sub2 = sha256[:2], sha256[2:4]
|
|
53
|
+
fname = sha256 + (ext or "")
|
|
54
|
+
target_dir = os.path.join(base_dir, sub1, sub2)
|
|
55
|
+
os.makedirs(target_dir, exist_ok=True)
|
|
56
|
+
return os.path.join(target_dir, fname)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _walk_dir(root: str, include: list[str] | None, exclude: list[str] | None):
|
|
60
|
+
"""Yield (relpath, abspath) for files under root honoring include/exclude globs."""
|
|
61
|
+
root_p = Path(root)
|
|
62
|
+
for p in root_p.rglob("*"):
|
|
63
|
+
if not p.is_file():
|
|
64
|
+
continue
|
|
65
|
+
rel = str(p.relative_to(root_p)).replace("\\", "/")
|
|
66
|
+
if exclude and any(fnmatch.fnmatch(rel, pat) for pat in exclude):
|
|
67
|
+
continue
|
|
68
|
+
if include and not any(fnmatch.fnmatch(rel, pat) for pat in include):
|
|
69
|
+
continue
|
|
70
|
+
yield rel, str(p)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _tree_manifest_and_hash(root: str, include: list[str] | None, exclude: list[str] | None):
|
|
74
|
+
"""
|
|
75
|
+
Build a deterministic manifest of files: [{"path": rel, "sha256": sha, "bytes": n}, ...]
|
|
76
|
+
The tree hash is sha256 over JSON lines: "<rel> <sha> <bytes>\n" sorted by rel.
|
|
77
|
+
"""
|
|
78
|
+
entries = []
|
|
79
|
+
lines = []
|
|
80
|
+
for rel, abspath in _walk_dir(root, include, exclude):
|
|
81
|
+
sha, nbytes = _sha256_file(abspath)
|
|
82
|
+
entries.append({"path": rel, "sha256": sha, "bytes": nbytes})
|
|
83
|
+
lines.append(f"{rel}\t{sha}\t{nbytes}\n")
|
|
84
|
+
# sort for determinism
|
|
85
|
+
lines.sort()
|
|
86
|
+
h = hashlib.sha256()
|
|
87
|
+
for line in lines:
|
|
88
|
+
h.update(line.encode("utf-8"))
|
|
89
|
+
tree_sha = h.hexdigest()
|
|
90
|
+
return entries, tree_sha
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _content_addr_dir_path(base_dir: str, tree_sha: str):
|
|
94
|
+
# content-addressed folder to hold manifest (and optional archive)
|
|
95
|
+
sub1, sub2 = tree_sha[:2], tree_sha[2:4]
|
|
96
|
+
target_dir = os.path.join(base_dir, sub1, sub2, tree_sha)
|
|
97
|
+
os.makedirs(target_dir, exist_ok=True)
|
|
98
|
+
return target_dir
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _write_json(path: str, obj: dict | list):
|
|
102
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
103
|
+
json.dump(obj, f, ensure_ascii=False, separators=(",", ":"))
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _maybe_cleanup_tmp_parent(tmp_root: str, path: str):
|
|
107
|
+
"""Remove empty parent dirs strictly under tmp_root (never _tmp itself)."""
|
|
108
|
+
try:
|
|
109
|
+
parent = os.path.dirname(os.path.abspath(path))
|
|
110
|
+
tmp_root_abs = os.path.abspath(tmp_root)
|
|
111
|
+
|
|
112
|
+
# Only operate if `parent` is inside tmp_root
|
|
113
|
+
while (
|
|
114
|
+
os.path.commonpath([parent, tmp_root_abs]) == tmp_root_abs
|
|
115
|
+
and os.path.normcase(parent)
|
|
116
|
+
!= os.path.normcase(tmp_root_abs) # don't delete _tmp itself
|
|
117
|
+
):
|
|
118
|
+
try:
|
|
119
|
+
os.rmdir(parent) # only removes if empty
|
|
120
|
+
except OSError:
|
|
121
|
+
break
|
|
122
|
+
parent = os.path.dirname(parent)
|
|
123
|
+
except Exception:
|
|
124
|
+
pass
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
|
|
6
|
+
from aethergraph.contracts.storage.blob_store import BlobStore
|
|
7
|
+
from aethergraph.storage.fs_utils import _from_uri_or_path, _to_file_uri, to_thread
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FSBlobStore(BlobStore):
|
|
11
|
+
def __init__(self, base_dir: str):
|
|
12
|
+
self.base_dir = os.path.abspath(base_dir)
|
|
13
|
+
os.makedirs(self.base_dir, exist_ok=True)
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def base_uri(self) -> str:
|
|
17
|
+
return _to_file_uri(self.base_dir)
|
|
18
|
+
|
|
19
|
+
def _resolve_key(self, key: str | None, ext: str | None) -> str:
|
|
20
|
+
if key is None:
|
|
21
|
+
# fall back to some random-ish name under "blobs/"
|
|
22
|
+
import uuid
|
|
23
|
+
|
|
24
|
+
name = uuid.uuid4().hex + (ext or "")
|
|
25
|
+
key = os.path.join("blobs", name)
|
|
26
|
+
return key
|
|
27
|
+
|
|
28
|
+
async def put_bytes(
|
|
29
|
+
self,
|
|
30
|
+
data: bytes,
|
|
31
|
+
*,
|
|
32
|
+
key: str | None = None,
|
|
33
|
+
ext: str | None = None,
|
|
34
|
+
mime: str | None = None,
|
|
35
|
+
) -> str:
|
|
36
|
+
key = self._resolve_key(key, ext)
|
|
37
|
+
path = os.path.join(self.base_dir, key)
|
|
38
|
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
39
|
+
|
|
40
|
+
def _write():
|
|
41
|
+
with open(path, "wb") as f:
|
|
42
|
+
f.write(data)
|
|
43
|
+
return _to_file_uri(path)
|
|
44
|
+
|
|
45
|
+
return await to_thread(_write)
|
|
46
|
+
|
|
47
|
+
async def put_file(
|
|
48
|
+
self,
|
|
49
|
+
path: str,
|
|
50
|
+
*,
|
|
51
|
+
key: str | None = None,
|
|
52
|
+
mime: str | None = None,
|
|
53
|
+
keep_source: bool = False,
|
|
54
|
+
) -> str:
|
|
55
|
+
ext = os.path.splitext(path)[1]
|
|
56
|
+
key = self._resolve_key(key, ext)
|
|
57
|
+
dst = os.path.join(self.base_dir, key)
|
|
58
|
+
os.makedirs(os.path.dirname(dst), exist_ok=True)
|
|
59
|
+
|
|
60
|
+
def _move():
|
|
61
|
+
if keep_source:
|
|
62
|
+
shutil.copy2(os.path.abspath(path), dst)
|
|
63
|
+
else:
|
|
64
|
+
shutil.move(os.path.abspath(path), dst)
|
|
65
|
+
return _to_file_uri(dst)
|
|
66
|
+
|
|
67
|
+
return await to_thread(_move)
|
|
68
|
+
|
|
69
|
+
async def load_bytes(self, uri: str) -> bytes:
|
|
70
|
+
path = _from_uri_or_path(uri)
|
|
71
|
+
|
|
72
|
+
def _read():
|
|
73
|
+
with open(path, "rb") as f:
|
|
74
|
+
return f.read()
|
|
75
|
+
|
|
76
|
+
return await to_thread(_read)
|
|
77
|
+
|
|
78
|
+
async def load_text(
|
|
79
|
+
self,
|
|
80
|
+
uri: str,
|
|
81
|
+
*,
|
|
82
|
+
encoding: str = "utf-8",
|
|
83
|
+
errors: str = "strict",
|
|
84
|
+
) -> str:
|
|
85
|
+
data = await self.load_bytes(uri)
|
|
86
|
+
return data.decode(encoding, errors)
|