aethergraph 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aethergraph/__init__.py +49 -0
- aethergraph/config/__init__.py +0 -0
- aethergraph/config/config.py +121 -0
- aethergraph/config/context.py +16 -0
- aethergraph/config/llm.py +26 -0
- aethergraph/config/loader.py +60 -0
- aethergraph/config/runtime.py +9 -0
- aethergraph/contracts/errors/errors.py +44 -0
- aethergraph/contracts/services/artifacts.py +142 -0
- aethergraph/contracts/services/channel.py +72 -0
- aethergraph/contracts/services/continuations.py +23 -0
- aethergraph/contracts/services/eventbus.py +12 -0
- aethergraph/contracts/services/kv.py +24 -0
- aethergraph/contracts/services/llm.py +17 -0
- aethergraph/contracts/services/mcp.py +22 -0
- aethergraph/contracts/services/memory.py +108 -0
- aethergraph/contracts/services/resume.py +28 -0
- aethergraph/contracts/services/state_stores.py +33 -0
- aethergraph/contracts/services/wakeup.py +28 -0
- aethergraph/core/execution/base_scheduler.py +77 -0
- aethergraph/core/execution/forward_scheduler.py +777 -0
- aethergraph/core/execution/global_scheduler.py +634 -0
- aethergraph/core/execution/retry_policy.py +22 -0
- aethergraph/core/execution/step_forward.py +411 -0
- aethergraph/core/execution/step_result.py +18 -0
- aethergraph/core/execution/wait_types.py +72 -0
- aethergraph/core/graph/graph_builder.py +192 -0
- aethergraph/core/graph/graph_fn.py +219 -0
- aethergraph/core/graph/graph_io.py +67 -0
- aethergraph/core/graph/graph_refs.py +154 -0
- aethergraph/core/graph/graph_spec.py +115 -0
- aethergraph/core/graph/graph_state.py +59 -0
- aethergraph/core/graph/graphify.py +128 -0
- aethergraph/core/graph/interpreter.py +145 -0
- aethergraph/core/graph/node_handle.py +33 -0
- aethergraph/core/graph/node_spec.py +46 -0
- aethergraph/core/graph/node_state.py +63 -0
- aethergraph/core/graph/task_graph.py +747 -0
- aethergraph/core/graph/task_node.py +82 -0
- aethergraph/core/graph/utils.py +37 -0
- aethergraph/core/graph/visualize.py +239 -0
- aethergraph/core/runtime/ad_hoc_context.py +61 -0
- aethergraph/core/runtime/base_service.py +153 -0
- aethergraph/core/runtime/bind_adapter.py +42 -0
- aethergraph/core/runtime/bound_memory.py +69 -0
- aethergraph/core/runtime/execution_context.py +220 -0
- aethergraph/core/runtime/graph_runner.py +349 -0
- aethergraph/core/runtime/lifecycle.py +26 -0
- aethergraph/core/runtime/node_context.py +203 -0
- aethergraph/core/runtime/node_services.py +30 -0
- aethergraph/core/runtime/recovery.py +159 -0
- aethergraph/core/runtime/run_registration.py +33 -0
- aethergraph/core/runtime/runtime_env.py +157 -0
- aethergraph/core/runtime/runtime_registry.py +32 -0
- aethergraph/core/runtime/runtime_services.py +224 -0
- aethergraph/core/runtime/wakeup_watcher.py +40 -0
- aethergraph/core/tools/__init__.py +10 -0
- aethergraph/core/tools/builtins/channel_tools.py +194 -0
- aethergraph/core/tools/builtins/toolset.py +134 -0
- aethergraph/core/tools/toolkit.py +510 -0
- aethergraph/core/tools/waitable.py +109 -0
- aethergraph/plugins/channel/__init__.py +0 -0
- aethergraph/plugins/channel/adapters/__init__.py +0 -0
- aethergraph/plugins/channel/adapters/console.py +106 -0
- aethergraph/plugins/channel/adapters/file.py +102 -0
- aethergraph/plugins/channel/adapters/slack.py +285 -0
- aethergraph/plugins/channel/adapters/telegram.py +302 -0
- aethergraph/plugins/channel/adapters/webhook.py +104 -0
- aethergraph/plugins/channel/adapters/webui.py +134 -0
- aethergraph/plugins/channel/routes/__init__.py +0 -0
- aethergraph/plugins/channel/routes/console_routes.py +86 -0
- aethergraph/plugins/channel/routes/slack_routes.py +49 -0
- aethergraph/plugins/channel/routes/telegram_routes.py +26 -0
- aethergraph/plugins/channel/routes/webui_routes.py +136 -0
- aethergraph/plugins/channel/utils/__init__.py +0 -0
- aethergraph/plugins/channel/utils/slack_utils.py +278 -0
- aethergraph/plugins/channel/utils/telegram_utils.py +324 -0
- aethergraph/plugins/channel/websockets/slack_ws.py +68 -0
- aethergraph/plugins/channel/websockets/telegram_polling.py +151 -0
- aethergraph/plugins/mcp/fs_server.py +128 -0
- aethergraph/plugins/mcp/http_server.py +101 -0
- aethergraph/plugins/mcp/ws_server.py +180 -0
- aethergraph/plugins/net/http.py +10 -0
- aethergraph/plugins/utils/data_io.py +359 -0
- aethergraph/runner/__init__.py +5 -0
- aethergraph/runtime/__init__.py +62 -0
- aethergraph/server/__init__.py +3 -0
- aethergraph/server/app_factory.py +84 -0
- aethergraph/server/start.py +122 -0
- aethergraph/services/__init__.py +10 -0
- aethergraph/services/artifacts/facade.py +284 -0
- aethergraph/services/artifacts/factory.py +35 -0
- aethergraph/services/artifacts/fs_store.py +656 -0
- aethergraph/services/artifacts/jsonl_index.py +123 -0
- aethergraph/services/artifacts/paths.py +23 -0
- aethergraph/services/artifacts/sqlite_index.py +209 -0
- aethergraph/services/artifacts/utils.py +124 -0
- aethergraph/services/auth/dev.py +16 -0
- aethergraph/services/channel/channel_bus.py +293 -0
- aethergraph/services/channel/factory.py +44 -0
- aethergraph/services/channel/session.py +511 -0
- aethergraph/services/channel/wait_helpers.py +57 -0
- aethergraph/services/clock/clock.py +9 -0
- aethergraph/services/container/default_container.py +320 -0
- aethergraph/services/continuations/continuation.py +56 -0
- aethergraph/services/continuations/factory.py +34 -0
- aethergraph/services/continuations/stores/fs_store.py +264 -0
- aethergraph/services/continuations/stores/inmem_store.py +95 -0
- aethergraph/services/eventbus/inmem.py +21 -0
- aethergraph/services/features/static.py +10 -0
- aethergraph/services/kv/ephemeral.py +90 -0
- aethergraph/services/kv/factory.py +27 -0
- aethergraph/services/kv/layered.py +41 -0
- aethergraph/services/kv/sqlite_kv.py +128 -0
- aethergraph/services/llm/factory.py +157 -0
- aethergraph/services/llm/generic_client.py +542 -0
- aethergraph/services/llm/providers.py +3 -0
- aethergraph/services/llm/service.py +105 -0
- aethergraph/services/logger/base.py +36 -0
- aethergraph/services/logger/compat.py +50 -0
- aethergraph/services/logger/formatters.py +106 -0
- aethergraph/services/logger/std.py +203 -0
- aethergraph/services/mcp/helpers.py +23 -0
- aethergraph/services/mcp/http_client.py +70 -0
- aethergraph/services/mcp/mcp_tools.py +21 -0
- aethergraph/services/mcp/registry.py +14 -0
- aethergraph/services/mcp/service.py +100 -0
- aethergraph/services/mcp/stdio_client.py +70 -0
- aethergraph/services/mcp/ws_client.py +115 -0
- aethergraph/services/memory/bound.py +106 -0
- aethergraph/services/memory/distillers/episode.py +116 -0
- aethergraph/services/memory/distillers/rolling.py +74 -0
- aethergraph/services/memory/facade.py +633 -0
- aethergraph/services/memory/factory.py +78 -0
- aethergraph/services/memory/hotlog_kv.py +27 -0
- aethergraph/services/memory/indices.py +74 -0
- aethergraph/services/memory/io_helpers.py +72 -0
- aethergraph/services/memory/persist_fs.py +40 -0
- aethergraph/services/memory/resolver.py +152 -0
- aethergraph/services/metering/noop.py +4 -0
- aethergraph/services/prompts/file_store.py +41 -0
- aethergraph/services/rag/chunker.py +29 -0
- aethergraph/services/rag/facade.py +593 -0
- aethergraph/services/rag/index/base.py +27 -0
- aethergraph/services/rag/index/faiss_index.py +121 -0
- aethergraph/services/rag/index/sqlite_index.py +134 -0
- aethergraph/services/rag/index_factory.py +52 -0
- aethergraph/services/rag/parsers/md.py +7 -0
- aethergraph/services/rag/parsers/pdf.py +14 -0
- aethergraph/services/rag/parsers/txt.py +7 -0
- aethergraph/services/rag/utils/hybrid.py +39 -0
- aethergraph/services/rag/utils/make_fs_key.py +62 -0
- aethergraph/services/redactor/simple.py +16 -0
- aethergraph/services/registry/key_parsing.py +44 -0
- aethergraph/services/registry/registry_key.py +19 -0
- aethergraph/services/registry/unified_registry.py +185 -0
- aethergraph/services/resume/multi_scheduler_resume_bus.py +65 -0
- aethergraph/services/resume/router.py +73 -0
- aethergraph/services/schedulers/registry.py +41 -0
- aethergraph/services/secrets/base.py +7 -0
- aethergraph/services/secrets/env.py +8 -0
- aethergraph/services/state_stores/externalize.py +135 -0
- aethergraph/services/state_stores/graph_observer.py +131 -0
- aethergraph/services/state_stores/json_store.py +67 -0
- aethergraph/services/state_stores/resume_policy.py +119 -0
- aethergraph/services/state_stores/serialize.py +249 -0
- aethergraph/services/state_stores/utils.py +91 -0
- aethergraph/services/state_stores/validate.py +78 -0
- aethergraph/services/tracing/noop.py +18 -0
- aethergraph/services/waits/wait_registry.py +91 -0
- aethergraph/services/wakeup/memory_queue.py +57 -0
- aethergraph/services/wakeup/scanner_producer.py +56 -0
- aethergraph/services/wakeup/worker.py +31 -0
- aethergraph/tools/__init__.py +25 -0
- aethergraph/utils/optdeps.py +8 -0
- aethergraph-0.1.0a1.dist-info/METADATA +410 -0
- aethergraph-0.1.0a1.dist-info/RECORD +182 -0
- aethergraph-0.1.0a1.dist-info/WHEEL +5 -0
- aethergraph-0.1.0a1.dist-info/entry_points.txt +2 -0
- aethergraph-0.1.0a1.dist-info/licenses/LICENSE +176 -0
- aethergraph-0.1.0a1.dist-info/licenses/NOTICE +31 -0
- aethergraph-0.1.0a1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# aethergraph/artifacts/index_jsonl.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import threading
|
|
8
|
+
from typing import Literal
|
|
9
|
+
|
|
10
|
+
from aethergraph.contracts.services.artifacts import Artifact
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class JsonlArtifactIndexSync:
|
|
14
|
+
"""Simple JSONL-based artifact index for small to medium scale use cases.
|
|
15
|
+
Not suitable for very large scale (millions of artifacts) due to linear scans.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, path: str, occurrences_path: str | None = None):
|
|
19
|
+
self.path = path
|
|
20
|
+
self.occ_path = occurrences_path or (os.path.splitext(path)[0] + "_occurrences.jsonl")
|
|
21
|
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
22
|
+
# small in-memory map for quick lookup / dedup of last write
|
|
23
|
+
self._by_id = {}
|
|
24
|
+
self._lock = threading.Lock()
|
|
25
|
+
if os.path.exists(self.path):
|
|
26
|
+
with open(self.path, encoding="utf-8") as f:
|
|
27
|
+
for line in f:
|
|
28
|
+
if not line.strip():
|
|
29
|
+
continue
|
|
30
|
+
rec = json.loads(line)
|
|
31
|
+
self._by_id[rec["artifact_id"]] = rec
|
|
32
|
+
|
|
33
|
+
def upsert(self, a: Artifact) -> None:
|
|
34
|
+
"""Upsert an artifact record."""
|
|
35
|
+
with self._lock:
|
|
36
|
+
rec = a.to_dict()
|
|
37
|
+
self._by_id[a.artifact_id] = rec
|
|
38
|
+
with open(self.path, "a", encoding="utf-8") as f:
|
|
39
|
+
f.write(json.dumps(rec) + "\n")
|
|
40
|
+
|
|
41
|
+
def list_for_run(self, run_id: str) -> list[Artifact]:
|
|
42
|
+
"""List all artifacts for a given run_id."""
|
|
43
|
+
return [Artifact(**r) for r in self._by_id.values() if r.get("run_id") == run_id]
|
|
44
|
+
|
|
45
|
+
def search(
|
|
46
|
+
self,
|
|
47
|
+
*,
|
|
48
|
+
kind: str | None = None,
|
|
49
|
+
labels: dict[str, str] | None = None,
|
|
50
|
+
metric: str | None = None,
|
|
51
|
+
mode: Literal["max", "min"] | None = None,
|
|
52
|
+
) -> list[Artifact]:
|
|
53
|
+
"""Search artifacts by kind, labels (exact match), and metric (min/max)."""
|
|
54
|
+
rows = list(self._by_id.values())
|
|
55
|
+
if kind:
|
|
56
|
+
rows = [r for r in rows if r.get("kind") == kind]
|
|
57
|
+
if labels:
|
|
58
|
+
for k, v in labels.items():
|
|
59
|
+
rows = [r for r in rows if r.get("labels", {}).get(k) == v]
|
|
60
|
+
if metric and mode:
|
|
61
|
+
rows = [r for r in rows if metric in r.get("metrics", {})]
|
|
62
|
+
rows.sort(key=lambda r: r["metrics"][metric], reverse=(mode == "max"))
|
|
63
|
+
return [Artifact(**r) for r in rows]
|
|
64
|
+
|
|
65
|
+
def best(
|
|
66
|
+
self,
|
|
67
|
+
*,
|
|
68
|
+
kind: str,
|
|
69
|
+
metric: str,
|
|
70
|
+
mode: Literal["max", "min"],
|
|
71
|
+
filters: dict[str, str] | None = None,
|
|
72
|
+
) -> Artifact | None:
|
|
73
|
+
"""Get the best artifact by metric with optional filters."""
|
|
74
|
+
rows = self.search(kind=kind, labels=filters, metric=metric, mode=mode)
|
|
75
|
+
return rows[0] if rows else None
|
|
76
|
+
|
|
77
|
+
def pin(self, artifact_id: str, pinned: bool = True) -> None:
|
|
78
|
+
"""Pin or unpin an artifact by artifact_id."""
|
|
79
|
+
if artifact_id in self._by_id:
|
|
80
|
+
self._by_id[artifact_id]["pinned"] = bool(pinned)
|
|
81
|
+
with open(self.path, "a", encoding="utf-8") as f:
|
|
82
|
+
f.write(json.dumps(self._by_id[artifact_id]) + "\n")
|
|
83
|
+
|
|
84
|
+
def record_occurrence(self, a: Artifact, extra_labels: dict | None = None):
|
|
85
|
+
"""
|
|
86
|
+
Append-only log that this artifact appeared in this run/node at this time.
|
|
87
|
+
Keeps lineage even if bytes are identical across runs.
|
|
88
|
+
"""
|
|
89
|
+
row = {
|
|
90
|
+
"artifact_id": a.artifact_id,
|
|
91
|
+
"run_id": a.run_id,
|
|
92
|
+
"graph_id": a.graph_id,
|
|
93
|
+
"node_id": a.node_id,
|
|
94
|
+
"tool_name": a.tool_name,
|
|
95
|
+
"tool_version": a.tool_version,
|
|
96
|
+
"created_at": a.created_at,
|
|
97
|
+
"labels": a.labels | (extra_labels or {}),
|
|
98
|
+
}
|
|
99
|
+
with open(self.occ_path, "a", encoding="utf-8") as f:
|
|
100
|
+
f.write(json.dumps(row) + "\n")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class JsonlArtifactIndex: # implements AsyncArtifactIndex
|
|
104
|
+
def __init__(self, path: str, occurrences_path: str | None = None):
|
|
105
|
+
self._sync = JsonlArtifactIndexSync(path, occurrences_path)
|
|
106
|
+
|
|
107
|
+
async def upsert(self, a: Artifact) -> None:
|
|
108
|
+
await asyncio.to_thread(self._sync.upsert, a)
|
|
109
|
+
|
|
110
|
+
async def list_for_run(self, run_id: str) -> list[Artifact]:
|
|
111
|
+
return await asyncio.to_thread(self._sync.list_for_run, run_id)
|
|
112
|
+
|
|
113
|
+
async def search(self, **kw) -> list[Artifact]:
|
|
114
|
+
return await asyncio.to_thread(self._sync.search, **kw)
|
|
115
|
+
|
|
116
|
+
async def best(self, **kw) -> Artifact | None:
|
|
117
|
+
return await asyncio.to_thread(self._sync.best, **kw)
|
|
118
|
+
|
|
119
|
+
async def pin(self, artifact_id: str, pinned: bool = True) -> None:
|
|
120
|
+
await asyncio.to_thread(self._sync.pin, artifact_id, pinned)
|
|
121
|
+
|
|
122
|
+
async def record_occurrence(self, a: Artifact, extra_labels: dict | None = None):
|
|
123
|
+
await asyncio.to_thread(self._sync.record_occurrence, a, extra_labels)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# at top of the file
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from urllib.parse import unquote, urlparse
|
|
4
|
+
from urllib.request import url2pathname
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _from_uri_or_path(s: str | Path) -> Path:
|
|
8
|
+
"""Turn a file:// URI or plain path into a local Path (Windows-safe)."""
|
|
9
|
+
if isinstance(s, Path):
|
|
10
|
+
return s
|
|
11
|
+
if not isinstance(s, str):
|
|
12
|
+
raise TypeError(f"Expected str/Path, got {type(s)}")
|
|
13
|
+
if "://" not in s:
|
|
14
|
+
return Path(s)
|
|
15
|
+
u = urlparse(s)
|
|
16
|
+
if (u.scheme or "").lower() != "file":
|
|
17
|
+
# Not a local FS location; return a Path of the original to keep type uniform
|
|
18
|
+
# Callers can decide what to do; or raise if you want to enforce FS-only.
|
|
19
|
+
return Path(s)
|
|
20
|
+
# UNC: file://server/share/path -> \\server\share\path
|
|
21
|
+
# Local: file:///C:/path -> C:\path
|
|
22
|
+
raw = (f"//{u.netloc}{u.path}") if u.netloc else u.path
|
|
23
|
+
return Path(url2pathname(unquote(raw)))
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# aethergraph/artifacts/index_sqlite.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
import json
|
|
6
|
+
import sqlite3
|
|
7
|
+
from typing import Literal
|
|
8
|
+
|
|
9
|
+
from aethergraph.contracts.services.artifacts import Artifact
|
|
10
|
+
from aethergraph.services.artifacts.jsonl_index import JsonlArtifactIndexSync
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SqliteArtifactIndexSync:
|
|
14
|
+
"""SQLite-based artifact index for medium to large scale use cases.
|
|
15
|
+
Suitable for larger scale (millions of artifacts) with indexing.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, db_path: str):
|
|
19
|
+
self.db_path = db_path
|
|
20
|
+
self._init()
|
|
21
|
+
|
|
22
|
+
def _init(self):
|
|
23
|
+
con = sqlite3.connect(self.db_path)
|
|
24
|
+
cur = con.cursor()
|
|
25
|
+
cur.execute("""
|
|
26
|
+
CREATE TABLE IF NOT EXISTS artifacts (
|
|
27
|
+
artifact_id TEXT PRIMARY KEY,
|
|
28
|
+
uri TEXT NOT NULL,
|
|
29
|
+
kind TEXT NOT NULL,
|
|
30
|
+
bytes INTEGER,
|
|
31
|
+
sha256 TEXT,
|
|
32
|
+
mime TEXT,
|
|
33
|
+
run_id TEXT,
|
|
34
|
+
graph_id TEXT,
|
|
35
|
+
node_id TEXT,
|
|
36
|
+
tool_name TEXT,
|
|
37
|
+
tool_version TEXT,
|
|
38
|
+
created_at TEXT,
|
|
39
|
+
labels TEXT,
|
|
40
|
+
metrics TEXT,
|
|
41
|
+
params TEXT,
|
|
42
|
+
preview_uri TEXT,
|
|
43
|
+
pinned INTEGER DEFAULT 0
|
|
44
|
+
)""")
|
|
45
|
+
cur.execute("CREATE INDEX IF NOT EXISTS idx_kind ON artifacts(kind)")
|
|
46
|
+
cur.execute("CREATE INDEX IF NOT EXISTS idx_run ON artifacts(run_id)")
|
|
47
|
+
con.commit()
|
|
48
|
+
con.close()
|
|
49
|
+
|
|
50
|
+
def upsert(self, a: Artifact) -> None:
|
|
51
|
+
con = sqlite3.connect(self.db_path)
|
|
52
|
+
cur = con.cursor()
|
|
53
|
+
cur.execute(
|
|
54
|
+
"""
|
|
55
|
+
INSERT INTO artifacts
|
|
56
|
+
(artifact_id, uri, kind, bytes, sha256, mime, run_id, graph_id, node_id,
|
|
57
|
+
tool_name, tool_version, created_at, labels, metrics, params, preview_uri, pinned)
|
|
58
|
+
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
|
59
|
+
ON CONFLICT(artifact_id) DO UPDATE SET
|
|
60
|
+
uri=excluded.uri, kind=excluded.kind, bytes=excluded.bytes,
|
|
61
|
+
sha256=excluded.sha256, mime=excluded.mime, run_id=excluded.run_id,
|
|
62
|
+
graph_id=excluded.graph_id, node_id=excluded.node_id,
|
|
63
|
+
tool_name=excluded.tool_name, tool_version=excluded.tool_version,
|
|
64
|
+
created_at=excluded.created_at, labels=excluded.labels,
|
|
65
|
+
metrics=excluded.metrics, params=excluded.params,
|
|
66
|
+
preview_uri=excluded.preview_uri, pinned=excluded.pinned
|
|
67
|
+
""",
|
|
68
|
+
(
|
|
69
|
+
a.artifact_id,
|
|
70
|
+
a.uri,
|
|
71
|
+
a.kind,
|
|
72
|
+
a.bytes,
|
|
73
|
+
a.sha256,
|
|
74
|
+
a.mime,
|
|
75
|
+
a.run_id,
|
|
76
|
+
a.graph_id,
|
|
77
|
+
a.node_id,
|
|
78
|
+
a.tool_name,
|
|
79
|
+
a.tool_version,
|
|
80
|
+
a.created_at,
|
|
81
|
+
json.dumps(a.labels),
|
|
82
|
+
json.dumps(a.metrics),
|
|
83
|
+
json.dumps(a.params),
|
|
84
|
+
a.preview_uri,
|
|
85
|
+
1 if a.pinned else 0,
|
|
86
|
+
),
|
|
87
|
+
)
|
|
88
|
+
con.commit()
|
|
89
|
+
con.close()
|
|
90
|
+
|
|
91
|
+
def list_for_run(self, run_id: str) -> list[Artifact]:
|
|
92
|
+
con = sqlite3.connect(self.db_path)
|
|
93
|
+
cur = con.cursor()
|
|
94
|
+
cur.execute("SELECT * FROM artifacts WHERE run_id=? ORDER BY created_at", (run_id,))
|
|
95
|
+
rows = cur.fetchall()
|
|
96
|
+
con.close()
|
|
97
|
+
return [self._row_to_artifact(r) for r in rows]
|
|
98
|
+
|
|
99
|
+
def search(
|
|
100
|
+
self,
|
|
101
|
+
*,
|
|
102
|
+
kind: str | None = None,
|
|
103
|
+
labels: dict[str, str] | None = None,
|
|
104
|
+
metric: str | None = None,
|
|
105
|
+
mode: Literal["max", "min"] | None = None,
|
|
106
|
+
) -> list[Artifact]:
|
|
107
|
+
con = sqlite3.connect(self.db_path)
|
|
108
|
+
cur = con.cursor()
|
|
109
|
+
q = "SELECT * FROM artifacts WHERE 1=1"
|
|
110
|
+
args = []
|
|
111
|
+
if kind:
|
|
112
|
+
q += " AND kind=?"
|
|
113
|
+
args.append(kind)
|
|
114
|
+
# naive label filter: all requested label kv must be contained in labels json
|
|
115
|
+
if labels:
|
|
116
|
+
for k, v in labels.items():
|
|
117
|
+
q += " AND json_extract(labels, ?) = ?"
|
|
118
|
+
args += (f"$.{k}", v)
|
|
119
|
+
cur.execute(q, args)
|
|
120
|
+
rows = [self._row_to_artifact(r) for r in cur.fetchall()]
|
|
121
|
+
con.close()
|
|
122
|
+
if metric and mode and rows:
|
|
123
|
+
rows = [r for r in rows if r.metrics and metric in r.metrics]
|
|
124
|
+
reverse = mode == "max"
|
|
125
|
+
rows.sort(key=lambda a: a.metrics.get(metric, float("-inf")), reverse=reverse)
|
|
126
|
+
return rows
|
|
127
|
+
|
|
128
|
+
def best(
|
|
129
|
+
self,
|
|
130
|
+
*,
|
|
131
|
+
kind: str,
|
|
132
|
+
metric: str,
|
|
133
|
+
mode: Literal["max", "min"],
|
|
134
|
+
filters: dict[str, str] | None = None,
|
|
135
|
+
) -> Artifact | None:
|
|
136
|
+
rows = self.search(kind=kind, labels=filters, metric=metric, mode=mode)
|
|
137
|
+
return rows[0] if rows else None
|
|
138
|
+
|
|
139
|
+
def pin(self, artifact_id: str, pinned: bool = True) -> None:
|
|
140
|
+
con = sqlite3.connect(self.db_path)
|
|
141
|
+
cur = con.cursor()
|
|
142
|
+
cur.execute(
|
|
143
|
+
"UPDATE artifacts SET pinned=? WHERE artifact_id=?", (1 if pinned else 0, artifact_id)
|
|
144
|
+
)
|
|
145
|
+
con.commit()
|
|
146
|
+
con.close()
|
|
147
|
+
|
|
148
|
+
def _row_to_artifact(self, r) -> Artifact:
|
|
149
|
+
(
|
|
150
|
+
artifact_id,
|
|
151
|
+
uri,
|
|
152
|
+
kind,
|
|
153
|
+
bytes_,
|
|
154
|
+
sha256,
|
|
155
|
+
mime,
|
|
156
|
+
run_id,
|
|
157
|
+
graph_id,
|
|
158
|
+
node_id,
|
|
159
|
+
tool_name,
|
|
160
|
+
tool_version,
|
|
161
|
+
created_at,
|
|
162
|
+
labels,
|
|
163
|
+
metrics,
|
|
164
|
+
params,
|
|
165
|
+
preview_uri,
|
|
166
|
+
pinned,
|
|
167
|
+
) = r
|
|
168
|
+
return Artifact(
|
|
169
|
+
artifact_id=artifact_id,
|
|
170
|
+
uri=uri,
|
|
171
|
+
kind=kind,
|
|
172
|
+
bytes=bytes_,
|
|
173
|
+
sha256=sha256,
|
|
174
|
+
mime=mime,
|
|
175
|
+
run_id=run_id,
|
|
176
|
+
graph_id=graph_id,
|
|
177
|
+
node_id=node_id,
|
|
178
|
+
tool_name=tool_name,
|
|
179
|
+
tool_version=tool_version,
|
|
180
|
+
created_at=created_at,
|
|
181
|
+
labels=json.loads(labels or "{}"),
|
|
182
|
+
metrics=json.loads(metrics or "{}"),
|
|
183
|
+
params=json.loads(params or "{}"),
|
|
184
|
+
preview_uri=preview_uri,
|
|
185
|
+
pinned=bool(pinned),
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class SqliteArtifactIndex: # implements AsyncArtifactIndex
|
|
190
|
+
def __init__(self, path: str, occurrences_path: str | None = None):
|
|
191
|
+
self._sync = JsonlArtifactIndexSync(path, occurrences_path)
|
|
192
|
+
|
|
193
|
+
async def upsert(self, a: Artifact) -> None:
|
|
194
|
+
await asyncio.to_thread(self._sync.upsert, a)
|
|
195
|
+
|
|
196
|
+
async def list_for_run(self, run_id: str) -> list[Artifact]:
|
|
197
|
+
return await asyncio.to_thread(self._sync.list_for_run, run_id)
|
|
198
|
+
|
|
199
|
+
async def search(self, **kw) -> list[Artifact]:
|
|
200
|
+
return await asyncio.to_thread(self._sync.search, **kw)
|
|
201
|
+
|
|
202
|
+
async def best(self, **kw) -> Artifact | None:
|
|
203
|
+
return await asyncio.to_thread(self._sync.best, **kw)
|
|
204
|
+
|
|
205
|
+
async def pin(self, artifact_id: str, pinned: bool = True) -> None:
|
|
206
|
+
await asyncio.to_thread(self._sync.pin, artifact_id, pinned)
|
|
207
|
+
|
|
208
|
+
async def record_occurrence(self, a: Artifact, extra_labels: dict | None = None):
|
|
209
|
+
await asyncio.to_thread(self._sync.record_occurrence, a, extra_labels)
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
from fnmatch import fnmatch
|
|
4
|
+
import hashlib
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def now_iso() -> str:
|
|
11
|
+
return datetime.now(timezone.utc).isoformat()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
async def to_thread(fn, *a, **k):
|
|
15
|
+
return await asyncio.to_thread(fn, *a, **k)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# ----- helpers ----- NOTE: we have multiple copies of these in different places, consider centralizing -----
|
|
19
|
+
def _now_iso():
|
|
20
|
+
return datetime.now(timezone.utc).isoformat()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _sha256_file(path: str, chunk=1024 * 1024) -> tuple[str, int]:
|
|
24
|
+
"""Return (sha256 hex, size in bytes) of a file."""
|
|
25
|
+
h = hashlib.sha256()
|
|
26
|
+
total = 0
|
|
27
|
+
with open(path, "rb") as f:
|
|
28
|
+
while True:
|
|
29
|
+
b = f.read(chunk)
|
|
30
|
+
if not b:
|
|
31
|
+
break
|
|
32
|
+
h.update(b)
|
|
33
|
+
total += len(b)
|
|
34
|
+
return h.hexdigest(), total
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _content_addr_path(base_dir: str, sha256: str, ext: str | None) -> str:
|
|
38
|
+
"""Return a content-addressed path under base_dir for a given sha256 and optional extension.
|
|
39
|
+
Creates subdirectories as needed.
|
|
40
|
+
|
|
41
|
+
It works as follows:
|
|
42
|
+
- Takes the first 4 characters of the sha256 hash to create two levels of subdirectories.
|
|
43
|
+
- The first two characters form the first subdirectory (sub1).
|
|
44
|
+
- The next two characters form the second subdirectory (sub2).
|
|
45
|
+
- The full sha256 hash, optionally followed by the provided file extension, is used as the filename.
|
|
46
|
+
- Ensures that the target directory exists by creating it if necessary.
|
|
47
|
+
- Returns the full path to the content-addressed file.
|
|
48
|
+
|
|
49
|
+
The final path structure will look like:
|
|
50
|
+
base_dir/sub1/sub2/sha256[.ext]
|
|
51
|
+
"""
|
|
52
|
+
sub1, sub2 = sha256[:2], sha256[2:4]
|
|
53
|
+
fname = sha256 + (ext or "")
|
|
54
|
+
target_dir = os.path.join(base_dir, sub1, sub2)
|
|
55
|
+
os.makedirs(target_dir, exist_ok=True)
|
|
56
|
+
return os.path.join(target_dir, fname)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _walk_dir(root: str, include: list[str] | None, exclude: list[str] | None):
|
|
60
|
+
"""Yield (relpath, abspath) for files under root honoring include/exclude globs."""
|
|
61
|
+
root_p = Path(root)
|
|
62
|
+
for p in root_p.rglob("*"):
|
|
63
|
+
if not p.is_file():
|
|
64
|
+
continue
|
|
65
|
+
rel = str(p.relative_to(root_p)).replace("\\", "/")
|
|
66
|
+
if exclude and any(fnmatch.fnmatch(rel, pat) for pat in exclude):
|
|
67
|
+
continue
|
|
68
|
+
if include and not any(fnmatch.fnmatch(rel, pat) for pat in include):
|
|
69
|
+
continue
|
|
70
|
+
yield rel, str(p)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _tree_manifest_and_hash(root: str, include: list[str] | None, exclude: list[str] | None):
|
|
74
|
+
"""
|
|
75
|
+
Build a deterministic manifest of files: [{"path": rel, "sha256": sha, "bytes": n}, ...]
|
|
76
|
+
The tree hash is sha256 over JSON lines: "<rel> <sha> <bytes>\n" sorted by rel.
|
|
77
|
+
"""
|
|
78
|
+
entries = []
|
|
79
|
+
lines = []
|
|
80
|
+
for rel, abspath in _walk_dir(root, include, exclude):
|
|
81
|
+
sha, nbytes = _sha256_file(abspath)
|
|
82
|
+
entries.append({"path": rel, "sha256": sha, "bytes": nbytes})
|
|
83
|
+
lines.append(f"{rel}\t{sha}\t{nbytes}\n")
|
|
84
|
+
# sort for determinism
|
|
85
|
+
lines.sort()
|
|
86
|
+
h = hashlib.sha256()
|
|
87
|
+
for line in lines:
|
|
88
|
+
h.update(line.encode("utf-8"))
|
|
89
|
+
tree_sha = h.hexdigest()
|
|
90
|
+
return entries, tree_sha
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _content_addr_dir_path(base_dir: str, tree_sha: str):
|
|
94
|
+
# content-addressed folder to hold manifest (and optional archive)
|
|
95
|
+
sub1, sub2 = tree_sha[:2], tree_sha[2:4]
|
|
96
|
+
target_dir = os.path.join(base_dir, sub1, sub2, tree_sha)
|
|
97
|
+
os.makedirs(target_dir, exist_ok=True)
|
|
98
|
+
return target_dir
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _write_json(path: str, obj: dict | list):
|
|
102
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
103
|
+
json.dump(obj, f, ensure_ascii=False, separators=(",", ":"))
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _maybe_cleanup_tmp_parent(tmp_root: str, path: str):
|
|
107
|
+
"""Remove empty parent dirs strictly under tmp_root (never _tmp itself)."""
|
|
108
|
+
try:
|
|
109
|
+
parent = os.path.dirname(os.path.abspath(path))
|
|
110
|
+
tmp_root_abs = os.path.abspath(tmp_root)
|
|
111
|
+
|
|
112
|
+
# Only operate if `parent` is inside tmp_root
|
|
113
|
+
while (
|
|
114
|
+
os.path.commonpath([parent, tmp_root_abs]) == tmp_root_abs
|
|
115
|
+
and os.path.normcase(parent)
|
|
116
|
+
!= os.path.normcase(tmp_root_abs) # don't delete _tmp itself
|
|
117
|
+
):
|
|
118
|
+
try:
|
|
119
|
+
os.rmdir(parent) # only removes if empty
|
|
120
|
+
except OSError:
|
|
121
|
+
break
|
|
122
|
+
parent = os.path.dirname(parent)
|
|
123
|
+
except Exception:
|
|
124
|
+
pass
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# services/auth/dev.py
|
|
2
|
+
class DevTokenAuthn:
|
|
3
|
+
"""Development token authenticator. Accepts any token, returns 'dev' as subject."""
|
|
4
|
+
|
|
5
|
+
def __init__(self, header="x-dev-token"):
|
|
6
|
+
self.header = header
|
|
7
|
+
|
|
8
|
+
async def whoami(self, token: str | None) -> dict:
|
|
9
|
+
return {"subject": token or "dev", "roles": ["admin"]}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AllowAllAuthz:
|
|
13
|
+
"""Development authorizer that allows all actions."""
|
|
14
|
+
|
|
15
|
+
async def allow(self, actor: dict, action: str, resource: str) -> bool:
|
|
16
|
+
return True
|