aethergraph 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. aethergraph/__init__.py +49 -0
  2. aethergraph/config/__init__.py +0 -0
  3. aethergraph/config/config.py +121 -0
  4. aethergraph/config/context.py +16 -0
  5. aethergraph/config/llm.py +26 -0
  6. aethergraph/config/loader.py +60 -0
  7. aethergraph/config/runtime.py +9 -0
  8. aethergraph/contracts/errors/errors.py +44 -0
  9. aethergraph/contracts/services/artifacts.py +142 -0
  10. aethergraph/contracts/services/channel.py +72 -0
  11. aethergraph/contracts/services/continuations.py +23 -0
  12. aethergraph/contracts/services/eventbus.py +12 -0
  13. aethergraph/contracts/services/kv.py +24 -0
  14. aethergraph/contracts/services/llm.py +17 -0
  15. aethergraph/contracts/services/mcp.py +22 -0
  16. aethergraph/contracts/services/memory.py +108 -0
  17. aethergraph/contracts/services/resume.py +28 -0
  18. aethergraph/contracts/services/state_stores.py +33 -0
  19. aethergraph/contracts/services/wakeup.py +28 -0
  20. aethergraph/core/execution/base_scheduler.py +77 -0
  21. aethergraph/core/execution/forward_scheduler.py +777 -0
  22. aethergraph/core/execution/global_scheduler.py +634 -0
  23. aethergraph/core/execution/retry_policy.py +22 -0
  24. aethergraph/core/execution/step_forward.py +411 -0
  25. aethergraph/core/execution/step_result.py +18 -0
  26. aethergraph/core/execution/wait_types.py +72 -0
  27. aethergraph/core/graph/graph_builder.py +192 -0
  28. aethergraph/core/graph/graph_fn.py +219 -0
  29. aethergraph/core/graph/graph_io.py +67 -0
  30. aethergraph/core/graph/graph_refs.py +154 -0
  31. aethergraph/core/graph/graph_spec.py +115 -0
  32. aethergraph/core/graph/graph_state.py +59 -0
  33. aethergraph/core/graph/graphify.py +128 -0
  34. aethergraph/core/graph/interpreter.py +145 -0
  35. aethergraph/core/graph/node_handle.py +33 -0
  36. aethergraph/core/graph/node_spec.py +46 -0
  37. aethergraph/core/graph/node_state.py +63 -0
  38. aethergraph/core/graph/task_graph.py +747 -0
  39. aethergraph/core/graph/task_node.py +82 -0
  40. aethergraph/core/graph/utils.py +37 -0
  41. aethergraph/core/graph/visualize.py +239 -0
  42. aethergraph/core/runtime/ad_hoc_context.py +61 -0
  43. aethergraph/core/runtime/base_service.py +153 -0
  44. aethergraph/core/runtime/bind_adapter.py +42 -0
  45. aethergraph/core/runtime/bound_memory.py +69 -0
  46. aethergraph/core/runtime/execution_context.py +220 -0
  47. aethergraph/core/runtime/graph_runner.py +349 -0
  48. aethergraph/core/runtime/lifecycle.py +26 -0
  49. aethergraph/core/runtime/node_context.py +203 -0
  50. aethergraph/core/runtime/node_services.py +30 -0
  51. aethergraph/core/runtime/recovery.py +159 -0
  52. aethergraph/core/runtime/run_registration.py +33 -0
  53. aethergraph/core/runtime/runtime_env.py +157 -0
  54. aethergraph/core/runtime/runtime_registry.py +32 -0
  55. aethergraph/core/runtime/runtime_services.py +224 -0
  56. aethergraph/core/runtime/wakeup_watcher.py +40 -0
  57. aethergraph/core/tools/__init__.py +10 -0
  58. aethergraph/core/tools/builtins/channel_tools.py +194 -0
  59. aethergraph/core/tools/builtins/toolset.py +134 -0
  60. aethergraph/core/tools/toolkit.py +510 -0
  61. aethergraph/core/tools/waitable.py +109 -0
  62. aethergraph/plugins/channel/__init__.py +0 -0
  63. aethergraph/plugins/channel/adapters/__init__.py +0 -0
  64. aethergraph/plugins/channel/adapters/console.py +106 -0
  65. aethergraph/plugins/channel/adapters/file.py +102 -0
  66. aethergraph/plugins/channel/adapters/slack.py +285 -0
  67. aethergraph/plugins/channel/adapters/telegram.py +302 -0
  68. aethergraph/plugins/channel/adapters/webhook.py +104 -0
  69. aethergraph/plugins/channel/adapters/webui.py +134 -0
  70. aethergraph/plugins/channel/routes/__init__.py +0 -0
  71. aethergraph/plugins/channel/routes/console_routes.py +86 -0
  72. aethergraph/plugins/channel/routes/slack_routes.py +49 -0
  73. aethergraph/plugins/channel/routes/telegram_routes.py +26 -0
  74. aethergraph/plugins/channel/routes/webui_routes.py +136 -0
  75. aethergraph/plugins/channel/utils/__init__.py +0 -0
  76. aethergraph/plugins/channel/utils/slack_utils.py +278 -0
  77. aethergraph/plugins/channel/utils/telegram_utils.py +324 -0
  78. aethergraph/plugins/channel/websockets/slack_ws.py +68 -0
  79. aethergraph/plugins/channel/websockets/telegram_polling.py +151 -0
  80. aethergraph/plugins/mcp/fs_server.py +128 -0
  81. aethergraph/plugins/mcp/http_server.py +101 -0
  82. aethergraph/plugins/mcp/ws_server.py +180 -0
  83. aethergraph/plugins/net/http.py +10 -0
  84. aethergraph/plugins/utils/data_io.py +359 -0
  85. aethergraph/runner/__init__.py +5 -0
  86. aethergraph/runtime/__init__.py +62 -0
  87. aethergraph/server/__init__.py +3 -0
  88. aethergraph/server/app_factory.py +84 -0
  89. aethergraph/server/start.py +122 -0
  90. aethergraph/services/__init__.py +10 -0
  91. aethergraph/services/artifacts/facade.py +284 -0
  92. aethergraph/services/artifacts/factory.py +35 -0
  93. aethergraph/services/artifacts/fs_store.py +656 -0
  94. aethergraph/services/artifacts/jsonl_index.py +123 -0
  95. aethergraph/services/artifacts/paths.py +23 -0
  96. aethergraph/services/artifacts/sqlite_index.py +209 -0
  97. aethergraph/services/artifacts/utils.py +124 -0
  98. aethergraph/services/auth/dev.py +16 -0
  99. aethergraph/services/channel/channel_bus.py +293 -0
  100. aethergraph/services/channel/factory.py +44 -0
  101. aethergraph/services/channel/session.py +511 -0
  102. aethergraph/services/channel/wait_helpers.py +57 -0
  103. aethergraph/services/clock/clock.py +9 -0
  104. aethergraph/services/container/default_container.py +320 -0
  105. aethergraph/services/continuations/continuation.py +56 -0
  106. aethergraph/services/continuations/factory.py +34 -0
  107. aethergraph/services/continuations/stores/fs_store.py +264 -0
  108. aethergraph/services/continuations/stores/inmem_store.py +95 -0
  109. aethergraph/services/eventbus/inmem.py +21 -0
  110. aethergraph/services/features/static.py +10 -0
  111. aethergraph/services/kv/ephemeral.py +90 -0
  112. aethergraph/services/kv/factory.py +27 -0
  113. aethergraph/services/kv/layered.py +41 -0
  114. aethergraph/services/kv/sqlite_kv.py +128 -0
  115. aethergraph/services/llm/factory.py +157 -0
  116. aethergraph/services/llm/generic_client.py +542 -0
  117. aethergraph/services/llm/providers.py +3 -0
  118. aethergraph/services/llm/service.py +105 -0
  119. aethergraph/services/logger/base.py +36 -0
  120. aethergraph/services/logger/compat.py +50 -0
  121. aethergraph/services/logger/formatters.py +106 -0
  122. aethergraph/services/logger/std.py +203 -0
  123. aethergraph/services/mcp/helpers.py +23 -0
  124. aethergraph/services/mcp/http_client.py +70 -0
  125. aethergraph/services/mcp/mcp_tools.py +21 -0
  126. aethergraph/services/mcp/registry.py +14 -0
  127. aethergraph/services/mcp/service.py +100 -0
  128. aethergraph/services/mcp/stdio_client.py +70 -0
  129. aethergraph/services/mcp/ws_client.py +115 -0
  130. aethergraph/services/memory/bound.py +106 -0
  131. aethergraph/services/memory/distillers/episode.py +116 -0
  132. aethergraph/services/memory/distillers/rolling.py +74 -0
  133. aethergraph/services/memory/facade.py +633 -0
  134. aethergraph/services/memory/factory.py +78 -0
  135. aethergraph/services/memory/hotlog_kv.py +27 -0
  136. aethergraph/services/memory/indices.py +74 -0
  137. aethergraph/services/memory/io_helpers.py +72 -0
  138. aethergraph/services/memory/persist_fs.py +40 -0
  139. aethergraph/services/memory/resolver.py +152 -0
  140. aethergraph/services/metering/noop.py +4 -0
  141. aethergraph/services/prompts/file_store.py +41 -0
  142. aethergraph/services/rag/chunker.py +29 -0
  143. aethergraph/services/rag/facade.py +593 -0
  144. aethergraph/services/rag/index/base.py +27 -0
  145. aethergraph/services/rag/index/faiss_index.py +121 -0
  146. aethergraph/services/rag/index/sqlite_index.py +134 -0
  147. aethergraph/services/rag/index_factory.py +52 -0
  148. aethergraph/services/rag/parsers/md.py +7 -0
  149. aethergraph/services/rag/parsers/pdf.py +14 -0
  150. aethergraph/services/rag/parsers/txt.py +7 -0
  151. aethergraph/services/rag/utils/hybrid.py +39 -0
  152. aethergraph/services/rag/utils/make_fs_key.py +62 -0
  153. aethergraph/services/redactor/simple.py +16 -0
  154. aethergraph/services/registry/key_parsing.py +44 -0
  155. aethergraph/services/registry/registry_key.py +19 -0
  156. aethergraph/services/registry/unified_registry.py +185 -0
  157. aethergraph/services/resume/multi_scheduler_resume_bus.py +65 -0
  158. aethergraph/services/resume/router.py +73 -0
  159. aethergraph/services/schedulers/registry.py +41 -0
  160. aethergraph/services/secrets/base.py +7 -0
  161. aethergraph/services/secrets/env.py +8 -0
  162. aethergraph/services/state_stores/externalize.py +135 -0
  163. aethergraph/services/state_stores/graph_observer.py +131 -0
  164. aethergraph/services/state_stores/json_store.py +67 -0
  165. aethergraph/services/state_stores/resume_policy.py +119 -0
  166. aethergraph/services/state_stores/serialize.py +249 -0
  167. aethergraph/services/state_stores/utils.py +91 -0
  168. aethergraph/services/state_stores/validate.py +78 -0
  169. aethergraph/services/tracing/noop.py +18 -0
  170. aethergraph/services/waits/wait_registry.py +91 -0
  171. aethergraph/services/wakeup/memory_queue.py +57 -0
  172. aethergraph/services/wakeup/scanner_producer.py +56 -0
  173. aethergraph/services/wakeup/worker.py +31 -0
  174. aethergraph/tools/__init__.py +25 -0
  175. aethergraph/utils/optdeps.py +8 -0
  176. aethergraph-0.1.0a1.dist-info/METADATA +410 -0
  177. aethergraph-0.1.0a1.dist-info/RECORD +182 -0
  178. aethergraph-0.1.0a1.dist-info/WHEEL +5 -0
  179. aethergraph-0.1.0a1.dist-info/entry_points.txt +2 -0
  180. aethergraph-0.1.0a1.dist-info/licenses/LICENSE +176 -0
  181. aethergraph-0.1.0a1.dist-info/licenses/NOTICE +31 -0
  182. aethergraph-0.1.0a1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,123 @@
1
+ # aethergraph/artifacts/index_jsonl.py
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import json
6
+ import os
7
+ import threading
8
+ from typing import Literal
9
+
10
+ from aethergraph.contracts.services.artifacts import Artifact
11
+
12
+
13
+ class JsonlArtifactIndexSync:
14
+ """Simple JSONL-based artifact index for small to medium scale use cases.
15
+ Not suitable for very large scale (millions of artifacts) due to linear scans.
16
+ """
17
+
18
+ def __init__(self, path: str, occurrences_path: str | None = None):
19
+ self.path = path
20
+ self.occ_path = occurrences_path or (os.path.splitext(path)[0] + "_occurrences.jsonl")
21
+ os.makedirs(os.path.dirname(path), exist_ok=True)
22
+ # small in-memory map for quick lookup / dedup of last write
23
+ self._by_id = {}
24
+ self._lock = threading.Lock()
25
+ if os.path.exists(self.path):
26
+ with open(self.path, encoding="utf-8") as f:
27
+ for line in f:
28
+ if not line.strip():
29
+ continue
30
+ rec = json.loads(line)
31
+ self._by_id[rec["artifact_id"]] = rec
32
+
33
+ def upsert(self, a: Artifact) -> None:
34
+ """Upsert an artifact record."""
35
+ with self._lock:
36
+ rec = a.to_dict()
37
+ self._by_id[a.artifact_id] = rec
38
+ with open(self.path, "a", encoding="utf-8") as f:
39
+ f.write(json.dumps(rec) + "\n")
40
+
41
+ def list_for_run(self, run_id: str) -> list[Artifact]:
42
+ """List all artifacts for a given run_id."""
43
+ return [Artifact(**r) for r in self._by_id.values() if r.get("run_id") == run_id]
44
+
45
+ def search(
46
+ self,
47
+ *,
48
+ kind: str | None = None,
49
+ labels: dict[str, str] | None = None,
50
+ metric: str | None = None,
51
+ mode: Literal["max", "min"] | None = None,
52
+ ) -> list[Artifact]:
53
+ """Search artifacts by kind, labels (exact match), and metric (min/max)."""
54
+ rows = list(self._by_id.values())
55
+ if kind:
56
+ rows = [r for r in rows if r.get("kind") == kind]
57
+ if labels:
58
+ for k, v in labels.items():
59
+ rows = [r for r in rows if r.get("labels", {}).get(k) == v]
60
+ if metric and mode:
61
+ rows = [r for r in rows if metric in r.get("metrics", {})]
62
+ rows.sort(key=lambda r: r["metrics"][metric], reverse=(mode == "max"))
63
+ return [Artifact(**r) for r in rows]
64
+
65
+ def best(
66
+ self,
67
+ *,
68
+ kind: str,
69
+ metric: str,
70
+ mode: Literal["max", "min"],
71
+ filters: dict[str, str] | None = None,
72
+ ) -> Artifact | None:
73
+ """Get the best artifact by metric with optional filters."""
74
+ rows = self.search(kind=kind, labels=filters, metric=metric, mode=mode)
75
+ return rows[0] if rows else None
76
+
77
+ def pin(self, artifact_id: str, pinned: bool = True) -> None:
78
+ """Pin or unpin an artifact by artifact_id."""
79
+ if artifact_id in self._by_id:
80
+ self._by_id[artifact_id]["pinned"] = bool(pinned)
81
+ with open(self.path, "a", encoding="utf-8") as f:
82
+ f.write(json.dumps(self._by_id[artifact_id]) + "\n")
83
+
84
+ def record_occurrence(self, a: Artifact, extra_labels: dict | None = None):
85
+ """
86
+ Append-only log that this artifact appeared in this run/node at this time.
87
+ Keeps lineage even if bytes are identical across runs.
88
+ """
89
+ row = {
90
+ "artifact_id": a.artifact_id,
91
+ "run_id": a.run_id,
92
+ "graph_id": a.graph_id,
93
+ "node_id": a.node_id,
94
+ "tool_name": a.tool_name,
95
+ "tool_version": a.tool_version,
96
+ "created_at": a.created_at,
97
+ "labels": a.labels | (extra_labels or {}),
98
+ }
99
+ with open(self.occ_path, "a", encoding="utf-8") as f:
100
+ f.write(json.dumps(row) + "\n")
101
+
102
+
103
+ class JsonlArtifactIndex: # implements AsyncArtifactIndex
104
+ def __init__(self, path: str, occurrences_path: str | None = None):
105
+ self._sync = JsonlArtifactIndexSync(path, occurrences_path)
106
+
107
+ async def upsert(self, a: Artifact) -> None:
108
+ await asyncio.to_thread(self._sync.upsert, a)
109
+
110
+ async def list_for_run(self, run_id: str) -> list[Artifact]:
111
+ return await asyncio.to_thread(self._sync.list_for_run, run_id)
112
+
113
+ async def search(self, **kw) -> list[Artifact]:
114
+ return await asyncio.to_thread(self._sync.search, **kw)
115
+
116
+ async def best(self, **kw) -> Artifact | None:
117
+ return await asyncio.to_thread(self._sync.best, **kw)
118
+
119
+ async def pin(self, artifact_id: str, pinned: bool = True) -> None:
120
+ await asyncio.to_thread(self._sync.pin, artifact_id, pinned)
121
+
122
+ async def record_occurrence(self, a: Artifact, extra_labels: dict | None = None):
123
+ await asyncio.to_thread(self._sync.record_occurrence, a, extra_labels)
@@ -0,0 +1,23 @@
1
+ # at top of the file
2
+ from pathlib import Path
3
+ from urllib.parse import unquote, urlparse
4
+ from urllib.request import url2pathname
5
+
6
+
7
+ def _from_uri_or_path(s: str | Path) -> Path:
8
+ """Turn a file:// URI or plain path into a local Path (Windows-safe)."""
9
+ if isinstance(s, Path):
10
+ return s
11
+ if not isinstance(s, str):
12
+ raise TypeError(f"Expected str/Path, got {type(s)}")
13
+ if "://" not in s:
14
+ return Path(s)
15
+ u = urlparse(s)
16
+ if (u.scheme or "").lower() != "file":
17
+ # Not a local FS location; return a Path of the original to keep type uniform
18
+ # Callers can decide what to do; or raise if you want to enforce FS-only.
19
+ return Path(s)
20
+ # UNC: file://server/share/path -> \\server\share\path
21
+ # Local: file:///C:/path -> C:\path
22
+ raw = (f"//{u.netloc}{u.path}") if u.netloc else u.path
23
+ return Path(url2pathname(unquote(raw)))
@@ -0,0 +1,209 @@
1
+ # aethergraph/artifacts/index_sqlite.py
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import json
6
+ import sqlite3
7
+ from typing import Literal
8
+
9
+ from aethergraph.contracts.services.artifacts import Artifact
10
+ from aethergraph.services.artifacts.jsonl_index import JsonlArtifactIndexSync
11
+
12
+
13
+ class SqliteArtifactIndexSync:
14
+ """SQLite-based artifact index for medium to large scale use cases.
15
+ Suitable for larger scale (millions of artifacts) with indexing.
16
+ """
17
+
18
+ def __init__(self, db_path: str):
19
+ self.db_path = db_path
20
+ self._init()
21
+
22
+ def _init(self):
23
+ con = sqlite3.connect(self.db_path)
24
+ cur = con.cursor()
25
+ cur.execute("""
26
+ CREATE TABLE IF NOT EXISTS artifacts (
27
+ artifact_id TEXT PRIMARY KEY,
28
+ uri TEXT NOT NULL,
29
+ kind TEXT NOT NULL,
30
+ bytes INTEGER,
31
+ sha256 TEXT,
32
+ mime TEXT,
33
+ run_id TEXT,
34
+ graph_id TEXT,
35
+ node_id TEXT,
36
+ tool_name TEXT,
37
+ tool_version TEXT,
38
+ created_at TEXT,
39
+ labels TEXT,
40
+ metrics TEXT,
41
+ params TEXT,
42
+ preview_uri TEXT,
43
+ pinned INTEGER DEFAULT 0
44
+ )""")
45
+ cur.execute("CREATE INDEX IF NOT EXISTS idx_kind ON artifacts(kind)")
46
+ cur.execute("CREATE INDEX IF NOT EXISTS idx_run ON artifacts(run_id)")
47
+ con.commit()
48
+ con.close()
49
+
50
+ def upsert(self, a: Artifact) -> None:
51
+ con = sqlite3.connect(self.db_path)
52
+ cur = con.cursor()
53
+ cur.execute(
54
+ """
55
+ INSERT INTO artifacts
56
+ (artifact_id, uri, kind, bytes, sha256, mime, run_id, graph_id, node_id,
57
+ tool_name, tool_version, created_at, labels, metrics, params, preview_uri, pinned)
58
+ VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
59
+ ON CONFLICT(artifact_id) DO UPDATE SET
60
+ uri=excluded.uri, kind=excluded.kind, bytes=excluded.bytes,
61
+ sha256=excluded.sha256, mime=excluded.mime, run_id=excluded.run_id,
62
+ graph_id=excluded.graph_id, node_id=excluded.node_id,
63
+ tool_name=excluded.tool_name, tool_version=excluded.tool_version,
64
+ created_at=excluded.created_at, labels=excluded.labels,
65
+ metrics=excluded.metrics, params=excluded.params,
66
+ preview_uri=excluded.preview_uri, pinned=excluded.pinned
67
+ """,
68
+ (
69
+ a.artifact_id,
70
+ a.uri,
71
+ a.kind,
72
+ a.bytes,
73
+ a.sha256,
74
+ a.mime,
75
+ a.run_id,
76
+ a.graph_id,
77
+ a.node_id,
78
+ a.tool_name,
79
+ a.tool_version,
80
+ a.created_at,
81
+ json.dumps(a.labels),
82
+ json.dumps(a.metrics),
83
+ json.dumps(a.params),
84
+ a.preview_uri,
85
+ 1 if a.pinned else 0,
86
+ ),
87
+ )
88
+ con.commit()
89
+ con.close()
90
+
91
+ def list_for_run(self, run_id: str) -> list[Artifact]:
92
+ con = sqlite3.connect(self.db_path)
93
+ cur = con.cursor()
94
+ cur.execute("SELECT * FROM artifacts WHERE run_id=? ORDER BY created_at", (run_id,))
95
+ rows = cur.fetchall()
96
+ con.close()
97
+ return [self._row_to_artifact(r) for r in rows]
98
+
99
+ def search(
100
+ self,
101
+ *,
102
+ kind: str | None = None,
103
+ labels: dict[str, str] | None = None,
104
+ metric: str | None = None,
105
+ mode: Literal["max", "min"] | None = None,
106
+ ) -> list[Artifact]:
107
+ con = sqlite3.connect(self.db_path)
108
+ cur = con.cursor()
109
+ q = "SELECT * FROM artifacts WHERE 1=1"
110
+ args = []
111
+ if kind:
112
+ q += " AND kind=?"
113
+ args.append(kind)
114
+ # naive label filter: all requested label kv must be contained in labels json
115
+ if labels:
116
+ for k, v in labels.items():
117
+ q += " AND json_extract(labels, ?) = ?"
118
+ args += (f"$.{k}", v)
119
+ cur.execute(q, args)
120
+ rows = [self._row_to_artifact(r) for r in cur.fetchall()]
121
+ con.close()
122
+ if metric and mode and rows:
123
+ rows = [r for r in rows if r.metrics and metric in r.metrics]
124
+ reverse = mode == "max"
125
+ rows.sort(key=lambda a: a.metrics.get(metric, float("-inf")), reverse=reverse)
126
+ return rows
127
+
128
+ def best(
129
+ self,
130
+ *,
131
+ kind: str,
132
+ metric: str,
133
+ mode: Literal["max", "min"],
134
+ filters: dict[str, str] | None = None,
135
+ ) -> Artifact | None:
136
+ rows = self.search(kind=kind, labels=filters, metric=metric, mode=mode)
137
+ return rows[0] if rows else None
138
+
139
+ def pin(self, artifact_id: str, pinned: bool = True) -> None:
140
+ con = sqlite3.connect(self.db_path)
141
+ cur = con.cursor()
142
+ cur.execute(
143
+ "UPDATE artifacts SET pinned=? WHERE artifact_id=?", (1 if pinned else 0, artifact_id)
144
+ )
145
+ con.commit()
146
+ con.close()
147
+
148
+ def _row_to_artifact(self, r) -> Artifact:
149
+ (
150
+ artifact_id,
151
+ uri,
152
+ kind,
153
+ bytes_,
154
+ sha256,
155
+ mime,
156
+ run_id,
157
+ graph_id,
158
+ node_id,
159
+ tool_name,
160
+ tool_version,
161
+ created_at,
162
+ labels,
163
+ metrics,
164
+ params,
165
+ preview_uri,
166
+ pinned,
167
+ ) = r
168
+ return Artifact(
169
+ artifact_id=artifact_id,
170
+ uri=uri,
171
+ kind=kind,
172
+ bytes=bytes_,
173
+ sha256=sha256,
174
+ mime=mime,
175
+ run_id=run_id,
176
+ graph_id=graph_id,
177
+ node_id=node_id,
178
+ tool_name=tool_name,
179
+ tool_version=tool_version,
180
+ created_at=created_at,
181
+ labels=json.loads(labels or "{}"),
182
+ metrics=json.loads(metrics or "{}"),
183
+ params=json.loads(params or "{}"),
184
+ preview_uri=preview_uri,
185
+ pinned=bool(pinned),
186
+ )
187
+
188
+
189
+ class SqliteArtifactIndex: # implements AsyncArtifactIndex
190
+ def __init__(self, path: str, occurrences_path: str | None = None):
191
+ self._sync = JsonlArtifactIndexSync(path, occurrences_path)
192
+
193
+ async def upsert(self, a: Artifact) -> None:
194
+ await asyncio.to_thread(self._sync.upsert, a)
195
+
196
+ async def list_for_run(self, run_id: str) -> list[Artifact]:
197
+ return await asyncio.to_thread(self._sync.list_for_run, run_id)
198
+
199
+ async def search(self, **kw) -> list[Artifact]:
200
+ return await asyncio.to_thread(self._sync.search, **kw)
201
+
202
+ async def best(self, **kw) -> Artifact | None:
203
+ return await asyncio.to_thread(self._sync.best, **kw)
204
+
205
+ async def pin(self, artifact_id: str, pinned: bool = True) -> None:
206
+ await asyncio.to_thread(self._sync.pin, artifact_id, pinned)
207
+
208
+ async def record_occurrence(self, a: Artifact, extra_labels: dict | None = None):
209
+ await asyncio.to_thread(self._sync.record_occurrence, a, extra_labels)
@@ -0,0 +1,124 @@
1
+ import asyncio
2
+ from datetime import datetime, timezone
3
+ from fnmatch import fnmatch
4
+ import hashlib
5
+ import json
6
+ import os
7
+ from pathlib import Path
8
+
9
+
10
+ def now_iso() -> str:
11
+ return datetime.now(timezone.utc).isoformat()
12
+
13
+
14
+ async def to_thread(fn, *a, **k):
15
+ return await asyncio.to_thread(fn, *a, **k)
16
+
17
+
18
+ # ----- helpers ----- NOTE: we have multiple copies of these in different places, consider centralizing -----
19
+ def _now_iso():
20
+ return datetime.now(timezone.utc).isoformat()
21
+
22
+
23
+ def _sha256_file(path: str, chunk=1024 * 1024) -> tuple[str, int]:
24
+ """Return (sha256 hex, size in bytes) of a file."""
25
+ h = hashlib.sha256()
26
+ total = 0
27
+ with open(path, "rb") as f:
28
+ while True:
29
+ b = f.read(chunk)
30
+ if not b:
31
+ break
32
+ h.update(b)
33
+ total += len(b)
34
+ return h.hexdigest(), total
35
+
36
+
37
+ def _content_addr_path(base_dir: str, sha256: str, ext: str | None) -> str:
38
+ """Return a content-addressed path under base_dir for a given sha256 and optional extension.
39
+ Creates subdirectories as needed.
40
+
41
+ It works as follows:
42
+ - Takes the first 4 characters of the sha256 hash to create two levels of subdirectories.
43
+ - The first two characters form the first subdirectory (sub1).
44
+ - The next two characters form the second subdirectory (sub2).
45
+ - The full sha256 hash, optionally followed by the provided file extension, is used as the filename.
46
+ - Ensures that the target directory exists by creating it if necessary.
47
+ - Returns the full path to the content-addressed file.
48
+
49
+ The final path structure will look like:
50
+ base_dir/sub1/sub2/sha256[.ext]
51
+ """
52
+ sub1, sub2 = sha256[:2], sha256[2:4]
53
+ fname = sha256 + (ext or "")
54
+ target_dir = os.path.join(base_dir, sub1, sub2)
55
+ os.makedirs(target_dir, exist_ok=True)
56
+ return os.path.join(target_dir, fname)
57
+
58
+
59
+ def _walk_dir(root: str, include: list[str] | None, exclude: list[str] | None):
60
+ """Yield (relpath, abspath) for files under root honoring include/exclude globs."""
61
+ root_p = Path(root)
62
+ for p in root_p.rglob("*"):
63
+ if not p.is_file():
64
+ continue
65
+ rel = str(p.relative_to(root_p)).replace("\\", "/")
66
+ if exclude and any(fnmatch.fnmatch(rel, pat) for pat in exclude):
67
+ continue
68
+ if include and not any(fnmatch.fnmatch(rel, pat) for pat in include):
69
+ continue
70
+ yield rel, str(p)
71
+
72
+
73
+ def _tree_manifest_and_hash(root: str, include: list[str] | None, exclude: list[str] | None):
74
+ """
75
+ Build a deterministic manifest of files: [{"path": rel, "sha256": sha, "bytes": n}, ...]
76
+ The tree hash is sha256 over JSON lines: "<rel> <sha> <bytes>\n" sorted by rel.
77
+ """
78
+ entries = []
79
+ lines = []
80
+ for rel, abspath in _walk_dir(root, include, exclude):
81
+ sha, nbytes = _sha256_file(abspath)
82
+ entries.append({"path": rel, "sha256": sha, "bytes": nbytes})
83
+ lines.append(f"{rel}\t{sha}\t{nbytes}\n")
84
+ # sort for determinism
85
+ lines.sort()
86
+ h = hashlib.sha256()
87
+ for line in lines:
88
+ h.update(line.encode("utf-8"))
89
+ tree_sha = h.hexdigest()
90
+ return entries, tree_sha
91
+
92
+
93
+ def _content_addr_dir_path(base_dir: str, tree_sha: str):
94
+ # content-addressed folder to hold manifest (and optional archive)
95
+ sub1, sub2 = tree_sha[:2], tree_sha[2:4]
96
+ target_dir = os.path.join(base_dir, sub1, sub2, tree_sha)
97
+ os.makedirs(target_dir, exist_ok=True)
98
+ return target_dir
99
+
100
+
101
+ def _write_json(path: str, obj: dict | list):
102
+ with open(path, "w", encoding="utf-8") as f:
103
+ json.dump(obj, f, ensure_ascii=False, separators=(",", ":"))
104
+
105
+
106
+ def _maybe_cleanup_tmp_parent(tmp_root: str, path: str):
107
+ """Remove empty parent dirs strictly under tmp_root (never _tmp itself)."""
108
+ try:
109
+ parent = os.path.dirname(os.path.abspath(path))
110
+ tmp_root_abs = os.path.abspath(tmp_root)
111
+
112
+ # Only operate if `parent` is inside tmp_root
113
+ while (
114
+ os.path.commonpath([parent, tmp_root_abs]) == tmp_root_abs
115
+ and os.path.normcase(parent)
116
+ != os.path.normcase(tmp_root_abs) # don't delete _tmp itself
117
+ ):
118
+ try:
119
+ os.rmdir(parent) # only removes if empty
120
+ except OSError:
121
+ break
122
+ parent = os.path.dirname(parent)
123
+ except Exception:
124
+ pass
@@ -0,0 +1,16 @@
1
+ # services/auth/dev.py
2
+ class DevTokenAuthn:
3
+ """Development token authenticator. Accepts any token, returns 'dev' as subject."""
4
+
5
+ def __init__(self, header="x-dev-token"):
6
+ self.header = header
7
+
8
+ async def whoami(self, token: str | None) -> dict:
9
+ return {"subject": token or "dev", "roles": ["admin"]}
10
+
11
+
12
+ class AllowAllAuthz:
13
+ """Development authorizer that allows all actions."""
14
+
15
+ async def allow(self, actor: dict, action: str, resource: str) -> bool:
16
+ return True