aethergraph 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. aethergraph/__init__.py +49 -0
  2. aethergraph/config/__init__.py +0 -0
  3. aethergraph/config/config.py +121 -0
  4. aethergraph/config/context.py +16 -0
  5. aethergraph/config/llm.py +26 -0
  6. aethergraph/config/loader.py +60 -0
  7. aethergraph/config/runtime.py +9 -0
  8. aethergraph/contracts/errors/errors.py +44 -0
  9. aethergraph/contracts/services/artifacts.py +142 -0
  10. aethergraph/contracts/services/channel.py +72 -0
  11. aethergraph/contracts/services/continuations.py +23 -0
  12. aethergraph/contracts/services/eventbus.py +12 -0
  13. aethergraph/contracts/services/kv.py +24 -0
  14. aethergraph/contracts/services/llm.py +17 -0
  15. aethergraph/contracts/services/mcp.py +22 -0
  16. aethergraph/contracts/services/memory.py +108 -0
  17. aethergraph/contracts/services/resume.py +28 -0
  18. aethergraph/contracts/services/state_stores.py +33 -0
  19. aethergraph/contracts/services/wakeup.py +28 -0
  20. aethergraph/core/execution/base_scheduler.py +77 -0
  21. aethergraph/core/execution/forward_scheduler.py +777 -0
  22. aethergraph/core/execution/global_scheduler.py +634 -0
  23. aethergraph/core/execution/retry_policy.py +22 -0
  24. aethergraph/core/execution/step_forward.py +411 -0
  25. aethergraph/core/execution/step_result.py +18 -0
  26. aethergraph/core/execution/wait_types.py +72 -0
  27. aethergraph/core/graph/graph_builder.py +192 -0
  28. aethergraph/core/graph/graph_fn.py +219 -0
  29. aethergraph/core/graph/graph_io.py +67 -0
  30. aethergraph/core/graph/graph_refs.py +154 -0
  31. aethergraph/core/graph/graph_spec.py +115 -0
  32. aethergraph/core/graph/graph_state.py +59 -0
  33. aethergraph/core/graph/graphify.py +128 -0
  34. aethergraph/core/graph/interpreter.py +145 -0
  35. aethergraph/core/graph/node_handle.py +33 -0
  36. aethergraph/core/graph/node_spec.py +46 -0
  37. aethergraph/core/graph/node_state.py +63 -0
  38. aethergraph/core/graph/task_graph.py +747 -0
  39. aethergraph/core/graph/task_node.py +82 -0
  40. aethergraph/core/graph/utils.py +37 -0
  41. aethergraph/core/graph/visualize.py +239 -0
  42. aethergraph/core/runtime/ad_hoc_context.py +61 -0
  43. aethergraph/core/runtime/base_service.py +153 -0
  44. aethergraph/core/runtime/bind_adapter.py +42 -0
  45. aethergraph/core/runtime/bound_memory.py +69 -0
  46. aethergraph/core/runtime/execution_context.py +220 -0
  47. aethergraph/core/runtime/graph_runner.py +349 -0
  48. aethergraph/core/runtime/lifecycle.py +26 -0
  49. aethergraph/core/runtime/node_context.py +203 -0
  50. aethergraph/core/runtime/node_services.py +30 -0
  51. aethergraph/core/runtime/recovery.py +159 -0
  52. aethergraph/core/runtime/run_registration.py +33 -0
  53. aethergraph/core/runtime/runtime_env.py +157 -0
  54. aethergraph/core/runtime/runtime_registry.py +32 -0
  55. aethergraph/core/runtime/runtime_services.py +224 -0
  56. aethergraph/core/runtime/wakeup_watcher.py +40 -0
  57. aethergraph/core/tools/__init__.py +10 -0
  58. aethergraph/core/tools/builtins/channel_tools.py +194 -0
  59. aethergraph/core/tools/builtins/toolset.py +134 -0
  60. aethergraph/core/tools/toolkit.py +510 -0
  61. aethergraph/core/tools/waitable.py +109 -0
  62. aethergraph/plugins/channel/__init__.py +0 -0
  63. aethergraph/plugins/channel/adapters/__init__.py +0 -0
  64. aethergraph/plugins/channel/adapters/console.py +106 -0
  65. aethergraph/plugins/channel/adapters/file.py +102 -0
  66. aethergraph/plugins/channel/adapters/slack.py +285 -0
  67. aethergraph/plugins/channel/adapters/telegram.py +302 -0
  68. aethergraph/plugins/channel/adapters/webhook.py +104 -0
  69. aethergraph/plugins/channel/adapters/webui.py +134 -0
  70. aethergraph/plugins/channel/routes/__init__.py +0 -0
  71. aethergraph/plugins/channel/routes/console_routes.py +86 -0
  72. aethergraph/plugins/channel/routes/slack_routes.py +49 -0
  73. aethergraph/plugins/channel/routes/telegram_routes.py +26 -0
  74. aethergraph/plugins/channel/routes/webui_routes.py +136 -0
  75. aethergraph/plugins/channel/utils/__init__.py +0 -0
  76. aethergraph/plugins/channel/utils/slack_utils.py +278 -0
  77. aethergraph/plugins/channel/utils/telegram_utils.py +324 -0
  78. aethergraph/plugins/channel/websockets/slack_ws.py +68 -0
  79. aethergraph/plugins/channel/websockets/telegram_polling.py +151 -0
  80. aethergraph/plugins/mcp/fs_server.py +128 -0
  81. aethergraph/plugins/mcp/http_server.py +101 -0
  82. aethergraph/plugins/mcp/ws_server.py +180 -0
  83. aethergraph/plugins/net/http.py +10 -0
  84. aethergraph/plugins/utils/data_io.py +359 -0
  85. aethergraph/runner/__init__.py +5 -0
  86. aethergraph/runtime/__init__.py +62 -0
  87. aethergraph/server/__init__.py +3 -0
  88. aethergraph/server/app_factory.py +84 -0
  89. aethergraph/server/start.py +122 -0
  90. aethergraph/services/__init__.py +10 -0
  91. aethergraph/services/artifacts/facade.py +284 -0
  92. aethergraph/services/artifacts/factory.py +35 -0
  93. aethergraph/services/artifacts/fs_store.py +656 -0
  94. aethergraph/services/artifacts/jsonl_index.py +123 -0
  95. aethergraph/services/artifacts/paths.py +23 -0
  96. aethergraph/services/artifacts/sqlite_index.py +209 -0
  97. aethergraph/services/artifacts/utils.py +124 -0
  98. aethergraph/services/auth/dev.py +16 -0
  99. aethergraph/services/channel/channel_bus.py +293 -0
  100. aethergraph/services/channel/factory.py +44 -0
  101. aethergraph/services/channel/session.py +511 -0
  102. aethergraph/services/channel/wait_helpers.py +57 -0
  103. aethergraph/services/clock/clock.py +9 -0
  104. aethergraph/services/container/default_container.py +320 -0
  105. aethergraph/services/continuations/continuation.py +56 -0
  106. aethergraph/services/continuations/factory.py +34 -0
  107. aethergraph/services/continuations/stores/fs_store.py +264 -0
  108. aethergraph/services/continuations/stores/inmem_store.py +95 -0
  109. aethergraph/services/eventbus/inmem.py +21 -0
  110. aethergraph/services/features/static.py +10 -0
  111. aethergraph/services/kv/ephemeral.py +90 -0
  112. aethergraph/services/kv/factory.py +27 -0
  113. aethergraph/services/kv/layered.py +41 -0
  114. aethergraph/services/kv/sqlite_kv.py +128 -0
  115. aethergraph/services/llm/factory.py +157 -0
  116. aethergraph/services/llm/generic_client.py +542 -0
  117. aethergraph/services/llm/providers.py +3 -0
  118. aethergraph/services/llm/service.py +105 -0
  119. aethergraph/services/logger/base.py +36 -0
  120. aethergraph/services/logger/compat.py +50 -0
  121. aethergraph/services/logger/formatters.py +106 -0
  122. aethergraph/services/logger/std.py +203 -0
  123. aethergraph/services/mcp/helpers.py +23 -0
  124. aethergraph/services/mcp/http_client.py +70 -0
  125. aethergraph/services/mcp/mcp_tools.py +21 -0
  126. aethergraph/services/mcp/registry.py +14 -0
  127. aethergraph/services/mcp/service.py +100 -0
  128. aethergraph/services/mcp/stdio_client.py +70 -0
  129. aethergraph/services/mcp/ws_client.py +115 -0
  130. aethergraph/services/memory/bound.py +106 -0
  131. aethergraph/services/memory/distillers/episode.py +116 -0
  132. aethergraph/services/memory/distillers/rolling.py +74 -0
  133. aethergraph/services/memory/facade.py +633 -0
  134. aethergraph/services/memory/factory.py +78 -0
  135. aethergraph/services/memory/hotlog_kv.py +27 -0
  136. aethergraph/services/memory/indices.py +74 -0
  137. aethergraph/services/memory/io_helpers.py +72 -0
  138. aethergraph/services/memory/persist_fs.py +40 -0
  139. aethergraph/services/memory/resolver.py +152 -0
  140. aethergraph/services/metering/noop.py +4 -0
  141. aethergraph/services/prompts/file_store.py +41 -0
  142. aethergraph/services/rag/chunker.py +29 -0
  143. aethergraph/services/rag/facade.py +593 -0
  144. aethergraph/services/rag/index/base.py +27 -0
  145. aethergraph/services/rag/index/faiss_index.py +121 -0
  146. aethergraph/services/rag/index/sqlite_index.py +134 -0
  147. aethergraph/services/rag/index_factory.py +52 -0
  148. aethergraph/services/rag/parsers/md.py +7 -0
  149. aethergraph/services/rag/parsers/pdf.py +14 -0
  150. aethergraph/services/rag/parsers/txt.py +7 -0
  151. aethergraph/services/rag/utils/hybrid.py +39 -0
  152. aethergraph/services/rag/utils/make_fs_key.py +62 -0
  153. aethergraph/services/redactor/simple.py +16 -0
  154. aethergraph/services/registry/key_parsing.py +44 -0
  155. aethergraph/services/registry/registry_key.py +19 -0
  156. aethergraph/services/registry/unified_registry.py +185 -0
  157. aethergraph/services/resume/multi_scheduler_resume_bus.py +65 -0
  158. aethergraph/services/resume/router.py +73 -0
  159. aethergraph/services/schedulers/registry.py +41 -0
  160. aethergraph/services/secrets/base.py +7 -0
  161. aethergraph/services/secrets/env.py +8 -0
  162. aethergraph/services/state_stores/externalize.py +135 -0
  163. aethergraph/services/state_stores/graph_observer.py +131 -0
  164. aethergraph/services/state_stores/json_store.py +67 -0
  165. aethergraph/services/state_stores/resume_policy.py +119 -0
  166. aethergraph/services/state_stores/serialize.py +249 -0
  167. aethergraph/services/state_stores/utils.py +91 -0
  168. aethergraph/services/state_stores/validate.py +78 -0
  169. aethergraph/services/tracing/noop.py +18 -0
  170. aethergraph/services/waits/wait_registry.py +91 -0
  171. aethergraph/services/wakeup/memory_queue.py +57 -0
  172. aethergraph/services/wakeup/scanner_producer.py +56 -0
  173. aethergraph/services/wakeup/worker.py +31 -0
  174. aethergraph/tools/__init__.py +25 -0
  175. aethergraph/utils/optdeps.py +8 -0
  176. aethergraph-0.1.0a1.dist-info/METADATA +410 -0
  177. aethergraph-0.1.0a1.dist-info/RECORD +182 -0
  178. aethergraph-0.1.0a1.dist-info/WHEEL +5 -0
  179. aethergraph-0.1.0a1.dist-info/entry_points.txt +2 -0
  180. aethergraph-0.1.0a1.dist-info/licenses/LICENSE +176 -0
  181. aethergraph-0.1.0a1.dist-info/licenses/NOTICE +31 -0
  182. aethergraph-0.1.0a1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,101 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ import urllib.parse
6
+
7
+ from fastapi import FastAPI, HTTPException, Request
8
+ import httpx
9
+ from pydantic import BaseModel
10
+ import uvicorn
11
+
12
+ # TODO: move it to tests/examples later
13
+ DEMO_HTTP_TOKEN = os.getenv("DEMO_HTTP_TOKEN")
14
+
15
+ app = FastAPI()
16
+
17
+ TOOLS = [
18
+ {
19
+ "name": "search",
20
+ "description": "Search Wikipedia and return top hits.",
21
+ "input_schema": {
22
+ "type": "object",
23
+ "properties": {"q": {"type": "string"}, "k": {"type": "integer"}},
24
+ "required": ["q"],
25
+ },
26
+ }
27
+ ]
28
+
29
+
30
+ class RPCReq(BaseModel):
31
+ jsonrpc: str
32
+ id: int | str | None = None
33
+ method: str
34
+ params: dict | None = None
35
+
36
+
37
+ def ok(i, result):
38
+ return {"jsonrpc": "2.0", "id": i, "result": result}
39
+
40
+
41
+ def err(i, msg, code=-32000, data=None):
42
+ e = {"jsonrpc": "2.0", "id": i, "error": {"code": code, "message": msg}}
43
+ if data is not None:
44
+ e["error"]["data"] = data
45
+ return e
46
+
47
+
48
+ async def do_search(q: str, k: int = 5):
49
+ url = "https://en.wikipedia.org/w/api.php"
50
+ params = {
51
+ "action": "query",
52
+ "list": "search",
53
+ "format": "json",
54
+ "srsearch": q,
55
+ "srlimit": max(1, min(int(k or 5), 10)),
56
+ }
57
+ async with httpx.AsyncClient(timeout=10) as c:
58
+ r = await c.get(url, params=params)
59
+ r.raise_for_status()
60
+ data = r.json()
61
+ hits = []
62
+ for it in data.get("query", {}).get("search") or []:
63
+ title = it.get("title", "")
64
+ page = "https://en.wikipedia.org/wiki/" + urllib.parse.quote(title.replace(" ", "_"))
65
+ hits.append({"title": title, "url": page, "snippet": it.get("snippet", "")})
66
+ return {"hits": hits}
67
+
68
+
69
+ @app.post("/rpc")
70
+ async def rpc(req: RPCReq, request: Request):
71
+ if DEMO_HTTP_TOKEN:
72
+ auth = request.headers.get("authorization", "")
73
+ if auth != f"Bearer {DEMO_HTTP_TOKEN}":
74
+ raise HTTPException(status_code=401, detail="Unauthorized")
75
+ else:
76
+ logger = logging.getLogger("aethergraph.plugins.mcp.http_server")
77
+ logger.warning(
78
+ "No auth token DEMO_HTTP_TOKEN set, skipping auth check. Set up DEMO_HTTP_TOKEN in env for test."
79
+ )
80
+ try:
81
+ p = req.params or {}
82
+ if req.method == "tools/list":
83
+ return ok(req.id, TOOLS)
84
+ if req.method == "tools/call":
85
+ name = (p.get("name") or "").strip()
86
+ args = p.get("arguments") or {}
87
+ if name in ("search", "query"):
88
+ res = await do_search(args.get("q", ""), int(args.get("k", 5)))
89
+ return ok(req.id, res)
90
+ return err(req.id, f"Unknown tool: {name}")
91
+ if req.method == "resources/list":
92
+ return ok(req.id, [])
93
+ if req.method == "resources/read":
94
+ return ok(req.id, {"uri": p.get("uri"), "data": None})
95
+ return err(req.id, f"Unknown method: {req.method}")
96
+ except Exception as e:
97
+ return err(req.id, str(e))
98
+
99
+
100
+ if __name__ == "__main__":
101
+ uvicorn.run(app, host="127.0.0.1", port=8769)
@@ -0,0 +1,180 @@
1
+ # ws_mcp_server.py (robust for websockets v15, with optional token auth)
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import json
6
+ import logging
7
+ import os
8
+ import urllib.parse
9
+
10
+ import httpx
11
+ from websockets import exceptions as ws_exceptions, serve
12
+ from websockets.http import Headers
13
+
14
+ # -------- Config --------
15
+ DEMO_WS_TOKEN = os.getenv("DEMO_WS_TOKEN", "").strip()
16
+ REQUIRE_HEADER_BEARER = True # require Authorization header when token set
17
+ ALLOW_FIRST_MESSAGE_AUTH = True # also allow in-band JSON-RPC auth frame
18
+
19
+ TOOLS = [
20
+ {
21
+ "name": "search",
22
+ "description": "Search Wikipedia and return top hits.",
23
+ "input_schema": {
24
+ "type": "object",
25
+ "properties": {"q": {"type": "string"}, "k": {"type": "integer"}},
26
+ "required": ["q"],
27
+ },
28
+ }
29
+ ]
30
+
31
+
32
+ def ok(i, result):
33
+ return {"jsonrpc": "2.0", "id": i, "result": result}
34
+
35
+
36
+ def err(i, msg, code=-32000, data=None):
37
+ e = {"jsonrpc": "2.0", "id": i, "error": {"code": code, "message": msg}}
38
+ if data is not None:
39
+ e["error"]["data"] = data
40
+ return e
41
+
42
+
43
+ async def do_search(q: str, k: int = 5):
44
+ params = {
45
+ "action": "query",
46
+ "list": "search",
47
+ "format": "json",
48
+ "srsearch": q,
49
+ "srlimit": max(1, min(int(k or 5), 10)),
50
+ }
51
+ url = "https://en.wikipedia.org/w/api.php?" + urllib.parse.urlencode(params)
52
+ async with httpx.AsyncClient(timeout=15.0) as client:
53
+ r = await client.get(url)
54
+ r.raise_for_status()
55
+ data = r.json()
56
+ hits = []
57
+ for item in data.get("query", {}).get("search") or []:
58
+ title = item.get("title", "")
59
+ page = "https://en.wikipedia.org/wiki/" + urllib.parse.quote(title.replace(" ", "_"))
60
+ hits.append({"title": title, "url": page, "snippet": item.get("snippet", "")})
61
+ return {"hits": hits}
62
+
63
+
64
+ # ---------- Handshake-time token check (recommended) ----------
65
+ async def process_request(path: str, request_headers: Headers):
66
+ """If DEMO_WS_TOKEN is set, enforce Authorization: Bearer <token> at handshake."""
67
+ if not DEMO_WS_TOKEN or not REQUIRE_HEADER_BEARER:
68
+ return # accept; continue with handshake
69
+
70
+ auth = request_headers.get("Authorization", "")
71
+ if auth == f"Bearer {DEMO_WS_TOKEN}":
72
+ return # ok
73
+
74
+ # Reject handshake with 401
75
+ body = b"Unauthorized"
76
+ headers = [
77
+ ("Content-Type", "text/plain; charset=utf-8"),
78
+ ("Content-Length", str(len(body))),
79
+ ("WWW-Authenticate", 'Bearer realm="mcp-ws", error="invalid_token"'),
80
+ ]
81
+ return (401, headers, body)
82
+
83
+
84
+ # ---------- Handler ----------
85
+ async def handle(ws):
86
+ # Optional: in-band first-message auth if header was not used
87
+ if DEMO_WS_TOKEN and ALLOW_FIRST_MESSAGE_AUTH and (not REQUIRE_HEADER_BEARER):
88
+ try:
89
+ first_raw = await asyncio.wait_for(ws.recv(), timeout=5.0)
90
+ first = json.loads(first_raw)
91
+ if first.get("method") != "auth/bearer":
92
+ await ws.send(
93
+ json.dumps(err(first.get("id"), "Unauthorized: expected auth/bearer"))
94
+ )
95
+ await ws.close()
96
+ return
97
+ tok = (first.get("params") or {}).get("token", "")
98
+ if tok != DEMO_WS_TOKEN:
99
+ await ws.send(json.dumps(err(first.get("id"), "Unauthorized: bad token")))
100
+ await ws.close()
101
+ return
102
+ # auth ok; optionally reply success
103
+ await ws.send(json.dumps(ok(first.get("id"), {"ok": True})))
104
+ except Exception:
105
+ # couldn't read/parse first frame or wrong shape
106
+ try:
107
+ await ws.send(json.dumps(err(None, "Unauthorized")))
108
+ finally:
109
+ await ws.close()
110
+ return
111
+
112
+ try:
113
+ async for raw in ws:
114
+ try:
115
+ req = json.loads(raw)
116
+ mid = req.get("id")
117
+ method = req.get("method")
118
+ params = req.get("params") or {}
119
+
120
+ if method == "tools/list":
121
+ await ws.send(json.dumps(ok(mid, TOOLS)))
122
+ continue
123
+
124
+ if method == "tools/call":
125
+ name = (params.get("name") or "").strip()
126
+ args = params.get("arguments") or {}
127
+ if name in ("search", "query"):
128
+ res = await do_search(args.get("q", ""), int(args.get("k", 5)))
129
+ await ws.send(json.dumps(ok(mid, res)))
130
+ continue
131
+ await ws.send(json.dumps(err(mid, f"Unknown tool: {name}")))
132
+ continue
133
+
134
+ if method == "resources/list":
135
+ await ws.send(json.dumps(ok(mid, [])))
136
+ continue
137
+
138
+ if method == "resources/read":
139
+ await ws.send(json.dumps(ok(mid, {"uri": params.get("uri"), "data": None})))
140
+ continue
141
+
142
+ await ws.send(json.dumps(err(mid, f"Unknown method: {method}")))
143
+ except Exception as e:
144
+ # Return JSON-RPC error but keep the session alive
145
+ try:
146
+ rid = req.get("id") if isinstance(req, dict) else None
147
+ except Exception:
148
+ rid = None
149
+ await ws.send(json.dumps(err(rid, str(e))))
150
+ except (ws_exceptions.ConnectionClosedOK, ws_exceptions.ConnectionClosedError):
151
+ return
152
+
153
+
154
+ async def main(host="0.0.0.0", port=8765):
155
+ # If REQUIRE header-based auth and DISABLE in-band auth:
156
+ # set REQUIRE_HEADER_BEARER=True and ALLOW_FIRST_MESSAGE_AUTH=False
157
+ async with serve(
158
+ handle,
159
+ host,
160
+ port,
161
+ ping_interval=20,
162
+ ping_timeout=10,
163
+ close_timeout=2,
164
+ max_queue=32,
165
+ process_request=process_request, # <— handshake auth hook
166
+ ):
167
+ logger = logging.getLogger("aethergraph.plugins.mcp.ws_server")
168
+ logger.info(f"MCP WS server listening on ws://{host}:{port}")
169
+ if DEMO_WS_TOKEN:
170
+ mode = []
171
+ if REQUIRE_HEADER_BEARER:
172
+ mode.append("header")
173
+ if ALLOW_FIRST_MESSAGE_AUTH and not REQUIRE_HEADER_BEARER:
174
+ mode.append("first-message")
175
+ logger.info(f"Auth enabled: token set; modes: {', '.join(mode) or 'none'}")
176
+ await asyncio.Future()
177
+
178
+
179
+ if __name__ == "__main__":
180
+ asyncio.run(main())
@@ -0,0 +1,10 @@
1
+ # aethergraph/net/http.py
2
+ from contextlib import asynccontextmanager
3
+
4
+ import httpx
5
+
6
+
7
+ @asynccontextmanager
8
+ async def get_async_client(timeout_s: float = 10.0, headers: dict | None = None):
9
+ async with httpx.AsyncClient(timeout=timeout_s, headers=headers) as client:
10
+ yield client
@@ -0,0 +1,359 @@
1
+ # aethergraph/v2/utils/data_io.py
2
+ from __future__ import annotations
3
+
4
+ import csv
5
+ import hashlib
6
+ import io
7
+ import os
8
+ from typing import Any
9
+
10
+ # Optional deps
11
+ try:
12
+ _HAS_PANDAS = True
13
+ except Exception:
14
+ _HAS_PANDAS = False
15
+
16
+ try:
17
+ _HAS_PIL = True
18
+ except Exception:
19
+ _HAS_PIL = False
20
+
21
+ try:
22
+ import numpy as np
23
+
24
+ _HAS_NUMPY = True
25
+ except Exception:
26
+ _HAS_NUMPY = False
27
+
28
+ try:
29
+ from pypdf import PdfReader # lightweight text extractor
30
+
31
+ _HAS_PYPDF = True
32
+ except Exception:
33
+ _HAS_PYPDF = False
34
+
35
+
36
+ # ---------- URI helpers ----------
37
+
38
+
39
+ def _resolve_local_path(uri: str) -> str | None:
40
+ if uri.startswith("file://"):
41
+ return uri[len("file://") :]
42
+ return None
43
+
44
+
45
+ def load_bytes(artifact_store, uri: str) -> bytes:
46
+ """Load raw bytes from artifact_store via URI (prefer local file path).
47
+ TODO: deprecate this function as artifact_store.get_bytes(uri) is preferred.
48
+ """
49
+ p = _resolve_local_path(uri)
50
+ if p and os.path.exists(p):
51
+ with open(p, "rb") as f:
52
+ return f.read()
53
+ # Optional: if later add artifact_store.get_bytes(uri), handle here.
54
+ raise FileNotFoundError(f"Cannot resolve bytes for URI: {uri}")
55
+
56
+
57
+ # ---------- MIME normalization & classification ----------
58
+
59
+ _EXTENSION_TO_MIME = {
60
+ # images
61
+ "png": "image/png",
62
+ "jpg": "image/jpeg",
63
+ "jpeg": "image/jpeg",
64
+ "gif": "image/gif",
65
+ "webp": "image/webp",
66
+ "tif": "image/tiff",
67
+ "tiff": "image/tiff",
68
+ "bmp": "image/bmp",
69
+ "svg": "image/svg+xml",
70
+ "heic": "image/heic",
71
+ "heif": "image/heif",
72
+ # text/docs
73
+ "txt": "text/plain",
74
+ "log": "text/plain",
75
+ "md": "text/markdown",
76
+ "csv": "text/csv",
77
+ "tsv": "text/tab-separated-values",
78
+ "json": "application/json",
79
+ "yaml": "text/yaml",
80
+ "yml": "text/yaml",
81
+ "xml": "application/xml",
82
+ "pdf": "application/pdf",
83
+ # archives
84
+ "zip": "application/zip",
85
+ "gz": "application/gzip",
86
+ "tar": "application/x-tar",
87
+ "7z": "application/x-7z-compressed",
88
+ # office
89
+ "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
90
+ "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
91
+ "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
92
+ # proprietary examples
93
+ "mph": "application/octet-stream", # COMSOL
94
+ }
95
+
96
+
97
+ def normalize_mime(name: str | None, mimetype_hint: str | None) -> str:
98
+ mt = (mimetype_hint or "").lower().strip()
99
+ if mt:
100
+ return mt
101
+ if name:
102
+ n = name.lower()
103
+ if "." in n:
104
+ ext = n.rsplit(".", 1)[-1]
105
+ return _EXTENSION_TO_MIME.get(ext, "application/octet-stream")
106
+ return "application/octet-stream"
107
+
108
+
109
+ def classify_for_processing(mime: str) -> str:
110
+ m = mime.lower()
111
+ if m.startswith("image/") and m != "image/svg+xml":
112
+ return "image"
113
+ if m in ("image/svg+xml", "application/xml", "text/xml"):
114
+ return "xml"
115
+ if m.startswith("text/") or m in ("application/json",):
116
+ return "text"
117
+ if m == "application/pdf":
118
+ return "pdf"
119
+ if m in (
120
+ "application/zip",
121
+ "application/gzip",
122
+ "application/x-tar",
123
+ "application/x-7z-compressed",
124
+ ):
125
+ return "archive"
126
+ if m in ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",):
127
+ return "xlsx"
128
+ return "binary" # unknown/proprietary (e.g., COMSOL .mph)
129
+
130
+
131
+ # ---------- Text helpers ----------
132
+
133
+
134
+ def try_decode_text(b: bytes) -> str:
135
+ for enc in ("utf-8", "utf-8-sig", "utf-16", "utf-16le", "utf-16be"):
136
+ try:
137
+ return b.decode(enc)
138
+ except UnicodeDecodeError:
139
+ pass
140
+ return b.decode("latin-1", errors="replace")
141
+
142
+
143
+ # ---------- CSV helpers ----------
144
+
145
+
146
+ def read_csv_any(b: bytes) -> dict[str, Any]:
147
+ """
148
+ Returns a lightweight preview for CSV/TSV; if pandas is available, also return a DataFrame preview.
149
+ """
150
+ txt = try_decode_text(b)
151
+ # Dialect sniff
152
+ sniffer = csv.Sniffer()
153
+ try:
154
+ dialect = sniffer.sniff(txt.splitlines()[0] if txt else "")
155
+ except Exception:
156
+ dialect = csv.excel
157
+ rows = list(csv.reader(io.StringIO(txt), dialect=dialect))
158
+ preview_rows = rows[:10]
159
+ out: dict[str, Any] = {"rows_preview": preview_rows, "num_rows_previewed": len(preview_rows)}
160
+ if _HAS_PANDAS:
161
+ try:
162
+ import pandas as pd # type: ignore
163
+
164
+ # Let pandas infer separator; fallback to comma
165
+ df = pd.read_csv(io.StringIO(txt))
166
+ out["pandas_head"] = df.head(10) # DataFrame (caller may display)
167
+ out["columns"] = list(df.columns)
168
+ out["shape"] = tuple(df.shape)
169
+ except Exception:
170
+ pass
171
+ return out
172
+
173
+
174
+ # ---------- Image helpers ----------
175
+
176
+
177
+ def decode_image_pil(b: bytes, *, fix_orientation: bool = True, to_rgb: bool = True):
178
+ if not _HAS_PIL:
179
+ raise RuntimeError("Pillow not installed; cannot decode image.")
180
+ from PIL import Image, ImageCms, ImageOps # local import for safety
181
+
182
+ im = Image.open(io.BytesIO(b))
183
+ if fix_orientation:
184
+ im = ImageOps.exif_transpose(im)
185
+ try:
186
+ if "icc_profile" in im.info:
187
+ src = ImageCms.ImageCmsProfile(io.BytesIO(im.info.get("icc_profile")))
188
+ dst = ImageCms.createProfile("sRGB")
189
+ im = ImageCms.profileToProfile(im, src, dst, outputMode=im.mode)
190
+ except Exception:
191
+ pass
192
+ if to_rgb and im.mode not in ("RGB", "RGBA"):
193
+ im = im.convert("RGB")
194
+ return im
195
+
196
+
197
+ def pil_to_numpy(im) -> np.ndarray:
198
+ if not _HAS_NUMPY:
199
+ raise RuntimeError("NumPy not installed; cannot convert image to array.")
200
+ import numpy as np # type: ignore
201
+
202
+ arr = np.asarray(im)
203
+ if arr.ndim == 2:
204
+ arr = arr[:, :, None]
205
+ return arr
206
+
207
+
208
+ # ---------- PDF helpers ----------
209
+
210
+
211
+ def extract_pdf_text(b: bytes, max_pages: int = 5) -> dict[str, Any]:
212
+ if not _HAS_PYPDF:
213
+ raise RuntimeError("pypdf not installed; cannot extract PDF text.")
214
+ reader = PdfReader(io.BytesIO(b))
215
+ pages = min(len(reader.pages), max_pages)
216
+ texts = []
217
+ for i in range(pages):
218
+ try:
219
+ texts.append(reader.pages[i].extract_text() or "")
220
+ except Exception:
221
+ texts.append("")
222
+ return {"num_pages": len(reader.pages), "preview_pages": pages, "text_preview": texts}
223
+
224
+
225
+ # ---------- XLSX helpers (cheap preview without heavy engines) ----------
226
+
227
+
228
+ def preview_xlsx(b: bytes) -> dict[str, Any]:
229
+ if not _HAS_PANDAS:
230
+ return {"note": "pandas not installed; cannot preview xlsx"}
231
+ try:
232
+ import pandas as pd # type: ignore
233
+
234
+ with io.BytesIO(b) as bio:
235
+ xl = pd.ExcelFile(bio)
236
+ sheets = xl.sheet_names
237
+ out: dict[str, Any] = {"sheets": sheets, "previews": {}}
238
+ for s in sheets[:3]:
239
+ try:
240
+ df = xl.parse(s, nrows=10)
241
+ out["previews"][s] = df
242
+ except Exception:
243
+ out["previews"][s] = "unreadable"
244
+ return out
245
+ except Exception as e:
246
+ return {"error": f"xlsx preview failed: {e}"}
247
+
248
+
249
+ # ---------- Dispatcher ----------
250
+
251
+
252
+ def quick_decode(
253
+ artifact_store, name: str | None, mimetype_hint: str | None, uri: str
254
+ ) -> dict[str, Any]:
255
+ """
256
+ Returns: {
257
+ 'uri': str, 'mimetype': str, 'kind': 'image'|'text'|'pdf'|'archive'|'xlsx'|'binary',
258
+ 'meta': {...}, # shape/columns/pages etc
259
+ 'preview': ... # small human-friendly preview (safe to log/send)
260
+ }
261
+ """
262
+ b = load_bytes(artifact_store, uri)
263
+ mime = normalize_mime(name, mimetype_hint)
264
+ kind = classify_for_processing(mime)
265
+ sha = hashlib.sha256(b).hexdigest()
266
+
267
+ if kind == "image":
268
+ if not _HAS_PIL:
269
+ return {
270
+ "uri": uri,
271
+ "mimetype": mime,
272
+ "kind": kind,
273
+ "meta": {"sha256": sha},
274
+ "preview": "Pillow missing",
275
+ }
276
+ im = decode_image_pil(b)
277
+ w, h = im.size
278
+ meta = {"width": w, "height": h, "mode": im.mode, "sha256": sha}
279
+ if _HAS_NUMPY:
280
+ arr = pil_to_numpy(im)
281
+ meta["array_shape"] = tuple(arr.shape)
282
+ return {
283
+ "uri": uri,
284
+ "mimetype": mime,
285
+ "kind": kind,
286
+ "meta": meta,
287
+ "preview": f"{w}x{h} {im.mode}",
288
+ }
289
+
290
+ if kind == "text":
291
+ txt = try_decode_text(b)
292
+ head = "\n".join(txt.splitlines()[:20])
293
+ return {
294
+ "uri": uri,
295
+ "mimetype": mime,
296
+ "kind": kind,
297
+ "meta": {"bytes": len(b), "sha256": sha},
298
+ "preview": head,
299
+ }
300
+
301
+ if kind == "pdf":
302
+ if not _HAS_PYPDF:
303
+ return {
304
+ "uri": uri,
305
+ "mimetype": mime,
306
+ "kind": kind,
307
+ "meta": {"bytes": len(b), "sha256": sha},
308
+ "preview": "pypdf missing",
309
+ }
310
+ meta = extract_pdf_text(b)
311
+ return {
312
+ "uri": uri,
313
+ "mimetype": mime,
314
+ "kind": kind,
315
+ "meta": {"bytes": len(b), **meta, "sha256": sha},
316
+ "preview": "\n---\n".join(meta["text_preview"]),
317
+ }
318
+
319
+ if kind == "archive":
320
+ # We don't auto-unpack; just list ZIP members if it's zip
321
+ import zipfile
322
+
323
+ bio = io.BytesIO(b)
324
+ if zipfile.is_zipfile(bio):
325
+ with zipfile.ZipFile(bio) as z:
326
+ names = z.namelist()[:20]
327
+ return {
328
+ "uri": uri,
329
+ "mimetype": mime,
330
+ "kind": kind,
331
+ "meta": {"bytes": len(b), "sha256": sha},
332
+ "preview": "\n".join(names),
333
+ }
334
+ return {
335
+ "uri": uri,
336
+ "mimetype": mime,
337
+ "kind": kind,
338
+ "meta": {"bytes": len(b), "sha256": sha},
339
+ "preview": "archive (non-zip)",
340
+ }
341
+
342
+ if kind == "xlsx":
343
+ meta = preview_xlsx(b)
344
+ return {
345
+ "uri": uri,
346
+ "mimetype": mime,
347
+ "kind": kind,
348
+ "meta": {"bytes": len(b), **meta, "sha256": sha},
349
+ "preview": f"sheets: {', '.join(meta.get('sheets', [])) if isinstance(meta, dict) else meta}",
350
+ }
351
+
352
+ # binary / unknown (e.g., COMSOL .mph)
353
+ return {
354
+ "uri": uri,
355
+ "mimetype": mime,
356
+ "kind": "binary",
357
+ "meta": {"bytes": len(b), "sha256": sha},
358
+ "preview": "opaque binary",
359
+ }
@@ -0,0 +1,5 @@
1
+ # Aethergraph Runner -- direct aethergraph.core.runtime.graph_runner for clean imports
2
+
3
+ from aethergraph.core.runtime.graph_runner import run, run_async
4
+
5
+ __all__ = ["run", "run_async"]