aethergraph 0.1.0a1__py3-none-any.whl → 0.1.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. aethergraph/__init__.py +4 -10
  2. aethergraph/__main__.py +293 -0
  3. aethergraph/api/v1/__init__.py +0 -0
  4. aethergraph/api/v1/agents.py +46 -0
  5. aethergraph/api/v1/apps.py +70 -0
  6. aethergraph/api/v1/artifacts.py +415 -0
  7. aethergraph/api/v1/channels.py +89 -0
  8. aethergraph/api/v1/deps.py +168 -0
  9. aethergraph/api/v1/graphs.py +259 -0
  10. aethergraph/api/v1/identity.py +25 -0
  11. aethergraph/api/v1/memory.py +353 -0
  12. aethergraph/api/v1/misc.py +47 -0
  13. aethergraph/api/v1/pagination.py +29 -0
  14. aethergraph/api/v1/runs.py +568 -0
  15. aethergraph/api/v1/schemas.py +535 -0
  16. aethergraph/api/v1/session.py +323 -0
  17. aethergraph/api/v1/stats.py +201 -0
  18. aethergraph/api/v1/viz.py +152 -0
  19. aethergraph/config/config.py +22 -0
  20. aethergraph/config/loader.py +3 -2
  21. aethergraph/config/storage.py +209 -0
  22. aethergraph/contracts/__init__.py +0 -0
  23. aethergraph/contracts/services/__init__.py +0 -0
  24. aethergraph/contracts/services/artifacts.py +27 -14
  25. aethergraph/contracts/services/memory.py +45 -17
  26. aethergraph/contracts/services/metering.py +129 -0
  27. aethergraph/contracts/services/runs.py +50 -0
  28. aethergraph/contracts/services/sessions.py +87 -0
  29. aethergraph/contracts/services/state_stores.py +3 -0
  30. aethergraph/contracts/services/viz.py +44 -0
  31. aethergraph/contracts/storage/artifact_index.py +88 -0
  32. aethergraph/contracts/storage/artifact_store.py +99 -0
  33. aethergraph/contracts/storage/async_kv.py +34 -0
  34. aethergraph/contracts/storage/blob_store.py +50 -0
  35. aethergraph/contracts/storage/doc_store.py +35 -0
  36. aethergraph/contracts/storage/event_log.py +31 -0
  37. aethergraph/contracts/storage/vector_index.py +48 -0
  38. aethergraph/core/__init__.py +0 -0
  39. aethergraph/core/execution/forward_scheduler.py +13 -2
  40. aethergraph/core/execution/global_scheduler.py +21 -15
  41. aethergraph/core/execution/step_forward.py +10 -1
  42. aethergraph/core/graph/__init__.py +0 -0
  43. aethergraph/core/graph/graph_builder.py +8 -4
  44. aethergraph/core/graph/graph_fn.py +156 -15
  45. aethergraph/core/graph/graph_spec.py +8 -0
  46. aethergraph/core/graph/graphify.py +146 -27
  47. aethergraph/core/graph/node_spec.py +0 -2
  48. aethergraph/core/graph/node_state.py +3 -0
  49. aethergraph/core/graph/task_graph.py +39 -1
  50. aethergraph/core/runtime/__init__.py +0 -0
  51. aethergraph/core/runtime/ad_hoc_context.py +64 -4
  52. aethergraph/core/runtime/base_service.py +28 -4
  53. aethergraph/core/runtime/execution_context.py +13 -15
  54. aethergraph/core/runtime/graph_runner.py +222 -37
  55. aethergraph/core/runtime/node_context.py +510 -6
  56. aethergraph/core/runtime/node_services.py +12 -5
  57. aethergraph/core/runtime/recovery.py +15 -1
  58. aethergraph/core/runtime/run_manager.py +783 -0
  59. aethergraph/core/runtime/run_manager_local.py +204 -0
  60. aethergraph/core/runtime/run_registration.py +2 -2
  61. aethergraph/core/runtime/run_types.py +89 -0
  62. aethergraph/core/runtime/runtime_env.py +136 -7
  63. aethergraph/core/runtime/runtime_metering.py +71 -0
  64. aethergraph/core/runtime/runtime_registry.py +36 -13
  65. aethergraph/core/runtime/runtime_services.py +194 -6
  66. aethergraph/core/tools/builtins/toolset.py +1 -1
  67. aethergraph/core/tools/toolkit.py +5 -0
  68. aethergraph/plugins/agents/default_chat_agent copy.py +90 -0
  69. aethergraph/plugins/agents/default_chat_agent.py +171 -0
  70. aethergraph/plugins/agents/shared.py +81 -0
  71. aethergraph/plugins/channel/adapters/webui.py +112 -112
  72. aethergraph/plugins/channel/routes/webui_routes.py +367 -102
  73. aethergraph/plugins/channel/utils/slack_utils.py +115 -59
  74. aethergraph/plugins/channel/utils/telegram_utils.py +88 -47
  75. aethergraph/plugins/channel/websockets/weibui_ws.py +172 -0
  76. aethergraph/runtime/__init__.py +15 -0
  77. aethergraph/server/app_factory.py +190 -34
  78. aethergraph/server/clients/channel_client.py +202 -0
  79. aethergraph/server/http/channel_http_routes.py +116 -0
  80. aethergraph/server/http/channel_ws_routers.py +45 -0
  81. aethergraph/server/loading.py +117 -0
  82. aethergraph/server/server.py +131 -0
  83. aethergraph/server/server_state.py +240 -0
  84. aethergraph/server/start.py +227 -66
  85. aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-BQhdFMY1.woff2 +0 -0
  86. aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DMm9YOAa.woff +0 -0
  87. aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DRggAlZN.ttf +0 -0
  88. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-ATXxdsX0.ttf +0 -0
  89. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-BEiXGLvX.woff +0 -0
  90. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-Dq_IR9rO.woff2 +0 -0
  91. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-CTRA-rTL.woff +0 -0
  92. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-Di6jR-x-.woff2 +0 -0
  93. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-wX97UBjC.ttf +0 -0
  94. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BdnERNNW.ttf +0 -0
  95. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BsDP51OF.woff +0 -0
  96. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-CL6g_b3V.woff2 +0 -0
  97. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CB_wures.ttf +0 -0
  98. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CTYiF6lA.woff2 +0 -0
  99. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-Dxdc4cR9.woff +0 -0
  100. aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Cx986IdX.woff2 +0 -0
  101. aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Jm3AIy58.woff +0 -0
  102. aethergraph/server/ui_static/assets/KaTeX_Main-Bold-waoOVXN0.ttf +0 -0
  103. aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DxDJ3AOS.woff2 +0 -0
  104. aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DzxPMmG6.ttf +0 -0
  105. aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-SpSLRI95.woff +0 -0
  106. aethergraph/server/ui_static/assets/KaTeX_Main-Italic-3WenGoN9.ttf +0 -0
  107. aethergraph/server/ui_static/assets/KaTeX_Main-Italic-BMLOBm91.woff +0 -0
  108. aethergraph/server/ui_static/assets/KaTeX_Main-Italic-NWA7e6Wa.woff2 +0 -0
  109. aethergraph/server/ui_static/assets/KaTeX_Main-Regular-B22Nviop.woff2 +0 -0
  110. aethergraph/server/ui_static/assets/KaTeX_Main-Regular-Dr94JaBh.woff +0 -0
  111. aethergraph/server/ui_static/assets/KaTeX_Main-Regular-ypZvNtVU.ttf +0 -0
  112. aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-B3XSjfu4.ttf +0 -0
  113. aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-CZnvNsCZ.woff2 +0 -0
  114. aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-iY-2wyZ7.woff +0 -0
  115. aethergraph/server/ui_static/assets/KaTeX_Math-Italic-DA0__PXp.woff +0 -0
  116. aethergraph/server/ui_static/assets/KaTeX_Math-Italic-flOr_0UB.ttf +0 -0
  117. aethergraph/server/ui_static/assets/KaTeX_Math-Italic-t53AETM-.woff2 +0 -0
  118. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-CFMepnvq.ttf +0 -0
  119. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-D1sUS0GD.woff2 +0 -0
  120. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-DbIhKOiC.woff +0 -0
  121. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-C3H0VqGB.woff2 +0 -0
  122. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-DN2j7dab.woff +0 -0
  123. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-YYjJ1zSn.ttf +0 -0
  124. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-BNo7hRIc.ttf +0 -0
  125. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-CS6fqUqJ.woff +0 -0
  126. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-DDBCnlJ7.woff2 +0 -0
  127. aethergraph/server/ui_static/assets/KaTeX_Script-Regular-C5JkGWo-.ttf +0 -0
  128. aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D3wIWfF6.woff2 +0 -0
  129. aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D5yQViql.woff +0 -0
  130. aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-C195tn64.woff +0 -0
  131. aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-Dbsnue_I.ttf +0 -0
  132. aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-mCD8mA8B.woff2 +0 -0
  133. aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-B7gKUWhC.ttf +0 -0
  134. aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-Dy4dx90m.woff2 +0 -0
  135. aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-oD1tc_U0.woff +0 -0
  136. aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-CTq5MqoE.woff +0 -0
  137. aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-DgpXs0kz.ttf +0 -0
  138. aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-BF-4gkZK.woff +0 -0
  139. aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-DWFBv043.ttf +0 -0
  140. aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-Dl5lxZxV.woff2 +0 -0
  141. aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-C0xS9mPB.woff +0 -0
  142. aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-CO6r4hn1.woff2 +0 -0
  143. aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-D3Ib7_Hf.ttf +0 -0
  144. aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +1 -0
  145. aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +400 -0
  146. aethergraph/server/ui_static/index.html +15 -0
  147. aethergraph/server/ui_static/logo.png +0 -0
  148. aethergraph/services/artifacts/__init__.py +0 -0
  149. aethergraph/services/artifacts/facade.py +1239 -132
  150. aethergraph/services/auth/{dev.py → authn.py} +0 -8
  151. aethergraph/services/auth/authz.py +100 -0
  152. aethergraph/services/channel/__init__.py +0 -0
  153. aethergraph/services/channel/channel_bus.py +19 -1
  154. aethergraph/services/channel/factory.py +13 -1
  155. aethergraph/services/channel/ingress.py +311 -0
  156. aethergraph/services/channel/queue_adapter.py +75 -0
  157. aethergraph/services/channel/session.py +502 -19
  158. aethergraph/services/container/default_container.py +122 -43
  159. aethergraph/services/continuations/continuation.py +6 -0
  160. aethergraph/services/continuations/stores/fs_store.py +19 -0
  161. aethergraph/services/eventhub/event_hub.py +76 -0
  162. aethergraph/services/kv/__init__.py +0 -0
  163. aethergraph/services/kv/ephemeral.py +244 -0
  164. aethergraph/services/llm/__init__.py +0 -0
  165. aethergraph/services/llm/generic_client copy.py +691 -0
  166. aethergraph/services/llm/generic_client.py +1288 -187
  167. aethergraph/services/llm/providers.py +3 -1
  168. aethergraph/services/llm/types.py +47 -0
  169. aethergraph/services/llm/utils.py +284 -0
  170. aethergraph/services/logger/std.py +3 -0
  171. aethergraph/services/mcp/__init__.py +9 -0
  172. aethergraph/services/mcp/http_client.py +38 -0
  173. aethergraph/services/mcp/service.py +225 -1
  174. aethergraph/services/mcp/stdio_client.py +41 -6
  175. aethergraph/services/mcp/ws_client.py +44 -2
  176. aethergraph/services/memory/__init__.py +0 -0
  177. aethergraph/services/memory/distillers/llm_long_term.py +234 -0
  178. aethergraph/services/memory/distillers/llm_meta_summary.py +398 -0
  179. aethergraph/services/memory/distillers/long_term.py +225 -0
  180. aethergraph/services/memory/facade/__init__.py +3 -0
  181. aethergraph/services/memory/facade/chat.py +440 -0
  182. aethergraph/services/memory/facade/core.py +447 -0
  183. aethergraph/services/memory/facade/distillation.py +424 -0
  184. aethergraph/services/memory/facade/rag.py +410 -0
  185. aethergraph/services/memory/facade/results.py +315 -0
  186. aethergraph/services/memory/facade/retrieval.py +139 -0
  187. aethergraph/services/memory/facade/types.py +77 -0
  188. aethergraph/services/memory/facade/utils.py +43 -0
  189. aethergraph/services/memory/facade_dep.py +1539 -0
  190. aethergraph/services/memory/factory.py +9 -3
  191. aethergraph/services/memory/utils.py +10 -0
  192. aethergraph/services/metering/eventlog_metering.py +470 -0
  193. aethergraph/services/metering/noop.py +25 -4
  194. aethergraph/services/rag/__init__.py +0 -0
  195. aethergraph/services/rag/facade.py +279 -23
  196. aethergraph/services/rag/index_factory.py +2 -2
  197. aethergraph/services/rag/node_rag.py +317 -0
  198. aethergraph/services/rate_limit/inmem_rate_limit.py +24 -0
  199. aethergraph/services/registry/__init__.py +0 -0
  200. aethergraph/services/registry/agent_app_meta.py +419 -0
  201. aethergraph/services/registry/registry_key.py +1 -1
  202. aethergraph/services/registry/unified_registry.py +74 -6
  203. aethergraph/services/scope/scope.py +159 -0
  204. aethergraph/services/scope/scope_factory.py +164 -0
  205. aethergraph/services/state_stores/serialize.py +5 -0
  206. aethergraph/services/state_stores/utils.py +2 -1
  207. aethergraph/services/viz/__init__.py +0 -0
  208. aethergraph/services/viz/facade.py +413 -0
  209. aethergraph/services/viz/viz_service.py +69 -0
  210. aethergraph/storage/artifacts/artifact_index_jsonl.py +180 -0
  211. aethergraph/storage/artifacts/artifact_index_sqlite.py +426 -0
  212. aethergraph/storage/artifacts/cas_store.py +422 -0
  213. aethergraph/storage/artifacts/fs_cas.py +18 -0
  214. aethergraph/storage/artifacts/s3_cas.py +14 -0
  215. aethergraph/storage/artifacts/utils.py +124 -0
  216. aethergraph/storage/blob/fs_blob.py +86 -0
  217. aethergraph/storage/blob/s3_blob.py +115 -0
  218. aethergraph/storage/continuation_store/fs_cont.py +283 -0
  219. aethergraph/storage/continuation_store/inmem_cont.py +146 -0
  220. aethergraph/storage/continuation_store/kvdoc_cont.py +261 -0
  221. aethergraph/storage/docstore/fs_doc.py +63 -0
  222. aethergraph/storage/docstore/sqlite_doc.py +31 -0
  223. aethergraph/storage/docstore/sqlite_doc_sync.py +90 -0
  224. aethergraph/storage/eventlog/fs_event.py +136 -0
  225. aethergraph/storage/eventlog/sqlite_event.py +47 -0
  226. aethergraph/storage/eventlog/sqlite_event_sync.py +178 -0
  227. aethergraph/storage/factory.py +432 -0
  228. aethergraph/storage/fs_utils.py +28 -0
  229. aethergraph/storage/graph_state_store/state_store.py +64 -0
  230. aethergraph/storage/kv/inmem_kv.py +103 -0
  231. aethergraph/storage/kv/layered_kv.py +52 -0
  232. aethergraph/storage/kv/sqlite_kv.py +39 -0
  233. aethergraph/storage/kv/sqlite_kv_sync.py +98 -0
  234. aethergraph/storage/memory/event_persist.py +68 -0
  235. aethergraph/storage/memory/fs_persist.py +118 -0
  236. aethergraph/{services/memory/hotlog_kv.py → storage/memory/hotlog.py} +8 -2
  237. aethergraph/{services → storage}/memory/indices.py +31 -7
  238. aethergraph/storage/metering/meter_event.py +55 -0
  239. aethergraph/storage/runs/doc_store.py +280 -0
  240. aethergraph/storage/runs/inmen_store.py +82 -0
  241. aethergraph/storage/runs/sqlite_run_store.py +403 -0
  242. aethergraph/storage/sessions/doc_store.py +183 -0
  243. aethergraph/storage/sessions/inmem_store.py +110 -0
  244. aethergraph/storage/sessions/sqlite_session_store.py +399 -0
  245. aethergraph/storage/vector_index/chroma_index.py +138 -0
  246. aethergraph/storage/vector_index/faiss_index.py +179 -0
  247. aethergraph/storage/vector_index/sqlite_index.py +187 -0
  248. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/METADATA +138 -31
  249. aethergraph-0.1.0a2.dist-info/RECORD +356 -0
  250. aethergraph-0.1.0a2.dist-info/entry_points.txt +3 -0
  251. aethergraph/services/artifacts/factory.py +0 -35
  252. aethergraph/services/artifacts/fs_store.py +0 -656
  253. aethergraph/services/artifacts/jsonl_index.py +0 -123
  254. aethergraph/services/artifacts/sqlite_index.py +0 -209
  255. aethergraph/services/memory/distillers/episode.py +0 -116
  256. aethergraph/services/memory/distillers/rolling.py +0 -74
  257. aethergraph/services/memory/facade.py +0 -633
  258. aethergraph/services/memory/persist_fs.py +0 -40
  259. aethergraph/services/rag/index/base.py +0 -27
  260. aethergraph/services/rag/index/faiss_index.py +0 -121
  261. aethergraph/services/rag/index/sqlite_index.py +0 -134
  262. aethergraph-0.1.0a1.dist-info/RECORD +0 -182
  263. aethergraph-0.1.0a1.dist-info/entry_points.txt +0 -2
  264. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/WHEEL +0 -0
  265. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/LICENSE +0 -0
  266. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/NOTICE +0 -0
  267. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,691 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+ import os
6
+
7
+ # from time import time
8
+ import time
9
+ from typing import Any
10
+
11
+ import httpx
12
+
13
+ from aethergraph.config.config import RateLimitSettings
14
+ from aethergraph.contracts.services.llm import LLMClientProtocol
15
+ from aethergraph.contracts.services.metering import MeteringService
16
+ from aethergraph.core.runtime.runtime_metering import current_meter_context, current_metering
17
+
18
+
19
+ # ---- Helpers --------------------------------------------------------------
20
+ class _Retry:
21
+ def __init__(self, tries=4, base=0.5, cap=8.0):
22
+ self.tries, self.base, self.cap = tries, base, cap
23
+
24
+ async def run(self, fn, *a, **k):
25
+ exc = None
26
+ for i in range(self.tries):
27
+ try:
28
+ return await fn(*a, **k)
29
+ except (httpx.ReadTimeout, httpx.ConnectError, httpx.HTTPStatusError) as e:
30
+ exc = e
31
+ await asyncio.sleep(min(self.cap, self.base * (2**i)))
32
+ raise exc
33
+
34
+
35
+ def _first_text(choices):
36
+ """Extract text and usage from OpenAI-style choices list."""
37
+ if not choices:
38
+ return "", {}
39
+ c = choices[0]
40
+ text = (c.get("message", {}) or {}).get("content") or c.get("text") or ""
41
+ usage = {}
42
+ return text, usage
43
+
44
+
45
+ # ---- Generic client -------------------------------------------------------
46
+ class GenericLLMClient(LLMClientProtocol):
47
+ """
48
+ provider: one of {"openai","azure","anthropic","google","openrouter","lmstudio","ollama"}
49
+ Configuration (read from env by default, but you can pass in):
50
+ - OPENAI_API_KEY / OPENAI_BASE_URL
51
+ - AZURE_OPENAI_KEY / AZURE_OPENAI_ENDPOINT / AZURE_OPENAI_DEPLOYMENT
52
+ - ANTHROPIC_API_KEY
53
+ - GOOGLE_API_KEY
54
+ - OPENROUTER_API_KEY
55
+ - LMSTUDIO_BASE_URL (defaults http://localhost:1234/v1)
56
+ - OLLAMA_BASE_URL (defaults http://localhost:11434/v1)
57
+ """
58
+
59
+ def __init__(
60
+ self,
61
+ provider: str | None = None,
62
+ model: str | None = None,
63
+ embed_model: str | None = None,
64
+ *,
65
+ base_url: str | None = None,
66
+ api_key: str | None = None,
67
+ azure_deployment: str | None = None,
68
+ timeout: float = 60.0,
69
+ # metering
70
+ metering: MeteringService | None = None,
71
+ # rate limit
72
+ rate_limit_cfg: RateLimitSettings | None = None,
73
+ ):
74
+ self.provider = (provider or os.getenv("LLM_PROVIDER") or "openai").lower()
75
+ self.model = model or os.getenv("LLM_MODEL") or "gpt-4o-mini"
76
+ self.embed_model = embed_model or os.getenv("EMBED_MODEL") or "text-embedding-3-small"
77
+ self._retry = _Retry()
78
+ self._client = httpx.AsyncClient(timeout=timeout)
79
+ self._bound_loop = None
80
+
81
+ # Resolve creds/base
82
+ self.api_key = (
83
+ api_key
84
+ or os.getenv("OPENAI_API_KEY")
85
+ or os.getenv("ANTHROPIC_API_KEY")
86
+ or os.getenv("GOOGLE_API_KEY")
87
+ or os.getenv("OPENROUTER_API_KEY")
88
+ )
89
+
90
+ self.base_url = (
91
+ base_url
92
+ or {
93
+ "openai": os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"),
94
+ "azure": os.getenv("AZURE_OPENAI_ENDPOINT", "").rstrip("/"),
95
+ "anthropic": "https://api.anthropic.com",
96
+ "google": "https://generativelanguage.googleapis.com",
97
+ "openrouter": "https://openrouter.ai/api/v1",
98
+ "lmstudio": os.getenv("LMSTUDIO_BASE_URL", "http://localhost:1234/v1"),
99
+ "ollama": os.getenv("OLLAMA_BASE_URL", "http://localhost:11434/v1"),
100
+ }[self.provider]
101
+ )
102
+ self.azure_deployment = azure_deployment or os.getenv("AZURE_OPENAI_DEPLOYMENT")
103
+
104
+ self.metering = metering
105
+
106
+ # Rate limit settings
107
+ self._rate_limit_cfg = rate_limit_cfg
108
+ self._per_run_calls: dict[str, int] = {}
109
+ self._per_run_tokens: dict[str, int] = {}
110
+
111
+ # ---------------- internal helpers for metering ----------------
112
+ @staticmethod
113
+ def _normalize_usage(usage: dict[str, Any]) -> dict[str, int]:
114
+ """Normalize usage dict to standard keys: prompt_tokens, completion_tokens."""
115
+ if not usage:
116
+ return 0, 0
117
+
118
+ prompt = usage.get("prompt_tokens") or usage.get("input_tokens")
119
+ completion = usage.get("completion_tokens") or usage.get("output_tokens")
120
+
121
+ try:
122
+ prompt_i = int(prompt) if prompt is not None else 0
123
+ except (ValueError, TypeError):
124
+ prompt_i = 0
125
+ try:
126
+ completion_i = int(completion) if completion is not None else 0
127
+ except (ValueError, TypeError):
128
+ completion_i = 0
129
+
130
+ return prompt_i, completion_i
131
+
132
+ def _get_rate_limit_cfg(self) -> RateLimitSettings | None:
133
+ if self._rate_limit_cfg is not None:
134
+ return self._rate_limit_cfg
135
+ # Lazy-load from container if available
136
+ try:
137
+ from aethergraph.core.runtime.runtime_services import (
138
+ current_services, # local import to avoid cycles
139
+ )
140
+
141
+ container = current_services()
142
+ settings = getattr(container, "settings", None)
143
+ if settings is not None and getattr(settings, "rate_limit", None) is not None:
144
+ self._rate_limit_cfg = settings.rate_limit
145
+ return self._rate_limit_cfg
146
+ except Exception:
147
+ pass
148
+
149
+ def _enforce_llm_limits_for_run(self, *, usage: dict[str, Any]) -> None:
150
+ cfg = self._get_rate_limit_cfg()
151
+ if cfg is None or not cfg.enabled:
152
+ return
153
+
154
+ # get current run_id from context
155
+ ctx = current_meter_context.get()
156
+ run_id = ctx.get("run_id")
157
+ if not run_id:
158
+ # no run_id context; cannot enforce per-run limits
159
+ return
160
+
161
+ prompt_tokens, completion_tokens = self._normalize_usage(usage)
162
+ total_tokens = prompt_tokens + completion_tokens
163
+
164
+ calls = self._per_run_calls.get(run_id, 0) + 1
165
+ tokens = self._per_run_tokens.get(run_id, 0) + total_tokens
166
+
167
+ # store updated counts
168
+ self._per_run_calls[run_id] = calls
169
+ self._per_run_tokens[run_id] = tokens
170
+
171
+ if cfg.max_llm_calls_per_run and calls > cfg.max_llm_calls_per_run:
172
+ raise RuntimeError(
173
+ f"LLM call limit exceeded for this run "
174
+ f"({calls} > {cfg.max_llm_calls_per_run}). "
175
+ "Consider simplifying the graph or raising the limit."
176
+ )
177
+
178
+ if cfg.max_llm_tokens_per_run and tokens > cfg.max_llm_tokens_per_run:
179
+ raise RuntimeError(
180
+ f"LLM token limit exceeded for this run "
181
+ f"({tokens} > {cfg.max_llm_tokens_per_run}). "
182
+ "Consider simplifying the graph or raising the limit."
183
+ )
184
+
185
+ async def _record_llm_usage(
186
+ self,
187
+ *,
188
+ model: str,
189
+ usage: dict[str, Any],
190
+ latency_ms: int | None = None,
191
+ ) -> None:
192
+ self.metering = self.metering or current_metering()
193
+ prompt_tokens, completion_tokens = self._normalize_usage(usage)
194
+ ctx = current_meter_context.get()
195
+ user_id = ctx.get("user_id")
196
+ org_id = ctx.get("org_id")
197
+ run_id = ctx.get("run_id")
198
+
199
+ try:
200
+ await self.metering.record_llm(
201
+ user_id=user_id,
202
+ org_id=org_id,
203
+ run_id=run_id,
204
+ model=model,
205
+ provider=self.provider,
206
+ prompt_tokens=prompt_tokens,
207
+ completion_tokens=completion_tokens,
208
+ latency_ms=latency_ms,
209
+ )
210
+ except Exception as e:
211
+ # Never fail the LLM call due to metering issues
212
+ logger = logging.getLogger("aethergraph.services.llm.generic_client")
213
+ logger.warning(f"llm_metering_failed: {e}")
214
+
215
+ async def _ensure_client(self):
216
+ """Ensure the httpx client is bound to the current event loop.
217
+ This allows safe usage across multiple async contexts.
218
+ """
219
+ loop = asyncio.get_running_loop()
220
+ if self._client is None or self._bound_loop != loop:
221
+ # close old client if any
222
+ if self._client is not None:
223
+ try:
224
+ await self._client.aclose()
225
+ except Exception:
226
+ logger = logging.getLogger("aethergraph.services.llm.generic_client")
227
+ logger.warning("llm_client_close_failed")
228
+ self._client = httpx.AsyncClient(timeout=self._client.timeout)
229
+ self._bound_loop = loop
230
+
231
+ async def chat(
232
+ self,
233
+ messages: list[dict[str, Any]],
234
+ *,
235
+ reasoning_effort: str | None = None,
236
+ max_output_tokens: int | None = None,
237
+ **kw: Any,
238
+ ) -> tuple[str, dict[str, int]]:
239
+ await self._ensure_client()
240
+ model = kw.get("model", self.model)
241
+
242
+ if self.provider != "openai":
243
+ # Make sure _chat_by_provider ALSO returns (str, usage),
244
+ # or wraps provider-specific structures into text.
245
+ start = time.perf_counter()
246
+ text, usage = await self._chat_by_provider(messages, **kw)
247
+ latency_ms = int((time.perf_counter() - start) * 1000)
248
+
249
+ # Enforce rate limits
250
+ self._enforce_llm_limits_for_run(usage=usage)
251
+
252
+ # Record metering
253
+ await self._record_llm_usage(
254
+ model=model,
255
+ usage=usage,
256
+ latency_ms=latency_ms,
257
+ )
258
+ return text, usage
259
+
260
+ body: dict[str, Any] = {
261
+ "model": model,
262
+ "input": messages,
263
+ }
264
+ if reasoning_effort is not None:
265
+ body["reasoning"] = {"effort": reasoning_effort}
266
+ if max_output_tokens is not None:
267
+ body["max_output_tokens"] = max_output_tokens
268
+
269
+ temperature = kw.get("temperature")
270
+ top_p = kw.get("top_p")
271
+ if temperature is not None:
272
+ body["temperature"] = temperature
273
+ if top_p is not None:
274
+ body["top_p"] = top_p
275
+
276
+ async def _call():
277
+ r = await self._client.post(
278
+ f"{self.base_url}/responses",
279
+ headers=self._headers_openai_like(),
280
+ json=body,
281
+ )
282
+
283
+ try:
284
+ r.raise_for_status()
285
+ except httpx.HTTPError as e:
286
+ raise RuntimeError(f"OpenAI Responses API error: {e.response.text}") from e
287
+
288
+ data = r.json()
289
+ output = data.get("output")
290
+ txt = ""
291
+
292
+ # NEW: handle list-of-messages shape
293
+ if isinstance(output, list) and output:
294
+ first = output[0]
295
+ if isinstance(first, dict) and first.get("type") == "message":
296
+ parts = first.get("content") or []
297
+ chunks: list[str] = []
298
+ for p in parts:
299
+ if "text" in p:
300
+ chunks.append(p["text"])
301
+ txt = "".join(chunks)
302
+
303
+ elif isinstance(output, dict) and output.get("type") == "message":
304
+ msg = output.get("message") or output
305
+ parts = msg.get("content") or []
306
+ chunks: list[str] = []
307
+ for p in parts:
308
+ if "text" in p:
309
+ chunks.append(p["text"])
310
+ txt = "".join(chunks)
311
+
312
+ elif isinstance(output, str):
313
+ txt = output
314
+
315
+ else:
316
+ txt = str(output) if output is not None else ""
317
+
318
+ usage = data.get("usage", {}) or {}
319
+ return txt, usage
320
+
321
+ # Measure latency for metering
322
+ start = time.perf_counter()
323
+ text, usage = await self._retry.run(_call)
324
+ latency_ms = int((time.perf_counter() - start) * 1000)
325
+
326
+ # Enforce rate limits
327
+ self._enforce_llm_limits_for_run(usage=usage)
328
+
329
+ # Record metering
330
+ await self._record_llm_usage(
331
+ model=model,
332
+ usage=usage,
333
+ latency_ms=latency_ms,
334
+ )
335
+
336
+ return text, usage
337
+
338
+ # ---------------- Chat ----------------
339
+ async def _chat_by_provider(
340
+ self, messages: list[dict[str, Any]], **kw
341
+ ) -> tuple[str, dict[str, int]]:
342
+ await self._ensure_client()
343
+
344
+ temperature = kw.get("temperature", 0.5)
345
+ top_p = kw.get("top_p", 1.0)
346
+ model = kw.get("model", self.model)
347
+
348
+ if self.provider in {"openrouter", "lmstudio", "ollama"}:
349
+
350
+ async def _call():
351
+ body = {
352
+ "model": model,
353
+ "messages": messages,
354
+ }
355
+
356
+ r = await self._client.post(
357
+ f"{self.base_url}/chat/completions",
358
+ headers=self._headers_openai_like(),
359
+ json=body,
360
+ )
361
+
362
+ try:
363
+ r.raise_for_status()
364
+ except httpx.HTTPError as e:
365
+ raise RuntimeError(f"OpenAI Responses API error: {e.response.text}") from e
366
+ data = r.json()
367
+ txt, _ = _first_text(data.get("choices", []))
368
+ return txt, data.get("usage", {}) or {}
369
+
370
+ return await self._retry.run(_call)
371
+
372
+ if self.provider == "azure":
373
+ if not (self.base_url and self.azure_deployment):
374
+ raise RuntimeError(
375
+ "Azure OpenAI requires AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_DEPLOYMENT"
376
+ )
377
+
378
+ async def _call():
379
+ r = await self._client.post(
380
+ f"{self.base_url}/openai/deployments/{self.azure_deployment}/chat/completions?api-version=2024-08-01-preview",
381
+ headers={"api-key": self.api_key, "Content-Type": "application/json"},
382
+ json={"messages": messages, "temperature": temperature, "top_p": top_p},
383
+ )
384
+ try:
385
+ r.raise_for_status()
386
+ except httpx.HTTPError as e:
387
+ raise RuntimeError(f"OpenAI Responses API error: {e.response.text}") from e
388
+
389
+ data = r.json()
390
+ txt, _ = _first_text(data.get("choices", []))
391
+ return txt, data.get("usage", {}) or {}
392
+
393
+ return await self._retry.run(_call)
394
+
395
+ if self.provider == "anthropic":
396
+ # Convert OpenAI-style messages -> Anthropic Messages API format
397
+ # 1) Collect system messages (as strings)
398
+ sys_msgs = [m["content"] for m in messages if m["role"] == "system"]
399
+
400
+ # 2) Convert non-system messages into Anthropic blocks
401
+ conv = []
402
+ for m in messages:
403
+ role = m["role"]
404
+ if role == "system":
405
+ continue # handled via `system` field
406
+
407
+ # Anthropic only accepts "user" or "assistant"
408
+ anthro_role = "assistant" if role == "assistant" else "user"
409
+
410
+ content = m["content"]
411
+ # Wrap string content into text blocks; if caller is already giving blocks, pass them through.
412
+ if isinstance(content, str):
413
+ content_blocks = [{"type": "text", "text": content}]
414
+ else:
415
+ # Assume caller knows what they're doing for multimodal content
416
+ content_blocks = content
417
+
418
+ conv.append({"role": anthro_role, "content": content_blocks})
419
+
420
+ # 3) Build payload
421
+ payload = {
422
+ "model": model,
423
+ "max_tokens": kw.get("max_tokens", 1024),
424
+ "messages": conv,
425
+ "temperature": temperature,
426
+ "top_p": top_p,
427
+ }
428
+
429
+ # ✅ Anthropic v1/messages now expects `system` to be a list
430
+ if sys_msgs:
431
+ payload["system"] = "\n\n".join(sys_msgs)
432
+
433
+ async def _call():
434
+ r = await self._client.post(
435
+ f"{self.base_url}/v1/messages",
436
+ headers={
437
+ "x-api-key": self.api_key,
438
+ "anthropic-version": "2023-06-01",
439
+ "Content-Type": "application/json",
440
+ },
441
+ json=payload,
442
+ )
443
+ try:
444
+ r.raise_for_status()
445
+ except httpx.HTTPStatusError as e:
446
+ # keep the nice debugging message
447
+ raise RuntimeError(f"Anthropic API error: {e.response.text}") from e
448
+
449
+ data = r.json()
450
+ # data["content"] is a list of blocks
451
+ blocks = data.get("content") or []
452
+ txt = "".join(b.get("text", "") for b in blocks if b.get("type") == "text")
453
+ return txt, data.get("usage", {}) or {}
454
+
455
+ return await self._retry.run(_call)
456
+
457
+ if self.provider == "google":
458
+ # Merge system messages into a single preamble
459
+ system = "\n".join([m["content"] for m in messages if m["role"] == "system"])
460
+
461
+ # Non-system messages
462
+ turns = [
463
+ {
464
+ "role": "user" if m["role"] == "user" else "model",
465
+ "parts": [{"text": m["content"]}],
466
+ }
467
+ for m in messages
468
+ if m["role"] != "system"
469
+ ]
470
+
471
+ if system:
472
+ turns.insert(
473
+ 0,
474
+ {
475
+ "role": "user",
476
+ "parts": [{"text": f"System instructions: {system}"}],
477
+ },
478
+ )
479
+
480
+ async def _call():
481
+ payload = {
482
+ "contents": turns,
483
+ "generationConfig": {
484
+ "temperature": temperature,
485
+ "topP": top_p,
486
+ },
487
+ }
488
+
489
+ r = await self._client.post(
490
+ f"{self.base_url}/v1/models/{model}:generateContent?key={self.api_key}",
491
+ headers={"Content-Type": "application/json"},
492
+ json=payload,
493
+ )
494
+ try:
495
+ r.raise_for_status()
496
+ except httpx.HTTPStatusError as e:
497
+ raise RuntimeError(
498
+ f"Gemini generateContent failed ({e.response.status_code}): {e.response.text}"
499
+ ) from e
500
+
501
+ data = r.json()
502
+ cand = (data.get("candidates") or [{}])[0]
503
+ txt = "".join(
504
+ p.get("text", "") for p in (cand.get("content", {}).get("parts") or [])
505
+ )
506
+ return txt, {} # usage parsing optional
507
+
508
+ return await self._retry.run(_call)
509
+
510
+ if self.provider == "openai":
511
+ raise RuntimeError(
512
+ "Internal error: OpenAI provider should use chat() or responses_chat() directly."
513
+ )
514
+
515
+ raise NotImplementedError(f"provider {self.provider}")
516
+
517
+ # ---------------- Embeddings ----------------
518
+ async def embed(self, texts: list[str], **kw) -> list[list[float]]:
519
+ # model override order: kw > self.embed_model > ENV > default
520
+ await self._ensure_client()
521
+
522
+ model = (
523
+ kw.get("model")
524
+ or self.embed_model
525
+ or os.getenv("EMBED_MODEL")
526
+ or "text-embedding-3-small"
527
+ )
528
+
529
+ if self.provider in {"openai", "openrouter", "lmstudio", "ollama"}:
530
+
531
+ async def _call():
532
+ r = await self._client.post(
533
+ f"{self.base_url}/embeddings",
534
+ headers=self._headers_openai_like(),
535
+ json={"model": model, "input": texts},
536
+ )
537
+ try:
538
+ r.raise_for_status()
539
+ except httpx.HTTPStatusError as e:
540
+ # Log or re-raise with more context
541
+ msg = f"Embeddings request failed ({e.response.status_code}): {e.response.text}"
542
+ raise RuntimeError(msg) from e
543
+
544
+ data = r.json()
545
+ return [d["embedding"] for d in data.get("data", [])]
546
+
547
+ return await self._retry.run(_call)
548
+
549
+ if self.provider == "azure":
550
+
551
+ async def _call():
552
+ r = await self._client.post(
553
+ f"{self.base_url}/openai/deployments/{self.azure_deployment}/embeddings?api-version=2024-08-01-preview",
554
+ headers={"api-key": self.api_key, "Content-Type": "application/json"},
555
+ json={"input": texts},
556
+ )
557
+ try:
558
+ r.raise_for_status()
559
+ except httpx.HTTPStatusError as e:
560
+ # Log or re-raise with more context
561
+ msg = f"Embeddings request failed ({e.response.status_code}): {e.response.text}"
562
+ raise RuntimeError(msg) from e
563
+
564
+ data = r.json()
565
+ return [d["embedding"] for d in data.get("data", [])]
566
+
567
+ return await self._retry.run(_call)
568
+
569
+ if self.provider == "google":
570
+
571
+ async def _call():
572
+ r = await self._client.post(
573
+ f"{self.base_url}/v1/models/{model}:embedContent?key={self.api_key}",
574
+ headers={"Content-Type": "application/json"},
575
+ json={"content": {"parts": [{"text": "\n".join(texts)}]}},
576
+ )
577
+ try:
578
+ r.raise_for_status()
579
+ except httpx.HTTPStatusError as e:
580
+ raise RuntimeError(
581
+ f"Gemini embedContent failed ({e.response.status_code}): {e.response.text}"
582
+ ) from e
583
+
584
+ data = r.json()
585
+ return [data.get("embedding", {}).get("values", [])]
586
+
587
+ return await self._retry.run(_call)
588
+
589
+ # Anthropic: no embeddings endpoint
590
+ raise NotImplementedError(f"Embeddings not supported for {self.provider}")
591
+
592
+ # ---------------- Internals ----------------
593
+ def _headers_openai_like(self):
594
+ hdr = {"Content-Type": "application/json"}
595
+ if self.provider in {"openai", "openrouter"}:
596
+ hdr["Authorization"] = f"Bearer {self.api_key}"
597
+ return hdr
598
+
599
+ async def aclose(self):
600
+ await self._client.aclose()
601
+
602
+ def _default_headers_for_raw(self) -> dict[str, str]:
603
+ hdr = {"Content-Type": "application/json"}
604
+
605
+ if self.provider in {"openai", "openrouter"}:
606
+ if self.api_key:
607
+ hdr["Authorization"] = f"Bearer {self.api_key}"
608
+ else:
609
+ raise RuntimeError("OpenAI/OpenRouter requires an API key for raw() calls.")
610
+
611
+ elif self.provider == "anthropic":
612
+ if self.api_key:
613
+ hdr.update(
614
+ {
615
+ "x-api-key": self.api_key,
616
+ "anthropic-version": "2023-06-01",
617
+ }
618
+ )
619
+ else:
620
+ raise RuntimeError("Anthropic requires an API key for raw() calls.")
621
+
622
+ elif self.provider == "azure":
623
+ if self.api_key:
624
+ hdr["api-key"] = self.api_key
625
+ else:
626
+ raise RuntimeError("Azure OpenAI requires an API key for raw() calls.")
627
+
628
+ # For google, lmstudio, ollama we usually put keys in the URL or
629
+ # they’re local; leave headers minimal unless user overrides.
630
+ return hdr
631
+
632
+ async def raw(
633
+ self,
634
+ *,
635
+ method: str = "POST",
636
+ path: str | None = None,
637
+ url: str | None = None,
638
+ json: Any | None = None,
639
+ params: dict[str, Any] | None = None,
640
+ headers: dict[str, str] | None = None,
641
+ return_response: bool = False,
642
+ ) -> Any:
643
+ """
644
+ Low-level escape hatch: send a raw HTTP request using this client’s
645
+ base_url, auth, and retry logic.
646
+
647
+ - If `url` is provided, it is used as-is.
648
+ - Otherwise, `path` is joined to `self.base_url`.
649
+ - `json` and `params` are forwarded to httpx.
650
+ - Provider-specific default headers (auth, version, etc.) are applied,
651
+ then overridden by `headers` if provided.
652
+
653
+ Returns:
654
+ - r.json() by default
655
+ - or the raw `httpx.Response` if `return_response=True`
656
+ """
657
+ await self._ensure_client()
658
+
659
+ if not url and not path:
660
+ raise ValueError("Either `url` or `path` must be provided to raw().")
661
+
662
+ if not url:
663
+ url = f"{self.base_url.rstrip('/')}/{path.lstrip('/')}"
664
+
665
+ base_headers = self._default_headers_for_raw()
666
+ if headers:
667
+ base_headers.update(headers)
668
+
669
+ async def _call():
670
+ r = await self._client.request(
671
+ method=method,
672
+ url=url,
673
+ headers=base_headers,
674
+ json=json,
675
+ params=params,
676
+ )
677
+ try:
678
+ r.raise_for_status()
679
+ except httpx.HTTPStatusError as e:
680
+ raise RuntimeError(
681
+ f"{self.provider} raw API error ({e.response.status_code}): {e.response.text}"
682
+ ) from e
683
+
684
+ return r if return_response else r.json()
685
+
686
+ return await self._retry.run(_call)
687
+
688
+
689
+ # Convenience factory
690
+ def llm_from_env() -> GenericLLMClient:
691
+ return GenericLLMClient()