aethergraph 0.1.0a1__py3-none-any.whl → 0.1.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. aethergraph/__init__.py +4 -10
  2. aethergraph/__main__.py +293 -0
  3. aethergraph/api/v1/__init__.py +0 -0
  4. aethergraph/api/v1/agents.py +46 -0
  5. aethergraph/api/v1/apps.py +70 -0
  6. aethergraph/api/v1/artifacts.py +415 -0
  7. aethergraph/api/v1/channels.py +89 -0
  8. aethergraph/api/v1/deps.py +168 -0
  9. aethergraph/api/v1/graphs.py +259 -0
  10. aethergraph/api/v1/identity.py +25 -0
  11. aethergraph/api/v1/memory.py +353 -0
  12. aethergraph/api/v1/misc.py +47 -0
  13. aethergraph/api/v1/pagination.py +29 -0
  14. aethergraph/api/v1/runs.py +568 -0
  15. aethergraph/api/v1/schemas.py +535 -0
  16. aethergraph/api/v1/session.py +323 -0
  17. aethergraph/api/v1/stats.py +201 -0
  18. aethergraph/api/v1/viz.py +152 -0
  19. aethergraph/config/config.py +22 -0
  20. aethergraph/config/loader.py +3 -2
  21. aethergraph/config/storage.py +209 -0
  22. aethergraph/contracts/__init__.py +0 -0
  23. aethergraph/contracts/services/__init__.py +0 -0
  24. aethergraph/contracts/services/artifacts.py +27 -14
  25. aethergraph/contracts/services/memory.py +45 -17
  26. aethergraph/contracts/services/metering.py +129 -0
  27. aethergraph/contracts/services/runs.py +50 -0
  28. aethergraph/contracts/services/sessions.py +87 -0
  29. aethergraph/contracts/services/state_stores.py +3 -0
  30. aethergraph/contracts/services/viz.py +44 -0
  31. aethergraph/contracts/storage/artifact_index.py +88 -0
  32. aethergraph/contracts/storage/artifact_store.py +99 -0
  33. aethergraph/contracts/storage/async_kv.py +34 -0
  34. aethergraph/contracts/storage/blob_store.py +50 -0
  35. aethergraph/contracts/storage/doc_store.py +35 -0
  36. aethergraph/contracts/storage/event_log.py +31 -0
  37. aethergraph/contracts/storage/vector_index.py +48 -0
  38. aethergraph/core/__init__.py +0 -0
  39. aethergraph/core/execution/forward_scheduler.py +13 -2
  40. aethergraph/core/execution/global_scheduler.py +21 -15
  41. aethergraph/core/execution/step_forward.py +10 -1
  42. aethergraph/core/graph/__init__.py +0 -0
  43. aethergraph/core/graph/graph_builder.py +8 -4
  44. aethergraph/core/graph/graph_fn.py +156 -15
  45. aethergraph/core/graph/graph_spec.py +8 -0
  46. aethergraph/core/graph/graphify.py +146 -27
  47. aethergraph/core/graph/node_spec.py +0 -2
  48. aethergraph/core/graph/node_state.py +3 -0
  49. aethergraph/core/graph/task_graph.py +39 -1
  50. aethergraph/core/runtime/__init__.py +0 -0
  51. aethergraph/core/runtime/ad_hoc_context.py +64 -4
  52. aethergraph/core/runtime/base_service.py +28 -4
  53. aethergraph/core/runtime/execution_context.py +13 -15
  54. aethergraph/core/runtime/graph_runner.py +222 -37
  55. aethergraph/core/runtime/node_context.py +510 -6
  56. aethergraph/core/runtime/node_services.py +12 -5
  57. aethergraph/core/runtime/recovery.py +15 -1
  58. aethergraph/core/runtime/run_manager.py +783 -0
  59. aethergraph/core/runtime/run_manager_local.py +204 -0
  60. aethergraph/core/runtime/run_registration.py +2 -2
  61. aethergraph/core/runtime/run_types.py +89 -0
  62. aethergraph/core/runtime/runtime_env.py +136 -7
  63. aethergraph/core/runtime/runtime_metering.py +71 -0
  64. aethergraph/core/runtime/runtime_registry.py +36 -13
  65. aethergraph/core/runtime/runtime_services.py +194 -6
  66. aethergraph/core/tools/builtins/toolset.py +1 -1
  67. aethergraph/core/tools/toolkit.py +5 -0
  68. aethergraph/plugins/agents/default_chat_agent copy.py +90 -0
  69. aethergraph/plugins/agents/default_chat_agent.py +171 -0
  70. aethergraph/plugins/agents/shared.py +81 -0
  71. aethergraph/plugins/channel/adapters/webui.py +112 -112
  72. aethergraph/plugins/channel/routes/webui_routes.py +367 -102
  73. aethergraph/plugins/channel/utils/slack_utils.py +115 -59
  74. aethergraph/plugins/channel/utils/telegram_utils.py +88 -47
  75. aethergraph/plugins/channel/websockets/weibui_ws.py +172 -0
  76. aethergraph/runtime/__init__.py +15 -0
  77. aethergraph/server/app_factory.py +190 -34
  78. aethergraph/server/clients/channel_client.py +202 -0
  79. aethergraph/server/http/channel_http_routes.py +116 -0
  80. aethergraph/server/http/channel_ws_routers.py +45 -0
  81. aethergraph/server/loading.py +117 -0
  82. aethergraph/server/server.py +131 -0
  83. aethergraph/server/server_state.py +240 -0
  84. aethergraph/server/start.py +227 -66
  85. aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-BQhdFMY1.woff2 +0 -0
  86. aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DMm9YOAa.woff +0 -0
  87. aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DRggAlZN.ttf +0 -0
  88. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-ATXxdsX0.ttf +0 -0
  89. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-BEiXGLvX.woff +0 -0
  90. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-Dq_IR9rO.woff2 +0 -0
  91. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-CTRA-rTL.woff +0 -0
  92. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-Di6jR-x-.woff2 +0 -0
  93. aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-wX97UBjC.ttf +0 -0
  94. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BdnERNNW.ttf +0 -0
  95. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BsDP51OF.woff +0 -0
  96. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-CL6g_b3V.woff2 +0 -0
  97. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CB_wures.ttf +0 -0
  98. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CTYiF6lA.woff2 +0 -0
  99. aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-Dxdc4cR9.woff +0 -0
  100. aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Cx986IdX.woff2 +0 -0
  101. aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Jm3AIy58.woff +0 -0
  102. aethergraph/server/ui_static/assets/KaTeX_Main-Bold-waoOVXN0.ttf +0 -0
  103. aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DxDJ3AOS.woff2 +0 -0
  104. aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DzxPMmG6.ttf +0 -0
  105. aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-SpSLRI95.woff +0 -0
  106. aethergraph/server/ui_static/assets/KaTeX_Main-Italic-3WenGoN9.ttf +0 -0
  107. aethergraph/server/ui_static/assets/KaTeX_Main-Italic-BMLOBm91.woff +0 -0
  108. aethergraph/server/ui_static/assets/KaTeX_Main-Italic-NWA7e6Wa.woff2 +0 -0
  109. aethergraph/server/ui_static/assets/KaTeX_Main-Regular-B22Nviop.woff2 +0 -0
  110. aethergraph/server/ui_static/assets/KaTeX_Main-Regular-Dr94JaBh.woff +0 -0
  111. aethergraph/server/ui_static/assets/KaTeX_Main-Regular-ypZvNtVU.ttf +0 -0
  112. aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-B3XSjfu4.ttf +0 -0
  113. aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-CZnvNsCZ.woff2 +0 -0
  114. aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-iY-2wyZ7.woff +0 -0
  115. aethergraph/server/ui_static/assets/KaTeX_Math-Italic-DA0__PXp.woff +0 -0
  116. aethergraph/server/ui_static/assets/KaTeX_Math-Italic-flOr_0UB.ttf +0 -0
  117. aethergraph/server/ui_static/assets/KaTeX_Math-Italic-t53AETM-.woff2 +0 -0
  118. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-CFMepnvq.ttf +0 -0
  119. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-D1sUS0GD.woff2 +0 -0
  120. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-DbIhKOiC.woff +0 -0
  121. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-C3H0VqGB.woff2 +0 -0
  122. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-DN2j7dab.woff +0 -0
  123. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-YYjJ1zSn.ttf +0 -0
  124. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-BNo7hRIc.ttf +0 -0
  125. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-CS6fqUqJ.woff +0 -0
  126. aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-DDBCnlJ7.woff2 +0 -0
  127. aethergraph/server/ui_static/assets/KaTeX_Script-Regular-C5JkGWo-.ttf +0 -0
  128. aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D3wIWfF6.woff2 +0 -0
  129. aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D5yQViql.woff +0 -0
  130. aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-C195tn64.woff +0 -0
  131. aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-Dbsnue_I.ttf +0 -0
  132. aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-mCD8mA8B.woff2 +0 -0
  133. aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-B7gKUWhC.ttf +0 -0
  134. aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-Dy4dx90m.woff2 +0 -0
  135. aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-oD1tc_U0.woff +0 -0
  136. aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-CTq5MqoE.woff +0 -0
  137. aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-DgpXs0kz.ttf +0 -0
  138. aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-BF-4gkZK.woff +0 -0
  139. aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-DWFBv043.ttf +0 -0
  140. aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-Dl5lxZxV.woff2 +0 -0
  141. aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-C0xS9mPB.woff +0 -0
  142. aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-CO6r4hn1.woff2 +0 -0
  143. aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-D3Ib7_Hf.ttf +0 -0
  144. aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +1 -0
  145. aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +400 -0
  146. aethergraph/server/ui_static/index.html +15 -0
  147. aethergraph/server/ui_static/logo.png +0 -0
  148. aethergraph/services/artifacts/__init__.py +0 -0
  149. aethergraph/services/artifacts/facade.py +1239 -132
  150. aethergraph/services/auth/{dev.py → authn.py} +0 -8
  151. aethergraph/services/auth/authz.py +100 -0
  152. aethergraph/services/channel/__init__.py +0 -0
  153. aethergraph/services/channel/channel_bus.py +19 -1
  154. aethergraph/services/channel/factory.py +13 -1
  155. aethergraph/services/channel/ingress.py +311 -0
  156. aethergraph/services/channel/queue_adapter.py +75 -0
  157. aethergraph/services/channel/session.py +502 -19
  158. aethergraph/services/container/default_container.py +122 -43
  159. aethergraph/services/continuations/continuation.py +6 -0
  160. aethergraph/services/continuations/stores/fs_store.py +19 -0
  161. aethergraph/services/eventhub/event_hub.py +76 -0
  162. aethergraph/services/kv/__init__.py +0 -0
  163. aethergraph/services/kv/ephemeral.py +244 -0
  164. aethergraph/services/llm/__init__.py +0 -0
  165. aethergraph/services/llm/generic_client copy.py +691 -0
  166. aethergraph/services/llm/generic_client.py +1288 -187
  167. aethergraph/services/llm/providers.py +3 -1
  168. aethergraph/services/llm/types.py +47 -0
  169. aethergraph/services/llm/utils.py +284 -0
  170. aethergraph/services/logger/std.py +3 -0
  171. aethergraph/services/mcp/__init__.py +9 -0
  172. aethergraph/services/mcp/http_client.py +38 -0
  173. aethergraph/services/mcp/service.py +225 -1
  174. aethergraph/services/mcp/stdio_client.py +41 -6
  175. aethergraph/services/mcp/ws_client.py +44 -2
  176. aethergraph/services/memory/__init__.py +0 -0
  177. aethergraph/services/memory/distillers/llm_long_term.py +234 -0
  178. aethergraph/services/memory/distillers/llm_meta_summary.py +398 -0
  179. aethergraph/services/memory/distillers/long_term.py +225 -0
  180. aethergraph/services/memory/facade/__init__.py +3 -0
  181. aethergraph/services/memory/facade/chat.py +440 -0
  182. aethergraph/services/memory/facade/core.py +447 -0
  183. aethergraph/services/memory/facade/distillation.py +424 -0
  184. aethergraph/services/memory/facade/rag.py +410 -0
  185. aethergraph/services/memory/facade/results.py +315 -0
  186. aethergraph/services/memory/facade/retrieval.py +139 -0
  187. aethergraph/services/memory/facade/types.py +77 -0
  188. aethergraph/services/memory/facade/utils.py +43 -0
  189. aethergraph/services/memory/facade_dep.py +1539 -0
  190. aethergraph/services/memory/factory.py +9 -3
  191. aethergraph/services/memory/utils.py +10 -0
  192. aethergraph/services/metering/eventlog_metering.py +470 -0
  193. aethergraph/services/metering/noop.py +25 -4
  194. aethergraph/services/rag/__init__.py +0 -0
  195. aethergraph/services/rag/facade.py +279 -23
  196. aethergraph/services/rag/index_factory.py +2 -2
  197. aethergraph/services/rag/node_rag.py +317 -0
  198. aethergraph/services/rate_limit/inmem_rate_limit.py +24 -0
  199. aethergraph/services/registry/__init__.py +0 -0
  200. aethergraph/services/registry/agent_app_meta.py +419 -0
  201. aethergraph/services/registry/registry_key.py +1 -1
  202. aethergraph/services/registry/unified_registry.py +74 -6
  203. aethergraph/services/scope/scope.py +159 -0
  204. aethergraph/services/scope/scope_factory.py +164 -0
  205. aethergraph/services/state_stores/serialize.py +5 -0
  206. aethergraph/services/state_stores/utils.py +2 -1
  207. aethergraph/services/viz/__init__.py +0 -0
  208. aethergraph/services/viz/facade.py +413 -0
  209. aethergraph/services/viz/viz_service.py +69 -0
  210. aethergraph/storage/artifacts/artifact_index_jsonl.py +180 -0
  211. aethergraph/storage/artifacts/artifact_index_sqlite.py +426 -0
  212. aethergraph/storage/artifacts/cas_store.py +422 -0
  213. aethergraph/storage/artifacts/fs_cas.py +18 -0
  214. aethergraph/storage/artifacts/s3_cas.py +14 -0
  215. aethergraph/storage/artifacts/utils.py +124 -0
  216. aethergraph/storage/blob/fs_blob.py +86 -0
  217. aethergraph/storage/blob/s3_blob.py +115 -0
  218. aethergraph/storage/continuation_store/fs_cont.py +283 -0
  219. aethergraph/storage/continuation_store/inmem_cont.py +146 -0
  220. aethergraph/storage/continuation_store/kvdoc_cont.py +261 -0
  221. aethergraph/storage/docstore/fs_doc.py +63 -0
  222. aethergraph/storage/docstore/sqlite_doc.py +31 -0
  223. aethergraph/storage/docstore/sqlite_doc_sync.py +90 -0
  224. aethergraph/storage/eventlog/fs_event.py +136 -0
  225. aethergraph/storage/eventlog/sqlite_event.py +47 -0
  226. aethergraph/storage/eventlog/sqlite_event_sync.py +178 -0
  227. aethergraph/storage/factory.py +432 -0
  228. aethergraph/storage/fs_utils.py +28 -0
  229. aethergraph/storage/graph_state_store/state_store.py +64 -0
  230. aethergraph/storage/kv/inmem_kv.py +103 -0
  231. aethergraph/storage/kv/layered_kv.py +52 -0
  232. aethergraph/storage/kv/sqlite_kv.py +39 -0
  233. aethergraph/storage/kv/sqlite_kv_sync.py +98 -0
  234. aethergraph/storage/memory/event_persist.py +68 -0
  235. aethergraph/storage/memory/fs_persist.py +118 -0
  236. aethergraph/{services/memory/hotlog_kv.py → storage/memory/hotlog.py} +8 -2
  237. aethergraph/{services → storage}/memory/indices.py +31 -7
  238. aethergraph/storage/metering/meter_event.py +55 -0
  239. aethergraph/storage/runs/doc_store.py +280 -0
  240. aethergraph/storage/runs/inmen_store.py +82 -0
  241. aethergraph/storage/runs/sqlite_run_store.py +403 -0
  242. aethergraph/storage/sessions/doc_store.py +183 -0
  243. aethergraph/storage/sessions/inmem_store.py +110 -0
  244. aethergraph/storage/sessions/sqlite_session_store.py +399 -0
  245. aethergraph/storage/vector_index/chroma_index.py +138 -0
  246. aethergraph/storage/vector_index/faiss_index.py +179 -0
  247. aethergraph/storage/vector_index/sqlite_index.py +187 -0
  248. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/METADATA +138 -31
  249. aethergraph-0.1.0a2.dist-info/RECORD +356 -0
  250. aethergraph-0.1.0a2.dist-info/entry_points.txt +3 -0
  251. aethergraph/services/artifacts/factory.py +0 -35
  252. aethergraph/services/artifacts/fs_store.py +0 -656
  253. aethergraph/services/artifacts/jsonl_index.py +0 -123
  254. aethergraph/services/artifacts/sqlite_index.py +0 -209
  255. aethergraph/services/memory/distillers/episode.py +0 -116
  256. aethergraph/services/memory/distillers/rolling.py +0 -74
  257. aethergraph/services/memory/facade.py +0 -633
  258. aethergraph/services/memory/persist_fs.py +0 -40
  259. aethergraph/services/rag/index/base.py +0 -27
  260. aethergraph/services/rag/index/faiss_index.py +0 -121
  261. aethergraph/services/rag/index/sqlite_index.py +0 -134
  262. aethergraph-0.1.0a1.dist-info/RECORD +0 -182
  263. aethergraph-0.1.0a1.dist-info/entry_points.txt +0 -2
  264. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/WHEEL +0 -0
  265. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/LICENSE +0 -0
  266. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/NOTICE +0 -0
  267. {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,41 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import asyncio
4
+ import json
4
5
  import logging
5
6
  import os
7
+
8
+ # from time import time
9
+ import time
6
10
  from typing import Any
7
11
 
8
12
  import httpx
9
13
 
14
+ from aethergraph.config.config import RateLimitSettings
10
15
  from aethergraph.contracts.services.llm import LLMClientProtocol
16
+ from aethergraph.contracts.services.metering import MeteringService
17
+ from aethergraph.core.runtime.runtime_metering import current_meter_context, current_metering
18
+ from aethergraph.services.llm.types import (
19
+ ChatOutputFormat,
20
+ GeneratedImage,
21
+ ImageFormat,
22
+ ImageGenerationResult,
23
+ ImageResponseFormat,
24
+ LLMUnsupportedFeatureError,
25
+ )
26
+ from aethergraph.services.llm.utils import (
27
+ _azure_images_generations_url,
28
+ _data_url_to_b64_and_mime,
29
+ _ensure_system_json_directive,
30
+ _extract_json_text,
31
+ _guess_mime_from_format,
32
+ _is_data_url,
33
+ _normalize_base_url_no_trailing_slash,
34
+ _normalize_openai_responses_input,
35
+ _to_anthropic_blocks,
36
+ _to_gemini_parts,
37
+ _validate_json_schema,
38
+ )
11
39
 
12
40
 
13
41
  # ---- Helpers --------------------------------------------------------------
@@ -60,6 +88,10 @@ class GenericLLMClient(LLMClientProtocol):
60
88
  api_key: str | None = None,
61
89
  azure_deployment: str | None = None,
62
90
  timeout: float = 60.0,
91
+ # metering
92
+ metering: MeteringService | None = None,
93
+ # rate limit
94
+ rate_limit_cfg: RateLimitSettings | None = None,
63
95
  ):
64
96
  self.provider = (provider or os.getenv("LLM_PROVIDER") or "openai").lower()
65
97
  self.model = model or os.getenv("LLM_MODEL") or "gpt-4o-mini"
@@ -87,10 +119,122 @@ class GenericLLMClient(LLMClientProtocol):
87
119
  "openrouter": "https://openrouter.ai/api/v1",
88
120
  "lmstudio": os.getenv("LMSTUDIO_BASE_URL", "http://localhost:1234/v1"),
89
121
  "ollama": os.getenv("OLLAMA_BASE_URL", "http://localhost:11434/v1"),
122
+ "dummy": "http://localhost:8745", # for testing with a dummy server
90
123
  }[self.provider]
91
124
  )
92
125
  self.azure_deployment = azure_deployment or os.getenv("AZURE_OPENAI_DEPLOYMENT")
93
126
 
127
+ self.metering = metering
128
+
129
+ # Rate limit settings
130
+ self._rate_limit_cfg = rate_limit_cfg
131
+ self._per_run_calls: dict[str, int] = {}
132
+ self._per_run_tokens: dict[str, int] = {}
133
+
134
+ # ---------------- internal helpers for metering ----------------
135
+ @staticmethod
136
+ def _normalize_usage(usage: dict[str, Any]) -> tuple[int, int]:
137
+ """Normalize usage dict to standard keys: prompt_tokens, completion_tokens."""
138
+ if not usage:
139
+ return 0, 0
140
+
141
+ prompt = usage.get("prompt_tokens") or usage.get("input_tokens")
142
+ completion = usage.get("completion_tokens") or usage.get("output_tokens")
143
+
144
+ try:
145
+ prompt_i = int(prompt) if prompt is not None else 0
146
+ except (ValueError, TypeError):
147
+ prompt_i = 0
148
+ try:
149
+ completion_i = int(completion) if completion is not None else 0
150
+ except (ValueError, TypeError):
151
+ completion_i = 0
152
+
153
+ return prompt_i, completion_i
154
+
155
+ def _get_rate_limit_cfg(self) -> RateLimitSettings | None:
156
+ if self._rate_limit_cfg is not None:
157
+ return self._rate_limit_cfg
158
+ # Lazy-load from container if available
159
+ try:
160
+ from aethergraph.core.runtime.runtime_services import (
161
+ current_services, # local import to avoid cycles
162
+ )
163
+
164
+ container = current_services()
165
+ settings = getattr(container, "settings", None)
166
+ if settings is not None and getattr(settings, "rate_limit", None) is not None:
167
+ self._rate_limit_cfg = settings.rate_limit
168
+ return self._rate_limit_cfg
169
+ except Exception:
170
+ pass
171
+
172
+ def _enforce_llm_limits_for_run(self, *, usage: dict[str, Any]) -> None:
173
+ cfg = self._get_rate_limit_cfg()
174
+ if cfg is None or not cfg.enabled:
175
+ return
176
+
177
+ # get current run_id from context
178
+ ctx = current_meter_context.get()
179
+ run_id = ctx.get("run_id")
180
+ if not run_id:
181
+ # no run_id context; cannot enforce per-run limits
182
+ return
183
+
184
+ prompt_tokens, completion_tokens = self._normalize_usage(usage)
185
+ total_tokens = prompt_tokens + completion_tokens
186
+
187
+ calls = self._per_run_calls.get(run_id, 0) + 1
188
+ tokens = self._per_run_tokens.get(run_id, 0) + total_tokens
189
+
190
+ # store updated counts
191
+ self._per_run_calls[run_id] = calls
192
+ self._per_run_tokens[run_id] = tokens
193
+
194
+ if cfg.max_llm_calls_per_run and calls > cfg.max_llm_calls_per_run:
195
+ raise RuntimeError(
196
+ f"LLM call limit exceeded for this run "
197
+ f"({calls} > {cfg.max_llm_calls_per_run}). "
198
+ "Consider simplifying the graph or raising the limit."
199
+ )
200
+
201
+ if cfg.max_llm_tokens_per_run and tokens > cfg.max_llm_tokens_per_run:
202
+ raise RuntimeError(
203
+ f"LLM token limit exceeded for this run "
204
+ f"({tokens} > {cfg.max_llm_tokens_per_run}). "
205
+ "Consider simplifying the graph or raising the limit."
206
+ )
207
+
208
+ async def _record_llm_usage(
209
+ self,
210
+ *,
211
+ model: str,
212
+ usage: dict[str, Any],
213
+ latency_ms: int | None = None,
214
+ ) -> None:
215
+ self.metering = self.metering or current_metering()
216
+ prompt_tokens, completion_tokens = self._normalize_usage(usage)
217
+ ctx = current_meter_context.get()
218
+ user_id = ctx.get("user_id")
219
+ org_id = ctx.get("org_id")
220
+ run_id = ctx.get("run_id")
221
+
222
+ try:
223
+ await self.metering.record_llm(
224
+ user_id=user_id,
225
+ org_id=org_id,
226
+ run_id=run_id,
227
+ model=model,
228
+ provider=self.provider,
229
+ prompt_tokens=prompt_tokens,
230
+ completion_tokens=completion_tokens,
231
+ latency_ms=latency_ms,
232
+ )
233
+ except Exception as e:
234
+ # Never fail the LLM call due to metering issues
235
+ logger = logging.getLogger("aethergraph.services.llm.generic_client")
236
+ logger.warning(f"llm_metering_failed: {e}")
237
+
94
238
  async def _ensure_client(self):
95
239
  """Ensure the httpx client is bound to the current event loop.
96
240
  This allows safe usage across multiple async contexts.
@@ -113,260 +257,954 @@ class GenericLLMClient(LLMClientProtocol):
113
257
  *,
114
258
  reasoning_effort: str | None = None,
115
259
  max_output_tokens: int | None = None,
260
+ output_format: ChatOutputFormat = "text",
261
+ json_schema: dict[str, Any] | None = None,
262
+ schema_name: str = "output",
263
+ strict_schema: bool = True,
264
+ validate_json: bool = True,
265
+ fail_on_unsupported: bool = True,
116
266
  **kw: Any,
117
267
  ) -> tuple[str, dict[str, int]]:
268
+ """
269
+ Send a chat request to the LLM provider and return the response in a normalized format.
270
+ This method handles provider-specific dispatch, output postprocessing,
271
+ rate limiting, and usage metering. It supports structured output via JSON schema
272
+ validation and flexible output formats.
273
+
274
+ Examples:
275
+ Basic usage with a list of messages:
276
+ ```python
277
+ response, usage = await context.llm().chat([
278
+ {"role": "user", "content": "Hello, assistant!"}
279
+ ])
280
+ ```
281
+
282
+ Requesting structured output with a JSON schema:
283
+ ```python
284
+ response, usage = await context.llm().chat(
285
+ messages=[{"role": "user", "content": "Summarize this text."}],
286
+ output_format="json",
287
+ json_schema={"type": "object", "properties": {"summary": {"type": "string"}}}
288
+ ```
289
+
290
+ Args:
291
+ messages: List of message dicts, each with "role" and "content" keys.
292
+ reasoning_effort: Optional string to control model reasoning depth.
293
+ max_output_tokens: Optional maximum number of output tokens.
294
+ output_format: Output format, e.g., "text" or "json".
295
+ json_schema: Optional JSON schema for validating structured output.
296
+ schema_name: Name for the root schema object (default: "output").
297
+ strict_schema: If True, enforce strict schema validation.
298
+ validate_json: If True, validate JSON output against schema.
299
+ fail_on_unsupported: If True, raise error for unsupported features.
300
+ **kw: Additional provider-specific keyword arguments.
301
+
302
+ Returns:
303
+ tuple[str, dict[str, int]]: The model response (text or structured output) and usage statistics.
304
+
305
+ Raises:
306
+ NotImplementedError: If the provider is not supported.
307
+ RuntimeError: For various errors including invalid JSON output or rate limit violations.
308
+ LLMUnsupportedFeatureError: If a requested feature is unsupported by the provider.
309
+
310
+ Notes:
311
+ - This method centralizes handling of different LLM providers, ensuring consistent behavior.
312
+ - Structured output support allows for robust integration with downstream systems.
313
+ - Rate limiting and metering help manage resource usage effectively.
314
+ """
118
315
  await self._ensure_client()
119
316
  model = kw.get("model", self.model)
120
317
 
121
- if self.provider != "openai":
122
- # Make sure _chat_by_provider ALSO returns (str, usage),
123
- # or wraps provider-specific structures into text.
124
- return await self._chat_by_provider(messages, **kw)
318
+ start = time.perf_counter()
319
+
320
+ # Provider-specific call (now symmetric)
321
+ text, usage = await self._chat_dispatch(
322
+ messages,
323
+ model=model,
324
+ reasoning_effort=reasoning_effort,
325
+ max_output_tokens=max_output_tokens,
326
+ output_format=output_format,
327
+ json_schema=json_schema,
328
+ schema_name=schema_name,
329
+ strict_schema=strict_schema,
330
+ validate_json=validate_json,
331
+ fail_on_unsupported=fail_on_unsupported,
332
+ **kw,
333
+ )
125
334
 
126
- body: dict[str, Any] = {
127
- "model": model,
128
- "input": messages,
129
- }
335
+ # JSON postprocessing/validation is centralized here (consistent behavior)
336
+ text = self._postprocess_structured_output(
337
+ text=text,
338
+ output_format=output_format,
339
+ json_schema=json_schema,
340
+ strict_schema=strict_schema,
341
+ validate_json=validate_json,
342
+ )
343
+
344
+ latency_ms = int((time.perf_counter() - start) * 1000)
345
+
346
+ # Enforce rate limits (existing)
347
+ self._enforce_llm_limits_for_run(usage=usage)
348
+
349
+ # Metering (existing)
350
+ await self._record_llm_usage(
351
+ model=model,
352
+ usage=usage,
353
+ latency_ms=latency_ms,
354
+ )
355
+
356
+ return text, usage
357
+
358
+ async def _chat_dispatch(
359
+ self,
360
+ messages: list[dict[str, Any]],
361
+ *,
362
+ model: str,
363
+ reasoning_effort: str | None,
364
+ max_output_tokens: int | None,
365
+ output_format: ChatOutputFormat,
366
+ json_schema: dict[str, Any] | None,
367
+ schema_name: str,
368
+ strict_schema: bool,
369
+ validate_json: bool,
370
+ fail_on_unsupported: bool,
371
+ **kw: Any,
372
+ ) -> tuple[str, dict[str, int]]:
373
+ # OpenAI is now symmetric too
374
+ if self.provider == "openai":
375
+ return await self._chat_openai_responses(
376
+ messages,
377
+ model=model,
378
+ reasoning_effort=reasoning_effort,
379
+ max_output_tokens=max_output_tokens,
380
+ output_format=output_format,
381
+ json_schema=json_schema,
382
+ schema_name=schema_name,
383
+ strict_schema=strict_schema,
384
+ )
385
+
386
+ # Everyone else
387
+ if self.provider in {"openrouter", "lmstudio", "ollama"}:
388
+ return await self._chat_openai_like_chat_completions(
389
+ messages,
390
+ model=model,
391
+ output_format=output_format,
392
+ json_schema=json_schema,
393
+ fail_on_unsupported=fail_on_unsupported,
394
+ **kw,
395
+ )
396
+
397
+ if self.provider == "azure":
398
+ return await self._chat_azure_chat_completions(
399
+ messages,
400
+ model=model,
401
+ output_format=output_format,
402
+ json_schema=json_schema,
403
+ fail_on_unsupported=fail_on_unsupported,
404
+ **kw,
405
+ )
406
+
407
+ if self.provider == "anthropic":
408
+ return await self._chat_anthropic_messages(
409
+ messages,
410
+ model=model,
411
+ output_format=output_format,
412
+ json_schema=json_schema,
413
+ **kw,
414
+ )
415
+
416
+ if self.provider == "google":
417
+ return await self._chat_gemini_generate_content(
418
+ messages,
419
+ model=model,
420
+ output_format=output_format,
421
+ json_schema=json_schema,
422
+ fail_on_unsupported=fail_on_unsupported,
423
+ **kw,
424
+ )
425
+
426
+ raise NotImplementedError(f"provider {self.provider}")
427
+
428
+ def _postprocess_structured_output(
429
+ self,
430
+ *,
431
+ text: str,
432
+ output_format: ChatOutputFormat,
433
+ json_schema: dict[str, Any] | None,
434
+ strict_schema: bool,
435
+ validate_json: bool,
436
+ ) -> str:
437
+ if output_format not in ("json_object", "json_schema"):
438
+ return text
439
+
440
+ if not validate_json:
441
+ return text
442
+
443
+ json_text = _extract_json_text(text)
444
+ try:
445
+ obj = json.loads(json_text)
446
+ except Exception as e:
447
+ raise RuntimeError(f"Model did not return valid JSON. Raw output:\n{text}") from e
448
+
449
+ if output_format == "json_schema" and json_schema is not None and strict_schema:
450
+ _validate_json_schema(obj, json_schema)
451
+
452
+ # Canonical JSON string output (makes downstream robust)
453
+ return json.dumps(obj, ensure_ascii=False)
454
+
455
+ async def _chat_openai_responses(
456
+ self,
457
+ messages: list[dict[str, Any]],
458
+ *,
459
+ model: str,
460
+ reasoning_effort: str | None,
461
+ max_output_tokens: int | None,
462
+ output_format: ChatOutputFormat,
463
+ json_schema: dict[str, Any] | None,
464
+ schema_name: str,
465
+ strict_schema: bool,
466
+ ) -> tuple[str, dict[str, int]]:
467
+ await self._ensure_client()
468
+ assert self._client is not None
469
+
470
+ url = f"{self.base_url}/responses"
471
+ headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
472
+
473
+ # Normalize input so vision works if caller used image_url parts
474
+ input_messages = _normalize_openai_responses_input(messages)
475
+
476
+ body: dict[str, Any] = {"model": model, "input": input_messages}
130
477
  if reasoning_effort is not None:
131
478
  body["reasoning"] = {"effort": reasoning_effort}
132
479
  if max_output_tokens is not None:
133
480
  body["max_output_tokens"] = max_output_tokens
134
481
 
135
- temperature = kw.get("temperature")
136
- top_p = kw.get("top_p")
137
- if temperature is not None:
138
- body["temperature"] = temperature
139
- if top_p is not None:
140
- body["top_p"] = top_p
482
+ # Structured output (Responses API style)
483
+ if output_format == "json_object":
484
+ body["text"] = {"format": {"type": "json_object"}}
485
+ elif output_format == "json_schema":
486
+ if json_schema is None:
487
+ raise ValueError("output_format='json_schema' requires json_schema")
488
+ body["text"] = {
489
+ "format": {
490
+ "type": "json_schema",
491
+ "name": schema_name,
492
+ "schema": json_schema,
493
+ "strict": bool(strict_schema),
494
+ }
495
+ }
141
496
 
142
497
  async def _call():
143
- r = await self._client.post(
144
- f"{self.base_url}/responses",
145
- headers=self._headers_openai_like(),
146
- json=body,
147
- )
148
-
498
+ r = await self._client.post(url, headers=headers, json=body)
149
499
  try:
150
500
  r.raise_for_status()
151
- except httpx.HTTPError as e:
501
+ except httpx.HTTPStatusError as e:
152
502
  raise RuntimeError(f"OpenAI Responses API error: {e.response.text}") from e
153
503
 
154
504
  data = r.json()
155
505
  output = data.get("output")
156
506
  txt = ""
157
507
 
158
- # NEW: handle list-of-messages shape
508
+ # Your existing parsing logic, but robust for list shape
159
509
  if isinstance(output, list) and output:
160
- first = output[0]
161
- if isinstance(first, dict) and first.get("type") == "message":
162
- parts = first.get("content") or []
163
- chunks: list[str] = []
164
- for p in parts:
165
- if "text" in p:
166
- chunks.append(p["text"])
167
- txt = "".join(chunks)
510
+ # concat all message outputs if multiple
511
+ chunks: list[str] = []
512
+ for item in output:
513
+ if isinstance(item, dict) and item.get("type") == "message":
514
+ parts = item.get("content") or []
515
+ for p in parts:
516
+ if isinstance(p, dict) and "text" in p:
517
+ chunks.append(p["text"])
518
+ txt = "".join(chunks)
168
519
 
169
520
  elif isinstance(output, dict) and output.get("type") == "message":
170
521
  msg = output.get("message") or output
171
522
  parts = msg.get("content") or []
172
523
  chunks: list[str] = []
173
524
  for p in parts:
174
- if "text" in p:
525
+ if isinstance(p, dict) and "text" in p:
175
526
  chunks.append(p["text"])
176
527
  txt = "".join(chunks)
177
528
 
178
529
  elif isinstance(output, str):
179
530
  txt = output
180
-
181
531
  else:
182
- txt = str(output) if output is not None else ""
532
+ txt = ""
183
533
 
184
534
  usage = data.get("usage", {}) or {}
185
535
  return txt, usage
186
536
 
187
537
  return await self._retry.run(_call)
188
538
 
189
- # ---------------- Chat ----------------
190
- async def _chat_by_provider(
191
- self, messages: list[dict[str, Any]], **kw
539
+ async def _chat_openai_like_chat_completions(
540
+ self,
541
+ messages: list[dict[str, Any]],
542
+ *,
543
+ model: str,
544
+ output_format: ChatOutputFormat,
545
+ json_schema: dict[str, Any] | None,
546
+ fail_on_unsupported: bool,
547
+ **kw: Any,
192
548
  ) -> tuple[str, dict[str, int]]:
549
+ """
550
+ Docstring for _chat_openai_like_chat_completions
551
+
552
+ :param self: Description
553
+ :param messages: Description
554
+ :type messages: list[dict[str, Any]]
555
+ :param model: Description
556
+ :type model: str
557
+ :param output_format: Description
558
+ :type output_format: ChatOutputFormat
559
+ :param json_schema: Description
560
+ :type json_schema: dict[str, Any] | None
561
+ :param fail_on_unsupported: Description
562
+ :type fail_on_unsupported: bool
563
+ :param kw: Description
564
+ :type kw: Any
565
+ :return: Description
566
+ :rtype: tuple[str, dict[str, int]]
567
+
568
+ Call OpenAI-like /chat/completions endpoint.
569
+ """
193
570
  await self._ensure_client()
571
+ assert self._client is not None
194
572
 
195
573
  temperature = kw.get("temperature", 0.5)
196
574
  top_p = kw.get("top_p", 1.0)
197
- model = kw.get("model", self.model)
198
575
 
199
- if self.provider in {"openrouter", "lmstudio", "ollama"}:
576
+ msg_for_provider = messages
577
+ response_format = None
200
578
 
201
- async def _call():
202
- body = {
203
- "model": model,
204
- "messages": messages,
205
- }
579
+ if output_format == "json_object":
580
+ response_format = {"type": "json_object"}
581
+ msg_for_provider = _ensure_system_json_directive(messages, schema=None)
582
+ elif output_format == "json_schema":
583
+ # not truly native in most openai-like providers
584
+ if fail_on_unsupported:
585
+ raise RuntimeError(f"provider {self.provider} does not support native json_schema")
586
+ msg_for_provider = _ensure_system_json_directive(messages, schema=json_schema)
206
587
 
207
- r = await self._client.post(
208
- f"{self.base_url}/chat/completions",
209
- headers=self._headers_openai_like(),
210
- json=body,
211
- )
588
+ async def _call():
589
+ body: dict[str, Any] = {
590
+ "model": model,
591
+ "messages": msg_for_provider,
592
+ "temperature": temperature,
593
+ "top_p": top_p,
594
+ }
595
+ if response_format is not None:
596
+ body["response_format"] = response_format
212
597
 
213
- try:
214
- r.raise_for_status()
215
- except httpx.HTTPError as e:
216
- raise RuntimeError(f"OpenAI Responses API error: {e.response.text}") from e
217
- data = r.json()
218
- txt, _ = _first_text(data.get("choices", []))
219
- return txt, data.get("usage", {}) or {}
598
+ r = await self._client.post(
599
+ f"{self.base_url}/chat/completions",
600
+ headers=self._headers_openai_like(),
601
+ json=body,
602
+ )
603
+ try:
604
+ r.raise_for_status()
605
+ except httpx.HTTPError as e:
606
+ raise RuntimeError(f"OpenAI-like chat/completions error: {e.response.text}") from e
220
607
 
221
- return await self._retry.run(_call)
608
+ data = r.json()
609
+ txt, _ = _first_text(data.get("choices", [])) # you already have _first_text in file
610
+ usage = data.get("usage", {}) or {}
611
+ return txt, usage
222
612
 
223
- if self.provider == "azure":
224
- if not (self.base_url and self.azure_deployment):
613
+ return await self._retry.run(_call)
614
+
615
+ async def _chat_azure_chat_completions(
616
+ self,
617
+ messages: list[dict[str, Any]],
618
+ *,
619
+ model: str,
620
+ output_format: ChatOutputFormat,
621
+ json_schema: dict[str, Any] | None,
622
+ fail_on_unsupported: bool,
623
+ **kw: Any,
624
+ ) -> tuple[str, dict[str, int]]:
625
+ await self._ensure_client()
626
+ assert self._client is not None
627
+
628
+ if not (self.base_url and self.azure_deployment):
629
+ raise RuntimeError(
630
+ "Azure OpenAI requires AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_DEPLOYMENT"
631
+ )
632
+
633
+ temperature = kw.get("temperature", 0.5)
634
+ top_p = kw.get("top_p", 1.0)
635
+
636
+ msg_for_provider = messages
637
+ payload: dict[str, Any] = {
638
+ "messages": msg_for_provider,
639
+ "temperature": temperature,
640
+ "top_p": top_p,
641
+ }
642
+
643
+ if output_format == "json_object":
644
+ payload["response_format"] = {"type": "json_object"}
645
+ payload["messages"] = _ensure_system_json_directive(messages, schema=None)
646
+ elif output_format == "json_schema":
647
+ if fail_on_unsupported:
225
648
  raise RuntimeError(
226
- "Azure OpenAI requires AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_DEPLOYMENT"
649
+ "Azure native json_schema not guaranteed; set fail_on_unsupported=False for best-effort"
227
650
  )
651
+ payload["messages"] = _ensure_system_json_directive(messages, schema=json_schema)
228
652
 
229
- async def _call():
230
- r = await self._client.post(
231
- f"{self.base_url}/openai/deployments/{self.azure_deployment}/chat/completions?api-version=2024-08-01-preview",
232
- headers={"api-key": self.api_key, "Content-Type": "application/json"},
233
- json={"messages": messages, "temperature": temperature, "top_p": top_p},
653
+ async def _call():
654
+ r = await self._client.post(
655
+ f"{self.base_url}/openai/deployments/{self.azure_deployment}/chat/completions?api-version=2024-08-01-preview",
656
+ headers={"api-key": self.api_key, "Content-Type": "application/json"},
657
+ json=payload,
658
+ )
659
+ try:
660
+ r.raise_for_status()
661
+ except httpx.HTTPError as e:
662
+ raise RuntimeError(f"Azure chat/completions error: {e.response.text}") from e
663
+
664
+ data = r.json()
665
+ txt, _ = _first_text(data.get("choices", []))
666
+ usage = data.get("usage", {}) or {}
667
+ return txt, usage
668
+
669
+ return await self._retry.run(_call)
670
+
671
+ async def _chat_anthropic_messages(
672
+ self,
673
+ messages: list[dict[str, Any]],
674
+ *,
675
+ model: str,
676
+ output_format: ChatOutputFormat,
677
+ json_schema: dict[str, Any] | None,
678
+ **kw: Any,
679
+ ) -> tuple[str, dict[str, int]]:
680
+ await self._ensure_client()
681
+ assert self._client is not None
682
+
683
+ temperature = kw.get("temperature", 0.5)
684
+ top_p = kw.get("top_p", 1.0)
685
+
686
+ # System text aggregation
687
+ sys_msgs: list[str] = []
688
+ for m in messages:
689
+ if m.get("role") == "system":
690
+ c = m.get("content")
691
+ sys_msgs.append(c if isinstance(c, str) else str(c))
692
+
693
+ if output_format in ("json_object", "json_schema"):
694
+ sys_msgs.insert(0, "Return ONLY valid JSON. No markdown, no commentary.")
695
+ if output_format == "json_schema" and json_schema is not None:
696
+ sys_msgs.insert(
697
+ 1,
698
+ "JSON MUST conform to this schema:\n"
699
+ + json.dumps(json_schema, ensure_ascii=False),
234
700
  )
235
- try:
236
- r.raise_for_status()
237
- except httpx.HTTPError as e:
238
- raise RuntimeError(f"OpenAI Responses API error: {e.response.text}") from e
239
701
 
240
- data = r.json()
241
- txt, _ = _first_text(data.get("choices", []))
242
- return txt, data.get("usage", {}) or {}
702
+ # Convert messages to Anthropic format (blocks)
703
+ conv: list[dict[str, Any]] = []
704
+ for m in messages:
705
+ role = m.get("role")
706
+ if role == "system":
707
+ continue
708
+ anthro_role = "assistant" if role == "assistant" else "user"
709
+ content_blocks = _to_anthropic_blocks(m.get("content"))
710
+ conv.append({"role": anthro_role, "content": content_blocks})
711
+
712
+ payload: dict[str, Any] = {
713
+ "model": model,
714
+ "max_tokens": kw.get("max_tokens", 1024),
715
+ "messages": conv,
716
+ "temperature": temperature,
717
+ "top_p": top_p,
718
+ }
719
+ if sys_msgs:
720
+ payload["system"] = "\n\n".join(sys_msgs)
243
721
 
244
- return await self._retry.run(_call)
722
+ async def _call():
723
+ r = await self._client.post(
724
+ f"{self.base_url}/v1/messages",
725
+ headers={
726
+ "x-api-key": self.api_key,
727
+ "anthropic-version": "2023-06-01",
728
+ "Content-Type": "application/json",
729
+ },
730
+ json=payload,
731
+ )
732
+ try:
733
+ r.raise_for_status()
734
+ except httpx.HTTPStatusError as e:
735
+ body = e.response.text or ""
736
+ if e.response.status_code == 404:
737
+ # Often model not found, or wrong base URL.
738
+ hint = (
739
+ "Anthropic returned 404. Common causes:\n"
740
+ "1) base_url should be https://api.anthropic.com (no /v1 suffix)\n"
741
+ "2) model id is invalid / unavailable for your key\n"
742
+ f"Request URL: {e.request.url}\n"
743
+ )
744
+ raise RuntimeError(hint + "Response body:\n" + body) from e
745
+
746
+ raise RuntimeError(f"Anthropic API error ({e.response.status_code}): {body}") from e
245
747
 
246
- if self.provider == "anthropic":
247
- # Convert OpenAI-style messages -> Anthropic Messages API format
248
- # 1) Collect system messages (as strings)
249
- sys_msgs = [m["content"] for m in messages if m["role"] == "system"]
250
-
251
- # 2) Convert non-system messages into Anthropic blocks
252
- conv = []
253
- for m in messages:
254
- role = m["role"]
255
- if role == "system":
256
- continue # handled via `system` field
257
-
258
- # Anthropic only accepts "user" or "assistant"
259
- anthro_role = "assistant" if role == "assistant" else "user"
260
-
261
- content = m["content"]
262
- # Wrap string content into text blocks; if caller is already giving blocks, pass them through.
263
- if isinstance(content, str):
264
- content_blocks = [{"type": "text", "text": content}]
265
- else:
266
- # Assume caller knows what they're doing for multimodal content
267
- content_blocks = content
268
-
269
- conv.append({"role": anthro_role, "content": content_blocks})
270
-
271
- # 3) Build payload
272
- payload = {
273
- "model": model,
274
- "max_tokens": kw.get("max_tokens", 1024),
275
- "messages": conv,
276
- "temperature": temperature,
277
- "top_p": top_p,
748
+ data = r.json()
749
+ blocks = data.get("content") or []
750
+ txt = "".join(b.get("text", "") for b in blocks if b.get("type") == "text")
751
+ usage = data.get("usage", {}) or {}
752
+ return txt, usage
753
+
754
+ return await self._retry.run(_call)
755
+
756
+ async def _chat_gemini_generate_content(
757
+ self,
758
+ messages: list[dict[str, Any]],
759
+ *,
760
+ model: str,
761
+ output_format: ChatOutputFormat,
762
+ json_schema: dict[str, Any] | None,
763
+ fail_on_unsupported: bool,
764
+ **kw: Any,
765
+ ) -> tuple[str, dict[str, int]]:
766
+ await self._ensure_client()
767
+ assert self._client is not None
768
+
769
+ temperature = kw.get("temperature", 0.5)
770
+ top_p = kw.get("top_p", 1.0)
771
+
772
+ # Merge system messages into preamble
773
+ system_parts: list[str] = []
774
+ for m in messages:
775
+ if m.get("role") == "system":
776
+ c = m.get("content")
777
+ system_parts.append(c if isinstance(c, str) else str(c))
778
+ system = "\n".join(system_parts)
779
+
780
+ turns: list[dict[str, Any]] = []
781
+ for m in messages:
782
+ if m.get("role") == "system":
783
+ continue
784
+ role = "user" if m.get("role") == "user" else "model"
785
+ parts = _to_gemini_parts(m.get("content"))
786
+ turns.append({"role": role, "parts": parts})
787
+
788
+ if system:
789
+ turns.insert(0, {"role": "user", "parts": [{"text": f"System instructions: {system}"}]})
790
+
791
+ async def _call():
792
+ gen_cfg: dict[str, Any] = {"temperature": temperature, "topP": top_p}
793
+
794
+ # Gemini native structured outputs
795
+ if output_format == "json_object":
796
+ gen_cfg["responseMimeType"] = "application/json"
797
+ elif output_format == "json_schema":
798
+ if json_schema is None:
799
+ raise ValueError("output_format='json_schema' requires json_schema")
800
+ gen_cfg["responseMimeType"] = "application/json"
801
+ gen_cfg["responseJsonSchema"] = json_schema
802
+
803
+ payload = {"contents": turns, "generationConfig": gen_cfg}
804
+
805
+ r = await self._client.post(
806
+ f"{self.base_url}/v1/models/{model}:generateContent?key={self.api_key}",
807
+ headers={"Content-Type": "application/json"},
808
+ json=payload,
809
+ )
810
+ try:
811
+ r.raise_for_status()
812
+ except httpx.HTTPStatusError as e:
813
+ raise RuntimeError(
814
+ f"Gemini generateContent failed ({e.response.status_code}): {e.response.text}"
815
+ ) from e
816
+
817
+ data = r.json()
818
+ cand = (data.get("candidates") or [{}])[0]
819
+ txt = "".join(p.get("text", "") for p in (cand.get("content", {}).get("parts") or []))
820
+
821
+ um = data.get("usageMetadata") or {}
822
+ usage = {
823
+ "input_tokens": int(um.get("promptTokenCount", 0) or 0),
824
+ "output_tokens": int(um.get("candidatesTokenCount", 0) or 0),
278
825
  }
826
+ return txt, usage
279
827
 
280
- # Anthropic v1/messages now expects `system` to be a list
281
- if sys_msgs:
282
- payload["system"] = "\n\n".join(sys_msgs)
828
+ return await self._retry.run(_call)
283
829
 
284
- async def _call():
285
- r = await self._client.post(
286
- f"{self.base_url}/v1/messages",
287
- headers={
288
- "x-api-key": self.api_key,
289
- "anthropic-version": "2023-06-01",
290
- "Content-Type": "application/json",
291
- },
292
- json=payload,
293
- )
294
- try:
295
- r.raise_for_status()
296
- except httpx.HTTPStatusError as e:
297
- # keep the nice debugging message
298
- raise RuntimeError(f"Anthropic API error: {e.response.text}") from e
830
+ # ---------------- Image Generation ----------------
299
831
 
300
- data = r.json()
301
- # data["content"] is a list of blocks
302
- blocks = data.get("content") or []
303
- txt = "".join(b.get("text", "") for b in blocks if b.get("type") == "text")
304
- return txt, data.get("usage", {}) or {}
832
+ async def generate_image(
833
+ self,
834
+ prompt: str,
835
+ *,
836
+ model: str | None = None,
837
+ n: int = 1,
838
+ size: str | None = None, # e.g. "1024x1024"
839
+ quality: str | None = None, # OpenAI: "high|medium|low|auto" or dall-e: "hd|standard"
840
+ style: str | None = None, # dall-e-3: "vivid|natural"
841
+ output_format: ImageFormat | None = None, # OpenAI GPT image models: png|jpeg|webp
842
+ response_format: ImageResponseFormat | None = None, # dall-e: url|b64_json (OpenAI/azure)
843
+ background: str | None = None, # OpenAI GPT image models: "transparent|opaque|auto"
844
+ # Optional image inputs for providers that can do edit-style generation via "prompt + image(s)"
845
+ input_images: list[str] | None = None, # data: URLs (base64) for now
846
+ # Provider-specific knobs
847
+ azure_api_version: str | None = None,
848
+ **kw: Any,
849
+ ) -> ImageGenerationResult:
850
+ """
851
+ Generate images from a text prompt using the configured LLM provider.
852
+
853
+ This method supports provider-agnostic image generation, including OpenAI, Azure, and Google Gemini.
854
+ It automatically handles rate limiting, usage metering, and provider-specific options.
855
+
856
+ Examples:
857
+ Basic usage with a prompt:
858
+ ```python
859
+ result = await context.llm().generate_image("A cat riding a bicycle")
860
+ ```
861
+
862
+ Requesting multiple images with custom size and style:
863
+ ```python
864
+ result = await context.llm().generate_image(
865
+ "A futuristic cityscape",
866
+ n=3,
867
+ size="1024x1024",
868
+ style="vivid"
869
+ )
870
+ ```
305
871
 
306
- return await self._retry.run(_call)
872
+ Supplying input images for edit-style generation (Gemini):
873
+ ```python
874
+ result = await context.llm().generate_image(
875
+ "Make this image brighter",
876
+ input_images=[my_data_url]
877
+ )
878
+ ```
879
+
880
+ Args:
881
+ prompt: The text prompt describing the desired image(s).
882
+ model: Optional model name to override the default.
883
+ n: Number of images to generate (default: 1).
884
+ size: Image size, e.g., "1024x1024".
885
+ quality: Image quality setting (provider-specific).
886
+ style: Artistic style (provider-specific).
887
+ output_format: Desired image format, e.g., "png", "jpeg".
888
+ response_format: Response format, e.g., "url" or "b64_json".
889
+ background: Background setting, e.g., "transparent".
890
+ input_images: List of input images (as data URLs) for edit-style generation.
891
+ azure_api_version: Azure-specific API version override.
892
+ **kw: Additional provider-specific keyword arguments.
893
+
894
+ Returns:
895
+ ImageGenerationResult: An object containing generated images, usage statistics, and raw response data.
896
+
897
+ Raises:
898
+ LLMUnsupportedFeatureError: If the provider does not support image generation.
899
+ RuntimeError: For provider-specific errors or invalid configuration.
900
+
901
+ Notes:
902
+ - This method is accessed via `context.llm().generate_image(...)`.
903
+ - Usage metering and rate limits are enforced automatically. However, token usage is typically not reported for image generation.
904
+ - The returned `ImageGenerationResult` includes both images and metadata.
905
+ """
906
+ await self._ensure_client()
907
+ model = model or self.model
908
+
909
+ start = time.perf_counter()
910
+
911
+ result = await self._image_dispatch(
912
+ prompt,
913
+ model=model,
914
+ n=n,
915
+ size=size,
916
+ quality=quality,
917
+ style=style,
918
+ output_format=output_format,
919
+ response_format=response_format,
920
+ background=background,
921
+ input_images=input_images,
922
+ azure_api_version=azure_api_version,
923
+ **kw,
924
+ )
925
+
926
+ # Rate limits: count as a call; tokens are typically not reported for images
927
+ self._enforce_llm_limits_for_run(usage=result.usage or {})
928
+
929
+ latency_ms = int((time.perf_counter() - start) * 1000)
930
+ await self._record_llm_usage(model=model, usage=result.usage or {}, latency_ms=latency_ms)
931
+
932
+ return result
933
+
934
+ async def _image_dispatch(
935
+ self,
936
+ prompt: str,
937
+ *,
938
+ model: str,
939
+ n: int,
940
+ size: str | None,
941
+ quality: str | None,
942
+ style: str | None,
943
+ output_format: ImageFormat | None,
944
+ response_format: ImageResponseFormat | None,
945
+ background: str | None,
946
+ input_images: list[str] | None,
947
+ azure_api_version: str | None,
948
+ **kw: Any,
949
+ ) -> ImageGenerationResult:
950
+ if self.provider == "openai":
951
+ return await self._image_openai_generate(
952
+ prompt,
953
+ model=model,
954
+ n=n,
955
+ size=size,
956
+ quality=quality,
957
+ style=style,
958
+ output_format=output_format,
959
+ response_format=response_format,
960
+ background=background,
961
+ **kw,
962
+ )
963
+
964
+ if self.provider == "azure":
965
+ return await self._image_azure_generate(
966
+ prompt,
967
+ model=model,
968
+ n=n,
969
+ size=size,
970
+ quality=quality,
971
+ style=style,
972
+ output_format=output_format,
973
+ response_format=response_format,
974
+ background=background,
975
+ azure_api_version=azure_api_version,
976
+ **kw,
977
+ )
307
978
 
308
979
  if self.provider == "google":
309
- # Merge system messages into a single preamble
310
- system = "\n".join([m["content"] for m in messages if m["role"] == "system"])
311
-
312
- # Non-system messages
313
- turns = [
314
- {
315
- "role": "user" if m["role"] == "user" else "model",
316
- "parts": [{"text": m["content"]}],
317
- }
318
- for m in messages
319
- if m["role"] != "system"
320
- ]
980
+ return await self._image_gemini_generate(
981
+ prompt,
982
+ model=model,
983
+ input_images=input_images,
984
+ **kw,
985
+ )
321
986
 
322
- if system:
323
- turns.insert(
324
- 0,
325
- {
326
- "role": "user",
327
- "parts": [{"text": f"System instructions: {system}"}],
328
- },
329
- )
987
+ if self.provider == "anthropic":
988
+ raise LLMUnsupportedFeatureError(
989
+ "Anthropic does not support image generation via Claude API (vision is input-only)."
990
+ )
330
991
 
331
- async def _call():
332
- payload = {
333
- "contents": turns,
334
- "generationConfig": {
335
- "temperature": temperature,
336
- "topP": top_p,
337
- },
338
- }
992
+ # openrouter/lmstudio/ollama: no single standard image endpoint
993
+ raise LLMUnsupportedFeatureError(
994
+ f"provider '{self.provider}' does not support generate_image() in this client."
995
+ )
339
996
 
340
- r = await self._client.post(
341
- f"{self.base_url}/v1/models/{model}:generateContent?key={self.api_key}",
342
- headers={"Content-Type": "application/json"},
343
- json=payload,
344
- )
345
- try:
346
- r.raise_for_status()
347
- except httpx.HTTPStatusError as e:
348
- raise RuntimeError(
349
- f"Gemini generateContent failed ({e.response.status_code}): {e.response.text}"
350
- ) from e
997
+ async def _image_openai_generate(
998
+ self,
999
+ prompt: str,
1000
+ *,
1001
+ model: str,
1002
+ n: int,
1003
+ size: str | None,
1004
+ quality: str | None,
1005
+ style: str | None,
1006
+ output_format: ImageFormat | None,
1007
+ response_format: ImageResponseFormat | None,
1008
+ background: str | None,
1009
+ **kw: Any,
1010
+ ) -> ImageGenerationResult:
1011
+ assert self._client is not None
351
1012
 
352
- data = r.json()
353
- cand = (data.get("candidates") or [{}])[0]
354
- txt = "".join(
355
- p.get("text", "") for p in (cand.get("content", {}).get("parts") or [])
1013
+ url = f"{_normalize_base_url_no_trailing_slash(self.base_url)}/images/generations"
1014
+ headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
1015
+
1016
+ body: dict[str, Any] = {
1017
+ "model": model,
1018
+ "prompt": prompt,
1019
+ "n": n,
1020
+ }
1021
+ if size is not None:
1022
+ body["size"] = size
1023
+ if quality is not None:
1024
+ body["quality"] = quality
1025
+ if style is not None:
1026
+ body["style"] = style
1027
+ if output_format is not None:
1028
+ body["output_format"] = output_format
1029
+ if background is not None:
1030
+ body["background"] = background
1031
+
1032
+ # For dall-e models, response_format can be url|b64_json.
1033
+ # GPT image models generally return base64 and may ignore response_format. :contentReference[oaicite:4]{index=4}
1034
+ if response_format is not None:
1035
+ body["response_format"] = response_format
1036
+
1037
+ async def _call():
1038
+ r = await self._client.post(url, headers=headers, json=body)
1039
+ try:
1040
+ r.raise_for_status()
1041
+ except Exception as e:
1042
+ raise RuntimeError(f"OpenAI image generation error: {r.text}") from e
1043
+
1044
+ data = r.json()
1045
+ imgs: list[GeneratedImage] = []
1046
+ for item in data.get("data", []) or []:
1047
+ imgs.append(
1048
+ GeneratedImage(
1049
+ b64=item.get("b64_json"),
1050
+ url=item.get("url"),
1051
+ mime_type=_guess_mime_from_format(output_format or "png")
1052
+ if item.get("b64_json")
1053
+ else None,
1054
+ revised_prompt=item.get("revised_prompt"),
1055
+ )
356
1056
  )
357
- return txt, {} # usage parsing optional
358
1057
 
359
- return await self._retry.run(_call)
1058
+ # OpenAI images endpoints often don't return token usage; keep empty usage.
1059
+ return ImageGenerationResult(images=imgs, usage=data.get("usage", {}) or {}, raw=data)
360
1060
 
361
- if self.provider == "openai":
1061
+ return await self._retry.run(_call)
1062
+
1063
+ async def _image_azure_generate(
1064
+ self,
1065
+ prompt: str,
1066
+ *,
1067
+ model: str,
1068
+ n: int,
1069
+ size: str | None,
1070
+ quality: str | None,
1071
+ style: str | None,
1072
+ output_format: ImageFormat | None,
1073
+ response_format: ImageResponseFormat | None,
1074
+ background: str | None,
1075
+ azure_api_version: str | None,
1076
+ **kw: Any,
1077
+ ) -> ImageGenerationResult:
1078
+ assert self._client is not None
1079
+
1080
+ if not self.base_url or not self.azure_deployment:
362
1081
  raise RuntimeError(
363
- "Internal error: OpenAI provider should use chat() or responses_chat() directly."
1082
+ "Azure generate_image requires base_url=<resource endpoint> and azure_deployment=<deployment name>"
364
1083
  )
365
1084
 
366
- raise NotImplementedError(f"provider {self.provider}")
1085
+ api_version = (
1086
+ azure_api_version or "2025-04-01-preview"
1087
+ ) # doc example for GPT-image-1 series :contentReference[oaicite:6]{index=6}
1088
+ url = _azure_images_generations_url(self.base_url, self.azure_deployment, api_version)
1089
+
1090
+ headers = {"api-key": self.api_key, "Content-Type": "application/json"}
1091
+
1092
+ body: dict[str, Any] = {"prompt": prompt, "n": n}
1093
+
1094
+ # For GPT-image-1 series Azure expects "model" in body (per docs). :contentReference[oaicite:7]{index=7}
1095
+ if model:
1096
+ body["model"] = model
1097
+
1098
+ if size is not None:
1099
+ body["size"] = size
1100
+ if quality is not None:
1101
+ body["quality"] = quality
1102
+ if style is not None:
1103
+ body["style"] = style
1104
+
1105
+ # Azure docs: GPT-image-1 series returns base64; DALL-E supports url/b64_json. :contentReference[oaicite:8]{index=8}
1106
+ if response_format is not None:
1107
+ body["response_format"] = response_format
1108
+ if output_format is not None:
1109
+ # Azure uses output_format like PNG/JPEG for some image models; you can pass through as-is.
1110
+ body["output_format"] = output_format.upper()
1111
+ if background is not None:
1112
+ body["background"] = background
1113
+
1114
+ async def _call():
1115
+ r = await self._client.post(url, headers=headers, json=body)
1116
+ try:
1117
+ r.raise_for_status()
1118
+ except Exception as e:
1119
+ raise RuntimeError(f"Azure image generation error: {r.text}") from e
1120
+
1121
+ data = r.json()
1122
+ imgs: list[GeneratedImage] = []
1123
+ for item in data.get("data", []) or []:
1124
+ imgs.append(
1125
+ GeneratedImage(
1126
+ b64=item.get("b64_json"),
1127
+ url=item.get("url"),
1128
+ mime_type=_guess_mime_from_format((output_format or "png").lower())
1129
+ if item.get("b64_json")
1130
+ else None,
1131
+ revised_prompt=item.get("revised_prompt"),
1132
+ )
1133
+ )
1134
+
1135
+ return ImageGenerationResult(images=imgs, usage=data.get("usage", {}) or {}, raw=data)
1136
+
1137
+ return await self._retry.run(_call)
1138
+
1139
+ async def _image_gemini_generate(
1140
+ self,
1141
+ prompt: str,
1142
+ *,
1143
+ model: str,
1144
+ input_images: list[str] | None,
1145
+ **kw: Any,
1146
+ ) -> ImageGenerationResult:
1147
+ assert self._client is not None
1148
+
1149
+ # Gemini REST endpoint uses generativelanguage.googleapis.com and API key header. :contentReference[oaicite:10]{index=10}
1150
+ # Your self.base_url should already be something like: https://generativelanguage.googleapis.com
1151
+ base = (
1152
+ _normalize_base_url_no_trailing_slash(self.base_url)
1153
+ or "https://generativelanguage.googleapis.com"
1154
+ )
1155
+ url = f"{base}/v1beta/models/{model}:generateContent"
1156
+
1157
+ parts: list[dict[str, Any]] = []
1158
+ if input_images:
1159
+ for img in input_images:
1160
+ if not _is_data_url(img):
1161
+ raise ValueError("Gemini input_images must be data: URLs (base64) for now.")
1162
+ b64, mime = _data_url_to_b64_and_mime(img)
1163
+ parts.append({"inline_data": {"mime_type": mime, "data": b64}})
1164
+
1165
+ parts.append({"text": prompt})
1166
+
1167
+ payload: dict[str, Any] = {
1168
+ "contents": [{"parts": parts}],
1169
+ }
1170
+ # Optional: ImageConfig etc. could be added here later per Gemini docs. :contentReference[oaicite:11]{index=11}
1171
+
1172
+ async def _call():
1173
+ r = await self._client.post(
1174
+ url,
1175
+ headers={"x-goog-api-key": self.api_key, "Content-Type": "application/json"},
1176
+ json=payload,
1177
+ )
1178
+ try:
1179
+ r.raise_for_status()
1180
+ except Exception as e:
1181
+ raise RuntimeError(f"Gemini image generation error: {r.text}") from e
1182
+
1183
+ data = r.json()
1184
+ cand = (data.get("candidates") or [{}])[0]
1185
+ content = cand.get("content") or {}
1186
+ out_parts = content.get("parts") or []
1187
+
1188
+ imgs: list[GeneratedImage] = []
1189
+ for p in out_parts:
1190
+ inline = p.get("inlineData") or p.get("inline_data")
1191
+ if inline and inline.get("data"):
1192
+ mime = inline.get("mimeType") or inline.get("mime_type")
1193
+ imgs.append(GeneratedImage(b64=inline["data"], mime_type=mime))
1194
+
1195
+ # Usage shape varies; keep best-effort.
1196
+ um = data.get("usageMetadata") or {}
1197
+ usage = {
1198
+ "input_tokens": int(um.get("promptTokenCount", 0) or 0),
1199
+ "output_tokens": int(um.get("candidatesTokenCount", 0) or 0),
1200
+ }
1201
+
1202
+ return ImageGenerationResult(images=imgs, usage=usage, raw=data)
1203
+
1204
+ return await self._retry.run(_call)
367
1205
 
368
1206
  # ---------------- Embeddings ----------------
369
- async def embed(self, texts: list[str], **kw) -> list[list[float]]:
1207
+ async def embed_deprecated(self, texts: list[str], **kw) -> list[list[float]]:
370
1208
  # model override order: kw > self.embed_model > ENV > default
371
1209
  await self._ensure_client()
372
1210
 
@@ -440,6 +1278,224 @@ class GenericLLMClient(LLMClientProtocol):
440
1278
  # Anthropic: no embeddings endpoint
441
1279
  raise NotImplementedError(f"Embeddings not supported for {self.provider}")
442
1280
 
1281
+ async def embed(self, texts: list[str], **kw) -> list[list[float]]:
1282
+ """
1283
+ Generate vector embeddings for a batch of texts using the configured LLM provider.
1284
+
1285
+ This method provides a provider-agnostic interface for embedding text, automatically
1286
+ handling model selection, batching, and provider-specific API quirks. It ensures the
1287
+ output shape matches the input and raises informative errors for configuration issues.
1288
+
1289
+ Examples:
1290
+ Basic usage with a list of texts:
1291
+ ```python
1292
+ embeddings = await context.llm().embed([
1293
+ "The quick brown fox.",
1294
+ "Jumped over the lazy dog."
1295
+ ])
1296
+ ```
1297
+
1298
+ Specifying a custom embedding model:
1299
+ ```python
1300
+ embeddings = await context.llm().embed(
1301
+ ["Hello world!"],
1302
+ model="text-embedding-3-large"
1303
+ )
1304
+ ```
1305
+
1306
+ Args:
1307
+ texts: List of input strings to embed.
1308
+ model: Optional model name to override the default embedding model.
1309
+ azure_api_version: Optional Azure API version override.
1310
+ extra_body: Optional dict of extra fields to pass to the provider.
1311
+ **kw: Additional provider-specific keyword arguments.
1312
+
1313
+ Returns:
1314
+ list[list[float]]: List of embedding vectors, one per input text.
1315
+
1316
+ Raises:
1317
+ TypeError: If `texts` is not a list of strings.
1318
+ RuntimeError: For provider/model/configuration errors or shape mismatches.
1319
+ NotImplementedError: If embeddings are not supported for the provider.
1320
+
1321
+ Notes:
1322
+ - For Google Gemini, uses batch embedding if available, otherwise falls back to per-item embedding.
1323
+ - For Azure, requires `azure_deployment` to be set.
1324
+ - The returned list always matches the length of `texts`.
1325
+ """
1326
+ await self._ensure_client()
1327
+ assert self._client is not None
1328
+
1329
+ # ---- validate input ----
1330
+ if not isinstance(texts, list) or any(not isinstance(t, str) for t in texts):
1331
+ raise TypeError("embed(texts) expects list[str]")
1332
+ if len(texts) == 0:
1333
+ return []
1334
+
1335
+ # ---- resolve model ----
1336
+ # model override order: kw > self.embed_model > ENV > default
1337
+ model = (
1338
+ kw.get("model")
1339
+ or self.embed_model
1340
+ or os.getenv("EMBED_MODEL")
1341
+ or "text-embedding-3-small"
1342
+ )
1343
+
1344
+ # ---- capability + config checks ----
1345
+ if self.provider == "anthropic":
1346
+ raise NotImplementedError("Embeddings not supported for anthropic")
1347
+
1348
+ if self.provider == "azure" and not self.azure_deployment:
1349
+ raise RuntimeError(
1350
+ "Azure embeddings requires AZURE_OPENAI_DEPLOYMENT (azure_deployment)"
1351
+ )
1352
+
1353
+ # Optional knobs
1354
+ azure_api_version = kw.get("azure_api_version") or "2024-08-01-preview"
1355
+ # For OpenAI-like, some providers support extra fields like dimensions/user; pass-through if present
1356
+ extra_body = kw.get("extra_body") or {}
1357
+
1358
+ # ---- build request spec (within one function) ----
1359
+ # spec = (url, headers, json_body, parser_fn)
1360
+ if self.provider in {"openai", "openrouter", "lmstudio", "ollama"}:
1361
+ url = f"{self.base_url}/embeddings"
1362
+ headers = self._headers_openai_like()
1363
+ body: dict[str, object] = {"model": model, "input": texts}
1364
+ if isinstance(extra_body, dict):
1365
+ body.update(extra_body)
1366
+
1367
+ def parse(data: dict) -> list[list[float]]:
1368
+ items = data.get("data", []) or []
1369
+ embs = [d.get("embedding") for d in items]
1370
+ # Ensure shape consistency
1371
+ if len(embs) != len(texts) or any(e is None for e in embs):
1372
+ raise RuntimeError(
1373
+ f"Embeddings response shape mismatch: got {len(embs)} items for {len(texts)} inputs"
1374
+ )
1375
+ return embs # type: ignore[return-value]
1376
+
1377
+ async def _call():
1378
+ r = await self._client.post(url, headers=headers, json=body)
1379
+ try:
1380
+ r.raise_for_status()
1381
+ except httpx.HTTPStatusError as e:
1382
+ raise RuntimeError(
1383
+ f"Embeddings request failed ({e.response.status_code}): {e.response.text}"
1384
+ ) from e
1385
+ return parse(r.json())
1386
+
1387
+ return await self._retry.run(_call)
1388
+
1389
+ if self.provider == "azure":
1390
+ # Azure embeddings are typically per-deployment; model sometimes optional/ignored
1391
+ url = f"{self.base_url}/openai/deployments/{self.azure_deployment}/embeddings?api-version={azure_api_version}"
1392
+ headers = {"api-key": self.api_key, "Content-Type": "application/json"}
1393
+ body: dict[str, object] = {"input": texts}
1394
+ # Some Azure variants also accept "model" or dimensions; keep pass-through flexible
1395
+ if model:
1396
+ body["model"] = model
1397
+ if isinstance(extra_body, dict):
1398
+ body.update(extra_body)
1399
+
1400
+ def parse(data: dict) -> list[list[float]]:
1401
+ items = data.get("data", []) or []
1402
+ embs = [d.get("embedding") for d in items]
1403
+ if len(embs) != len(texts) or any(e is None for e in embs):
1404
+ raise RuntimeError(
1405
+ f"Azure embeddings response shape mismatch: got {len(embs)} items for {len(texts)} inputs"
1406
+ )
1407
+ return embs # type: ignore[return-value]
1408
+
1409
+ async def _call():
1410
+ r = await self._client.post(url, headers=headers, json=body)
1411
+ try:
1412
+ r.raise_for_status()
1413
+ except httpx.HTTPStatusError as e:
1414
+ raise RuntimeError(
1415
+ f"Embeddings request failed ({e.response.status_code}): {e.response.text}"
1416
+ ) from e
1417
+ return parse(r.json())
1418
+
1419
+ return await self._retry.run(_call)
1420
+
1421
+ if self.provider == "google":
1422
+ # Goal: return one embedding per input.
1423
+ # Preferred: batchEmbedContents if supported by your endpoint/model.
1424
+ # If it 404s/400s, fallback to per-item embedContent.
1425
+ base = self.base_url.rstrip("/")
1426
+ # Newer APIs often live under v1beta; your current code uses v1. Keep v1 but fallback to v1beta if needed.
1427
+ batch_url_v1 = f"{base}/v1/models/{model}:batchEmbedContents?key={self.api_key}"
1428
+ embed_url_v1 = f"{base}/v1/models/{model}:embedContent?key={self.api_key}"
1429
+ batch_url_v1beta = f"{base}/v1beta/models/{model}:batchEmbedContents?key={self.api_key}"
1430
+ embed_url_v1beta = f"{base}/v1beta/models/{model}:embedContent?key={self.api_key}"
1431
+
1432
+ headers = {"Content-Type": "application/json"}
1433
+
1434
+ def parse_single(data: dict) -> list[float]:
1435
+ return (data.get("embedding") or {}).get("values") or []
1436
+
1437
+ def parse_batch(data: dict) -> list[list[float]]:
1438
+ # Typical shape: {"embeddings":[{"values":[...]} , ...]}
1439
+ embs = []
1440
+ for e in data.get("embeddings") or []:
1441
+ embs.append((e or {}).get("values") or [])
1442
+ if len(embs) != len(texts):
1443
+ raise RuntimeError(
1444
+ f"Gemini batch embeddings mismatch: got {len(embs)} for {len(texts)}"
1445
+ )
1446
+ return embs
1447
+
1448
+ async def try_batch(url: str) -> list[list[float]] | None:
1449
+ body = {"requests": [{"content": {"parts": [{"text": t}]}} for t in texts]}
1450
+ r = await self._client.post(url, headers=headers, json=body)
1451
+ if r.status_code in (404, 400):
1452
+ return None
1453
+ try:
1454
+ r.raise_for_status()
1455
+ except httpx.HTTPStatusError as e:
1456
+ raise RuntimeError(
1457
+ f"Gemini batchEmbedContents failed ({e.response.status_code}): {e.response.text}"
1458
+ ) from e
1459
+ return parse_batch(r.json())
1460
+
1461
+ async def call_single(url: str) -> list[list[float]]:
1462
+ out: list[list[float]] = []
1463
+ for t in texts:
1464
+ r = await self._client.post(
1465
+ url, headers=headers, json={"content": {"parts": [{"text": t}]}}
1466
+ )
1467
+ try:
1468
+ r.raise_for_status()
1469
+ except httpx.HTTPStatusError as e:
1470
+ raise RuntimeError(
1471
+ f"Gemini embedContent failed ({e.response.status_code}): {e.response.text}"
1472
+ ) from e
1473
+ out.append(parse_single(r.json()))
1474
+ if len(out) != len(texts):
1475
+ raise RuntimeError(
1476
+ f"Gemini embeddings mismatch: got {len(out)} for {len(texts)}"
1477
+ )
1478
+ return out
1479
+
1480
+ async def _call():
1481
+ # Try v1 batch, then v1beta batch, then fallback to v1 single, then v1beta single
1482
+ res = await try_batch(batch_url_v1)
1483
+ if res is not None:
1484
+ return res
1485
+ res = await try_batch(batch_url_v1beta)
1486
+ if res is not None:
1487
+ return res
1488
+
1489
+ # fallback loop
1490
+ try:
1491
+ return await call_single(embed_url_v1)
1492
+ except RuntimeError:
1493
+ return await call_single(embed_url_v1beta)
1494
+
1495
+ return await self._retry.run(_call)
1496
+
1497
+ raise NotImplementedError(f"Embeddings not supported for {self.provider}")
1498
+
443
1499
  # ---------------- Internals ----------------
444
1500
  def _headers_openai_like(self):
445
1501
  hdr = {"Content-Type": "application/json"}
@@ -492,18 +1548,63 @@ class GenericLLMClient(LLMClientProtocol):
492
1548
  return_response: bool = False,
493
1549
  ) -> Any:
494
1550
  """
495
- Low-level escape hatch: send a raw HTTP request using this client’s
496
- base_url, auth, and retry logic.
497
-
498
- - If `url` is provided, it is used as-is.
499
- - Otherwise, `path` is joined to `self.base_url`.
500
- - `json` and `params` are forwarded to httpx.
501
- - Provider-specific default headers (auth, version, etc.) are applied,
502
- then overridden by `headers` if provided.
1551
+ Send a low-level HTTP request using the configured LLM provider’s client.
1552
+
1553
+ This method provides direct access to the underlying HTTP transport, automatically
1554
+ applying provider-specific authentication, base URL resolution, and retry logic.
1555
+ It is intended for advanced use cases where you need to call custom endpoints
1556
+ or experiment with provider APIs not covered by higher-level methods.
1557
+
1558
+ Examples:
1559
+ Basic usage with a relative path:
1560
+ ```python
1561
+ result = await context.llm().raw(
1562
+ method="POST",
1563
+ path="/custom/endpoint",
1564
+ json={"foo": "bar"}
1565
+ )
1566
+ ```
1567
+
1568
+ Sending a GET request to an absolute URL:
1569
+ ```python
1570
+ response = await context.llm().raw(
1571
+ method="GET",
1572
+ url="https://api.openai.com/v1/models",
1573
+ return_response=True
1574
+ )
1575
+ ```
1576
+
1577
+ Overriding headers and query parameters:
1578
+ ```python
1579
+ result = await context.llm().raw(
1580
+ path="/v1/special",
1581
+ headers={"X-Custom": "123"},
1582
+ params={"q": "search"}
1583
+ )
1584
+ ```
1585
+
1586
+ Args:
1587
+ method: HTTP method to use (e.g., "POST", "GET").
1588
+ path: Relative path to append to the provider’s base URL.
1589
+ url: Absolute URL to call (overrides `path` and `base_url`).
1590
+ json: JSON-serializable body to send with the request.
1591
+ params: Dictionary of query parameters.
1592
+ headers: Dictionary of HTTP headers to override defaults.
1593
+ return_response: If True, return the raw `httpx.Response` object;
1594
+ otherwise, return the parsed JSON response.
503
1595
 
504
1596
  Returns:
505
- - r.json() by default
506
- - or the raw `httpx.Response` if `return_response=True`
1597
+ Any: The parsed JSON response by default, or the raw `httpx.Response`
1598
+ if `return_response=True`.
1599
+
1600
+ Raises:
1601
+ ValueError: If neither `url` nor `path` is provided.
1602
+ RuntimeError: For HTTP errors or provider-specific failures.
1603
+
1604
+ Notes:
1605
+ - This method is accessed via `context.llm().raw(...)`.
1606
+ - Provider authentication and retry logic are handled automatically.
1607
+ - Use with caution; malformed requests may result in provider errors.
507
1608
  """
508
1609
  await self._ensure_client()
509
1610