aethergraph 0.1.0a1__py3-none-any.whl → 0.1.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aethergraph/__init__.py +4 -10
- aethergraph/__main__.py +296 -0
- aethergraph/api/v1/__init__.py +0 -0
- aethergraph/api/v1/agents.py +46 -0
- aethergraph/api/v1/apps.py +70 -0
- aethergraph/api/v1/artifacts.py +415 -0
- aethergraph/api/v1/channels.py +89 -0
- aethergraph/api/v1/deps.py +168 -0
- aethergraph/api/v1/graphs.py +259 -0
- aethergraph/api/v1/identity.py +25 -0
- aethergraph/api/v1/memory.py +353 -0
- aethergraph/api/v1/misc.py +47 -0
- aethergraph/api/v1/pagination.py +29 -0
- aethergraph/api/v1/runs.py +568 -0
- aethergraph/api/v1/schemas.py +535 -0
- aethergraph/api/v1/session.py +323 -0
- aethergraph/api/v1/stats.py +201 -0
- aethergraph/api/v1/viz.py +152 -0
- aethergraph/config/config.py +22 -0
- aethergraph/config/loader.py +3 -2
- aethergraph/config/storage.py +209 -0
- aethergraph/contracts/__init__.py +0 -0
- aethergraph/contracts/services/__init__.py +0 -0
- aethergraph/contracts/services/artifacts.py +27 -14
- aethergraph/contracts/services/memory.py +45 -17
- aethergraph/contracts/services/metering.py +129 -0
- aethergraph/contracts/services/runs.py +50 -0
- aethergraph/contracts/services/sessions.py +87 -0
- aethergraph/contracts/services/state_stores.py +3 -0
- aethergraph/contracts/services/viz.py +44 -0
- aethergraph/contracts/storage/artifact_index.py +88 -0
- aethergraph/contracts/storage/artifact_store.py +99 -0
- aethergraph/contracts/storage/async_kv.py +34 -0
- aethergraph/contracts/storage/blob_store.py +50 -0
- aethergraph/contracts/storage/doc_store.py +35 -0
- aethergraph/contracts/storage/event_log.py +31 -0
- aethergraph/contracts/storage/vector_index.py +48 -0
- aethergraph/core/__init__.py +0 -0
- aethergraph/core/execution/forward_scheduler.py +13 -2
- aethergraph/core/execution/global_scheduler.py +21 -15
- aethergraph/core/execution/step_forward.py +10 -1
- aethergraph/core/graph/__init__.py +0 -0
- aethergraph/core/graph/graph_builder.py +8 -4
- aethergraph/core/graph/graph_fn.py +156 -15
- aethergraph/core/graph/graph_spec.py +8 -0
- aethergraph/core/graph/graphify.py +146 -27
- aethergraph/core/graph/node_spec.py +0 -2
- aethergraph/core/graph/node_state.py +3 -0
- aethergraph/core/graph/task_graph.py +39 -1
- aethergraph/core/runtime/__init__.py +0 -0
- aethergraph/core/runtime/ad_hoc_context.py +64 -4
- aethergraph/core/runtime/base_service.py +28 -4
- aethergraph/core/runtime/execution_context.py +13 -15
- aethergraph/core/runtime/graph_runner.py +222 -37
- aethergraph/core/runtime/node_context.py +510 -6
- aethergraph/core/runtime/node_services.py +12 -5
- aethergraph/core/runtime/recovery.py +15 -1
- aethergraph/core/runtime/run_manager.py +783 -0
- aethergraph/core/runtime/run_manager_local.py +204 -0
- aethergraph/core/runtime/run_registration.py +2 -2
- aethergraph/core/runtime/run_types.py +89 -0
- aethergraph/core/runtime/runtime_env.py +136 -7
- aethergraph/core/runtime/runtime_metering.py +71 -0
- aethergraph/core/runtime/runtime_registry.py +36 -13
- aethergraph/core/runtime/runtime_services.py +194 -6
- aethergraph/core/tools/builtins/toolset.py +1 -1
- aethergraph/core/tools/toolkit.py +5 -0
- aethergraph/plugins/agents/default_chat_agent copy.py +90 -0
- aethergraph/plugins/agents/default_chat_agent.py +171 -0
- aethergraph/plugins/agents/shared.py +81 -0
- aethergraph/plugins/channel/adapters/webui.py +112 -112
- aethergraph/plugins/channel/routes/webui_routes.py +367 -102
- aethergraph/plugins/channel/utils/slack_utils.py +115 -59
- aethergraph/plugins/channel/utils/telegram_utils.py +88 -47
- aethergraph/plugins/channel/websockets/weibui_ws.py +172 -0
- aethergraph/runtime/__init__.py +15 -0
- aethergraph/server/app_factory.py +196 -34
- aethergraph/server/clients/channel_client.py +202 -0
- aethergraph/server/http/channel_http_routes.py +116 -0
- aethergraph/server/http/channel_ws_routers.py +45 -0
- aethergraph/server/loading.py +117 -0
- aethergraph/server/server.py +131 -0
- aethergraph/server/server_state.py +240 -0
- aethergraph/server/start.py +227 -66
- aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-BQhdFMY1.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DMm9YOAa.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DRggAlZN.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-ATXxdsX0.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-BEiXGLvX.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-Dq_IR9rO.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-CTRA-rTL.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-Di6jR-x-.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-wX97UBjC.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BdnERNNW.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BsDP51OF.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-CL6g_b3V.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CB_wures.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CTYiF6lA.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-Dxdc4cR9.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Cx986IdX.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Jm3AIy58.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Bold-waoOVXN0.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DxDJ3AOS.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DzxPMmG6.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-SpSLRI95.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Italic-3WenGoN9.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Italic-BMLOBm91.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Italic-NWA7e6Wa.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Regular-B22Nviop.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Regular-Dr94JaBh.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Regular-ypZvNtVU.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-B3XSjfu4.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-CZnvNsCZ.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-iY-2wyZ7.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-Italic-DA0__PXp.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-Italic-flOr_0UB.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-Italic-t53AETM-.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-CFMepnvq.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-D1sUS0GD.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-DbIhKOiC.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-C3H0VqGB.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-DN2j7dab.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-YYjJ1zSn.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-BNo7hRIc.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-CS6fqUqJ.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-DDBCnlJ7.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Script-Regular-C5JkGWo-.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D3wIWfF6.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D5yQViql.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-C195tn64.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-Dbsnue_I.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-mCD8mA8B.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-B7gKUWhC.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-Dy4dx90m.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-oD1tc_U0.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-CTq5MqoE.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-DgpXs0kz.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-BF-4gkZK.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-DWFBv043.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-Dl5lxZxV.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-C0xS9mPB.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-CO6r4hn1.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-D3Ib7_Hf.ttf +0 -0
- aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +1 -0
- aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +400 -0
- aethergraph/server/ui_static/index.html +15 -0
- aethergraph/server/ui_static/logo.png +0 -0
- aethergraph/services/artifacts/__init__.py +0 -0
- aethergraph/services/artifacts/facade.py +1239 -132
- aethergraph/services/auth/{dev.py → authn.py} +0 -8
- aethergraph/services/auth/authz.py +100 -0
- aethergraph/services/channel/__init__.py +0 -0
- aethergraph/services/channel/channel_bus.py +19 -1
- aethergraph/services/channel/factory.py +13 -1
- aethergraph/services/channel/ingress.py +311 -0
- aethergraph/services/channel/queue_adapter.py +75 -0
- aethergraph/services/channel/session.py +502 -19
- aethergraph/services/container/default_container.py +122 -43
- aethergraph/services/continuations/continuation.py +6 -0
- aethergraph/services/continuations/stores/fs_store.py +19 -0
- aethergraph/services/eventhub/event_hub.py +76 -0
- aethergraph/services/kv/__init__.py +0 -0
- aethergraph/services/kv/ephemeral.py +244 -0
- aethergraph/services/llm/__init__.py +0 -0
- aethergraph/services/llm/generic_client copy.py +691 -0
- aethergraph/services/llm/generic_client.py +1288 -187
- aethergraph/services/llm/providers.py +3 -1
- aethergraph/services/llm/types.py +47 -0
- aethergraph/services/llm/utils.py +284 -0
- aethergraph/services/logger/std.py +3 -0
- aethergraph/services/mcp/__init__.py +9 -0
- aethergraph/services/mcp/http_client.py +38 -0
- aethergraph/services/mcp/service.py +225 -1
- aethergraph/services/mcp/stdio_client.py +41 -6
- aethergraph/services/mcp/ws_client.py +44 -2
- aethergraph/services/memory/__init__.py +0 -0
- aethergraph/services/memory/distillers/llm_long_term.py +234 -0
- aethergraph/services/memory/distillers/llm_meta_summary.py +398 -0
- aethergraph/services/memory/distillers/long_term.py +225 -0
- aethergraph/services/memory/facade/__init__.py +3 -0
- aethergraph/services/memory/facade/chat.py +440 -0
- aethergraph/services/memory/facade/core.py +447 -0
- aethergraph/services/memory/facade/distillation.py +424 -0
- aethergraph/services/memory/facade/rag.py +410 -0
- aethergraph/services/memory/facade/results.py +315 -0
- aethergraph/services/memory/facade/retrieval.py +139 -0
- aethergraph/services/memory/facade/types.py +77 -0
- aethergraph/services/memory/facade/utils.py +43 -0
- aethergraph/services/memory/facade_dep.py +1539 -0
- aethergraph/services/memory/factory.py +9 -3
- aethergraph/services/memory/utils.py +10 -0
- aethergraph/services/metering/eventlog_metering.py +470 -0
- aethergraph/services/metering/noop.py +25 -4
- aethergraph/services/rag/__init__.py +0 -0
- aethergraph/services/rag/facade.py +279 -23
- aethergraph/services/rag/index_factory.py +2 -2
- aethergraph/services/rag/node_rag.py +317 -0
- aethergraph/services/rate_limit/inmem_rate_limit.py +24 -0
- aethergraph/services/registry/__init__.py +0 -0
- aethergraph/services/registry/agent_app_meta.py +419 -0
- aethergraph/services/registry/registry_key.py +1 -1
- aethergraph/services/registry/unified_registry.py +74 -6
- aethergraph/services/scope/scope.py +159 -0
- aethergraph/services/scope/scope_factory.py +164 -0
- aethergraph/services/state_stores/serialize.py +5 -0
- aethergraph/services/state_stores/utils.py +2 -1
- aethergraph/services/viz/__init__.py +0 -0
- aethergraph/services/viz/facade.py +413 -0
- aethergraph/services/viz/viz_service.py +69 -0
- aethergraph/storage/artifacts/artifact_index_jsonl.py +180 -0
- aethergraph/storage/artifacts/artifact_index_sqlite.py +426 -0
- aethergraph/storage/artifacts/cas_store.py +422 -0
- aethergraph/storage/artifacts/fs_cas.py +18 -0
- aethergraph/storage/artifacts/s3_cas.py +14 -0
- aethergraph/storage/artifacts/utils.py +124 -0
- aethergraph/storage/blob/fs_blob.py +86 -0
- aethergraph/storage/blob/s3_blob.py +115 -0
- aethergraph/storage/continuation_store/fs_cont.py +283 -0
- aethergraph/storage/continuation_store/inmem_cont.py +146 -0
- aethergraph/storage/continuation_store/kvdoc_cont.py +261 -0
- aethergraph/storage/docstore/fs_doc.py +63 -0
- aethergraph/storage/docstore/sqlite_doc.py +31 -0
- aethergraph/storage/docstore/sqlite_doc_sync.py +90 -0
- aethergraph/storage/eventlog/fs_event.py +136 -0
- aethergraph/storage/eventlog/sqlite_event.py +47 -0
- aethergraph/storage/eventlog/sqlite_event_sync.py +178 -0
- aethergraph/storage/factory.py +432 -0
- aethergraph/storage/fs_utils.py +28 -0
- aethergraph/storage/graph_state_store/state_store.py +64 -0
- aethergraph/storage/kv/inmem_kv.py +103 -0
- aethergraph/storage/kv/layered_kv.py +52 -0
- aethergraph/storage/kv/sqlite_kv.py +39 -0
- aethergraph/storage/kv/sqlite_kv_sync.py +98 -0
- aethergraph/storage/memory/event_persist.py +68 -0
- aethergraph/storage/memory/fs_persist.py +118 -0
- aethergraph/{services/memory/hotlog_kv.py → storage/memory/hotlog.py} +8 -2
- aethergraph/{services → storage}/memory/indices.py +31 -7
- aethergraph/storage/metering/meter_event.py +55 -0
- aethergraph/storage/runs/doc_store.py +280 -0
- aethergraph/storage/runs/inmen_store.py +82 -0
- aethergraph/storage/runs/sqlite_run_store.py +403 -0
- aethergraph/storage/sessions/doc_store.py +183 -0
- aethergraph/storage/sessions/inmem_store.py +110 -0
- aethergraph/storage/sessions/sqlite_session_store.py +399 -0
- aethergraph/storage/vector_index/chroma_index.py +138 -0
- aethergraph/storage/vector_index/faiss_index.py +179 -0
- aethergraph/storage/vector_index/sqlite_index.py +187 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a3.dist-info}/METADATA +138 -31
- aethergraph-0.1.0a3.dist-info/RECORD +356 -0
- aethergraph-0.1.0a3.dist-info/entry_points.txt +3 -0
- aethergraph/services/artifacts/factory.py +0 -35
- aethergraph/services/artifacts/fs_store.py +0 -656
- aethergraph/services/artifacts/jsonl_index.py +0 -123
- aethergraph/services/artifacts/sqlite_index.py +0 -209
- aethergraph/services/memory/distillers/episode.py +0 -116
- aethergraph/services/memory/distillers/rolling.py +0 -74
- aethergraph/services/memory/facade.py +0 -633
- aethergraph/services/memory/persist_fs.py +0 -40
- aethergraph/services/rag/index/base.py +0 -27
- aethergraph/services/rag/index/faiss_index.py +0 -121
- aethergraph/services/rag/index/sqlite_index.py +0 -134
- aethergraph-0.1.0a1.dist-info/RECORD +0 -182
- aethergraph-0.1.0a1.dist-info/entry_points.txt +0 -2
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a3.dist-info}/WHEEL +0 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a3.dist-info}/licenses/LICENSE +0 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a3.dist-info}/licenses/NOTICE +0 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,691 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
# from time import time
|
|
8
|
+
import time
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
|
|
13
|
+
from aethergraph.config.config import RateLimitSettings
|
|
14
|
+
from aethergraph.contracts.services.llm import LLMClientProtocol
|
|
15
|
+
from aethergraph.contracts.services.metering import MeteringService
|
|
16
|
+
from aethergraph.core.runtime.runtime_metering import current_meter_context, current_metering
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# ---- Helpers --------------------------------------------------------------
|
|
20
|
+
class _Retry:
|
|
21
|
+
def __init__(self, tries=4, base=0.5, cap=8.0):
|
|
22
|
+
self.tries, self.base, self.cap = tries, base, cap
|
|
23
|
+
|
|
24
|
+
async def run(self, fn, *a, **k):
|
|
25
|
+
exc = None
|
|
26
|
+
for i in range(self.tries):
|
|
27
|
+
try:
|
|
28
|
+
return await fn(*a, **k)
|
|
29
|
+
except (httpx.ReadTimeout, httpx.ConnectError, httpx.HTTPStatusError) as e:
|
|
30
|
+
exc = e
|
|
31
|
+
await asyncio.sleep(min(self.cap, self.base * (2**i)))
|
|
32
|
+
raise exc
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _first_text(choices):
|
|
36
|
+
"""Extract text and usage from OpenAI-style choices list."""
|
|
37
|
+
if not choices:
|
|
38
|
+
return "", {}
|
|
39
|
+
c = choices[0]
|
|
40
|
+
text = (c.get("message", {}) or {}).get("content") or c.get("text") or ""
|
|
41
|
+
usage = {}
|
|
42
|
+
return text, usage
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# ---- Generic client -------------------------------------------------------
|
|
46
|
+
class GenericLLMClient(LLMClientProtocol):
|
|
47
|
+
"""
|
|
48
|
+
provider: one of {"openai","azure","anthropic","google","openrouter","lmstudio","ollama"}
|
|
49
|
+
Configuration (read from env by default, but you can pass in):
|
|
50
|
+
- OPENAI_API_KEY / OPENAI_BASE_URL
|
|
51
|
+
- AZURE_OPENAI_KEY / AZURE_OPENAI_ENDPOINT / AZURE_OPENAI_DEPLOYMENT
|
|
52
|
+
- ANTHROPIC_API_KEY
|
|
53
|
+
- GOOGLE_API_KEY
|
|
54
|
+
- OPENROUTER_API_KEY
|
|
55
|
+
- LMSTUDIO_BASE_URL (defaults http://localhost:1234/v1)
|
|
56
|
+
- OLLAMA_BASE_URL (defaults http://localhost:11434/v1)
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
provider: str | None = None,
|
|
62
|
+
model: str | None = None,
|
|
63
|
+
embed_model: str | None = None,
|
|
64
|
+
*,
|
|
65
|
+
base_url: str | None = None,
|
|
66
|
+
api_key: str | None = None,
|
|
67
|
+
azure_deployment: str | None = None,
|
|
68
|
+
timeout: float = 60.0,
|
|
69
|
+
# metering
|
|
70
|
+
metering: MeteringService | None = None,
|
|
71
|
+
# rate limit
|
|
72
|
+
rate_limit_cfg: RateLimitSettings | None = None,
|
|
73
|
+
):
|
|
74
|
+
self.provider = (provider or os.getenv("LLM_PROVIDER") or "openai").lower()
|
|
75
|
+
self.model = model or os.getenv("LLM_MODEL") or "gpt-4o-mini"
|
|
76
|
+
self.embed_model = embed_model or os.getenv("EMBED_MODEL") or "text-embedding-3-small"
|
|
77
|
+
self._retry = _Retry()
|
|
78
|
+
self._client = httpx.AsyncClient(timeout=timeout)
|
|
79
|
+
self._bound_loop = None
|
|
80
|
+
|
|
81
|
+
# Resolve creds/base
|
|
82
|
+
self.api_key = (
|
|
83
|
+
api_key
|
|
84
|
+
or os.getenv("OPENAI_API_KEY")
|
|
85
|
+
or os.getenv("ANTHROPIC_API_KEY")
|
|
86
|
+
or os.getenv("GOOGLE_API_KEY")
|
|
87
|
+
or os.getenv("OPENROUTER_API_KEY")
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
self.base_url = (
|
|
91
|
+
base_url
|
|
92
|
+
or {
|
|
93
|
+
"openai": os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"),
|
|
94
|
+
"azure": os.getenv("AZURE_OPENAI_ENDPOINT", "").rstrip("/"),
|
|
95
|
+
"anthropic": "https://api.anthropic.com",
|
|
96
|
+
"google": "https://generativelanguage.googleapis.com",
|
|
97
|
+
"openrouter": "https://openrouter.ai/api/v1",
|
|
98
|
+
"lmstudio": os.getenv("LMSTUDIO_BASE_URL", "http://localhost:1234/v1"),
|
|
99
|
+
"ollama": os.getenv("OLLAMA_BASE_URL", "http://localhost:11434/v1"),
|
|
100
|
+
}[self.provider]
|
|
101
|
+
)
|
|
102
|
+
self.azure_deployment = azure_deployment or os.getenv("AZURE_OPENAI_DEPLOYMENT")
|
|
103
|
+
|
|
104
|
+
self.metering = metering
|
|
105
|
+
|
|
106
|
+
# Rate limit settings
|
|
107
|
+
self._rate_limit_cfg = rate_limit_cfg
|
|
108
|
+
self._per_run_calls: dict[str, int] = {}
|
|
109
|
+
self._per_run_tokens: dict[str, int] = {}
|
|
110
|
+
|
|
111
|
+
# ---------------- internal helpers for metering ----------------
|
|
112
|
+
@staticmethod
|
|
113
|
+
def _normalize_usage(usage: dict[str, Any]) -> dict[str, int]:
|
|
114
|
+
"""Normalize usage dict to standard keys: prompt_tokens, completion_tokens."""
|
|
115
|
+
if not usage:
|
|
116
|
+
return 0, 0
|
|
117
|
+
|
|
118
|
+
prompt = usage.get("prompt_tokens") or usage.get("input_tokens")
|
|
119
|
+
completion = usage.get("completion_tokens") or usage.get("output_tokens")
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
prompt_i = int(prompt) if prompt is not None else 0
|
|
123
|
+
except (ValueError, TypeError):
|
|
124
|
+
prompt_i = 0
|
|
125
|
+
try:
|
|
126
|
+
completion_i = int(completion) if completion is not None else 0
|
|
127
|
+
except (ValueError, TypeError):
|
|
128
|
+
completion_i = 0
|
|
129
|
+
|
|
130
|
+
return prompt_i, completion_i
|
|
131
|
+
|
|
132
|
+
def _get_rate_limit_cfg(self) -> RateLimitSettings | None:
|
|
133
|
+
if self._rate_limit_cfg is not None:
|
|
134
|
+
return self._rate_limit_cfg
|
|
135
|
+
# Lazy-load from container if available
|
|
136
|
+
try:
|
|
137
|
+
from aethergraph.core.runtime.runtime_services import (
|
|
138
|
+
current_services, # local import to avoid cycles
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
container = current_services()
|
|
142
|
+
settings = getattr(container, "settings", None)
|
|
143
|
+
if settings is not None and getattr(settings, "rate_limit", None) is not None:
|
|
144
|
+
self._rate_limit_cfg = settings.rate_limit
|
|
145
|
+
return self._rate_limit_cfg
|
|
146
|
+
except Exception:
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
def _enforce_llm_limits_for_run(self, *, usage: dict[str, Any]) -> None:
|
|
150
|
+
cfg = self._get_rate_limit_cfg()
|
|
151
|
+
if cfg is None or not cfg.enabled:
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
# get current run_id from context
|
|
155
|
+
ctx = current_meter_context.get()
|
|
156
|
+
run_id = ctx.get("run_id")
|
|
157
|
+
if not run_id:
|
|
158
|
+
# no run_id context; cannot enforce per-run limits
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
prompt_tokens, completion_tokens = self._normalize_usage(usage)
|
|
162
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
163
|
+
|
|
164
|
+
calls = self._per_run_calls.get(run_id, 0) + 1
|
|
165
|
+
tokens = self._per_run_tokens.get(run_id, 0) + total_tokens
|
|
166
|
+
|
|
167
|
+
# store updated counts
|
|
168
|
+
self._per_run_calls[run_id] = calls
|
|
169
|
+
self._per_run_tokens[run_id] = tokens
|
|
170
|
+
|
|
171
|
+
if cfg.max_llm_calls_per_run and calls > cfg.max_llm_calls_per_run:
|
|
172
|
+
raise RuntimeError(
|
|
173
|
+
f"LLM call limit exceeded for this run "
|
|
174
|
+
f"({calls} > {cfg.max_llm_calls_per_run}). "
|
|
175
|
+
"Consider simplifying the graph or raising the limit."
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
if cfg.max_llm_tokens_per_run and tokens > cfg.max_llm_tokens_per_run:
|
|
179
|
+
raise RuntimeError(
|
|
180
|
+
f"LLM token limit exceeded for this run "
|
|
181
|
+
f"({tokens} > {cfg.max_llm_tokens_per_run}). "
|
|
182
|
+
"Consider simplifying the graph or raising the limit."
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
async def _record_llm_usage(
|
|
186
|
+
self,
|
|
187
|
+
*,
|
|
188
|
+
model: str,
|
|
189
|
+
usage: dict[str, Any],
|
|
190
|
+
latency_ms: int | None = None,
|
|
191
|
+
) -> None:
|
|
192
|
+
self.metering = self.metering or current_metering()
|
|
193
|
+
prompt_tokens, completion_tokens = self._normalize_usage(usage)
|
|
194
|
+
ctx = current_meter_context.get()
|
|
195
|
+
user_id = ctx.get("user_id")
|
|
196
|
+
org_id = ctx.get("org_id")
|
|
197
|
+
run_id = ctx.get("run_id")
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
await self.metering.record_llm(
|
|
201
|
+
user_id=user_id,
|
|
202
|
+
org_id=org_id,
|
|
203
|
+
run_id=run_id,
|
|
204
|
+
model=model,
|
|
205
|
+
provider=self.provider,
|
|
206
|
+
prompt_tokens=prompt_tokens,
|
|
207
|
+
completion_tokens=completion_tokens,
|
|
208
|
+
latency_ms=latency_ms,
|
|
209
|
+
)
|
|
210
|
+
except Exception as e:
|
|
211
|
+
# Never fail the LLM call due to metering issues
|
|
212
|
+
logger = logging.getLogger("aethergraph.services.llm.generic_client")
|
|
213
|
+
logger.warning(f"llm_metering_failed: {e}")
|
|
214
|
+
|
|
215
|
+
async def _ensure_client(self):
|
|
216
|
+
"""Ensure the httpx client is bound to the current event loop.
|
|
217
|
+
This allows safe usage across multiple async contexts.
|
|
218
|
+
"""
|
|
219
|
+
loop = asyncio.get_running_loop()
|
|
220
|
+
if self._client is None or self._bound_loop != loop:
|
|
221
|
+
# close old client if any
|
|
222
|
+
if self._client is not None:
|
|
223
|
+
try:
|
|
224
|
+
await self._client.aclose()
|
|
225
|
+
except Exception:
|
|
226
|
+
logger = logging.getLogger("aethergraph.services.llm.generic_client")
|
|
227
|
+
logger.warning("llm_client_close_failed")
|
|
228
|
+
self._client = httpx.AsyncClient(timeout=self._client.timeout)
|
|
229
|
+
self._bound_loop = loop
|
|
230
|
+
|
|
231
|
+
async def chat(
|
|
232
|
+
self,
|
|
233
|
+
messages: list[dict[str, Any]],
|
|
234
|
+
*,
|
|
235
|
+
reasoning_effort: str | None = None,
|
|
236
|
+
max_output_tokens: int | None = None,
|
|
237
|
+
**kw: Any,
|
|
238
|
+
) -> tuple[str, dict[str, int]]:
|
|
239
|
+
await self._ensure_client()
|
|
240
|
+
model = kw.get("model", self.model)
|
|
241
|
+
|
|
242
|
+
if self.provider != "openai":
|
|
243
|
+
# Make sure _chat_by_provider ALSO returns (str, usage),
|
|
244
|
+
# or wraps provider-specific structures into text.
|
|
245
|
+
start = time.perf_counter()
|
|
246
|
+
text, usage = await self._chat_by_provider(messages, **kw)
|
|
247
|
+
latency_ms = int((time.perf_counter() - start) * 1000)
|
|
248
|
+
|
|
249
|
+
# Enforce rate limits
|
|
250
|
+
self._enforce_llm_limits_for_run(usage=usage)
|
|
251
|
+
|
|
252
|
+
# Record metering
|
|
253
|
+
await self._record_llm_usage(
|
|
254
|
+
model=model,
|
|
255
|
+
usage=usage,
|
|
256
|
+
latency_ms=latency_ms,
|
|
257
|
+
)
|
|
258
|
+
return text, usage
|
|
259
|
+
|
|
260
|
+
body: dict[str, Any] = {
|
|
261
|
+
"model": model,
|
|
262
|
+
"input": messages,
|
|
263
|
+
}
|
|
264
|
+
if reasoning_effort is not None:
|
|
265
|
+
body["reasoning"] = {"effort": reasoning_effort}
|
|
266
|
+
if max_output_tokens is not None:
|
|
267
|
+
body["max_output_tokens"] = max_output_tokens
|
|
268
|
+
|
|
269
|
+
temperature = kw.get("temperature")
|
|
270
|
+
top_p = kw.get("top_p")
|
|
271
|
+
if temperature is not None:
|
|
272
|
+
body["temperature"] = temperature
|
|
273
|
+
if top_p is not None:
|
|
274
|
+
body["top_p"] = top_p
|
|
275
|
+
|
|
276
|
+
async def _call():
|
|
277
|
+
r = await self._client.post(
|
|
278
|
+
f"{self.base_url}/responses",
|
|
279
|
+
headers=self._headers_openai_like(),
|
|
280
|
+
json=body,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
try:
|
|
284
|
+
r.raise_for_status()
|
|
285
|
+
except httpx.HTTPError as e:
|
|
286
|
+
raise RuntimeError(f"OpenAI Responses API error: {e.response.text}") from e
|
|
287
|
+
|
|
288
|
+
data = r.json()
|
|
289
|
+
output = data.get("output")
|
|
290
|
+
txt = ""
|
|
291
|
+
|
|
292
|
+
# NEW: handle list-of-messages shape
|
|
293
|
+
if isinstance(output, list) and output:
|
|
294
|
+
first = output[0]
|
|
295
|
+
if isinstance(first, dict) and first.get("type") == "message":
|
|
296
|
+
parts = first.get("content") or []
|
|
297
|
+
chunks: list[str] = []
|
|
298
|
+
for p in parts:
|
|
299
|
+
if "text" in p:
|
|
300
|
+
chunks.append(p["text"])
|
|
301
|
+
txt = "".join(chunks)
|
|
302
|
+
|
|
303
|
+
elif isinstance(output, dict) and output.get("type") == "message":
|
|
304
|
+
msg = output.get("message") or output
|
|
305
|
+
parts = msg.get("content") or []
|
|
306
|
+
chunks: list[str] = []
|
|
307
|
+
for p in parts:
|
|
308
|
+
if "text" in p:
|
|
309
|
+
chunks.append(p["text"])
|
|
310
|
+
txt = "".join(chunks)
|
|
311
|
+
|
|
312
|
+
elif isinstance(output, str):
|
|
313
|
+
txt = output
|
|
314
|
+
|
|
315
|
+
else:
|
|
316
|
+
txt = str(output) if output is not None else ""
|
|
317
|
+
|
|
318
|
+
usage = data.get("usage", {}) or {}
|
|
319
|
+
return txt, usage
|
|
320
|
+
|
|
321
|
+
# Measure latency for metering
|
|
322
|
+
start = time.perf_counter()
|
|
323
|
+
text, usage = await self._retry.run(_call)
|
|
324
|
+
latency_ms = int((time.perf_counter() - start) * 1000)
|
|
325
|
+
|
|
326
|
+
# Enforce rate limits
|
|
327
|
+
self._enforce_llm_limits_for_run(usage=usage)
|
|
328
|
+
|
|
329
|
+
# Record metering
|
|
330
|
+
await self._record_llm_usage(
|
|
331
|
+
model=model,
|
|
332
|
+
usage=usage,
|
|
333
|
+
latency_ms=latency_ms,
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
return text, usage
|
|
337
|
+
|
|
338
|
+
# ---------------- Chat ----------------
|
|
339
|
+
async def _chat_by_provider(
|
|
340
|
+
self, messages: list[dict[str, Any]], **kw
|
|
341
|
+
) -> tuple[str, dict[str, int]]:
|
|
342
|
+
await self._ensure_client()
|
|
343
|
+
|
|
344
|
+
temperature = kw.get("temperature", 0.5)
|
|
345
|
+
top_p = kw.get("top_p", 1.0)
|
|
346
|
+
model = kw.get("model", self.model)
|
|
347
|
+
|
|
348
|
+
if self.provider in {"openrouter", "lmstudio", "ollama"}:
|
|
349
|
+
|
|
350
|
+
async def _call():
|
|
351
|
+
body = {
|
|
352
|
+
"model": model,
|
|
353
|
+
"messages": messages,
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
r = await self._client.post(
|
|
357
|
+
f"{self.base_url}/chat/completions",
|
|
358
|
+
headers=self._headers_openai_like(),
|
|
359
|
+
json=body,
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
try:
|
|
363
|
+
r.raise_for_status()
|
|
364
|
+
except httpx.HTTPError as e:
|
|
365
|
+
raise RuntimeError(f"OpenAI Responses API error: {e.response.text}") from e
|
|
366
|
+
data = r.json()
|
|
367
|
+
txt, _ = _first_text(data.get("choices", []))
|
|
368
|
+
return txt, data.get("usage", {}) or {}
|
|
369
|
+
|
|
370
|
+
return await self._retry.run(_call)
|
|
371
|
+
|
|
372
|
+
if self.provider == "azure":
|
|
373
|
+
if not (self.base_url and self.azure_deployment):
|
|
374
|
+
raise RuntimeError(
|
|
375
|
+
"Azure OpenAI requires AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_DEPLOYMENT"
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
async def _call():
|
|
379
|
+
r = await self._client.post(
|
|
380
|
+
f"{self.base_url}/openai/deployments/{self.azure_deployment}/chat/completions?api-version=2024-08-01-preview",
|
|
381
|
+
headers={"api-key": self.api_key, "Content-Type": "application/json"},
|
|
382
|
+
json={"messages": messages, "temperature": temperature, "top_p": top_p},
|
|
383
|
+
)
|
|
384
|
+
try:
|
|
385
|
+
r.raise_for_status()
|
|
386
|
+
except httpx.HTTPError as e:
|
|
387
|
+
raise RuntimeError(f"OpenAI Responses API error: {e.response.text}") from e
|
|
388
|
+
|
|
389
|
+
data = r.json()
|
|
390
|
+
txt, _ = _first_text(data.get("choices", []))
|
|
391
|
+
return txt, data.get("usage", {}) or {}
|
|
392
|
+
|
|
393
|
+
return await self._retry.run(_call)
|
|
394
|
+
|
|
395
|
+
if self.provider == "anthropic":
|
|
396
|
+
# Convert OpenAI-style messages -> Anthropic Messages API format
|
|
397
|
+
# 1) Collect system messages (as strings)
|
|
398
|
+
sys_msgs = [m["content"] for m in messages if m["role"] == "system"]
|
|
399
|
+
|
|
400
|
+
# 2) Convert non-system messages into Anthropic blocks
|
|
401
|
+
conv = []
|
|
402
|
+
for m in messages:
|
|
403
|
+
role = m["role"]
|
|
404
|
+
if role == "system":
|
|
405
|
+
continue # handled via `system` field
|
|
406
|
+
|
|
407
|
+
# Anthropic only accepts "user" or "assistant"
|
|
408
|
+
anthro_role = "assistant" if role == "assistant" else "user"
|
|
409
|
+
|
|
410
|
+
content = m["content"]
|
|
411
|
+
# Wrap string content into text blocks; if caller is already giving blocks, pass them through.
|
|
412
|
+
if isinstance(content, str):
|
|
413
|
+
content_blocks = [{"type": "text", "text": content}]
|
|
414
|
+
else:
|
|
415
|
+
# Assume caller knows what they're doing for multimodal content
|
|
416
|
+
content_blocks = content
|
|
417
|
+
|
|
418
|
+
conv.append({"role": anthro_role, "content": content_blocks})
|
|
419
|
+
|
|
420
|
+
# 3) Build payload
|
|
421
|
+
payload = {
|
|
422
|
+
"model": model,
|
|
423
|
+
"max_tokens": kw.get("max_tokens", 1024),
|
|
424
|
+
"messages": conv,
|
|
425
|
+
"temperature": temperature,
|
|
426
|
+
"top_p": top_p,
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
# ✅ Anthropic v1/messages now expects `system` to be a list
|
|
430
|
+
if sys_msgs:
|
|
431
|
+
payload["system"] = "\n\n".join(sys_msgs)
|
|
432
|
+
|
|
433
|
+
async def _call():
|
|
434
|
+
r = await self._client.post(
|
|
435
|
+
f"{self.base_url}/v1/messages",
|
|
436
|
+
headers={
|
|
437
|
+
"x-api-key": self.api_key,
|
|
438
|
+
"anthropic-version": "2023-06-01",
|
|
439
|
+
"Content-Type": "application/json",
|
|
440
|
+
},
|
|
441
|
+
json=payload,
|
|
442
|
+
)
|
|
443
|
+
try:
|
|
444
|
+
r.raise_for_status()
|
|
445
|
+
except httpx.HTTPStatusError as e:
|
|
446
|
+
# keep the nice debugging message
|
|
447
|
+
raise RuntimeError(f"Anthropic API error: {e.response.text}") from e
|
|
448
|
+
|
|
449
|
+
data = r.json()
|
|
450
|
+
# data["content"] is a list of blocks
|
|
451
|
+
blocks = data.get("content") or []
|
|
452
|
+
txt = "".join(b.get("text", "") for b in blocks if b.get("type") == "text")
|
|
453
|
+
return txt, data.get("usage", {}) or {}
|
|
454
|
+
|
|
455
|
+
return await self._retry.run(_call)
|
|
456
|
+
|
|
457
|
+
if self.provider == "google":
|
|
458
|
+
# Merge system messages into a single preamble
|
|
459
|
+
system = "\n".join([m["content"] for m in messages if m["role"] == "system"])
|
|
460
|
+
|
|
461
|
+
# Non-system messages
|
|
462
|
+
turns = [
|
|
463
|
+
{
|
|
464
|
+
"role": "user" if m["role"] == "user" else "model",
|
|
465
|
+
"parts": [{"text": m["content"]}],
|
|
466
|
+
}
|
|
467
|
+
for m in messages
|
|
468
|
+
if m["role"] != "system"
|
|
469
|
+
]
|
|
470
|
+
|
|
471
|
+
if system:
|
|
472
|
+
turns.insert(
|
|
473
|
+
0,
|
|
474
|
+
{
|
|
475
|
+
"role": "user",
|
|
476
|
+
"parts": [{"text": f"System instructions: {system}"}],
|
|
477
|
+
},
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
async def _call():
|
|
481
|
+
payload = {
|
|
482
|
+
"contents": turns,
|
|
483
|
+
"generationConfig": {
|
|
484
|
+
"temperature": temperature,
|
|
485
|
+
"topP": top_p,
|
|
486
|
+
},
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
r = await self._client.post(
|
|
490
|
+
f"{self.base_url}/v1/models/{model}:generateContent?key={self.api_key}",
|
|
491
|
+
headers={"Content-Type": "application/json"},
|
|
492
|
+
json=payload,
|
|
493
|
+
)
|
|
494
|
+
try:
|
|
495
|
+
r.raise_for_status()
|
|
496
|
+
except httpx.HTTPStatusError as e:
|
|
497
|
+
raise RuntimeError(
|
|
498
|
+
f"Gemini generateContent failed ({e.response.status_code}): {e.response.text}"
|
|
499
|
+
) from e
|
|
500
|
+
|
|
501
|
+
data = r.json()
|
|
502
|
+
cand = (data.get("candidates") or [{}])[0]
|
|
503
|
+
txt = "".join(
|
|
504
|
+
p.get("text", "") for p in (cand.get("content", {}).get("parts") or [])
|
|
505
|
+
)
|
|
506
|
+
return txt, {} # usage parsing optional
|
|
507
|
+
|
|
508
|
+
return await self._retry.run(_call)
|
|
509
|
+
|
|
510
|
+
if self.provider == "openai":
|
|
511
|
+
raise RuntimeError(
|
|
512
|
+
"Internal error: OpenAI provider should use chat() or responses_chat() directly."
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
raise NotImplementedError(f"provider {self.provider}")
|
|
516
|
+
|
|
517
|
+
# ---------------- Embeddings ----------------
|
|
518
|
+
async def embed(self, texts: list[str], **kw) -> list[list[float]]:
|
|
519
|
+
# model override order: kw > self.embed_model > ENV > default
|
|
520
|
+
await self._ensure_client()
|
|
521
|
+
|
|
522
|
+
model = (
|
|
523
|
+
kw.get("model")
|
|
524
|
+
or self.embed_model
|
|
525
|
+
or os.getenv("EMBED_MODEL")
|
|
526
|
+
or "text-embedding-3-small"
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
if self.provider in {"openai", "openrouter", "lmstudio", "ollama"}:
|
|
530
|
+
|
|
531
|
+
async def _call():
|
|
532
|
+
r = await self._client.post(
|
|
533
|
+
f"{self.base_url}/embeddings",
|
|
534
|
+
headers=self._headers_openai_like(),
|
|
535
|
+
json={"model": model, "input": texts},
|
|
536
|
+
)
|
|
537
|
+
try:
|
|
538
|
+
r.raise_for_status()
|
|
539
|
+
except httpx.HTTPStatusError as e:
|
|
540
|
+
# Log or re-raise with more context
|
|
541
|
+
msg = f"Embeddings request failed ({e.response.status_code}): {e.response.text}"
|
|
542
|
+
raise RuntimeError(msg) from e
|
|
543
|
+
|
|
544
|
+
data = r.json()
|
|
545
|
+
return [d["embedding"] for d in data.get("data", [])]
|
|
546
|
+
|
|
547
|
+
return await self._retry.run(_call)
|
|
548
|
+
|
|
549
|
+
if self.provider == "azure":
|
|
550
|
+
|
|
551
|
+
async def _call():
|
|
552
|
+
r = await self._client.post(
|
|
553
|
+
f"{self.base_url}/openai/deployments/{self.azure_deployment}/embeddings?api-version=2024-08-01-preview",
|
|
554
|
+
headers={"api-key": self.api_key, "Content-Type": "application/json"},
|
|
555
|
+
json={"input": texts},
|
|
556
|
+
)
|
|
557
|
+
try:
|
|
558
|
+
r.raise_for_status()
|
|
559
|
+
except httpx.HTTPStatusError as e:
|
|
560
|
+
# Log or re-raise with more context
|
|
561
|
+
msg = f"Embeddings request failed ({e.response.status_code}): {e.response.text}"
|
|
562
|
+
raise RuntimeError(msg) from e
|
|
563
|
+
|
|
564
|
+
data = r.json()
|
|
565
|
+
return [d["embedding"] for d in data.get("data", [])]
|
|
566
|
+
|
|
567
|
+
return await self._retry.run(_call)
|
|
568
|
+
|
|
569
|
+
if self.provider == "google":
|
|
570
|
+
|
|
571
|
+
async def _call():
|
|
572
|
+
r = await self._client.post(
|
|
573
|
+
f"{self.base_url}/v1/models/{model}:embedContent?key={self.api_key}",
|
|
574
|
+
headers={"Content-Type": "application/json"},
|
|
575
|
+
json={"content": {"parts": [{"text": "\n".join(texts)}]}},
|
|
576
|
+
)
|
|
577
|
+
try:
|
|
578
|
+
r.raise_for_status()
|
|
579
|
+
except httpx.HTTPStatusError as e:
|
|
580
|
+
raise RuntimeError(
|
|
581
|
+
f"Gemini embedContent failed ({e.response.status_code}): {e.response.text}"
|
|
582
|
+
) from e
|
|
583
|
+
|
|
584
|
+
data = r.json()
|
|
585
|
+
return [data.get("embedding", {}).get("values", [])]
|
|
586
|
+
|
|
587
|
+
return await self._retry.run(_call)
|
|
588
|
+
|
|
589
|
+
# Anthropic: no embeddings endpoint
|
|
590
|
+
raise NotImplementedError(f"Embeddings not supported for {self.provider}")
|
|
591
|
+
|
|
592
|
+
# ---------------- Internals ----------------
|
|
593
|
+
def _headers_openai_like(self):
|
|
594
|
+
hdr = {"Content-Type": "application/json"}
|
|
595
|
+
if self.provider in {"openai", "openrouter"}:
|
|
596
|
+
hdr["Authorization"] = f"Bearer {self.api_key}"
|
|
597
|
+
return hdr
|
|
598
|
+
|
|
599
|
+
async def aclose(self):
|
|
600
|
+
await self._client.aclose()
|
|
601
|
+
|
|
602
|
+
def _default_headers_for_raw(self) -> dict[str, str]:
|
|
603
|
+
hdr = {"Content-Type": "application/json"}
|
|
604
|
+
|
|
605
|
+
if self.provider in {"openai", "openrouter"}:
|
|
606
|
+
if self.api_key:
|
|
607
|
+
hdr["Authorization"] = f"Bearer {self.api_key}"
|
|
608
|
+
else:
|
|
609
|
+
raise RuntimeError("OpenAI/OpenRouter requires an API key for raw() calls.")
|
|
610
|
+
|
|
611
|
+
elif self.provider == "anthropic":
|
|
612
|
+
if self.api_key:
|
|
613
|
+
hdr.update(
|
|
614
|
+
{
|
|
615
|
+
"x-api-key": self.api_key,
|
|
616
|
+
"anthropic-version": "2023-06-01",
|
|
617
|
+
}
|
|
618
|
+
)
|
|
619
|
+
else:
|
|
620
|
+
raise RuntimeError("Anthropic requires an API key for raw() calls.")
|
|
621
|
+
|
|
622
|
+
elif self.provider == "azure":
|
|
623
|
+
if self.api_key:
|
|
624
|
+
hdr["api-key"] = self.api_key
|
|
625
|
+
else:
|
|
626
|
+
raise RuntimeError("Azure OpenAI requires an API key for raw() calls.")
|
|
627
|
+
|
|
628
|
+
# For google, lmstudio, ollama we usually put keys in the URL or
|
|
629
|
+
# they’re local; leave headers minimal unless user overrides.
|
|
630
|
+
return hdr
|
|
631
|
+
|
|
632
|
+
async def raw(
|
|
633
|
+
self,
|
|
634
|
+
*,
|
|
635
|
+
method: str = "POST",
|
|
636
|
+
path: str | None = None,
|
|
637
|
+
url: str | None = None,
|
|
638
|
+
json: Any | None = None,
|
|
639
|
+
params: dict[str, Any] | None = None,
|
|
640
|
+
headers: dict[str, str] | None = None,
|
|
641
|
+
return_response: bool = False,
|
|
642
|
+
) -> Any:
|
|
643
|
+
"""
|
|
644
|
+
Low-level escape hatch: send a raw HTTP request using this client’s
|
|
645
|
+
base_url, auth, and retry logic.
|
|
646
|
+
|
|
647
|
+
- If `url` is provided, it is used as-is.
|
|
648
|
+
- Otherwise, `path` is joined to `self.base_url`.
|
|
649
|
+
- `json` and `params` are forwarded to httpx.
|
|
650
|
+
- Provider-specific default headers (auth, version, etc.) are applied,
|
|
651
|
+
then overridden by `headers` if provided.
|
|
652
|
+
|
|
653
|
+
Returns:
|
|
654
|
+
- r.json() by default
|
|
655
|
+
- or the raw `httpx.Response` if `return_response=True`
|
|
656
|
+
"""
|
|
657
|
+
await self._ensure_client()
|
|
658
|
+
|
|
659
|
+
if not url and not path:
|
|
660
|
+
raise ValueError("Either `url` or `path` must be provided to raw().")
|
|
661
|
+
|
|
662
|
+
if not url:
|
|
663
|
+
url = f"{self.base_url.rstrip('/')}/{path.lstrip('/')}"
|
|
664
|
+
|
|
665
|
+
base_headers = self._default_headers_for_raw()
|
|
666
|
+
if headers:
|
|
667
|
+
base_headers.update(headers)
|
|
668
|
+
|
|
669
|
+
async def _call():
|
|
670
|
+
r = await self._client.request(
|
|
671
|
+
method=method,
|
|
672
|
+
url=url,
|
|
673
|
+
headers=base_headers,
|
|
674
|
+
json=json,
|
|
675
|
+
params=params,
|
|
676
|
+
)
|
|
677
|
+
try:
|
|
678
|
+
r.raise_for_status()
|
|
679
|
+
except httpx.HTTPStatusError as e:
|
|
680
|
+
raise RuntimeError(
|
|
681
|
+
f"{self.provider} raw API error ({e.response.status_code}): {e.response.text}"
|
|
682
|
+
) from e
|
|
683
|
+
|
|
684
|
+
return r if return_response else r.json()
|
|
685
|
+
|
|
686
|
+
return await self._retry.run(_call)
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
# Convenience factory
|
|
690
|
+
def llm_from_env() -> GenericLLMClient:
|
|
691
|
+
return GenericLLMClient()
|