aethergraph 0.1.0a1__py3-none-any.whl → 0.1.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aethergraph/__init__.py +4 -10
- aethergraph/__main__.py +293 -0
- aethergraph/api/v1/__init__.py +0 -0
- aethergraph/api/v1/agents.py +46 -0
- aethergraph/api/v1/apps.py +70 -0
- aethergraph/api/v1/artifacts.py +415 -0
- aethergraph/api/v1/channels.py +89 -0
- aethergraph/api/v1/deps.py +168 -0
- aethergraph/api/v1/graphs.py +259 -0
- aethergraph/api/v1/identity.py +25 -0
- aethergraph/api/v1/memory.py +353 -0
- aethergraph/api/v1/misc.py +47 -0
- aethergraph/api/v1/pagination.py +29 -0
- aethergraph/api/v1/runs.py +568 -0
- aethergraph/api/v1/schemas.py +535 -0
- aethergraph/api/v1/session.py +323 -0
- aethergraph/api/v1/stats.py +201 -0
- aethergraph/api/v1/viz.py +152 -0
- aethergraph/config/config.py +22 -0
- aethergraph/config/loader.py +3 -2
- aethergraph/config/storage.py +209 -0
- aethergraph/contracts/__init__.py +0 -0
- aethergraph/contracts/services/__init__.py +0 -0
- aethergraph/contracts/services/artifacts.py +27 -14
- aethergraph/contracts/services/memory.py +45 -17
- aethergraph/contracts/services/metering.py +129 -0
- aethergraph/contracts/services/runs.py +50 -0
- aethergraph/contracts/services/sessions.py +87 -0
- aethergraph/contracts/services/state_stores.py +3 -0
- aethergraph/contracts/services/viz.py +44 -0
- aethergraph/contracts/storage/artifact_index.py +88 -0
- aethergraph/contracts/storage/artifact_store.py +99 -0
- aethergraph/contracts/storage/async_kv.py +34 -0
- aethergraph/contracts/storage/blob_store.py +50 -0
- aethergraph/contracts/storage/doc_store.py +35 -0
- aethergraph/contracts/storage/event_log.py +31 -0
- aethergraph/contracts/storage/vector_index.py +48 -0
- aethergraph/core/__init__.py +0 -0
- aethergraph/core/execution/forward_scheduler.py +13 -2
- aethergraph/core/execution/global_scheduler.py +21 -15
- aethergraph/core/execution/step_forward.py +10 -1
- aethergraph/core/graph/__init__.py +0 -0
- aethergraph/core/graph/graph_builder.py +8 -4
- aethergraph/core/graph/graph_fn.py +156 -15
- aethergraph/core/graph/graph_spec.py +8 -0
- aethergraph/core/graph/graphify.py +146 -27
- aethergraph/core/graph/node_spec.py +0 -2
- aethergraph/core/graph/node_state.py +3 -0
- aethergraph/core/graph/task_graph.py +39 -1
- aethergraph/core/runtime/__init__.py +0 -0
- aethergraph/core/runtime/ad_hoc_context.py +64 -4
- aethergraph/core/runtime/base_service.py +28 -4
- aethergraph/core/runtime/execution_context.py +13 -15
- aethergraph/core/runtime/graph_runner.py +222 -37
- aethergraph/core/runtime/node_context.py +510 -6
- aethergraph/core/runtime/node_services.py +12 -5
- aethergraph/core/runtime/recovery.py +15 -1
- aethergraph/core/runtime/run_manager.py +783 -0
- aethergraph/core/runtime/run_manager_local.py +204 -0
- aethergraph/core/runtime/run_registration.py +2 -2
- aethergraph/core/runtime/run_types.py +89 -0
- aethergraph/core/runtime/runtime_env.py +136 -7
- aethergraph/core/runtime/runtime_metering.py +71 -0
- aethergraph/core/runtime/runtime_registry.py +36 -13
- aethergraph/core/runtime/runtime_services.py +194 -6
- aethergraph/core/tools/builtins/toolset.py +1 -1
- aethergraph/core/tools/toolkit.py +5 -0
- aethergraph/plugins/agents/default_chat_agent copy.py +90 -0
- aethergraph/plugins/agents/default_chat_agent.py +171 -0
- aethergraph/plugins/agents/shared.py +81 -0
- aethergraph/plugins/channel/adapters/webui.py +112 -112
- aethergraph/plugins/channel/routes/webui_routes.py +367 -102
- aethergraph/plugins/channel/utils/slack_utils.py +115 -59
- aethergraph/plugins/channel/utils/telegram_utils.py +88 -47
- aethergraph/plugins/channel/websockets/weibui_ws.py +172 -0
- aethergraph/runtime/__init__.py +15 -0
- aethergraph/server/app_factory.py +190 -34
- aethergraph/server/clients/channel_client.py +202 -0
- aethergraph/server/http/channel_http_routes.py +116 -0
- aethergraph/server/http/channel_ws_routers.py +45 -0
- aethergraph/server/loading.py +117 -0
- aethergraph/server/server.py +131 -0
- aethergraph/server/server_state.py +240 -0
- aethergraph/server/start.py +227 -66
- aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-BQhdFMY1.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DMm9YOAa.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_AMS-Regular-DRggAlZN.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-ATXxdsX0.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-BEiXGLvX.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Bold-Dq_IR9rO.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-CTRA-rTL.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-Di6jR-x-.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Caligraphic-Regular-wX97UBjC.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BdnERNNW.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-BsDP51OF.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Bold-CL6g_b3V.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CB_wures.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-CTYiF6lA.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Fraktur-Regular-Dxdc4cR9.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Cx986IdX.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Bold-Jm3AIy58.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Bold-waoOVXN0.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DxDJ3AOS.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-DzxPMmG6.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-BoldItalic-SpSLRI95.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Italic-3WenGoN9.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Italic-BMLOBm91.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Italic-NWA7e6Wa.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Regular-B22Nviop.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Regular-Dr94JaBh.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Main-Regular-ypZvNtVU.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-B3XSjfu4.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-CZnvNsCZ.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-BoldItalic-iY-2wyZ7.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-Italic-DA0__PXp.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-Italic-flOr_0UB.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Math-Italic-t53AETM-.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-CFMepnvq.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-D1sUS0GD.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Bold-DbIhKOiC.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-C3H0VqGB.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-DN2j7dab.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Italic-YYjJ1zSn.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-BNo7hRIc.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-CS6fqUqJ.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_SansSerif-Regular-DDBCnlJ7.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Script-Regular-C5JkGWo-.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D3wIWfF6.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Script-Regular-D5yQViql.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-C195tn64.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-Dbsnue_I.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size1-Regular-mCD8mA8B.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-B7gKUWhC.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-Dy4dx90m.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size2-Regular-oD1tc_U0.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-CTq5MqoE.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size3-Regular-DgpXs0kz.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-BF-4gkZK.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-DWFBv043.ttf +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Size4-Regular-Dl5lxZxV.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-C0xS9mPB.woff +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-CO6r4hn1.woff2 +0 -0
- aethergraph/server/ui_static/assets/KaTeX_Typewriter-Regular-D3Ib7_Hf.ttf +0 -0
- aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +1 -0
- aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +400 -0
- aethergraph/server/ui_static/index.html +15 -0
- aethergraph/server/ui_static/logo.png +0 -0
- aethergraph/services/artifacts/__init__.py +0 -0
- aethergraph/services/artifacts/facade.py +1239 -132
- aethergraph/services/auth/{dev.py → authn.py} +0 -8
- aethergraph/services/auth/authz.py +100 -0
- aethergraph/services/channel/__init__.py +0 -0
- aethergraph/services/channel/channel_bus.py +19 -1
- aethergraph/services/channel/factory.py +13 -1
- aethergraph/services/channel/ingress.py +311 -0
- aethergraph/services/channel/queue_adapter.py +75 -0
- aethergraph/services/channel/session.py +502 -19
- aethergraph/services/container/default_container.py +122 -43
- aethergraph/services/continuations/continuation.py +6 -0
- aethergraph/services/continuations/stores/fs_store.py +19 -0
- aethergraph/services/eventhub/event_hub.py +76 -0
- aethergraph/services/kv/__init__.py +0 -0
- aethergraph/services/kv/ephemeral.py +244 -0
- aethergraph/services/llm/__init__.py +0 -0
- aethergraph/services/llm/generic_client copy.py +691 -0
- aethergraph/services/llm/generic_client.py +1288 -187
- aethergraph/services/llm/providers.py +3 -1
- aethergraph/services/llm/types.py +47 -0
- aethergraph/services/llm/utils.py +284 -0
- aethergraph/services/logger/std.py +3 -0
- aethergraph/services/mcp/__init__.py +9 -0
- aethergraph/services/mcp/http_client.py +38 -0
- aethergraph/services/mcp/service.py +225 -1
- aethergraph/services/mcp/stdio_client.py +41 -6
- aethergraph/services/mcp/ws_client.py +44 -2
- aethergraph/services/memory/__init__.py +0 -0
- aethergraph/services/memory/distillers/llm_long_term.py +234 -0
- aethergraph/services/memory/distillers/llm_meta_summary.py +398 -0
- aethergraph/services/memory/distillers/long_term.py +225 -0
- aethergraph/services/memory/facade/__init__.py +3 -0
- aethergraph/services/memory/facade/chat.py +440 -0
- aethergraph/services/memory/facade/core.py +447 -0
- aethergraph/services/memory/facade/distillation.py +424 -0
- aethergraph/services/memory/facade/rag.py +410 -0
- aethergraph/services/memory/facade/results.py +315 -0
- aethergraph/services/memory/facade/retrieval.py +139 -0
- aethergraph/services/memory/facade/types.py +77 -0
- aethergraph/services/memory/facade/utils.py +43 -0
- aethergraph/services/memory/facade_dep.py +1539 -0
- aethergraph/services/memory/factory.py +9 -3
- aethergraph/services/memory/utils.py +10 -0
- aethergraph/services/metering/eventlog_metering.py +470 -0
- aethergraph/services/metering/noop.py +25 -4
- aethergraph/services/rag/__init__.py +0 -0
- aethergraph/services/rag/facade.py +279 -23
- aethergraph/services/rag/index_factory.py +2 -2
- aethergraph/services/rag/node_rag.py +317 -0
- aethergraph/services/rate_limit/inmem_rate_limit.py +24 -0
- aethergraph/services/registry/__init__.py +0 -0
- aethergraph/services/registry/agent_app_meta.py +419 -0
- aethergraph/services/registry/registry_key.py +1 -1
- aethergraph/services/registry/unified_registry.py +74 -6
- aethergraph/services/scope/scope.py +159 -0
- aethergraph/services/scope/scope_factory.py +164 -0
- aethergraph/services/state_stores/serialize.py +5 -0
- aethergraph/services/state_stores/utils.py +2 -1
- aethergraph/services/viz/__init__.py +0 -0
- aethergraph/services/viz/facade.py +413 -0
- aethergraph/services/viz/viz_service.py +69 -0
- aethergraph/storage/artifacts/artifact_index_jsonl.py +180 -0
- aethergraph/storage/artifacts/artifact_index_sqlite.py +426 -0
- aethergraph/storage/artifacts/cas_store.py +422 -0
- aethergraph/storage/artifacts/fs_cas.py +18 -0
- aethergraph/storage/artifacts/s3_cas.py +14 -0
- aethergraph/storage/artifacts/utils.py +124 -0
- aethergraph/storage/blob/fs_blob.py +86 -0
- aethergraph/storage/blob/s3_blob.py +115 -0
- aethergraph/storage/continuation_store/fs_cont.py +283 -0
- aethergraph/storage/continuation_store/inmem_cont.py +146 -0
- aethergraph/storage/continuation_store/kvdoc_cont.py +261 -0
- aethergraph/storage/docstore/fs_doc.py +63 -0
- aethergraph/storage/docstore/sqlite_doc.py +31 -0
- aethergraph/storage/docstore/sqlite_doc_sync.py +90 -0
- aethergraph/storage/eventlog/fs_event.py +136 -0
- aethergraph/storage/eventlog/sqlite_event.py +47 -0
- aethergraph/storage/eventlog/sqlite_event_sync.py +178 -0
- aethergraph/storage/factory.py +432 -0
- aethergraph/storage/fs_utils.py +28 -0
- aethergraph/storage/graph_state_store/state_store.py +64 -0
- aethergraph/storage/kv/inmem_kv.py +103 -0
- aethergraph/storage/kv/layered_kv.py +52 -0
- aethergraph/storage/kv/sqlite_kv.py +39 -0
- aethergraph/storage/kv/sqlite_kv_sync.py +98 -0
- aethergraph/storage/memory/event_persist.py +68 -0
- aethergraph/storage/memory/fs_persist.py +118 -0
- aethergraph/{services/memory/hotlog_kv.py → storage/memory/hotlog.py} +8 -2
- aethergraph/{services → storage}/memory/indices.py +31 -7
- aethergraph/storage/metering/meter_event.py +55 -0
- aethergraph/storage/runs/doc_store.py +280 -0
- aethergraph/storage/runs/inmen_store.py +82 -0
- aethergraph/storage/runs/sqlite_run_store.py +403 -0
- aethergraph/storage/sessions/doc_store.py +183 -0
- aethergraph/storage/sessions/inmem_store.py +110 -0
- aethergraph/storage/sessions/sqlite_session_store.py +399 -0
- aethergraph/storage/vector_index/chroma_index.py +138 -0
- aethergraph/storage/vector_index/faiss_index.py +179 -0
- aethergraph/storage/vector_index/sqlite_index.py +187 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/METADATA +138 -31
- aethergraph-0.1.0a2.dist-info/RECORD +356 -0
- aethergraph-0.1.0a2.dist-info/entry_points.txt +3 -0
- aethergraph/services/artifacts/factory.py +0 -35
- aethergraph/services/artifacts/fs_store.py +0 -656
- aethergraph/services/artifacts/jsonl_index.py +0 -123
- aethergraph/services/artifacts/sqlite_index.py +0 -209
- aethergraph/services/memory/distillers/episode.py +0 -116
- aethergraph/services/memory/distillers/rolling.py +0 -74
- aethergraph/services/memory/facade.py +0 -633
- aethergraph/services/memory/persist_fs.py +0 -40
- aethergraph/services/rag/index/base.py +0 -27
- aethergraph/services/rag/index/faiss_index.py +0 -121
- aethergraph/services/rag/index/sqlite_index.py +0 -134
- aethergraph-0.1.0a1.dist-info/RECORD +0 -182
- aethergraph-0.1.0a1.dist-info/entry_points.txt +0 -2
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/WHEEL +0 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/LICENSE +0 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/licenses/NOTICE +0 -0
- {aethergraph-0.1.0a1.dist-info → aethergraph-0.1.0a2.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,41 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
+
import json
|
|
4
5
|
import logging
|
|
5
6
|
import os
|
|
7
|
+
|
|
8
|
+
# from time import time
|
|
9
|
+
import time
|
|
6
10
|
from typing import Any
|
|
7
11
|
|
|
8
12
|
import httpx
|
|
9
13
|
|
|
14
|
+
from aethergraph.config.config import RateLimitSettings
|
|
10
15
|
from aethergraph.contracts.services.llm import LLMClientProtocol
|
|
16
|
+
from aethergraph.contracts.services.metering import MeteringService
|
|
17
|
+
from aethergraph.core.runtime.runtime_metering import current_meter_context, current_metering
|
|
18
|
+
from aethergraph.services.llm.types import (
|
|
19
|
+
ChatOutputFormat,
|
|
20
|
+
GeneratedImage,
|
|
21
|
+
ImageFormat,
|
|
22
|
+
ImageGenerationResult,
|
|
23
|
+
ImageResponseFormat,
|
|
24
|
+
LLMUnsupportedFeatureError,
|
|
25
|
+
)
|
|
26
|
+
from aethergraph.services.llm.utils import (
|
|
27
|
+
_azure_images_generations_url,
|
|
28
|
+
_data_url_to_b64_and_mime,
|
|
29
|
+
_ensure_system_json_directive,
|
|
30
|
+
_extract_json_text,
|
|
31
|
+
_guess_mime_from_format,
|
|
32
|
+
_is_data_url,
|
|
33
|
+
_normalize_base_url_no_trailing_slash,
|
|
34
|
+
_normalize_openai_responses_input,
|
|
35
|
+
_to_anthropic_blocks,
|
|
36
|
+
_to_gemini_parts,
|
|
37
|
+
_validate_json_schema,
|
|
38
|
+
)
|
|
11
39
|
|
|
12
40
|
|
|
13
41
|
# ---- Helpers --------------------------------------------------------------
|
|
@@ -60,6 +88,10 @@ class GenericLLMClient(LLMClientProtocol):
|
|
|
60
88
|
api_key: str | None = None,
|
|
61
89
|
azure_deployment: str | None = None,
|
|
62
90
|
timeout: float = 60.0,
|
|
91
|
+
# metering
|
|
92
|
+
metering: MeteringService | None = None,
|
|
93
|
+
# rate limit
|
|
94
|
+
rate_limit_cfg: RateLimitSettings | None = None,
|
|
63
95
|
):
|
|
64
96
|
self.provider = (provider or os.getenv("LLM_PROVIDER") or "openai").lower()
|
|
65
97
|
self.model = model or os.getenv("LLM_MODEL") or "gpt-4o-mini"
|
|
@@ -87,10 +119,122 @@ class GenericLLMClient(LLMClientProtocol):
|
|
|
87
119
|
"openrouter": "https://openrouter.ai/api/v1",
|
|
88
120
|
"lmstudio": os.getenv("LMSTUDIO_BASE_URL", "http://localhost:1234/v1"),
|
|
89
121
|
"ollama": os.getenv("OLLAMA_BASE_URL", "http://localhost:11434/v1"),
|
|
122
|
+
"dummy": "http://localhost:8745", # for testing with a dummy server
|
|
90
123
|
}[self.provider]
|
|
91
124
|
)
|
|
92
125
|
self.azure_deployment = azure_deployment or os.getenv("AZURE_OPENAI_DEPLOYMENT")
|
|
93
126
|
|
|
127
|
+
self.metering = metering
|
|
128
|
+
|
|
129
|
+
# Rate limit settings
|
|
130
|
+
self._rate_limit_cfg = rate_limit_cfg
|
|
131
|
+
self._per_run_calls: dict[str, int] = {}
|
|
132
|
+
self._per_run_tokens: dict[str, int] = {}
|
|
133
|
+
|
|
134
|
+
# ---------------- internal helpers for metering ----------------
|
|
135
|
+
@staticmethod
|
|
136
|
+
def _normalize_usage(usage: dict[str, Any]) -> tuple[int, int]:
|
|
137
|
+
"""Normalize usage dict to standard keys: prompt_tokens, completion_tokens."""
|
|
138
|
+
if not usage:
|
|
139
|
+
return 0, 0
|
|
140
|
+
|
|
141
|
+
prompt = usage.get("prompt_tokens") or usage.get("input_tokens")
|
|
142
|
+
completion = usage.get("completion_tokens") or usage.get("output_tokens")
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
prompt_i = int(prompt) if prompt is not None else 0
|
|
146
|
+
except (ValueError, TypeError):
|
|
147
|
+
prompt_i = 0
|
|
148
|
+
try:
|
|
149
|
+
completion_i = int(completion) if completion is not None else 0
|
|
150
|
+
except (ValueError, TypeError):
|
|
151
|
+
completion_i = 0
|
|
152
|
+
|
|
153
|
+
return prompt_i, completion_i
|
|
154
|
+
|
|
155
|
+
def _get_rate_limit_cfg(self) -> RateLimitSettings | None:
|
|
156
|
+
if self._rate_limit_cfg is not None:
|
|
157
|
+
return self._rate_limit_cfg
|
|
158
|
+
# Lazy-load from container if available
|
|
159
|
+
try:
|
|
160
|
+
from aethergraph.core.runtime.runtime_services import (
|
|
161
|
+
current_services, # local import to avoid cycles
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
container = current_services()
|
|
165
|
+
settings = getattr(container, "settings", None)
|
|
166
|
+
if settings is not None and getattr(settings, "rate_limit", None) is not None:
|
|
167
|
+
self._rate_limit_cfg = settings.rate_limit
|
|
168
|
+
return self._rate_limit_cfg
|
|
169
|
+
except Exception:
|
|
170
|
+
pass
|
|
171
|
+
|
|
172
|
+
def _enforce_llm_limits_for_run(self, *, usage: dict[str, Any]) -> None:
|
|
173
|
+
cfg = self._get_rate_limit_cfg()
|
|
174
|
+
if cfg is None or not cfg.enabled:
|
|
175
|
+
return
|
|
176
|
+
|
|
177
|
+
# get current run_id from context
|
|
178
|
+
ctx = current_meter_context.get()
|
|
179
|
+
run_id = ctx.get("run_id")
|
|
180
|
+
if not run_id:
|
|
181
|
+
# no run_id context; cannot enforce per-run limits
|
|
182
|
+
return
|
|
183
|
+
|
|
184
|
+
prompt_tokens, completion_tokens = self._normalize_usage(usage)
|
|
185
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
186
|
+
|
|
187
|
+
calls = self._per_run_calls.get(run_id, 0) + 1
|
|
188
|
+
tokens = self._per_run_tokens.get(run_id, 0) + total_tokens
|
|
189
|
+
|
|
190
|
+
# store updated counts
|
|
191
|
+
self._per_run_calls[run_id] = calls
|
|
192
|
+
self._per_run_tokens[run_id] = tokens
|
|
193
|
+
|
|
194
|
+
if cfg.max_llm_calls_per_run and calls > cfg.max_llm_calls_per_run:
|
|
195
|
+
raise RuntimeError(
|
|
196
|
+
f"LLM call limit exceeded for this run "
|
|
197
|
+
f"({calls} > {cfg.max_llm_calls_per_run}). "
|
|
198
|
+
"Consider simplifying the graph or raising the limit."
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
if cfg.max_llm_tokens_per_run and tokens > cfg.max_llm_tokens_per_run:
|
|
202
|
+
raise RuntimeError(
|
|
203
|
+
f"LLM token limit exceeded for this run "
|
|
204
|
+
f"({tokens} > {cfg.max_llm_tokens_per_run}). "
|
|
205
|
+
"Consider simplifying the graph or raising the limit."
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
async def _record_llm_usage(
|
|
209
|
+
self,
|
|
210
|
+
*,
|
|
211
|
+
model: str,
|
|
212
|
+
usage: dict[str, Any],
|
|
213
|
+
latency_ms: int | None = None,
|
|
214
|
+
) -> None:
|
|
215
|
+
self.metering = self.metering or current_metering()
|
|
216
|
+
prompt_tokens, completion_tokens = self._normalize_usage(usage)
|
|
217
|
+
ctx = current_meter_context.get()
|
|
218
|
+
user_id = ctx.get("user_id")
|
|
219
|
+
org_id = ctx.get("org_id")
|
|
220
|
+
run_id = ctx.get("run_id")
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
await self.metering.record_llm(
|
|
224
|
+
user_id=user_id,
|
|
225
|
+
org_id=org_id,
|
|
226
|
+
run_id=run_id,
|
|
227
|
+
model=model,
|
|
228
|
+
provider=self.provider,
|
|
229
|
+
prompt_tokens=prompt_tokens,
|
|
230
|
+
completion_tokens=completion_tokens,
|
|
231
|
+
latency_ms=latency_ms,
|
|
232
|
+
)
|
|
233
|
+
except Exception as e:
|
|
234
|
+
# Never fail the LLM call due to metering issues
|
|
235
|
+
logger = logging.getLogger("aethergraph.services.llm.generic_client")
|
|
236
|
+
logger.warning(f"llm_metering_failed: {e}")
|
|
237
|
+
|
|
94
238
|
async def _ensure_client(self):
|
|
95
239
|
"""Ensure the httpx client is bound to the current event loop.
|
|
96
240
|
This allows safe usage across multiple async contexts.
|
|
@@ -113,260 +257,954 @@ class GenericLLMClient(LLMClientProtocol):
|
|
|
113
257
|
*,
|
|
114
258
|
reasoning_effort: str | None = None,
|
|
115
259
|
max_output_tokens: int | None = None,
|
|
260
|
+
output_format: ChatOutputFormat = "text",
|
|
261
|
+
json_schema: dict[str, Any] | None = None,
|
|
262
|
+
schema_name: str = "output",
|
|
263
|
+
strict_schema: bool = True,
|
|
264
|
+
validate_json: bool = True,
|
|
265
|
+
fail_on_unsupported: bool = True,
|
|
116
266
|
**kw: Any,
|
|
117
267
|
) -> tuple[str, dict[str, int]]:
|
|
268
|
+
"""
|
|
269
|
+
Send a chat request to the LLM provider and return the response in a normalized format.
|
|
270
|
+
This method handles provider-specific dispatch, output postprocessing,
|
|
271
|
+
rate limiting, and usage metering. It supports structured output via JSON schema
|
|
272
|
+
validation and flexible output formats.
|
|
273
|
+
|
|
274
|
+
Examples:
|
|
275
|
+
Basic usage with a list of messages:
|
|
276
|
+
```python
|
|
277
|
+
response, usage = await context.llm().chat([
|
|
278
|
+
{"role": "user", "content": "Hello, assistant!"}
|
|
279
|
+
])
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
Requesting structured output with a JSON schema:
|
|
283
|
+
```python
|
|
284
|
+
response, usage = await context.llm().chat(
|
|
285
|
+
messages=[{"role": "user", "content": "Summarize this text."}],
|
|
286
|
+
output_format="json",
|
|
287
|
+
json_schema={"type": "object", "properties": {"summary": {"type": "string"}}}
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
messages: List of message dicts, each with "role" and "content" keys.
|
|
292
|
+
reasoning_effort: Optional string to control model reasoning depth.
|
|
293
|
+
max_output_tokens: Optional maximum number of output tokens.
|
|
294
|
+
output_format: Output format, e.g., "text" or "json".
|
|
295
|
+
json_schema: Optional JSON schema for validating structured output.
|
|
296
|
+
schema_name: Name for the root schema object (default: "output").
|
|
297
|
+
strict_schema: If True, enforce strict schema validation.
|
|
298
|
+
validate_json: If True, validate JSON output against schema.
|
|
299
|
+
fail_on_unsupported: If True, raise error for unsupported features.
|
|
300
|
+
**kw: Additional provider-specific keyword arguments.
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
tuple[str, dict[str, int]]: The model response (text or structured output) and usage statistics.
|
|
304
|
+
|
|
305
|
+
Raises:
|
|
306
|
+
NotImplementedError: If the provider is not supported.
|
|
307
|
+
RuntimeError: For various errors including invalid JSON output or rate limit violations.
|
|
308
|
+
LLMUnsupportedFeatureError: If a requested feature is unsupported by the provider.
|
|
309
|
+
|
|
310
|
+
Notes:
|
|
311
|
+
- This method centralizes handling of different LLM providers, ensuring consistent behavior.
|
|
312
|
+
- Structured output support allows for robust integration with downstream systems.
|
|
313
|
+
- Rate limiting and metering help manage resource usage effectively.
|
|
314
|
+
"""
|
|
118
315
|
await self._ensure_client()
|
|
119
316
|
model = kw.get("model", self.model)
|
|
120
317
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
318
|
+
start = time.perf_counter()
|
|
319
|
+
|
|
320
|
+
# Provider-specific call (now symmetric)
|
|
321
|
+
text, usage = await self._chat_dispatch(
|
|
322
|
+
messages,
|
|
323
|
+
model=model,
|
|
324
|
+
reasoning_effort=reasoning_effort,
|
|
325
|
+
max_output_tokens=max_output_tokens,
|
|
326
|
+
output_format=output_format,
|
|
327
|
+
json_schema=json_schema,
|
|
328
|
+
schema_name=schema_name,
|
|
329
|
+
strict_schema=strict_schema,
|
|
330
|
+
validate_json=validate_json,
|
|
331
|
+
fail_on_unsupported=fail_on_unsupported,
|
|
332
|
+
**kw,
|
|
333
|
+
)
|
|
125
334
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
335
|
+
# JSON postprocessing/validation is centralized here (consistent behavior)
|
|
336
|
+
text = self._postprocess_structured_output(
|
|
337
|
+
text=text,
|
|
338
|
+
output_format=output_format,
|
|
339
|
+
json_schema=json_schema,
|
|
340
|
+
strict_schema=strict_schema,
|
|
341
|
+
validate_json=validate_json,
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
latency_ms = int((time.perf_counter() - start) * 1000)
|
|
345
|
+
|
|
346
|
+
# Enforce rate limits (existing)
|
|
347
|
+
self._enforce_llm_limits_for_run(usage=usage)
|
|
348
|
+
|
|
349
|
+
# Metering (existing)
|
|
350
|
+
await self._record_llm_usage(
|
|
351
|
+
model=model,
|
|
352
|
+
usage=usage,
|
|
353
|
+
latency_ms=latency_ms,
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
return text, usage
|
|
357
|
+
|
|
358
|
+
async def _chat_dispatch(
|
|
359
|
+
self,
|
|
360
|
+
messages: list[dict[str, Any]],
|
|
361
|
+
*,
|
|
362
|
+
model: str,
|
|
363
|
+
reasoning_effort: str | None,
|
|
364
|
+
max_output_tokens: int | None,
|
|
365
|
+
output_format: ChatOutputFormat,
|
|
366
|
+
json_schema: dict[str, Any] | None,
|
|
367
|
+
schema_name: str,
|
|
368
|
+
strict_schema: bool,
|
|
369
|
+
validate_json: bool,
|
|
370
|
+
fail_on_unsupported: bool,
|
|
371
|
+
**kw: Any,
|
|
372
|
+
) -> tuple[str, dict[str, int]]:
|
|
373
|
+
# OpenAI is now symmetric too
|
|
374
|
+
if self.provider == "openai":
|
|
375
|
+
return await self._chat_openai_responses(
|
|
376
|
+
messages,
|
|
377
|
+
model=model,
|
|
378
|
+
reasoning_effort=reasoning_effort,
|
|
379
|
+
max_output_tokens=max_output_tokens,
|
|
380
|
+
output_format=output_format,
|
|
381
|
+
json_schema=json_schema,
|
|
382
|
+
schema_name=schema_name,
|
|
383
|
+
strict_schema=strict_schema,
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
# Everyone else
|
|
387
|
+
if self.provider in {"openrouter", "lmstudio", "ollama"}:
|
|
388
|
+
return await self._chat_openai_like_chat_completions(
|
|
389
|
+
messages,
|
|
390
|
+
model=model,
|
|
391
|
+
output_format=output_format,
|
|
392
|
+
json_schema=json_schema,
|
|
393
|
+
fail_on_unsupported=fail_on_unsupported,
|
|
394
|
+
**kw,
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
if self.provider == "azure":
|
|
398
|
+
return await self._chat_azure_chat_completions(
|
|
399
|
+
messages,
|
|
400
|
+
model=model,
|
|
401
|
+
output_format=output_format,
|
|
402
|
+
json_schema=json_schema,
|
|
403
|
+
fail_on_unsupported=fail_on_unsupported,
|
|
404
|
+
**kw,
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
if self.provider == "anthropic":
|
|
408
|
+
return await self._chat_anthropic_messages(
|
|
409
|
+
messages,
|
|
410
|
+
model=model,
|
|
411
|
+
output_format=output_format,
|
|
412
|
+
json_schema=json_schema,
|
|
413
|
+
**kw,
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
if self.provider == "google":
|
|
417
|
+
return await self._chat_gemini_generate_content(
|
|
418
|
+
messages,
|
|
419
|
+
model=model,
|
|
420
|
+
output_format=output_format,
|
|
421
|
+
json_schema=json_schema,
|
|
422
|
+
fail_on_unsupported=fail_on_unsupported,
|
|
423
|
+
**kw,
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
raise NotImplementedError(f"provider {self.provider}")
|
|
427
|
+
|
|
428
|
+
def _postprocess_structured_output(
|
|
429
|
+
self,
|
|
430
|
+
*,
|
|
431
|
+
text: str,
|
|
432
|
+
output_format: ChatOutputFormat,
|
|
433
|
+
json_schema: dict[str, Any] | None,
|
|
434
|
+
strict_schema: bool,
|
|
435
|
+
validate_json: bool,
|
|
436
|
+
) -> str:
|
|
437
|
+
if output_format not in ("json_object", "json_schema"):
|
|
438
|
+
return text
|
|
439
|
+
|
|
440
|
+
if not validate_json:
|
|
441
|
+
return text
|
|
442
|
+
|
|
443
|
+
json_text = _extract_json_text(text)
|
|
444
|
+
try:
|
|
445
|
+
obj = json.loads(json_text)
|
|
446
|
+
except Exception as e:
|
|
447
|
+
raise RuntimeError(f"Model did not return valid JSON. Raw output:\n{text}") from e
|
|
448
|
+
|
|
449
|
+
if output_format == "json_schema" and json_schema is not None and strict_schema:
|
|
450
|
+
_validate_json_schema(obj, json_schema)
|
|
451
|
+
|
|
452
|
+
# Canonical JSON string output (makes downstream robust)
|
|
453
|
+
return json.dumps(obj, ensure_ascii=False)
|
|
454
|
+
|
|
455
|
+
async def _chat_openai_responses(
|
|
456
|
+
self,
|
|
457
|
+
messages: list[dict[str, Any]],
|
|
458
|
+
*,
|
|
459
|
+
model: str,
|
|
460
|
+
reasoning_effort: str | None,
|
|
461
|
+
max_output_tokens: int | None,
|
|
462
|
+
output_format: ChatOutputFormat,
|
|
463
|
+
json_schema: dict[str, Any] | None,
|
|
464
|
+
schema_name: str,
|
|
465
|
+
strict_schema: bool,
|
|
466
|
+
) -> tuple[str, dict[str, int]]:
|
|
467
|
+
await self._ensure_client()
|
|
468
|
+
assert self._client is not None
|
|
469
|
+
|
|
470
|
+
url = f"{self.base_url}/responses"
|
|
471
|
+
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
|
472
|
+
|
|
473
|
+
# Normalize input so vision works if caller used image_url parts
|
|
474
|
+
input_messages = _normalize_openai_responses_input(messages)
|
|
475
|
+
|
|
476
|
+
body: dict[str, Any] = {"model": model, "input": input_messages}
|
|
130
477
|
if reasoning_effort is not None:
|
|
131
478
|
body["reasoning"] = {"effort": reasoning_effort}
|
|
132
479
|
if max_output_tokens is not None:
|
|
133
480
|
body["max_output_tokens"] = max_output_tokens
|
|
134
481
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
482
|
+
# Structured output (Responses API style)
|
|
483
|
+
if output_format == "json_object":
|
|
484
|
+
body["text"] = {"format": {"type": "json_object"}}
|
|
485
|
+
elif output_format == "json_schema":
|
|
486
|
+
if json_schema is None:
|
|
487
|
+
raise ValueError("output_format='json_schema' requires json_schema")
|
|
488
|
+
body["text"] = {
|
|
489
|
+
"format": {
|
|
490
|
+
"type": "json_schema",
|
|
491
|
+
"name": schema_name,
|
|
492
|
+
"schema": json_schema,
|
|
493
|
+
"strict": bool(strict_schema),
|
|
494
|
+
}
|
|
495
|
+
}
|
|
141
496
|
|
|
142
497
|
async def _call():
|
|
143
|
-
r = await self._client.post(
|
|
144
|
-
f"{self.base_url}/responses",
|
|
145
|
-
headers=self._headers_openai_like(),
|
|
146
|
-
json=body,
|
|
147
|
-
)
|
|
148
|
-
|
|
498
|
+
r = await self._client.post(url, headers=headers, json=body)
|
|
149
499
|
try:
|
|
150
500
|
r.raise_for_status()
|
|
151
|
-
except httpx.
|
|
501
|
+
except httpx.HTTPStatusError as e:
|
|
152
502
|
raise RuntimeError(f"OpenAI Responses API error: {e.response.text}") from e
|
|
153
503
|
|
|
154
504
|
data = r.json()
|
|
155
505
|
output = data.get("output")
|
|
156
506
|
txt = ""
|
|
157
507
|
|
|
158
|
-
#
|
|
508
|
+
# Your existing parsing logic, but robust for list shape
|
|
159
509
|
if isinstance(output, list) and output:
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
510
|
+
# concat all message outputs if multiple
|
|
511
|
+
chunks: list[str] = []
|
|
512
|
+
for item in output:
|
|
513
|
+
if isinstance(item, dict) and item.get("type") == "message":
|
|
514
|
+
parts = item.get("content") or []
|
|
515
|
+
for p in parts:
|
|
516
|
+
if isinstance(p, dict) and "text" in p:
|
|
517
|
+
chunks.append(p["text"])
|
|
518
|
+
txt = "".join(chunks)
|
|
168
519
|
|
|
169
520
|
elif isinstance(output, dict) and output.get("type") == "message":
|
|
170
521
|
msg = output.get("message") or output
|
|
171
522
|
parts = msg.get("content") or []
|
|
172
523
|
chunks: list[str] = []
|
|
173
524
|
for p in parts:
|
|
174
|
-
if "text" in p:
|
|
525
|
+
if isinstance(p, dict) and "text" in p:
|
|
175
526
|
chunks.append(p["text"])
|
|
176
527
|
txt = "".join(chunks)
|
|
177
528
|
|
|
178
529
|
elif isinstance(output, str):
|
|
179
530
|
txt = output
|
|
180
|
-
|
|
181
531
|
else:
|
|
182
|
-
txt =
|
|
532
|
+
txt = ""
|
|
183
533
|
|
|
184
534
|
usage = data.get("usage", {}) or {}
|
|
185
535
|
return txt, usage
|
|
186
536
|
|
|
187
537
|
return await self._retry.run(_call)
|
|
188
538
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
539
|
+
async def _chat_openai_like_chat_completions(
|
|
540
|
+
self,
|
|
541
|
+
messages: list[dict[str, Any]],
|
|
542
|
+
*,
|
|
543
|
+
model: str,
|
|
544
|
+
output_format: ChatOutputFormat,
|
|
545
|
+
json_schema: dict[str, Any] | None,
|
|
546
|
+
fail_on_unsupported: bool,
|
|
547
|
+
**kw: Any,
|
|
192
548
|
) -> tuple[str, dict[str, int]]:
|
|
549
|
+
"""
|
|
550
|
+
Docstring for _chat_openai_like_chat_completions
|
|
551
|
+
|
|
552
|
+
:param self: Description
|
|
553
|
+
:param messages: Description
|
|
554
|
+
:type messages: list[dict[str, Any]]
|
|
555
|
+
:param model: Description
|
|
556
|
+
:type model: str
|
|
557
|
+
:param output_format: Description
|
|
558
|
+
:type output_format: ChatOutputFormat
|
|
559
|
+
:param json_schema: Description
|
|
560
|
+
:type json_schema: dict[str, Any] | None
|
|
561
|
+
:param fail_on_unsupported: Description
|
|
562
|
+
:type fail_on_unsupported: bool
|
|
563
|
+
:param kw: Description
|
|
564
|
+
:type kw: Any
|
|
565
|
+
:return: Description
|
|
566
|
+
:rtype: tuple[str, dict[str, int]]
|
|
567
|
+
|
|
568
|
+
Call OpenAI-like /chat/completions endpoint.
|
|
569
|
+
"""
|
|
193
570
|
await self._ensure_client()
|
|
571
|
+
assert self._client is not None
|
|
194
572
|
|
|
195
573
|
temperature = kw.get("temperature", 0.5)
|
|
196
574
|
top_p = kw.get("top_p", 1.0)
|
|
197
|
-
model = kw.get("model", self.model)
|
|
198
575
|
|
|
199
|
-
|
|
576
|
+
msg_for_provider = messages
|
|
577
|
+
response_format = None
|
|
200
578
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
579
|
+
if output_format == "json_object":
|
|
580
|
+
response_format = {"type": "json_object"}
|
|
581
|
+
msg_for_provider = _ensure_system_json_directive(messages, schema=None)
|
|
582
|
+
elif output_format == "json_schema":
|
|
583
|
+
# not truly native in most openai-like providers
|
|
584
|
+
if fail_on_unsupported:
|
|
585
|
+
raise RuntimeError(f"provider {self.provider} does not support native json_schema")
|
|
586
|
+
msg_for_provider = _ensure_system_json_directive(messages, schema=json_schema)
|
|
206
587
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
588
|
+
async def _call():
|
|
589
|
+
body: dict[str, Any] = {
|
|
590
|
+
"model": model,
|
|
591
|
+
"messages": msg_for_provider,
|
|
592
|
+
"temperature": temperature,
|
|
593
|
+
"top_p": top_p,
|
|
594
|
+
}
|
|
595
|
+
if response_format is not None:
|
|
596
|
+
body["response_format"] = response_format
|
|
212
597
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
598
|
+
r = await self._client.post(
|
|
599
|
+
f"{self.base_url}/chat/completions",
|
|
600
|
+
headers=self._headers_openai_like(),
|
|
601
|
+
json=body,
|
|
602
|
+
)
|
|
603
|
+
try:
|
|
604
|
+
r.raise_for_status()
|
|
605
|
+
except httpx.HTTPError as e:
|
|
606
|
+
raise RuntimeError(f"OpenAI-like chat/completions error: {e.response.text}") from e
|
|
220
607
|
|
|
221
|
-
|
|
608
|
+
data = r.json()
|
|
609
|
+
txt, _ = _first_text(data.get("choices", [])) # you already have _first_text in file
|
|
610
|
+
usage = data.get("usage", {}) or {}
|
|
611
|
+
return txt, usage
|
|
222
612
|
|
|
223
|
-
|
|
224
|
-
|
|
613
|
+
return await self._retry.run(_call)
|
|
614
|
+
|
|
615
|
+
async def _chat_azure_chat_completions(
|
|
616
|
+
self,
|
|
617
|
+
messages: list[dict[str, Any]],
|
|
618
|
+
*,
|
|
619
|
+
model: str,
|
|
620
|
+
output_format: ChatOutputFormat,
|
|
621
|
+
json_schema: dict[str, Any] | None,
|
|
622
|
+
fail_on_unsupported: bool,
|
|
623
|
+
**kw: Any,
|
|
624
|
+
) -> tuple[str, dict[str, int]]:
|
|
625
|
+
await self._ensure_client()
|
|
626
|
+
assert self._client is not None
|
|
627
|
+
|
|
628
|
+
if not (self.base_url and self.azure_deployment):
|
|
629
|
+
raise RuntimeError(
|
|
630
|
+
"Azure OpenAI requires AZURE_OPENAI_ENDPOINT and AZURE_OPENAI_DEPLOYMENT"
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
temperature = kw.get("temperature", 0.5)
|
|
634
|
+
top_p = kw.get("top_p", 1.0)
|
|
635
|
+
|
|
636
|
+
msg_for_provider = messages
|
|
637
|
+
payload: dict[str, Any] = {
|
|
638
|
+
"messages": msg_for_provider,
|
|
639
|
+
"temperature": temperature,
|
|
640
|
+
"top_p": top_p,
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
if output_format == "json_object":
|
|
644
|
+
payload["response_format"] = {"type": "json_object"}
|
|
645
|
+
payload["messages"] = _ensure_system_json_directive(messages, schema=None)
|
|
646
|
+
elif output_format == "json_schema":
|
|
647
|
+
if fail_on_unsupported:
|
|
225
648
|
raise RuntimeError(
|
|
226
|
-
"Azure
|
|
649
|
+
"Azure native json_schema not guaranteed; set fail_on_unsupported=False for best-effort"
|
|
227
650
|
)
|
|
651
|
+
payload["messages"] = _ensure_system_json_directive(messages, schema=json_schema)
|
|
228
652
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
653
|
+
async def _call():
|
|
654
|
+
r = await self._client.post(
|
|
655
|
+
f"{self.base_url}/openai/deployments/{self.azure_deployment}/chat/completions?api-version=2024-08-01-preview",
|
|
656
|
+
headers={"api-key": self.api_key, "Content-Type": "application/json"},
|
|
657
|
+
json=payload,
|
|
658
|
+
)
|
|
659
|
+
try:
|
|
660
|
+
r.raise_for_status()
|
|
661
|
+
except httpx.HTTPError as e:
|
|
662
|
+
raise RuntimeError(f"Azure chat/completions error: {e.response.text}") from e
|
|
663
|
+
|
|
664
|
+
data = r.json()
|
|
665
|
+
txt, _ = _first_text(data.get("choices", []))
|
|
666
|
+
usage = data.get("usage", {}) or {}
|
|
667
|
+
return txt, usage
|
|
668
|
+
|
|
669
|
+
return await self._retry.run(_call)
|
|
670
|
+
|
|
671
|
+
async def _chat_anthropic_messages(
|
|
672
|
+
self,
|
|
673
|
+
messages: list[dict[str, Any]],
|
|
674
|
+
*,
|
|
675
|
+
model: str,
|
|
676
|
+
output_format: ChatOutputFormat,
|
|
677
|
+
json_schema: dict[str, Any] | None,
|
|
678
|
+
**kw: Any,
|
|
679
|
+
) -> tuple[str, dict[str, int]]:
|
|
680
|
+
await self._ensure_client()
|
|
681
|
+
assert self._client is not None
|
|
682
|
+
|
|
683
|
+
temperature = kw.get("temperature", 0.5)
|
|
684
|
+
top_p = kw.get("top_p", 1.0)
|
|
685
|
+
|
|
686
|
+
# System text aggregation
|
|
687
|
+
sys_msgs: list[str] = []
|
|
688
|
+
for m in messages:
|
|
689
|
+
if m.get("role") == "system":
|
|
690
|
+
c = m.get("content")
|
|
691
|
+
sys_msgs.append(c if isinstance(c, str) else str(c))
|
|
692
|
+
|
|
693
|
+
if output_format in ("json_object", "json_schema"):
|
|
694
|
+
sys_msgs.insert(0, "Return ONLY valid JSON. No markdown, no commentary.")
|
|
695
|
+
if output_format == "json_schema" and json_schema is not None:
|
|
696
|
+
sys_msgs.insert(
|
|
697
|
+
1,
|
|
698
|
+
"JSON MUST conform to this schema:\n"
|
|
699
|
+
+ json.dumps(json_schema, ensure_ascii=False),
|
|
234
700
|
)
|
|
235
|
-
try:
|
|
236
|
-
r.raise_for_status()
|
|
237
|
-
except httpx.HTTPError as e:
|
|
238
|
-
raise RuntimeError(f"OpenAI Responses API error: {e.response.text}") from e
|
|
239
701
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
702
|
+
# Convert messages to Anthropic format (blocks)
|
|
703
|
+
conv: list[dict[str, Any]] = []
|
|
704
|
+
for m in messages:
|
|
705
|
+
role = m.get("role")
|
|
706
|
+
if role == "system":
|
|
707
|
+
continue
|
|
708
|
+
anthro_role = "assistant" if role == "assistant" else "user"
|
|
709
|
+
content_blocks = _to_anthropic_blocks(m.get("content"))
|
|
710
|
+
conv.append({"role": anthro_role, "content": content_blocks})
|
|
711
|
+
|
|
712
|
+
payload: dict[str, Any] = {
|
|
713
|
+
"model": model,
|
|
714
|
+
"max_tokens": kw.get("max_tokens", 1024),
|
|
715
|
+
"messages": conv,
|
|
716
|
+
"temperature": temperature,
|
|
717
|
+
"top_p": top_p,
|
|
718
|
+
}
|
|
719
|
+
if sys_msgs:
|
|
720
|
+
payload["system"] = "\n\n".join(sys_msgs)
|
|
243
721
|
|
|
244
|
-
|
|
722
|
+
async def _call():
|
|
723
|
+
r = await self._client.post(
|
|
724
|
+
f"{self.base_url}/v1/messages",
|
|
725
|
+
headers={
|
|
726
|
+
"x-api-key": self.api_key,
|
|
727
|
+
"anthropic-version": "2023-06-01",
|
|
728
|
+
"Content-Type": "application/json",
|
|
729
|
+
},
|
|
730
|
+
json=payload,
|
|
731
|
+
)
|
|
732
|
+
try:
|
|
733
|
+
r.raise_for_status()
|
|
734
|
+
except httpx.HTTPStatusError as e:
|
|
735
|
+
body = e.response.text or ""
|
|
736
|
+
if e.response.status_code == 404:
|
|
737
|
+
# Often model not found, or wrong base URL.
|
|
738
|
+
hint = (
|
|
739
|
+
"Anthropic returned 404. Common causes:\n"
|
|
740
|
+
"1) base_url should be https://api.anthropic.com (no /v1 suffix)\n"
|
|
741
|
+
"2) model id is invalid / unavailable for your key\n"
|
|
742
|
+
f"Request URL: {e.request.url}\n"
|
|
743
|
+
)
|
|
744
|
+
raise RuntimeError(hint + "Response body:\n" + body) from e
|
|
745
|
+
|
|
746
|
+
raise RuntimeError(f"Anthropic API error ({e.response.status_code}): {body}") from e
|
|
245
747
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
748
|
+
data = r.json()
|
|
749
|
+
blocks = data.get("content") or []
|
|
750
|
+
txt = "".join(b.get("text", "") for b in blocks if b.get("type") == "text")
|
|
751
|
+
usage = data.get("usage", {}) or {}
|
|
752
|
+
return txt, usage
|
|
753
|
+
|
|
754
|
+
return await self._retry.run(_call)
|
|
755
|
+
|
|
756
|
+
async def _chat_gemini_generate_content(
|
|
757
|
+
self,
|
|
758
|
+
messages: list[dict[str, Any]],
|
|
759
|
+
*,
|
|
760
|
+
model: str,
|
|
761
|
+
output_format: ChatOutputFormat,
|
|
762
|
+
json_schema: dict[str, Any] | None,
|
|
763
|
+
fail_on_unsupported: bool,
|
|
764
|
+
**kw: Any,
|
|
765
|
+
) -> tuple[str, dict[str, int]]:
|
|
766
|
+
await self._ensure_client()
|
|
767
|
+
assert self._client is not None
|
|
768
|
+
|
|
769
|
+
temperature = kw.get("temperature", 0.5)
|
|
770
|
+
top_p = kw.get("top_p", 1.0)
|
|
771
|
+
|
|
772
|
+
# Merge system messages into preamble
|
|
773
|
+
system_parts: list[str] = []
|
|
774
|
+
for m in messages:
|
|
775
|
+
if m.get("role") == "system":
|
|
776
|
+
c = m.get("content")
|
|
777
|
+
system_parts.append(c if isinstance(c, str) else str(c))
|
|
778
|
+
system = "\n".join(system_parts)
|
|
779
|
+
|
|
780
|
+
turns: list[dict[str, Any]] = []
|
|
781
|
+
for m in messages:
|
|
782
|
+
if m.get("role") == "system":
|
|
783
|
+
continue
|
|
784
|
+
role = "user" if m.get("role") == "user" else "model"
|
|
785
|
+
parts = _to_gemini_parts(m.get("content"))
|
|
786
|
+
turns.append({"role": role, "parts": parts})
|
|
787
|
+
|
|
788
|
+
if system:
|
|
789
|
+
turns.insert(0, {"role": "user", "parts": [{"text": f"System instructions: {system}"}]})
|
|
790
|
+
|
|
791
|
+
async def _call():
|
|
792
|
+
gen_cfg: dict[str, Any] = {"temperature": temperature, "topP": top_p}
|
|
793
|
+
|
|
794
|
+
# Gemini native structured outputs
|
|
795
|
+
if output_format == "json_object":
|
|
796
|
+
gen_cfg["responseMimeType"] = "application/json"
|
|
797
|
+
elif output_format == "json_schema":
|
|
798
|
+
if json_schema is None:
|
|
799
|
+
raise ValueError("output_format='json_schema' requires json_schema")
|
|
800
|
+
gen_cfg["responseMimeType"] = "application/json"
|
|
801
|
+
gen_cfg["responseJsonSchema"] = json_schema
|
|
802
|
+
|
|
803
|
+
payload = {"contents": turns, "generationConfig": gen_cfg}
|
|
804
|
+
|
|
805
|
+
r = await self._client.post(
|
|
806
|
+
f"{self.base_url}/v1/models/{model}:generateContent?key={self.api_key}",
|
|
807
|
+
headers={"Content-Type": "application/json"},
|
|
808
|
+
json=payload,
|
|
809
|
+
)
|
|
810
|
+
try:
|
|
811
|
+
r.raise_for_status()
|
|
812
|
+
except httpx.HTTPStatusError as e:
|
|
813
|
+
raise RuntimeError(
|
|
814
|
+
f"Gemini generateContent failed ({e.response.status_code}): {e.response.text}"
|
|
815
|
+
) from e
|
|
816
|
+
|
|
817
|
+
data = r.json()
|
|
818
|
+
cand = (data.get("candidates") or [{}])[0]
|
|
819
|
+
txt = "".join(p.get("text", "") for p in (cand.get("content", {}).get("parts") or []))
|
|
820
|
+
|
|
821
|
+
um = data.get("usageMetadata") or {}
|
|
822
|
+
usage = {
|
|
823
|
+
"input_tokens": int(um.get("promptTokenCount", 0) or 0),
|
|
824
|
+
"output_tokens": int(um.get("candidatesTokenCount", 0) or 0),
|
|
278
825
|
}
|
|
826
|
+
return txt, usage
|
|
279
827
|
|
|
280
|
-
|
|
281
|
-
if sys_msgs:
|
|
282
|
-
payload["system"] = "\n\n".join(sys_msgs)
|
|
828
|
+
return await self._retry.run(_call)
|
|
283
829
|
|
|
284
|
-
|
|
285
|
-
r = await self._client.post(
|
|
286
|
-
f"{self.base_url}/v1/messages",
|
|
287
|
-
headers={
|
|
288
|
-
"x-api-key": self.api_key,
|
|
289
|
-
"anthropic-version": "2023-06-01",
|
|
290
|
-
"Content-Type": "application/json",
|
|
291
|
-
},
|
|
292
|
-
json=payload,
|
|
293
|
-
)
|
|
294
|
-
try:
|
|
295
|
-
r.raise_for_status()
|
|
296
|
-
except httpx.HTTPStatusError as e:
|
|
297
|
-
# keep the nice debugging message
|
|
298
|
-
raise RuntimeError(f"Anthropic API error: {e.response.text}") from e
|
|
830
|
+
# ---------------- Image Generation ----------------
|
|
299
831
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
832
|
+
async def generate_image(
|
|
833
|
+
self,
|
|
834
|
+
prompt: str,
|
|
835
|
+
*,
|
|
836
|
+
model: str | None = None,
|
|
837
|
+
n: int = 1,
|
|
838
|
+
size: str | None = None, # e.g. "1024x1024"
|
|
839
|
+
quality: str | None = None, # OpenAI: "high|medium|low|auto" or dall-e: "hd|standard"
|
|
840
|
+
style: str | None = None, # dall-e-3: "vivid|natural"
|
|
841
|
+
output_format: ImageFormat | None = None, # OpenAI GPT image models: png|jpeg|webp
|
|
842
|
+
response_format: ImageResponseFormat | None = None, # dall-e: url|b64_json (OpenAI/azure)
|
|
843
|
+
background: str | None = None, # OpenAI GPT image models: "transparent|opaque|auto"
|
|
844
|
+
# Optional image inputs for providers that can do edit-style generation via "prompt + image(s)"
|
|
845
|
+
input_images: list[str] | None = None, # data: URLs (base64) for now
|
|
846
|
+
# Provider-specific knobs
|
|
847
|
+
azure_api_version: str | None = None,
|
|
848
|
+
**kw: Any,
|
|
849
|
+
) -> ImageGenerationResult:
|
|
850
|
+
"""
|
|
851
|
+
Generate images from a text prompt using the configured LLM provider.
|
|
852
|
+
|
|
853
|
+
This method supports provider-agnostic image generation, including OpenAI, Azure, and Google Gemini.
|
|
854
|
+
It automatically handles rate limiting, usage metering, and provider-specific options.
|
|
855
|
+
|
|
856
|
+
Examples:
|
|
857
|
+
Basic usage with a prompt:
|
|
858
|
+
```python
|
|
859
|
+
result = await context.llm().generate_image("A cat riding a bicycle")
|
|
860
|
+
```
|
|
861
|
+
|
|
862
|
+
Requesting multiple images with custom size and style:
|
|
863
|
+
```python
|
|
864
|
+
result = await context.llm().generate_image(
|
|
865
|
+
"A futuristic cityscape",
|
|
866
|
+
n=3,
|
|
867
|
+
size="1024x1024",
|
|
868
|
+
style="vivid"
|
|
869
|
+
)
|
|
870
|
+
```
|
|
305
871
|
|
|
306
|
-
|
|
872
|
+
Supplying input images for edit-style generation (Gemini):
|
|
873
|
+
```python
|
|
874
|
+
result = await context.llm().generate_image(
|
|
875
|
+
"Make this image brighter",
|
|
876
|
+
input_images=[my_data_url]
|
|
877
|
+
)
|
|
878
|
+
```
|
|
879
|
+
|
|
880
|
+
Args:
|
|
881
|
+
prompt: The text prompt describing the desired image(s).
|
|
882
|
+
model: Optional model name to override the default.
|
|
883
|
+
n: Number of images to generate (default: 1).
|
|
884
|
+
size: Image size, e.g., "1024x1024".
|
|
885
|
+
quality: Image quality setting (provider-specific).
|
|
886
|
+
style: Artistic style (provider-specific).
|
|
887
|
+
output_format: Desired image format, e.g., "png", "jpeg".
|
|
888
|
+
response_format: Response format, e.g., "url" or "b64_json".
|
|
889
|
+
background: Background setting, e.g., "transparent".
|
|
890
|
+
input_images: List of input images (as data URLs) for edit-style generation.
|
|
891
|
+
azure_api_version: Azure-specific API version override.
|
|
892
|
+
**kw: Additional provider-specific keyword arguments.
|
|
893
|
+
|
|
894
|
+
Returns:
|
|
895
|
+
ImageGenerationResult: An object containing generated images, usage statistics, and raw response data.
|
|
896
|
+
|
|
897
|
+
Raises:
|
|
898
|
+
LLMUnsupportedFeatureError: If the provider does not support image generation.
|
|
899
|
+
RuntimeError: For provider-specific errors or invalid configuration.
|
|
900
|
+
|
|
901
|
+
Notes:
|
|
902
|
+
- This method is accessed via `context.llm().generate_image(...)`.
|
|
903
|
+
- Usage metering and rate limits are enforced automatically. However, token usage is typically not reported for image generation.
|
|
904
|
+
- The returned `ImageGenerationResult` includes both images and metadata.
|
|
905
|
+
"""
|
|
906
|
+
await self._ensure_client()
|
|
907
|
+
model = model or self.model
|
|
908
|
+
|
|
909
|
+
start = time.perf_counter()
|
|
910
|
+
|
|
911
|
+
result = await self._image_dispatch(
|
|
912
|
+
prompt,
|
|
913
|
+
model=model,
|
|
914
|
+
n=n,
|
|
915
|
+
size=size,
|
|
916
|
+
quality=quality,
|
|
917
|
+
style=style,
|
|
918
|
+
output_format=output_format,
|
|
919
|
+
response_format=response_format,
|
|
920
|
+
background=background,
|
|
921
|
+
input_images=input_images,
|
|
922
|
+
azure_api_version=azure_api_version,
|
|
923
|
+
**kw,
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
# Rate limits: count as a call; tokens are typically not reported for images
|
|
927
|
+
self._enforce_llm_limits_for_run(usage=result.usage or {})
|
|
928
|
+
|
|
929
|
+
latency_ms = int((time.perf_counter() - start) * 1000)
|
|
930
|
+
await self._record_llm_usage(model=model, usage=result.usage or {}, latency_ms=latency_ms)
|
|
931
|
+
|
|
932
|
+
return result
|
|
933
|
+
|
|
934
|
+
async def _image_dispatch(
|
|
935
|
+
self,
|
|
936
|
+
prompt: str,
|
|
937
|
+
*,
|
|
938
|
+
model: str,
|
|
939
|
+
n: int,
|
|
940
|
+
size: str | None,
|
|
941
|
+
quality: str | None,
|
|
942
|
+
style: str | None,
|
|
943
|
+
output_format: ImageFormat | None,
|
|
944
|
+
response_format: ImageResponseFormat | None,
|
|
945
|
+
background: str | None,
|
|
946
|
+
input_images: list[str] | None,
|
|
947
|
+
azure_api_version: str | None,
|
|
948
|
+
**kw: Any,
|
|
949
|
+
) -> ImageGenerationResult:
|
|
950
|
+
if self.provider == "openai":
|
|
951
|
+
return await self._image_openai_generate(
|
|
952
|
+
prompt,
|
|
953
|
+
model=model,
|
|
954
|
+
n=n,
|
|
955
|
+
size=size,
|
|
956
|
+
quality=quality,
|
|
957
|
+
style=style,
|
|
958
|
+
output_format=output_format,
|
|
959
|
+
response_format=response_format,
|
|
960
|
+
background=background,
|
|
961
|
+
**kw,
|
|
962
|
+
)
|
|
963
|
+
|
|
964
|
+
if self.provider == "azure":
|
|
965
|
+
return await self._image_azure_generate(
|
|
966
|
+
prompt,
|
|
967
|
+
model=model,
|
|
968
|
+
n=n,
|
|
969
|
+
size=size,
|
|
970
|
+
quality=quality,
|
|
971
|
+
style=style,
|
|
972
|
+
output_format=output_format,
|
|
973
|
+
response_format=response_format,
|
|
974
|
+
background=background,
|
|
975
|
+
azure_api_version=azure_api_version,
|
|
976
|
+
**kw,
|
|
977
|
+
)
|
|
307
978
|
|
|
308
979
|
if self.provider == "google":
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
"role": "user" if m["role"] == "user" else "model",
|
|
316
|
-
"parts": [{"text": m["content"]}],
|
|
317
|
-
}
|
|
318
|
-
for m in messages
|
|
319
|
-
if m["role"] != "system"
|
|
320
|
-
]
|
|
980
|
+
return await self._image_gemini_generate(
|
|
981
|
+
prompt,
|
|
982
|
+
model=model,
|
|
983
|
+
input_images=input_images,
|
|
984
|
+
**kw,
|
|
985
|
+
)
|
|
321
986
|
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
"role": "user",
|
|
327
|
-
"parts": [{"text": f"System instructions: {system}"}],
|
|
328
|
-
},
|
|
329
|
-
)
|
|
987
|
+
if self.provider == "anthropic":
|
|
988
|
+
raise LLMUnsupportedFeatureError(
|
|
989
|
+
"Anthropic does not support image generation via Claude API (vision is input-only)."
|
|
990
|
+
)
|
|
330
991
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
"temperature": temperature,
|
|
336
|
-
"topP": top_p,
|
|
337
|
-
},
|
|
338
|
-
}
|
|
992
|
+
# openrouter/lmstudio/ollama: no single standard image endpoint
|
|
993
|
+
raise LLMUnsupportedFeatureError(
|
|
994
|
+
f"provider '{self.provider}' does not support generate_image() in this client."
|
|
995
|
+
)
|
|
339
996
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
997
|
+
async def _image_openai_generate(
|
|
998
|
+
self,
|
|
999
|
+
prompt: str,
|
|
1000
|
+
*,
|
|
1001
|
+
model: str,
|
|
1002
|
+
n: int,
|
|
1003
|
+
size: str | None,
|
|
1004
|
+
quality: str | None,
|
|
1005
|
+
style: str | None,
|
|
1006
|
+
output_format: ImageFormat | None,
|
|
1007
|
+
response_format: ImageResponseFormat | None,
|
|
1008
|
+
background: str | None,
|
|
1009
|
+
**kw: Any,
|
|
1010
|
+
) -> ImageGenerationResult:
|
|
1011
|
+
assert self._client is not None
|
|
351
1012
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
1013
|
+
url = f"{_normalize_base_url_no_trailing_slash(self.base_url)}/images/generations"
|
|
1014
|
+
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
|
1015
|
+
|
|
1016
|
+
body: dict[str, Any] = {
|
|
1017
|
+
"model": model,
|
|
1018
|
+
"prompt": prompt,
|
|
1019
|
+
"n": n,
|
|
1020
|
+
}
|
|
1021
|
+
if size is not None:
|
|
1022
|
+
body["size"] = size
|
|
1023
|
+
if quality is not None:
|
|
1024
|
+
body["quality"] = quality
|
|
1025
|
+
if style is not None:
|
|
1026
|
+
body["style"] = style
|
|
1027
|
+
if output_format is not None:
|
|
1028
|
+
body["output_format"] = output_format
|
|
1029
|
+
if background is not None:
|
|
1030
|
+
body["background"] = background
|
|
1031
|
+
|
|
1032
|
+
# For dall-e models, response_format can be url|b64_json.
|
|
1033
|
+
# GPT image models generally return base64 and may ignore response_format. :contentReference[oaicite:4]{index=4}
|
|
1034
|
+
if response_format is not None:
|
|
1035
|
+
body["response_format"] = response_format
|
|
1036
|
+
|
|
1037
|
+
async def _call():
|
|
1038
|
+
r = await self._client.post(url, headers=headers, json=body)
|
|
1039
|
+
try:
|
|
1040
|
+
r.raise_for_status()
|
|
1041
|
+
except Exception as e:
|
|
1042
|
+
raise RuntimeError(f"OpenAI image generation error: {r.text}") from e
|
|
1043
|
+
|
|
1044
|
+
data = r.json()
|
|
1045
|
+
imgs: list[GeneratedImage] = []
|
|
1046
|
+
for item in data.get("data", []) or []:
|
|
1047
|
+
imgs.append(
|
|
1048
|
+
GeneratedImage(
|
|
1049
|
+
b64=item.get("b64_json"),
|
|
1050
|
+
url=item.get("url"),
|
|
1051
|
+
mime_type=_guess_mime_from_format(output_format or "png")
|
|
1052
|
+
if item.get("b64_json")
|
|
1053
|
+
else None,
|
|
1054
|
+
revised_prompt=item.get("revised_prompt"),
|
|
1055
|
+
)
|
|
356
1056
|
)
|
|
357
|
-
return txt, {} # usage parsing optional
|
|
358
1057
|
|
|
359
|
-
return
|
|
1058
|
+
# OpenAI images endpoints often don't return token usage; keep empty usage.
|
|
1059
|
+
return ImageGenerationResult(images=imgs, usage=data.get("usage", {}) or {}, raw=data)
|
|
360
1060
|
|
|
361
|
-
|
|
1061
|
+
return await self._retry.run(_call)
|
|
1062
|
+
|
|
1063
|
+
async def _image_azure_generate(
|
|
1064
|
+
self,
|
|
1065
|
+
prompt: str,
|
|
1066
|
+
*,
|
|
1067
|
+
model: str,
|
|
1068
|
+
n: int,
|
|
1069
|
+
size: str | None,
|
|
1070
|
+
quality: str | None,
|
|
1071
|
+
style: str | None,
|
|
1072
|
+
output_format: ImageFormat | None,
|
|
1073
|
+
response_format: ImageResponseFormat | None,
|
|
1074
|
+
background: str | None,
|
|
1075
|
+
azure_api_version: str | None,
|
|
1076
|
+
**kw: Any,
|
|
1077
|
+
) -> ImageGenerationResult:
|
|
1078
|
+
assert self._client is not None
|
|
1079
|
+
|
|
1080
|
+
if not self.base_url or not self.azure_deployment:
|
|
362
1081
|
raise RuntimeError(
|
|
363
|
-
"
|
|
1082
|
+
"Azure generate_image requires base_url=<resource endpoint> and azure_deployment=<deployment name>"
|
|
364
1083
|
)
|
|
365
1084
|
|
|
366
|
-
|
|
1085
|
+
api_version = (
|
|
1086
|
+
azure_api_version or "2025-04-01-preview"
|
|
1087
|
+
) # doc example for GPT-image-1 series :contentReference[oaicite:6]{index=6}
|
|
1088
|
+
url = _azure_images_generations_url(self.base_url, self.azure_deployment, api_version)
|
|
1089
|
+
|
|
1090
|
+
headers = {"api-key": self.api_key, "Content-Type": "application/json"}
|
|
1091
|
+
|
|
1092
|
+
body: dict[str, Any] = {"prompt": prompt, "n": n}
|
|
1093
|
+
|
|
1094
|
+
# For GPT-image-1 series Azure expects "model" in body (per docs). :contentReference[oaicite:7]{index=7}
|
|
1095
|
+
if model:
|
|
1096
|
+
body["model"] = model
|
|
1097
|
+
|
|
1098
|
+
if size is not None:
|
|
1099
|
+
body["size"] = size
|
|
1100
|
+
if quality is not None:
|
|
1101
|
+
body["quality"] = quality
|
|
1102
|
+
if style is not None:
|
|
1103
|
+
body["style"] = style
|
|
1104
|
+
|
|
1105
|
+
# Azure docs: GPT-image-1 series returns base64; DALL-E supports url/b64_json. :contentReference[oaicite:8]{index=8}
|
|
1106
|
+
if response_format is not None:
|
|
1107
|
+
body["response_format"] = response_format
|
|
1108
|
+
if output_format is not None:
|
|
1109
|
+
# Azure uses output_format like PNG/JPEG for some image models; you can pass through as-is.
|
|
1110
|
+
body["output_format"] = output_format.upper()
|
|
1111
|
+
if background is not None:
|
|
1112
|
+
body["background"] = background
|
|
1113
|
+
|
|
1114
|
+
async def _call():
|
|
1115
|
+
r = await self._client.post(url, headers=headers, json=body)
|
|
1116
|
+
try:
|
|
1117
|
+
r.raise_for_status()
|
|
1118
|
+
except Exception as e:
|
|
1119
|
+
raise RuntimeError(f"Azure image generation error: {r.text}") from e
|
|
1120
|
+
|
|
1121
|
+
data = r.json()
|
|
1122
|
+
imgs: list[GeneratedImage] = []
|
|
1123
|
+
for item in data.get("data", []) or []:
|
|
1124
|
+
imgs.append(
|
|
1125
|
+
GeneratedImage(
|
|
1126
|
+
b64=item.get("b64_json"),
|
|
1127
|
+
url=item.get("url"),
|
|
1128
|
+
mime_type=_guess_mime_from_format((output_format or "png").lower())
|
|
1129
|
+
if item.get("b64_json")
|
|
1130
|
+
else None,
|
|
1131
|
+
revised_prompt=item.get("revised_prompt"),
|
|
1132
|
+
)
|
|
1133
|
+
)
|
|
1134
|
+
|
|
1135
|
+
return ImageGenerationResult(images=imgs, usage=data.get("usage", {}) or {}, raw=data)
|
|
1136
|
+
|
|
1137
|
+
return await self._retry.run(_call)
|
|
1138
|
+
|
|
1139
|
+
async def _image_gemini_generate(
|
|
1140
|
+
self,
|
|
1141
|
+
prompt: str,
|
|
1142
|
+
*,
|
|
1143
|
+
model: str,
|
|
1144
|
+
input_images: list[str] | None,
|
|
1145
|
+
**kw: Any,
|
|
1146
|
+
) -> ImageGenerationResult:
|
|
1147
|
+
assert self._client is not None
|
|
1148
|
+
|
|
1149
|
+
# Gemini REST endpoint uses generativelanguage.googleapis.com and API key header. :contentReference[oaicite:10]{index=10}
|
|
1150
|
+
# Your self.base_url should already be something like: https://generativelanguage.googleapis.com
|
|
1151
|
+
base = (
|
|
1152
|
+
_normalize_base_url_no_trailing_slash(self.base_url)
|
|
1153
|
+
or "https://generativelanguage.googleapis.com"
|
|
1154
|
+
)
|
|
1155
|
+
url = f"{base}/v1beta/models/{model}:generateContent"
|
|
1156
|
+
|
|
1157
|
+
parts: list[dict[str, Any]] = []
|
|
1158
|
+
if input_images:
|
|
1159
|
+
for img in input_images:
|
|
1160
|
+
if not _is_data_url(img):
|
|
1161
|
+
raise ValueError("Gemini input_images must be data: URLs (base64) for now.")
|
|
1162
|
+
b64, mime = _data_url_to_b64_and_mime(img)
|
|
1163
|
+
parts.append({"inline_data": {"mime_type": mime, "data": b64}})
|
|
1164
|
+
|
|
1165
|
+
parts.append({"text": prompt})
|
|
1166
|
+
|
|
1167
|
+
payload: dict[str, Any] = {
|
|
1168
|
+
"contents": [{"parts": parts}],
|
|
1169
|
+
}
|
|
1170
|
+
# Optional: ImageConfig etc. could be added here later per Gemini docs. :contentReference[oaicite:11]{index=11}
|
|
1171
|
+
|
|
1172
|
+
async def _call():
|
|
1173
|
+
r = await self._client.post(
|
|
1174
|
+
url,
|
|
1175
|
+
headers={"x-goog-api-key": self.api_key, "Content-Type": "application/json"},
|
|
1176
|
+
json=payload,
|
|
1177
|
+
)
|
|
1178
|
+
try:
|
|
1179
|
+
r.raise_for_status()
|
|
1180
|
+
except Exception as e:
|
|
1181
|
+
raise RuntimeError(f"Gemini image generation error: {r.text}") from e
|
|
1182
|
+
|
|
1183
|
+
data = r.json()
|
|
1184
|
+
cand = (data.get("candidates") or [{}])[0]
|
|
1185
|
+
content = cand.get("content") or {}
|
|
1186
|
+
out_parts = content.get("parts") or []
|
|
1187
|
+
|
|
1188
|
+
imgs: list[GeneratedImage] = []
|
|
1189
|
+
for p in out_parts:
|
|
1190
|
+
inline = p.get("inlineData") or p.get("inline_data")
|
|
1191
|
+
if inline and inline.get("data"):
|
|
1192
|
+
mime = inline.get("mimeType") or inline.get("mime_type")
|
|
1193
|
+
imgs.append(GeneratedImage(b64=inline["data"], mime_type=mime))
|
|
1194
|
+
|
|
1195
|
+
# Usage shape varies; keep best-effort.
|
|
1196
|
+
um = data.get("usageMetadata") or {}
|
|
1197
|
+
usage = {
|
|
1198
|
+
"input_tokens": int(um.get("promptTokenCount", 0) or 0),
|
|
1199
|
+
"output_tokens": int(um.get("candidatesTokenCount", 0) or 0),
|
|
1200
|
+
}
|
|
1201
|
+
|
|
1202
|
+
return ImageGenerationResult(images=imgs, usage=usage, raw=data)
|
|
1203
|
+
|
|
1204
|
+
return await self._retry.run(_call)
|
|
367
1205
|
|
|
368
1206
|
# ---------------- Embeddings ----------------
|
|
369
|
-
async def
|
|
1207
|
+
async def embed_deprecated(self, texts: list[str], **kw) -> list[list[float]]:
|
|
370
1208
|
# model override order: kw > self.embed_model > ENV > default
|
|
371
1209
|
await self._ensure_client()
|
|
372
1210
|
|
|
@@ -440,6 +1278,224 @@ class GenericLLMClient(LLMClientProtocol):
|
|
|
440
1278
|
# Anthropic: no embeddings endpoint
|
|
441
1279
|
raise NotImplementedError(f"Embeddings not supported for {self.provider}")
|
|
442
1280
|
|
|
1281
|
+
async def embed(self, texts: list[str], **kw) -> list[list[float]]:
|
|
1282
|
+
"""
|
|
1283
|
+
Generate vector embeddings for a batch of texts using the configured LLM provider.
|
|
1284
|
+
|
|
1285
|
+
This method provides a provider-agnostic interface for embedding text, automatically
|
|
1286
|
+
handling model selection, batching, and provider-specific API quirks. It ensures the
|
|
1287
|
+
output shape matches the input and raises informative errors for configuration issues.
|
|
1288
|
+
|
|
1289
|
+
Examples:
|
|
1290
|
+
Basic usage with a list of texts:
|
|
1291
|
+
```python
|
|
1292
|
+
embeddings = await context.llm().embed([
|
|
1293
|
+
"The quick brown fox.",
|
|
1294
|
+
"Jumped over the lazy dog."
|
|
1295
|
+
])
|
|
1296
|
+
```
|
|
1297
|
+
|
|
1298
|
+
Specifying a custom embedding model:
|
|
1299
|
+
```python
|
|
1300
|
+
embeddings = await context.llm().embed(
|
|
1301
|
+
["Hello world!"],
|
|
1302
|
+
model="text-embedding-3-large"
|
|
1303
|
+
)
|
|
1304
|
+
```
|
|
1305
|
+
|
|
1306
|
+
Args:
|
|
1307
|
+
texts: List of input strings to embed.
|
|
1308
|
+
model: Optional model name to override the default embedding model.
|
|
1309
|
+
azure_api_version: Optional Azure API version override.
|
|
1310
|
+
extra_body: Optional dict of extra fields to pass to the provider.
|
|
1311
|
+
**kw: Additional provider-specific keyword arguments.
|
|
1312
|
+
|
|
1313
|
+
Returns:
|
|
1314
|
+
list[list[float]]: List of embedding vectors, one per input text.
|
|
1315
|
+
|
|
1316
|
+
Raises:
|
|
1317
|
+
TypeError: If `texts` is not a list of strings.
|
|
1318
|
+
RuntimeError: For provider/model/configuration errors or shape mismatches.
|
|
1319
|
+
NotImplementedError: If embeddings are not supported for the provider.
|
|
1320
|
+
|
|
1321
|
+
Notes:
|
|
1322
|
+
- For Google Gemini, uses batch embedding if available, otherwise falls back to per-item embedding.
|
|
1323
|
+
- For Azure, requires `azure_deployment` to be set.
|
|
1324
|
+
- The returned list always matches the length of `texts`.
|
|
1325
|
+
"""
|
|
1326
|
+
await self._ensure_client()
|
|
1327
|
+
assert self._client is not None
|
|
1328
|
+
|
|
1329
|
+
# ---- validate input ----
|
|
1330
|
+
if not isinstance(texts, list) or any(not isinstance(t, str) for t in texts):
|
|
1331
|
+
raise TypeError("embed(texts) expects list[str]")
|
|
1332
|
+
if len(texts) == 0:
|
|
1333
|
+
return []
|
|
1334
|
+
|
|
1335
|
+
# ---- resolve model ----
|
|
1336
|
+
# model override order: kw > self.embed_model > ENV > default
|
|
1337
|
+
model = (
|
|
1338
|
+
kw.get("model")
|
|
1339
|
+
or self.embed_model
|
|
1340
|
+
or os.getenv("EMBED_MODEL")
|
|
1341
|
+
or "text-embedding-3-small"
|
|
1342
|
+
)
|
|
1343
|
+
|
|
1344
|
+
# ---- capability + config checks ----
|
|
1345
|
+
if self.provider == "anthropic":
|
|
1346
|
+
raise NotImplementedError("Embeddings not supported for anthropic")
|
|
1347
|
+
|
|
1348
|
+
if self.provider == "azure" and not self.azure_deployment:
|
|
1349
|
+
raise RuntimeError(
|
|
1350
|
+
"Azure embeddings requires AZURE_OPENAI_DEPLOYMENT (azure_deployment)"
|
|
1351
|
+
)
|
|
1352
|
+
|
|
1353
|
+
# Optional knobs
|
|
1354
|
+
azure_api_version = kw.get("azure_api_version") or "2024-08-01-preview"
|
|
1355
|
+
# For OpenAI-like, some providers support extra fields like dimensions/user; pass-through if present
|
|
1356
|
+
extra_body = kw.get("extra_body") or {}
|
|
1357
|
+
|
|
1358
|
+
# ---- build request spec (within one function) ----
|
|
1359
|
+
# spec = (url, headers, json_body, parser_fn)
|
|
1360
|
+
if self.provider in {"openai", "openrouter", "lmstudio", "ollama"}:
|
|
1361
|
+
url = f"{self.base_url}/embeddings"
|
|
1362
|
+
headers = self._headers_openai_like()
|
|
1363
|
+
body: dict[str, object] = {"model": model, "input": texts}
|
|
1364
|
+
if isinstance(extra_body, dict):
|
|
1365
|
+
body.update(extra_body)
|
|
1366
|
+
|
|
1367
|
+
def parse(data: dict) -> list[list[float]]:
|
|
1368
|
+
items = data.get("data", []) or []
|
|
1369
|
+
embs = [d.get("embedding") for d in items]
|
|
1370
|
+
# Ensure shape consistency
|
|
1371
|
+
if len(embs) != len(texts) or any(e is None for e in embs):
|
|
1372
|
+
raise RuntimeError(
|
|
1373
|
+
f"Embeddings response shape mismatch: got {len(embs)} items for {len(texts)} inputs"
|
|
1374
|
+
)
|
|
1375
|
+
return embs # type: ignore[return-value]
|
|
1376
|
+
|
|
1377
|
+
async def _call():
|
|
1378
|
+
r = await self._client.post(url, headers=headers, json=body)
|
|
1379
|
+
try:
|
|
1380
|
+
r.raise_for_status()
|
|
1381
|
+
except httpx.HTTPStatusError as e:
|
|
1382
|
+
raise RuntimeError(
|
|
1383
|
+
f"Embeddings request failed ({e.response.status_code}): {e.response.text}"
|
|
1384
|
+
) from e
|
|
1385
|
+
return parse(r.json())
|
|
1386
|
+
|
|
1387
|
+
return await self._retry.run(_call)
|
|
1388
|
+
|
|
1389
|
+
if self.provider == "azure":
|
|
1390
|
+
# Azure embeddings are typically per-deployment; model sometimes optional/ignored
|
|
1391
|
+
url = f"{self.base_url}/openai/deployments/{self.azure_deployment}/embeddings?api-version={azure_api_version}"
|
|
1392
|
+
headers = {"api-key": self.api_key, "Content-Type": "application/json"}
|
|
1393
|
+
body: dict[str, object] = {"input": texts}
|
|
1394
|
+
# Some Azure variants also accept "model" or dimensions; keep pass-through flexible
|
|
1395
|
+
if model:
|
|
1396
|
+
body["model"] = model
|
|
1397
|
+
if isinstance(extra_body, dict):
|
|
1398
|
+
body.update(extra_body)
|
|
1399
|
+
|
|
1400
|
+
def parse(data: dict) -> list[list[float]]:
|
|
1401
|
+
items = data.get("data", []) or []
|
|
1402
|
+
embs = [d.get("embedding") for d in items]
|
|
1403
|
+
if len(embs) != len(texts) or any(e is None for e in embs):
|
|
1404
|
+
raise RuntimeError(
|
|
1405
|
+
f"Azure embeddings response shape mismatch: got {len(embs)} items for {len(texts)} inputs"
|
|
1406
|
+
)
|
|
1407
|
+
return embs # type: ignore[return-value]
|
|
1408
|
+
|
|
1409
|
+
async def _call():
|
|
1410
|
+
r = await self._client.post(url, headers=headers, json=body)
|
|
1411
|
+
try:
|
|
1412
|
+
r.raise_for_status()
|
|
1413
|
+
except httpx.HTTPStatusError as e:
|
|
1414
|
+
raise RuntimeError(
|
|
1415
|
+
f"Embeddings request failed ({e.response.status_code}): {e.response.text}"
|
|
1416
|
+
) from e
|
|
1417
|
+
return parse(r.json())
|
|
1418
|
+
|
|
1419
|
+
return await self._retry.run(_call)
|
|
1420
|
+
|
|
1421
|
+
if self.provider == "google":
|
|
1422
|
+
# Goal: return one embedding per input.
|
|
1423
|
+
# Preferred: batchEmbedContents if supported by your endpoint/model.
|
|
1424
|
+
# If it 404s/400s, fallback to per-item embedContent.
|
|
1425
|
+
base = self.base_url.rstrip("/")
|
|
1426
|
+
# Newer APIs often live under v1beta; your current code uses v1. Keep v1 but fallback to v1beta if needed.
|
|
1427
|
+
batch_url_v1 = f"{base}/v1/models/{model}:batchEmbedContents?key={self.api_key}"
|
|
1428
|
+
embed_url_v1 = f"{base}/v1/models/{model}:embedContent?key={self.api_key}"
|
|
1429
|
+
batch_url_v1beta = f"{base}/v1beta/models/{model}:batchEmbedContents?key={self.api_key}"
|
|
1430
|
+
embed_url_v1beta = f"{base}/v1beta/models/{model}:embedContent?key={self.api_key}"
|
|
1431
|
+
|
|
1432
|
+
headers = {"Content-Type": "application/json"}
|
|
1433
|
+
|
|
1434
|
+
def parse_single(data: dict) -> list[float]:
|
|
1435
|
+
return (data.get("embedding") or {}).get("values") or []
|
|
1436
|
+
|
|
1437
|
+
def parse_batch(data: dict) -> list[list[float]]:
|
|
1438
|
+
# Typical shape: {"embeddings":[{"values":[...]} , ...]}
|
|
1439
|
+
embs = []
|
|
1440
|
+
for e in data.get("embeddings") or []:
|
|
1441
|
+
embs.append((e or {}).get("values") or [])
|
|
1442
|
+
if len(embs) != len(texts):
|
|
1443
|
+
raise RuntimeError(
|
|
1444
|
+
f"Gemini batch embeddings mismatch: got {len(embs)} for {len(texts)}"
|
|
1445
|
+
)
|
|
1446
|
+
return embs
|
|
1447
|
+
|
|
1448
|
+
async def try_batch(url: str) -> list[list[float]] | None:
|
|
1449
|
+
body = {"requests": [{"content": {"parts": [{"text": t}]}} for t in texts]}
|
|
1450
|
+
r = await self._client.post(url, headers=headers, json=body)
|
|
1451
|
+
if r.status_code in (404, 400):
|
|
1452
|
+
return None
|
|
1453
|
+
try:
|
|
1454
|
+
r.raise_for_status()
|
|
1455
|
+
except httpx.HTTPStatusError as e:
|
|
1456
|
+
raise RuntimeError(
|
|
1457
|
+
f"Gemini batchEmbedContents failed ({e.response.status_code}): {e.response.text}"
|
|
1458
|
+
) from e
|
|
1459
|
+
return parse_batch(r.json())
|
|
1460
|
+
|
|
1461
|
+
async def call_single(url: str) -> list[list[float]]:
|
|
1462
|
+
out: list[list[float]] = []
|
|
1463
|
+
for t in texts:
|
|
1464
|
+
r = await self._client.post(
|
|
1465
|
+
url, headers=headers, json={"content": {"parts": [{"text": t}]}}
|
|
1466
|
+
)
|
|
1467
|
+
try:
|
|
1468
|
+
r.raise_for_status()
|
|
1469
|
+
except httpx.HTTPStatusError as e:
|
|
1470
|
+
raise RuntimeError(
|
|
1471
|
+
f"Gemini embedContent failed ({e.response.status_code}): {e.response.text}"
|
|
1472
|
+
) from e
|
|
1473
|
+
out.append(parse_single(r.json()))
|
|
1474
|
+
if len(out) != len(texts):
|
|
1475
|
+
raise RuntimeError(
|
|
1476
|
+
f"Gemini embeddings mismatch: got {len(out)} for {len(texts)}"
|
|
1477
|
+
)
|
|
1478
|
+
return out
|
|
1479
|
+
|
|
1480
|
+
async def _call():
|
|
1481
|
+
# Try v1 batch, then v1beta batch, then fallback to v1 single, then v1beta single
|
|
1482
|
+
res = await try_batch(batch_url_v1)
|
|
1483
|
+
if res is not None:
|
|
1484
|
+
return res
|
|
1485
|
+
res = await try_batch(batch_url_v1beta)
|
|
1486
|
+
if res is not None:
|
|
1487
|
+
return res
|
|
1488
|
+
|
|
1489
|
+
# fallback loop
|
|
1490
|
+
try:
|
|
1491
|
+
return await call_single(embed_url_v1)
|
|
1492
|
+
except RuntimeError:
|
|
1493
|
+
return await call_single(embed_url_v1beta)
|
|
1494
|
+
|
|
1495
|
+
return await self._retry.run(_call)
|
|
1496
|
+
|
|
1497
|
+
raise NotImplementedError(f"Embeddings not supported for {self.provider}")
|
|
1498
|
+
|
|
443
1499
|
# ---------------- Internals ----------------
|
|
444
1500
|
def _headers_openai_like(self):
|
|
445
1501
|
hdr = {"Content-Type": "application/json"}
|
|
@@ -492,18 +1548,63 @@ class GenericLLMClient(LLMClientProtocol):
|
|
|
492
1548
|
return_response: bool = False,
|
|
493
1549
|
) -> Any:
|
|
494
1550
|
"""
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
1551
|
+
Send a low-level HTTP request using the configured LLM provider’s client.
|
|
1552
|
+
|
|
1553
|
+
This method provides direct access to the underlying HTTP transport, automatically
|
|
1554
|
+
applying provider-specific authentication, base URL resolution, and retry logic.
|
|
1555
|
+
It is intended for advanced use cases where you need to call custom endpoints
|
|
1556
|
+
or experiment with provider APIs not covered by higher-level methods.
|
|
1557
|
+
|
|
1558
|
+
Examples:
|
|
1559
|
+
Basic usage with a relative path:
|
|
1560
|
+
```python
|
|
1561
|
+
result = await context.llm().raw(
|
|
1562
|
+
method="POST",
|
|
1563
|
+
path="/custom/endpoint",
|
|
1564
|
+
json={"foo": "bar"}
|
|
1565
|
+
)
|
|
1566
|
+
```
|
|
1567
|
+
|
|
1568
|
+
Sending a GET request to an absolute URL:
|
|
1569
|
+
```python
|
|
1570
|
+
response = await context.llm().raw(
|
|
1571
|
+
method="GET",
|
|
1572
|
+
url="https://api.openai.com/v1/models",
|
|
1573
|
+
return_response=True
|
|
1574
|
+
)
|
|
1575
|
+
```
|
|
1576
|
+
|
|
1577
|
+
Overriding headers and query parameters:
|
|
1578
|
+
```python
|
|
1579
|
+
result = await context.llm().raw(
|
|
1580
|
+
path="/v1/special",
|
|
1581
|
+
headers={"X-Custom": "123"},
|
|
1582
|
+
params={"q": "search"}
|
|
1583
|
+
)
|
|
1584
|
+
```
|
|
1585
|
+
|
|
1586
|
+
Args:
|
|
1587
|
+
method: HTTP method to use (e.g., "POST", "GET").
|
|
1588
|
+
path: Relative path to append to the provider’s base URL.
|
|
1589
|
+
url: Absolute URL to call (overrides `path` and `base_url`).
|
|
1590
|
+
json: JSON-serializable body to send with the request.
|
|
1591
|
+
params: Dictionary of query parameters.
|
|
1592
|
+
headers: Dictionary of HTTP headers to override defaults.
|
|
1593
|
+
return_response: If True, return the raw `httpx.Response` object;
|
|
1594
|
+
otherwise, return the parsed JSON response.
|
|
503
1595
|
|
|
504
1596
|
Returns:
|
|
505
|
-
|
|
506
|
-
|
|
1597
|
+
Any: The parsed JSON response by default, or the raw `httpx.Response`
|
|
1598
|
+
if `return_response=True`.
|
|
1599
|
+
|
|
1600
|
+
Raises:
|
|
1601
|
+
ValueError: If neither `url` nor `path` is provided.
|
|
1602
|
+
RuntimeError: For HTTP errors or provider-specific failures.
|
|
1603
|
+
|
|
1604
|
+
Notes:
|
|
1605
|
+
- This method is accessed via `context.llm().raw(...)`.
|
|
1606
|
+
- Provider authentication and retry logic are handled automatically.
|
|
1607
|
+
- Use with caution; malformed requests may result in provider errors.
|
|
507
1608
|
"""
|
|
508
1609
|
await self._ensure_client()
|
|
509
1610
|
|