PyPI - aethergraph - Versions diffs - 0.1.0a2__py3-none-any.whl → 0.1.0a4__py3-none-any.whl - Mend

aethergraph 0.1.0a2py3-none-any.whl → 0.1.0a4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

aethergraph/__main__.py +3 -0
aethergraph/api/v1/artifacts.py +23 -4
aethergraph/api/v1/schemas.py +7 -0
aethergraph/api/v1/session.py +123 -4
aethergraph/config/config.py +2 -0
aethergraph/config/search.py +49 -0
aethergraph/contracts/services/channel.py +18 -1
aethergraph/contracts/services/execution.py +58 -0
aethergraph/contracts/services/llm.py +26 -0
aethergraph/contracts/services/memory.py +10 -4
aethergraph/contracts/services/planning.py +53 -0
aethergraph/contracts/storage/event_log.py +8 -0
aethergraph/contracts/storage/search_backend.py +47 -0
aethergraph/contracts/storage/vector_index.py +73 -0
aethergraph/core/graph/action_spec.py +76 -0
aethergraph/core/graph/graph_fn.py +75 -2
aethergraph/core/graph/graphify.py +74 -2
aethergraph/core/runtime/graph_runner.py +2 -1
aethergraph/core/runtime/node_context.py +66 -3
aethergraph/core/runtime/node_services.py +8 -0
aethergraph/core/runtime/run_manager.py +263 -271
aethergraph/core/runtime/run_types.py +54 -1
aethergraph/core/runtime/runtime_env.py +35 -14
aethergraph/core/runtime/runtime_services.py +308 -18
aethergraph/plugins/agents/default_chat_agent.py +266 -74
aethergraph/plugins/agents/default_chat_agent_v2.py +487 -0
aethergraph/plugins/channel/adapters/webui.py +69 -21
aethergraph/plugins/channel/routes/webui_routes.py +8 -48
aethergraph/runtime/__init__.py +12 -0
aethergraph/server/app_factory.py +10 -1
aethergraph/server/ui_static/assets/index-CFktGdbW.js +4913 -0
aethergraph/server/ui_static/assets/index-DcfkFlTA.css +1 -0
aethergraph/server/ui_static/index.html +2 -2
aethergraph/services/artifacts/facade.py +157 -21
aethergraph/services/artifacts/types.py +35 -0
aethergraph/services/artifacts/utils.py +42 -0
aethergraph/services/channel/channel_bus.py +3 -1
aethergraph/services/channel/event_hub copy.py +55 -0
aethergraph/services/channel/event_hub.py +81 -0
aethergraph/services/channel/factory.py +3 -2
aethergraph/services/channel/session.py +709 -74
aethergraph/services/container/default_container.py +69 -7
aethergraph/services/execution/__init__.py +0 -0
aethergraph/services/execution/local_python.py +118 -0
aethergraph/services/indices/__init__.py +0 -0
aethergraph/services/indices/global_indices.py +21 -0
aethergraph/services/indices/scoped_indices.py +292 -0
aethergraph/services/llm/generic_client.py +342 -46
aethergraph/services/llm/generic_embed_client.py +359 -0
aethergraph/services/llm/types.py +3 -1
aethergraph/services/memory/distillers/llm_long_term.py +60 -109
aethergraph/services/memory/distillers/llm_long_term_v1.py +180 -0
aethergraph/services/memory/distillers/llm_meta_summary.py +57 -266
aethergraph/services/memory/distillers/llm_meta_summary_v1.py +342 -0
aethergraph/services/memory/distillers/long_term.py +48 -131
aethergraph/services/memory/distillers/long_term_v1.py +170 -0
aethergraph/services/memory/facade/chat.py +18 -8
aethergraph/services/memory/facade/core.py +159 -19
aethergraph/services/memory/facade/distillation.py +86 -31
aethergraph/services/memory/facade/retrieval.py +100 -1
aethergraph/services/memory/factory.py +4 -1
aethergraph/services/planning/__init__.py +0 -0
aethergraph/services/planning/action_catalog.py +271 -0
aethergraph/services/planning/bindings.py +56 -0
aethergraph/services/planning/dependency_index.py +65 -0
aethergraph/services/planning/flow_validator.py +263 -0
aethergraph/services/planning/graph_io_adapter.py +150 -0
aethergraph/services/planning/input_parser.py +312 -0
aethergraph/services/planning/missing_inputs.py +28 -0
aethergraph/services/planning/node_planner.py +613 -0
aethergraph/services/planning/orchestrator.py +112 -0
aethergraph/services/planning/plan_executor.py +506 -0
aethergraph/services/planning/plan_types.py +321 -0
aethergraph/services/planning/planner.py +617 -0
aethergraph/services/planning/planner_service.py +369 -0
aethergraph/services/planning/planning_context_builder.py +43 -0
aethergraph/services/planning/quick_actions.py +29 -0
aethergraph/services/planning/routers/__init__.py +0 -0
aethergraph/services/planning/routers/simple_router.py +26 -0
aethergraph/services/rag/facade.py +0 -3
aethergraph/services/scope/scope.py +30 -30
aethergraph/services/scope/scope_factory.py +15 -7
aethergraph/services/skills/__init__.py +0 -0
aethergraph/services/skills/skill_registry.py +465 -0
aethergraph/services/skills/skills.py +220 -0
aethergraph/services/skills/utils.py +194 -0
aethergraph/storage/artifacts/artifact_index_jsonl.py +16 -10
aethergraph/storage/artifacts/artifact_index_sqlite.py +12 -2
aethergraph/storage/docstore/sqlite_doc_sync.py +1 -1
aethergraph/storage/memory/event_persist.py +42 -2
aethergraph/storage/memory/fs_persist.py +32 -2
aethergraph/storage/search_backend/__init__.py +0 -0
aethergraph/storage/search_backend/generic_vector_backend.py +230 -0
aethergraph/storage/search_backend/null_backend.py +34 -0
aethergraph/storage/search_backend/sqlite_lexical_backend.py +387 -0
aethergraph/storage/search_backend/utils.py +31 -0
aethergraph/storage/search_factory.py +75 -0
aethergraph/storage/vector_index/faiss_index.py +72 -4
aethergraph/storage/vector_index/sqlite_index.py +521 -52
aethergraph/storage/vector_index/sqlite_index_vanila.py +311 -0
aethergraph/storage/vector_index/utils.py +22 -0
{aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/METADATA +1 -1
{aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/RECORD +108 -64
{aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/WHEEL +1 -1
aethergraph/plugins/agents/default_chat_agent copy.py +0 -90
aethergraph/server/ui_static/assets/index-BR5GtXcZ.css +0 -1
aethergraph/server/ui_static/assets/index-CQ0HZZ83.js +0 -400
aethergraph/services/eventhub/event_hub.py +0 -76
aethergraph/services/llm/generic_client copy.py +0 -691
aethergraph/services/prompts/file_store.py +0 -41
{aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/entry_points.txt +0 -0
{aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/licenses/LICENSE +0 -0
{aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/licenses/NOTICE +0 -0
{aethergraph-0.1.0a2.dist-info → aethergraph-0.1.0a4.dist-info}/top_level.txt +0 -0

aethergraph/services/llm/generic_client.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import asyncio
+from collections.abc import Awaitable, Callable
 import json
 import logging
 import os
@@ -37,6 +38,8 @@ from aethergraph.services.llm.utils import (
     _validate_json_schema,
 )
+DeltaCallback = Callable[[str], Awaitable[None]]
 # ---- Helpers --------------------------------------------------------------
 class _Retry:
@@ -99,6 +102,7 @@ class GenericLLMClient(LLMClientProtocol):
         self._retry = _Retry()
         self._client = httpx.AsyncClient(timeout=timeout)
         self._bound_loop = None
+        self._timeout = timeout
         # Resolve creds/base
         self.api_key = (
@@ -236,19 +240,16 @@ class GenericLLMClient(LLMClientProtocol):
             logger.warning(f"llm_metering_failed: {e}")
     async def _ensure_client(self):
-        """Ensure the httpx client is bound to the current event loop.
-        This allows safe usage across multiple async contexts.
-        """
         loop = asyncio.get_running_loop()
-        if self._client is None or self._bound_loop != loop:
-            # close old client if any
-            if self._client is not None:
-                try:
-                    await self._client.aclose()
-                except Exception:
-                    logger = logging.getLogger("aethergraph.services.llm.generic_client")
-                    logger.warning("llm_client_close_failed")
-            self._client = httpx.AsyncClient(timeout=self._client.timeout)
+        if self._client is None:
+            self._client = httpx.AsyncClient(timeout=self.timeout)
+            self._bound_loop = loop
+            return
+        if self._bound_loop is not loop:
+            # Don't attempt to close the old client here; it belongs to the old loop.
+            self._client = httpx.AsyncClient(timeout=self._timeout)
             self._bound_loop = loop
     async def chat(
@@ -298,6 +299,10 @@ class GenericLLMClient(LLMClientProtocol):
             validate_json: If True, validate JSON output against schema.
             fail_on_unsupported: If True, raise error for unsupported features.
             **kw: Additional provider-specific keyword arguments.
+                Common cross-provider options include:
+                - model: override default model name.
+                - tools: OpenAI-style tools / functions description.
+                - tool_choice: tool selection strategy (e.g., "auto", "none", or provider-specific dict).
         Returns:
             tuple[str, dict[str, int]]: The model response (text or structured output) and usage statistics.
@@ -313,7 +318,7 @@ class GenericLLMClient(LLMClientProtocol):
             - Rate limiting and metering help manage resource usage effectively.
         """
         await self._ensure_client()
-        model = kw.get("model", self.model)
+        model = kw.pop("model", self.model)
         start = time.perf_counter()
@@ -355,6 +360,250 @@ class GenericLLMClient(LLMClientProtocol):
         return text, usage
+    async def chat_stream(
+        self,
+        messages: list[dict[str, Any]],
+        *,
+        reasoning_effort: str | None = None,
+        max_output_tokens: int | None = None,
+        output_format: ChatOutputFormat = "text",
+        json_schema: dict[str, Any] | None = None,
+        schema_name: str = "output",
+        strict_schema: bool = True,
+        validate_json: bool = True,
+        fail_on_unsupported: bool = True,
+        on_delta: DeltaCallback | None = None,
+        **kw: Any,
+    ) -> tuple[str, dict[str, int]]:
+        """
+        Stream a chat request to the LLM provider and return the accumulated response.
+        This method handles provider-specific streaming paths, falling back to non-streaming
+        chat() if streaming is not implemented. It supports real-time delta updates via
+        a callback function and returns the full response text and usage statistics at the end.
+        Examples:
+            Basic usage with a list of messages:
+            ```python
+            response, usage = await context.llm().chat_stream(
+            messages=[{"role": "user", "content": "Hello, assistant!"}]
+            )
+            ```
+            Using a delta callback for real-time updates:
+            ```python
+            async def on_delta(delta):
+                print(delta, end="")
+            response, usage = await context.llm().chat_stream(
+                messages=[{"role": "user", "content": "Tell me a joke."}],
+                on_delta=on_delta
+            )
+            ```
+        Args:
+            messages: List of message dicts, each with "role" and "content" keys.
+            reasoning_effort: Optional string to control model reasoning depth.
+            max_output_tokens: Optional maximum number of output tokens.
+            output_format: Output format, e.g., "text" or "json".
+            json_schema: Optional JSON schema for validating structured output.
+            schema_name: Name for the root schema object (default: "output").
+            strict_schema: If True, enforce strict schema validation.
+            validate_json: If True, validate JSON output against schema.
+            fail_on_unsupported: If True, raise error for unsupported features.
+            on_delta: Optional callback function to handle real-time text deltas.
+            **kw: Additional provider-specific keyword arguments.
+        Returns:
+            tuple[str, dict[str, int]]: The accumulated response text and usage statistics.
+        Raises:
+            NotImplementedError: If the provider is not supported.
+            RuntimeError: For various errors including invalid JSON output or rate limit violations.
+            LLMUnsupportedFeatureError: If a requested feature is unsupported by the provider.
+        Notes:
+            - This method centralizes handling of streaming and non-streaming paths for LLM providers.
+            - The `on_delta` callback allows for real-time updates, making it suitable for interactive applications.
+            - Rate limiting and usage metering are applied consistently across providers.
+            - Currently, only OpenAI's Responses API streaming is implemented; other providers will fall back to the non-streaming `chat()` method.
+        """
+        await self._ensure_client()
+        model = kw.pop("model", self.model)
+        start = time.perf_counter()
+        # For now, only OpenAI Responses streaming is implemented.
+        if self.provider == "openai":
+            text, usage = await self._chat_openai_responses_stream(
+                messages,
+                model=model,
+                reasoning_effort=reasoning_effort,
+                max_output_tokens=max_output_tokens,
+                output_format=output_format,
+                json_schema=json_schema,
+                schema_name=schema_name,
+                strict_schema=strict_schema,
+                fail_on_unsupported=fail_on_unsupported,
+                on_delta=on_delta,
+                **kw,
+            )
+        else:
+            # Fallback: just call normal chat() and send a single delta.
+            text, usage = await self.chat(
+                messages,
+                reasoning_effort=reasoning_effort,
+                max_output_tokens=max_output_tokens,
+                output_format=output_format,
+                json_schema=json_schema,
+                schema_name=schema_name,
+                strict_schema=strict_schema,
+                validate_json=validate_json,
+                fail_on_unsupported=fail_on_unsupported,
+                **kw,
+            )
+            if on_delta is not None and text:
+                await on_delta(text)
+        # Postprocess (JSON modes etc.)
+        text = self._postprocess_structured_output(
+            text=text,
+            output_format=output_format,
+            json_schema=json_schema,
+            strict_schema=strict_schema,
+            validate_json=validate_json,
+        )
+        latency_ms = int((time.perf_counter() - start) * 1000)
+        # Rate limits + metering as usual
+        self._enforce_llm_limits_for_run(usage=usage)
+        await self._record_llm_usage(model=model, usage=usage, latency_ms=latency_ms)
+        return text, usage
+    async def _chat_openai_responses_stream(
+        self,
+        messages: list[dict[str, Any]],
+        *,
+        model: str,
+        reasoning_effort: str | None,
+        max_output_tokens: int | None,
+        output_format: ChatOutputFormat,
+        json_schema: dict[str, Any] | None,
+        schema_name: str,
+        strict_schema: bool,
+        fail_on_unsupported: bool,
+        on_delta: DeltaCallback | None = None,
+        **kw: Any,
+    ) -> tuple[str, dict[str, int]]:
+        """
+        Stream text using OpenAI Responses API.
+        - We only support text / json_object / json_schema here.
+        - We look for `response.output_text.delta` events and call on_delta(delta).
+        - We accumulate full text and best-effort usage from the final event.
+        """
+        await self._ensure_client()
+        assert self._client is not None
+        url = f"{self.base_url}/responses"
+        headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
+        input_messages = _normalize_openai_responses_input(messages)
+        body: dict[str, Any] = {
+            "model": model,
+            "input": input_messages,
+            "stream": True,
+        }
+        if reasoning_effort is not None:
+            body["reasoning"] = {"effort": reasoning_effort}
+        if max_output_tokens is not None:
+            body["max_output_tokens"] = max_output_tokens
+        # Structured output config (same as non-streaming path)
+        if output_format == "json_object":
+            body["text"] = {"format": {"type": "json_object"}}
+        elif output_format == "json_schema":
+            if json_schema is None:
+                raise ValueError("output_format='json_schema' requires json_schema")
+            body["text"] = {
+                "format": {
+                    "type": "json_schema",
+                    "name": schema_name,
+                    "schema": json_schema,
+                    "strict": bool(strict_schema),
+                }
+            }
+        # else: default "text" format
+        full_chunks: list[str] = []
+        usage: dict[str, int] = {}
+        async def _handle_event(evt: dict[str, Any]):
+            nonlocal usage
+            etype = evt.get("type")
+            # Main text deltas
+            if etype == "response.output_text.delta":
+                delta = evt.get("delta") or ""
+                if delta:
+                    full_chunks.append(delta)
+                    if on_delta is not None:
+                        await on_delta(delta)
+            # Finalization – grab usage from completed response if present
+            elif etype in ("response.completed", "response.incomplete", "response.failed"):
+                resp = evt.get("response") or {}
+                # Usage may or may not be present, keep best-effort
+                usage = resp.get("usage") or usage
+            # Optional: basic error surface
+            elif etype == "error":
+                # in practice `error` may be structured differently; this is just a guardrail
+                msg = evt.get("message") or "Unknown streaming error"
+                raise RuntimeError(f"OpenAI streaming error: {msg}")
+        async def _call():
+            async with self._client.stream(
+                "POST",
+                url,
+                headers=headers,
+                json=body,
+            ) as r:
+                try:
+                    r.raise_for_status()
+                except httpx.HTTPStatusError as e:
+                    text = await r.aread()
+                    raise RuntimeError(f"OpenAI Responses streaming error: {text!r}") from e
+                # SSE: each event line is "data: {...}" + blank lines between events
+                async for line in r.aiter_lines():
+                    if not line:
+                        continue
+                    if not line.startswith("data:"):
+                        continue
+                    data_str = line[len("data:") :].strip()
+                    if not data_str or data_str == "[DONE]":
+                        # OpenAI ends stream with `data: [DONE]`
+                        break
+                    try:
+                        evt = json.loads(data_str)
+                    except Exception:
+                        # best-effort: ignore malformed chunks
+                        continue
+                    await _handle_event(evt)
+        await self._retry.run(_call)
+        return "".join(full_chunks), usage
     async def _chat_dispatch(
         self,
         messages: list[dict[str, Any]],
@@ -370,6 +619,10 @@ class GenericLLMClient(LLMClientProtocol):
         fail_on_unsupported: bool,
         **kw: Any,
     ) -> tuple[str, dict[str, int]]:
+        # Extract cross-provider extras if any
+        tools = kw.pop("tools", None)
+        tool_choice = kw.pop("tool_choice", None)
         # OpenAI is now symmetric too
         if self.provider == "openai":
             return await self._chat_openai_responses(
@@ -381,6 +634,9 @@ class GenericLLMClient(LLMClientProtocol):
                 json_schema=json_schema,
                 schema_name=schema_name,
                 strict_schema=strict_schema,
+                tools=tools,
+                tool_choice=tool_choice,
+                **kw,
             )
         # Everyone else
@@ -391,6 +647,8 @@ class GenericLLMClient(LLMClientProtocol):
                 output_format=output_format,
                 json_schema=json_schema,
                 fail_on_unsupported=fail_on_unsupported,
+                tools=tools,
+                tool_choice=tool_choice,
                 **kw,
             )
@@ -401,6 +659,8 @@ class GenericLLMClient(LLMClientProtocol):
                 output_format=output_format,
                 json_schema=json_schema,
                 fail_on_unsupported=fail_on_unsupported,
+                tools=tools,
+                tool_choice=tool_choice,
                 **kw,
             )
@@ -410,6 +670,8 @@ class GenericLLMClient(LLMClientProtocol):
                 model=model,
                 output_format=output_format,
                 json_schema=json_schema,
+                fail_on_unsupported=fail_on_unsupported,
+                tools=tools,
                 **kw,
             )
@@ -420,6 +682,7 @@ class GenericLLMClient(LLMClientProtocol):
                 output_format=output_format,
                 json_schema=json_schema,
                 fail_on_unsupported=fail_on_unsupported,
+                tools=tools,
                 **kw,
             )
@@ -463,6 +726,9 @@ class GenericLLMClient(LLMClientProtocol):
         json_schema: dict[str, Any] | None,
         schema_name: str,
         strict_schema: bool,
+        tools: list[dict[str, Any]] | None = None,
+        tool_choice: Any = None,
+        **kw: Any,
     ) -> tuple[str, dict[str, int]]:
         await self._ensure_client()
         assert self._client is not None
@@ -470,16 +736,16 @@ class GenericLLMClient(LLMClientProtocol):
         url = f"{self.base_url}/responses"
         headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
-        # Normalize input so vision works if caller used image_url parts
         input_messages = _normalize_openai_responses_input(messages)
         body: dict[str, Any] = {"model": model, "input": input_messages}
         if reasoning_effort is not None:
             body["reasoning"] = {"effort": reasoning_effort}
         if max_output_tokens is not None:
             body["max_output_tokens"] = max_output_tokens
-        # Structured output (Responses API style)
+        # Structured output
         if output_format == "json_object":
             body["text"] = {"format": {"type": "json_object"}}
         elif output_format == "json_schema":
@@ -494,6 +760,12 @@ class GenericLLMClient(LLMClientProtocol):
                 }
             }
+        # Tools (Responses API style)
+        if tools is not None:
+            body["tools"] = tools
+        if tool_choice is not None:
+            body["tool_choice"] = tool_choice
         async def _call():
             r = await self._client.post(url, headers=headers, json=body)
             try:
@@ -502,12 +774,18 @@ class GenericLLMClient(LLMClientProtocol):
                 raise RuntimeError(f"OpenAI Responses API error: {e.response.text}") from e
             data = r.json()
+            usage = data.get("usage", {}) or {}
+            # If caller asked for raw provider payload, just return it as a JSON string
+            if output_format == "raw":
+                txt = json.dumps(data, ensure_ascii=False)
+                return txt, usage
+            # Existing parsing logic for message-only flows
             output = data.get("output")
             txt = ""
-            # Your existing parsing logic, but robust for list shape
             if isinstance(output, list) and output:
-                # concat all message outputs if multiple
                 chunks: list[str] = []
                 for item in output:
                     if isinstance(item, dict) and item.get("type") == "message":
@@ -531,7 +809,6 @@ class GenericLLMClient(LLMClientProtocol):
             else:
                 txt = ""
-            usage = data.get("usage", {}) or {}
             return txt, usage
         return await self._retry.run(_call)
@@ -544,29 +821,10 @@ class GenericLLMClient(LLMClientProtocol):
         output_format: ChatOutputFormat,
         json_schema: dict[str, Any] | None,
         fail_on_unsupported: bool,
+        tools: list[dict[str, Any]] | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
         **kw: Any,
     ) -> tuple[str, dict[str, int]]:
-        """
-        Docstring for _chat_openai_like_chat_completions
-        :param self: Description
-        :param messages: Description
-        :type messages: list[dict[str, Any]]
-        :param model: Description
-        :type model: str
-        :param output_format: Description
-        :type output_format: ChatOutputFormat
-        :param json_schema: Description
-        :type json_schema: dict[str, Any] | None
-        :param fail_on_unsupported: Description
-        :type fail_on_unsupported: bool
-        :param kw: Description
-        :type kw: Any
-        :return: Description
-        :rtype: tuple[str, dict[str, int]]
-        Call OpenAI-like /chat/completions endpoint.
-        """
         await self._ensure_client()
         assert self._client is not None
@@ -580,7 +838,6 @@ class GenericLLMClient(LLMClientProtocol):
             response_format = {"type": "json_object"}
             msg_for_provider = _ensure_system_json_directive(messages, schema=None)
         elif output_format == "json_schema":
-            # not truly native in most openai-like providers
             if fail_on_unsupported:
                 raise RuntimeError(f"provider {self.provider} does not support native json_schema")
             msg_for_provider = _ensure_system_json_directive(messages, schema=json_schema)
@@ -594,6 +851,10 @@ class GenericLLMClient(LLMClientProtocol):
             }
             if response_format is not None:
                 body["response_format"] = response_format
+            if tools is not None:
+                body["tools"] = tools
+            if tool_choice is not None:
+                body["tool_choice"] = tool_choice
             r = await self._client.post(
                 f"{self.base_url}/chat/completions",
@@ -606,8 +867,13 @@ class GenericLLMClient(LLMClientProtocol):
                 raise RuntimeError(f"OpenAI-like chat/completions error: {e.response.text}") from e
             data = r.json()
-            txt, _ = _first_text(data.get("choices", []))  # you already have _first_text in file
             usage = data.get("usage", {}) or {}
+            if output_format == "raw":
+                txt = json.dumps(data, ensure_ascii=False)
+                return txt, usage
+            txt, _ = _first_text(data.get("choices", []))
             return txt, usage
         return await self._retry.run(_call)
@@ -620,6 +886,8 @@ class GenericLLMClient(LLMClientProtocol):
         output_format: ChatOutputFormat,
         json_schema: dict[str, Any] | None,
         fail_on_unsupported: bool,
+        tools: list[dict[str, Any]] | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
         **kw: Any,
     ) -> tuple[str, dict[str, int]]:
         await self._ensure_client()
@@ -650,6 +918,11 @@ class GenericLLMClient(LLMClientProtocol):
                 )
             payload["messages"] = _ensure_system_json_directive(messages, schema=json_schema)
+        if tools is not None:
+            payload["tools"] = tools
+        if tool_choice is not None:
+            payload["tool_choice"] = tool_choice
         async def _call():
             r = await self._client.post(
                 f"{self.base_url}/openai/deployments/{self.azure_deployment}/chat/completions?api-version=2024-08-01-preview",
@@ -662,8 +935,13 @@ class GenericLLMClient(LLMClientProtocol):
                 raise RuntimeError(f"Azure chat/completions error: {e.response.text}") from e
             data = r.json()
-            txt, _ = _first_text(data.get("choices", []))
             usage = data.get("usage", {}) or {}
+            if output_format == "raw":
+                txt = json.dumps(data, ensure_ascii=False)
+                return txt, usage
+            txt, _ = _first_text(data.get("choices", []))
             return txt, usage
         return await self._retry.run(_call)
@@ -675,11 +953,16 @@ class GenericLLMClient(LLMClientProtocol):
         model: str,
         output_format: ChatOutputFormat,
         json_schema: dict[str, Any] | None,
+        fail_on_unsupported: bool,
+        tools: list[dict[str, Any]] | None = None,
         **kw: Any,
     ) -> tuple[str, dict[str, int]]:
         await self._ensure_client()
         assert self._client is not None
+        if tools is not None and fail_on_unsupported:
+            raise RuntimeError("Anthropic tools/function calling not wired yet in this client")
         temperature = kw.get("temperature", 0.5)
         top_p = kw.get("top_p", 1.0)
@@ -746,9 +1029,14 @@ class GenericLLMClient(LLMClientProtocol):
                 raise RuntimeError(f"Anthropic API error ({e.response.status_code}): {body}") from e
             data = r.json()
+            usage = data.get("usage", {}) or {}
+            if output_format == "raw":
+                txt = json.dumps(data, ensure_ascii=False)
+                return txt, usage
             blocks = data.get("content") or []
             txt = "".join(b.get("text", "") for b in blocks if b.get("type") == "text")
-            usage = data.get("usage", {}) or {}
             return txt, usage
         return await self._retry.run(_call)
@@ -761,6 +1049,7 @@ class GenericLLMClient(LLMClientProtocol):
         output_format: ChatOutputFormat,
         json_schema: dict[str, Any] | None,
         fail_on_unsupported: bool,
+        tools: list[dict[str, Any]] | None = None,
         **kw: Any,
     ) -> tuple[str, dict[str, int]]:
         await self._ensure_client()
@@ -769,6 +1058,9 @@ class GenericLLMClient(LLMClientProtocol):
         temperature = kw.get("temperature", 0.5)
         top_p = kw.get("top_p", 1.0)
+        if tools is not None and fail_on_unsupported:
+            raise RuntimeError("Gemini tools/function calling not wired yet in this client")
         # Merge system messages into preamble
         system_parts: list[str] = []
         for m in messages:
@@ -815,14 +1107,18 @@ class GenericLLMClient(LLMClientProtocol):
                 ) from e
             data = r.json()
-            cand = (data.get("candidates") or [{}])[0]
-            txt = "".join(p.get("text", "") for p in (cand.get("content", {}).get("parts") or []))
             um = data.get("usageMetadata") or {}
             usage = {
                 "input_tokens": int(um.get("promptTokenCount", 0) or 0),
                 "output_tokens": int(um.get("candidatesTokenCount", 0) or 0),
             }
+            if output_format == "raw":
+                txt = json.dumps(data, ensure_ascii=False)
+                return txt, usage
+            cand = (data.get("candidates") or [{}])[0]
+            txt = "".join(p.get("text", "") for p in (cand.get("content", {}).get("parts") or []))
             return txt, usage
         return await self._retry.run(_call)

aethergraph 0.1.0a2__py3-none-any.whl → 0.1.0a4__py3-none-any.whl

aethergraph 0.1.0a2py3-none-any.whl → 0.1.0a4py3-none-any.whl