PyPI - donkit-llm - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

donkit-llm 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

donkit/llm/__init__.py +5 -0
donkit/llm/claude_model.py +7 -5
donkit/llm/donkit_model.py +239 -0
donkit/llm/factory.py +105 -14
donkit/llm/gemini_model.py +406 -0
donkit/llm/model_abstract.py +27 -17
donkit/llm/ollama_integration.py +442 -0
donkit/llm/openai_model.py +179 -92
donkit/llm/vertex_model.py +446 -178
{donkit_llm-0.1.1.dist-info → donkit_llm-0.1.3.dist-info}/METADATA +3 -2
donkit_llm-0.1.3.dist-info/RECORD +12 -0
{donkit_llm-0.1.1.dist-info → donkit_llm-0.1.3.dist-info}/WHEEL +1 -1
donkit_llm-0.1.1.dist-info/RECORD +0 -9

donkit/llm/vertex_model.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
 import base64
-from typing import AsyncIterator
+from typing import Any, AsyncIterator
 import google.genai as genai
 from google.genai.types import Blob, Content, FunctionDeclaration, Part
@@ -32,6 +32,8 @@ class VertexAIModel(LLMModelAbstract):
     - Claude models via Vertex AI (claude-3-5-sonnet-v2@20241022, etc.)
     """
+    name = "vertex"
     def __init__(
         self,
         project_id: str,
@@ -104,9 +106,9 @@ class VertexAIModel(LLMModelAbstract):
         else:
             # Multimodal content
             for part in msg.content:
-                if part.type == ContentType.TEXT:
+                if part.content_type == ContentType.TEXT:
                     parts.append(Part(text=part.content))
-                elif part.type == ContentType.IMAGE_URL:
+                elif part.content_type == ContentType.IMAGE_URL:
                     # For URLs, we'd need to fetch and convert to inline data
                     parts.append(
                         Part(
@@ -116,7 +118,7 @@ class VertexAIModel(LLMModelAbstract):
                             )
                         )
                     )
-                elif part.type == ContentType.IMAGE_BASE64:
+                elif part.content_type == ContentType.IMAGE_BASE64:
                     # part.content is base64 string; Vertex needs raw bytes
                     raw = base64.b64decode(part.content, validate=True)
                     parts.append(
@@ -127,7 +129,7 @@ class VertexAIModel(LLMModelAbstract):
                             )
                         )
                     )
-                elif part.type == ContentType.AUDIO_BASE64:
+                elif part.content_type == ContentType.AUDIO_BASE64:
                     raw = base64.b64decode(part.content, validate=True)
                     parts.append(
                         Part(
@@ -137,7 +139,7 @@ class VertexAIModel(LLMModelAbstract):
                             )
                         )
                     )
-                elif part.type == ContentType.FILE_BASE64:
+                elif part.content_type == ContentType.FILE_BASE64:
                     raw = base64.b64decode(part.content, validate=True)
                     parts.append(
                         Part(
@@ -167,130 +169,342 @@ class VertexAIModel(LLMModelAbstract):
         return [GeminiTool(function_declarations=function_declarations)]
-    def _clean_json_schema(self, schema: dict) -> dict:
+    def _parse_response(self, response) -> tuple[str | None, list[ToolCall] | None]:
+        """Parse a genai response (or stream chunk) into plain text and tool calls."""
+        calls: list[ToolCall] = []
+        try:
+            cand_list = response.candidates
+        except AttributeError:
+            cand_list = None
+        if not cand_list:
+            return None, None
+        cand = cand_list[0]
+        try:
+            parts = cand.content.parts or []
+        except AttributeError:
+            parts = []
+        # Collect text and tool calls in a single pass
+        collected_text: list[str] = []
+        for p in parts:
+            # Try to get text from this part
+            try:
+                t = p.text
+                if t:
+                    collected_text.append(t)
+            except AttributeError:
+                pass
+            # Try to get function_call from this part
+            try:
+                fc = p.function_call
+                if fc:
+                    # Extract function name and arguments
+                    try:
+                        name = fc.name
+                    except AttributeError:
+                        name = ""
+                    if not name:
+                        continue
+                    try:
+                        args = dict(fc.args) if fc.args else {}
+                    except (AttributeError, TypeError):
+                        args = {}
+                    calls.append(
+                        ToolCall(
+                            id=name,
+                            type="function",
+                            function=FunctionCall(
+                                name=name,
+                                arguments=json.dumps(args),
+                            ),
+                        )
+                    )
+            except AttributeError:
+                pass
+        text = "".join(collected_text)
+        return text or None, calls or None
+    def _clean_json_schema(self, schema: dict | None) -> dict:
         """
-        Remove $ref and $defs from JSON Schema as Vertex AI doesn't support them.
+        Transform an arbitrary JSON Schema-like dict (possibly produced by Pydantic)
+        into a format compatible with google.genai by:
+        - Inlining $ref by replacing references with actual schemas from $defs
+        - Removing $defs after inlining all references
+        - Renaming unsupported keys to the SDK's expected snake_case
+        - Recursively converting nested schemas (properties, items, anyOf)
+        - Preserving fields supported by the SDK Schema model
         """
         if not isinstance(schema, dict):
-            return schema
+            return {}
+        # Step 1: Inline $ref references before any conversion
+        defs = schema.get("$defs", {})
+        def inline_refs(obj, definitions):
+            """Recursively inline $ref references."""
+            if isinstance(obj, dict):
+                # If this object has a $ref, replace it with the referenced schema
+                if "$ref" in obj:
+                    ref_path = obj["$ref"]
+                    if ref_path.startswith("#/$defs/"):
+                        ref_name = ref_path.replace("#/$defs/", "")
+                        if ref_name in definitions:
+                            # Get the referenced schema and inline it recursively
+                            referenced = definitions[ref_name].copy()
+                            # Preserve description and default from the referencing object
+                            if "description" in obj and "description" not in referenced:
+                                referenced["description"] = obj["description"]
+                            if "default" in obj:
+                                referenced["default"] = obj["default"]
+                            return inline_refs(referenced, definitions)
+                    # If can't resolve, remove the $ref
+                    return {k: v for k, v in obj.items() if k != "$ref"}
+                # Recursively process all properties
+                result = {}
+                for key, value in obj.items():
+                    if key == "$defs":
+                        continue  # Remove $defs after inlining
+                    # Skip additionalProperties: true as it's not well supported
+                    if key == "additionalProperties" and value is True:
+                        continue
+                    result[key] = inline_refs(value, definitions)
+                return result
+            elif isinstance(obj, list):
+                return [inline_refs(item, definitions) for item in obj]
+            else:
+                return obj
+        # Inline all references
+        schema = inline_refs(schema, defs)
+        # Step 2: Convert to SDK schema format
+        # Mapping from common JSON Schema/OpenAPI keys to google-genai Schema fields
+        key_map = {
+            "anyOf": "any_of",
+            "additionalProperties": "additional_properties",
+            "maxItems": "max_items",
+            "maxLength": "max_length",
+            "maxProperties": "max_properties",
+            "minItems": "min_items",
+            "minLength": "min_length",
+            "minProperties": "min_properties",
+            "propertyOrdering": "property_ordering",
+        }
-        cleaned = {}
-        for key, value in schema.items():
-            if key in ("$ref", "$defs", "definitions"):
-                continue
-            if isinstance(value, dict):
-                cleaned[key] = self._clean_json_schema(value)
-            elif isinstance(value, list):
-                cleaned[key] = [
-                    self._clean_json_schema(item) if isinstance(item, dict) else item
-                    for item in value
-                ]
+        def convert(obj):
+            if isinstance(obj, dict):
+                out: dict[str, object] = {}
+                for k, v in obj.items():
+                    if k == "const":
+                        out["enum"] = [v]
+                        continue
+                    kk = key_map.get(k, k)
+                    if kk == "properties" and isinstance(v, dict):
+                        # properties: dict[str, Schema]
+                        out[kk] = {pk: convert(pv) for pk, pv in v.items()}
+                    elif kk == "items":
+                        # items: Schema (treat list as first item schema)
+                        if isinstance(v, list) and v:
+                            out[kk] = convert(v[0])
+                        else:
+                            out[kk] = convert(v)
+                    elif kk == "any_of" and isinstance(v, list):
+                        out[kk] = [convert(iv) for iv in v]
+                    else:
+                        out[kk] = convert(v)
+                return out
+            elif isinstance(obj, list):
+                return [convert(i) for i in obj]
             else:
-                cleaned[key] = value
+                return obj
-        return cleaned
+        return convert(schema)
     async def generate(self, request: GenerateRequest) -> GenerateResponse:
         """Generate a response using Vertex AI."""
         await self.validate_request(request)
-        # Separate system message from conversation
-        system_instruction = None
-        messages = []
-        for msg in request.messages:
-            if msg.role == "system":
-                system_instruction = msg.content if isinstance(msg.content, str) else ""
+        def _safe_text(text: str) -> str:
+            try:
+                return text.encode("utf-8", errors="replace").decode(
+                    "utf-8", errors="replace"
+                )
+            except Exception:
+                return ""
+        contents: list[Content] = []
+        system_instruction = ""
+        # Group consecutive tool messages into single Content
+        i = 0
+        while i < len(request.messages):
+            m = request.messages[i]
+            if m.role == "tool":
+                # Collect all consecutive tool messages
+                tool_parts = []
+                while i < len(request.messages) and request.messages[i].role == "tool":
+                    tool_msg = request.messages[i]
+                    content_str = (
+                        tool_msg.content
+                        if isinstance(tool_msg.content, str)
+                        else str(tool_msg.content)
+                    )
+                    part = Part.from_function_response(
+                        name=getattr(tool_msg, "name", "") or "",
+                        response={"result": _safe_text(content_str)},
+                    )
+                    tool_parts.append(part)
+                    i += 1
+                # Add all tool responses as a single Content
+                if tool_parts:
+                    contents.append(Content(role="function", parts=tool_parts))
+                continue
+            elif m.role == "system":
+                content_str = (
+                    m.content if isinstance(m.content, str) else str(m.content)
+                )
+                system_instruction += _safe_text(content_str).strip()
+                i += 1
+            elif m.role == "assistant":
+                # Check if message has tool_calls attribute
+                if hasattr(m, "tool_calls") and m.tool_calls:
+                    # Assistant message with tool calls
+                    parts_list = []
+                    for tc in m.tool_calls:
+                        if not tc.function.name:
+                            continue
+                        args = (
+                            json.loads(tc.function.arguments)
+                            if isinstance(tc.function.arguments, str)
+                            else tc.function.arguments
+                        )
+                        if not isinstance(args, dict):
+                            args = {}
+                        part = Part.from_function_call(name=tc.function.name, args=args)
+                        parts_list.append(part)
+                    if parts_list:
+                        contents.append(Content(role="model", parts=parts_list))
+                else:
+                    # Regular assistant text response
+                    content_str = (
+                        m.content if isinstance(m.content, str) else str(m.content)
+                    )
+                    if content_str:
+                        part = Part(text=_safe_text(content_str))
+                        contents.append(Content(role="model", parts=[part]))
+                i += 1
             else:
-                messages.append(self._convert_message(msg))
-        config_kwargs = {}
-        if request.temperature is not None:
-            config_kwargs["temperature"] = request.temperature
-        if request.max_tokens is not None:
-            config_kwargs["max_output_tokens"] = request.max_tokens
-        if request.top_p is not None:
-            config_kwargs["top_p"] = request.top_p
+                # User message - use _convert_message to handle multimodal content
+                user_content = self._convert_message(m)
+                contents.append(user_content)
+                i += 1
+        config_kwargs = {
+            "temperature": request.temperature
+            if request.temperature is not None
+            else 0.2,
+            "top_p": request.top_p if request.top_p is not None else 0.95,
+            "max_output_tokens": request.max_tokens
+            if request.max_tokens is not None
+            else 8192,
+        }
+        if system_instruction:
+            config_kwargs["system_instruction"] = system_instruction
         if request.stop:
             config_kwargs["stop_sequences"] = request.stop
         if request.response_format:
-            # Vertex AI uses response_mime_type and response_schema
             config_kwargs["response_mime_type"] = "application/json"
             if "schema" in request.response_format:
                 config_kwargs["response_schema"] = self._clean_json_schema(
                     request.response_format["schema"]
                 )
-        # Build config object
-        config = (
-            genai.types.GenerateContentConfig(**config_kwargs)
-            if config_kwargs
-            else None
-        )
+        config = genai.types.GenerateContentConfig(**config_kwargs)
-        # Add tools to config if present
         if request.tools:
-            if config is None:
-                config = genai.types.GenerateContentConfig()
-            config.tools = self._convert_tools(request.tools)
-        # Add system instruction to config if present
-        if system_instruction:
-            if config is None:
-                config = genai.types.GenerateContentConfig()
-            config.system_instruction = system_instruction
-        response = await self.client.aio.models.generate_content(
-            model=self._model_name,
-            contents=messages,
-            config=config,
-        )
-        # Extract content
-        content = None
-        if response.text:
-            content = response.text
-        # Extract tool calls
-        tool_calls = None
-        if response.candidates and response.candidates[0].content.parts:
-            function_calls = []
-            for part in response.candidates[0].content.parts:
-                if not hasattr(part, "function_call") or not part.function_call:
-                    continue
-                fc = part.function_call
-                args_dict = dict(fc.args) if fc.args else {}
-                function_calls.append(
-                    ToolCall(
-                        id=fc.name,
-                        type="function",
-                        function=FunctionCall(
-                            name=fc.name,
-                            arguments=json.dumps(args_dict),
-                        ),
+            function_declarations: list[FunctionDeclaration] = []
+            for t in request.tools:
+                schema_obj = self._clean_json_schema(t.function.parameters or {})
+                function_declarations.append(
+                    FunctionDeclaration(
+                        name=t.function.name,
+                        description=t.function.description or "",
+                        parameters=schema_obj,
                     )
                 )
-            if function_calls:
-                tool_calls = function_calls
-        # Extract finish reason
-        finish_reason = None
-        if response.candidates:
-            finish_reason = str(response.candidates[0].finish_reason)
-        # Extract usage
-        usage = None
-        if response.usage_metadata:
-            usage = {
-                "prompt_tokens": response.usage_metadata.prompt_token_count,
-                "completion_tokens": response.usage_metadata.candidates_token_count,
-                "total_tokens": response.usage_metadata.total_token_count,
-            }
-        return GenerateResponse(
-            content=content,
-            tool_calls=tool_calls,
-            finish_reason=finish_reason,
-            usage=usage,
-        )
+            gen_tools = [GeminiTool(function_declarations=function_declarations)]
+            config.tools = gen_tools
+        try:
+            response = await self.client.aio.models.generate_content(
+                model=self._model_name,
+                contents=contents,
+                config=config,
+            )
+            text, tool_calls = self._parse_response(response)
+            # If no text and no tool calls, check for errors in response
+            if not text and not tool_calls:
+                try:
+                    # Check for blocking reasons or errors
+                    if hasattr(response, "candidates") and response.candidates:
+                        cand = response.candidates[0]
+                        if hasattr(cand, "finish_reason") and cand.finish_reason:
+                            finish_reason = cand.finish_reason
+                            if finish_reason not in ("STOP", None):
+                                error_msg = (
+                                    f"Model finished with reason: {finish_reason}"
+                                )
+                                return GenerateResponse(content=f"Warning: {error_msg}")
+                    # Check for safety ratings that might block content
+                    if hasattr(response, "candidates") and response.candidates:
+                        cand = response.candidates[0]
+                        if hasattr(cand, "safety_ratings"):
+                            blocked = any(
+                                getattr(r, "blocked", False)
+                                for r in getattr(cand, "safety_ratings", [])
+                            )
+                            if blocked:
+                                error_msg = "Response was blocked by safety filters"
+                                return GenerateResponse(content=f"Warning: {error_msg}")
+                except Exception:
+                    pass  # If we can't check, just return empty
+            # Extract finish reason
+            finish_reason = None
+            if response.candidates:
+                finish_reason = str(response.candidates[0].finish_reason)
+            # Extract usage
+            usage = None
+            if response.usage_metadata:
+                usage = {
+                    "prompt_tokens": response.usage_metadata.prompt_token_count,
+                    "completion_tokens": response.usage_metadata.candidates_token_count,
+                    "total_tokens": response.usage_metadata.total_token_count,
+                }
+            return GenerateResponse(
+                content=text,
+                tool_calls=tool_calls,
+                finish_reason=finish_reason,
+                usage=usage,
+            )
+        except Exception as e:
+            error_msg = str(e)
+            # Return error message instead of empty response
+            return GenerateResponse(content=f"Error: {error_msg}")
     async def generate_stream(
         self, request: GenerateRequest
@@ -298,22 +512,89 @@ class VertexAIModel(LLMModelAbstract):
         """Generate a streaming response using Vertex AI."""
         await self.validate_request(request)
-        # Separate system message from conversation
-        system_instruction = None
-        messages = []
-        for msg in request.messages:
-            if msg.role == "system":
-                system_instruction = msg.content if isinstance(msg.content, str) else ""
+        def _safe_text(text: str) -> str:
+            try:
+                return text.encode("utf-8", errors="replace").decode(
+                    "utf-8", errors="replace"
+                )
+            except Exception:
+                return ""
+        contents: list[Content] = []
+        system_instruction = ""
+        # Convert messages to genai format (same logic as generate())
+        i = 0
+        while i < len(request.messages):
+            m = request.messages[i]
+            if m.role == "tool":
+                # Collect all consecutive tool messages
+                tool_parts = []
+                while i < len(request.messages) and request.messages[i].role == "tool":
+                    tool_msg = request.messages[i]
+                    content_str = (
+                        tool_msg.content
+                        if isinstance(tool_msg.content, str)
+                        else str(tool_msg.content)
+                    )
+                    part = Part.from_function_response(
+                        name=getattr(tool_msg, "name", "") or "",
+                        response={"result": _safe_text(content_str)},
+                    )
+                    tool_parts.append(part)
+                    i += 1
+                if tool_parts:
+                    contents.append(Content(role="function", parts=tool_parts))
+                continue
+            elif m.role == "system":
+                content_str = (
+                    m.content if isinstance(m.content, str) else str(m.content)
+                )
+                system_instruction += _safe_text(content_str).strip()
+                i += 1
+            elif m.role == "assistant":
+                if hasattr(m, "tool_calls") and m.tool_calls:
+                    parts_list = []
+                    for tc in m.tool_calls:
+                        if not tc.function.name:
+                            continue
+                        args = (
+                            json.loads(tc.function.arguments)
+                            if isinstance(tc.function.arguments, str)
+                            else tc.function.arguments
+                        )
+                        if not isinstance(args, dict):
+                            args = {}
+                        part = Part.from_function_call(name=tc.function.name, args=args)
+                        parts_list.append(part)
+                    if parts_list:
+                        contents.append(Content(role="model", parts=parts_list))
+                else:
+                    content_str = (
+                        m.content if isinstance(m.content, str) else str(m.content)
+                    )
+                    if content_str:
+                        part = Part(text=_safe_text(content_str))
+                        contents.append(Content(role="model", parts=[part]))
+                i += 1
             else:
-                messages.append(self._convert_message(msg))
-        config_kwargs = {}
-        if request.temperature is not None:
-            config_kwargs["temperature"] = request.temperature
-        if request.max_tokens is not None:
-            config_kwargs["max_output_tokens"] = request.max_tokens
-        if request.top_p is not None:
-            config_kwargs["top_p"] = request.top_p
+                # User message - use _convert_message to handle multimodal content
+                user_content = self._convert_message(m)
+                contents.append(user_content)
+                i += 1
+        config_kwargs: dict[str, Any] = {
+            "temperature": request.temperature
+            if request.temperature is not None
+            else 0.2,
+            "top_p": request.top_p if request.top_p is not None else 0.95,
+            "max_output_tokens": request.max_tokens
+            if request.max_tokens is not None
+            else 8192,
+        }
+        if system_instruction:
+            config_kwargs["system_instruction"] = system_instruction
         if request.stop:
             config_kwargs["stop_sequences"] = request.stop
         if request.response_format:
@@ -322,70 +603,49 @@ class VertexAIModel(LLMModelAbstract):
                 config_kwargs["response_schema"] = self._clean_json_schema(
                     request.response_format["schema"]
                 )
-        # Build config object
-        config = (
-            genai.types.GenerateContentConfig(**config_kwargs)
-            if config_kwargs
-            else None
+        config_kwargs["automatic_function_calling"] = (
+            genai.types.AutomaticFunctionCallingConfig(maximum_remote_calls=100)
         )
-        # Add tools to config if present
+        config = genai.types.GenerateContentConfig(**config_kwargs)
         if request.tools:
-            if config is None:
-                config = genai.types.GenerateContentConfig()
-            config.tools = self._convert_tools(request.tools)
+            function_declarations: list[FunctionDeclaration] = []
+            for t in request.tools:
+                schema_obj = self._clean_json_schema(t.function.parameters or {})
+                function_declarations.append(
+                    FunctionDeclaration(
+                        name=t.function.name,
+                        description=t.function.description or "",
+                        parameters=schema_obj,
+                    )
+                )
+            gen_tools = [GeminiTool(function_declarations=function_declarations)]
+            config.tools = gen_tools
+        try:
+            # Use generate_content_stream for streaming
+            stream = await self.client.aio.models.generate_content_stream(
+                model=self._model_name,
+                contents=contents,
+                config=config,
+            )
-        # Add system instruction to config if present
-        if system_instruction:
-            if config is None:
-                config = genai.types.GenerateContentConfig()
-            config.system_instruction = system_instruction
-        model_name = self._model_name
-        stream = await self.client.aio.models.generate_content_stream(
-            model=model_name,
-            contents=messages,
-            config=config,
-        )
+            async for chunk in stream:
+                text, tool_calls = self._parse_response(chunk)
-        async for chunk in stream:
-            content = None
-            if chunk.text:
-                content = chunk.text
-            # Extract tool calls from chunk
-            tool_calls = None
-            if chunk.candidates and chunk.candidates[0].content.parts:
-                function_calls = []
-                for part in chunk.candidates[0].content.parts:
-                    if not hasattr(part, "function_call") or not part.function_call:
-                        continue
-                    fc = part.function_call
-                    args_dict = dict(fc.args) if fc.args else {}
-                    function_calls.append(
-                        ToolCall(
-                            id=fc.name,
-                            type="function",
-                            function=FunctionCall(
-                                name=fc.name,
-                                arguments=json.dumps(args_dict),
-                            ),
-                        )
-                    )
-                if function_calls:
-                    tool_calls = function_calls
+                # Yield text chunks as they come
+                if text:
+                    yield StreamChunk(content=text, tool_calls=None)
-            finish_reason = None
-            if chunk.candidates:
-                finish_reason = str(chunk.candidates[0].finish_reason)
-            if content or tool_calls or finish_reason:
-                yield StreamChunk(
-                    content=content,
-                    tool_calls=tool_calls,
-                    finish_reason=finish_reason,
-                )
+                # Tool calls come in final chunk - yield them separately
+                if tool_calls:
+                    yield StreamChunk(content=None, tool_calls=tool_calls)
+        except Exception as e:
+            error_msg = str(e)
+            # Yield error message instead of empty response
+            yield StreamChunk(content=f"Error: {error_msg}")
 class VertexEmbeddingModel(LLMModelAbstract):
@@ -428,6 +688,10 @@ class VertexEmbeddingModel(LLMModelAbstract):
     def model_name(self) -> str:
         return self._model_name
+    @model_name.setter
+    def model_name(self, model_name: str) -> None:
+        self._model_name = model_name
     @property
     def capabilities(self) -> ModelCapability:
         return ModelCapability.EMBEDDINGS
@@ -475,4 +739,8 @@ class VertexEmbeddingModel(LLMModelAbstract):
         return EmbeddingResponse(
             embeddings=all_embeddings,
             usage=None,
+            metadata={
+                "dimensions": len(all_embeddings[0]) if len(all_embeddings) > 0 else 0,
+                "batch_size": self._batch_size,
+            },
         )

donkit-llm 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

donkit-llm 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl