PyPI - julee - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

julee 0.1.7py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

julee/domain/use_cases/tests/test_pointable_json_schema.py ADDED Viewed

@@ -0,0 +1,451 @@
+"""
+Unit tests for PointableJSONSchema utility class.
+These tests verify that the PointableJSONSchema class correctly generates
+standalone schemas from JSON pointer targets while preserving important
+root metadata needed for proper JSON Schema validation.
+"""
+import pytest
+from julee.domain.use_cases.pointable_json_schema import PointableJSONSchema
+class TestPointableJSONSchema:
+    """Test cases for PointableJSONSchema class."""
+    def test_simple_property_extraction(self) -> None:
+        """Test extracting a simple property schema."""
+        root_schema = {
+            "type": "object",
+            "properties": {
+                "title": {"type": "string"},
+                "count": {"type": "integer"},
+            },
+            "required": ["title"],
+        }
+        pointable = PointableJSONSchema(root_schema)
+        result = pointable.schema_for_pointer("/properties/title")
+        expected = {
+            "type": "object",
+            "properties": {"title": {"type": "string"}},
+            "required": ["title"],
+            "additionalProperties": False,
+        }
+        assert result == expected
+    def test_complex_property_extraction(self) -> None:
+        """Test extracting a complex property schema."""
+        root_schema = {
+            "type": "object",
+            "properties": {
+                "user": {
+                    "type": "object",
+                    "properties": {
+                        "name": {"type": "string"},
+                        "age": {"type": "integer"},
+                    },
+                    "required": ["name"],
+                },
+            },
+        }
+        pointable = PointableJSONSchema(root_schema)
+        result = pointable.schema_for_pointer("/properties/user")
+        expected = {
+            "type": "object",
+            "properties": {
+                "user": {
+                    "type": "object",
+                    "properties": {
+                        "name": {"type": "string"},
+                        "age": {"type": "integer"},
+                    },
+                    "required": ["name"],
+                }
+            },
+            "required": ["user"],
+            "additionalProperties": False,
+        }
+        assert result == expected
+    def test_primitive_value_wrapping(self) -> None:
+        """Test that primitive values are used directly with proper property name."""
+        root_schema = {
+            "type": "object",
+            "properties": {
+                "title": "some string value",  # Not a proper schema
+            },
+        }
+        pointable = PointableJSONSchema(root_schema)
+        result = pointable.schema_for_pointer("/properties/title")
+        expected = {
+            "type": "object",
+            "properties": {"title": "some string value"},
+            "required": ["title"],
+            "additionalProperties": False,
+        }
+        assert result == expected
+    def test_preserves_schema_metadata(self) -> None:
+        """Test that important root metadata is preserved."""
+        root_schema = {
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "$id": "https://example.com/schema.json",
+            "title": "Test Schema",
+            "description": "A test schema for validation",
+            "type": "object",
+            "properties": {
+                "name": {"type": "string"},
+            },
+        }
+        pointable = PointableJSONSchema(root_schema)
+        result = pointable.schema_for_pointer("/properties/name")
+        expected = {
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "$id": "https://example.com/schema.json",
+            "title": "Test Schema - /properties/name",
+            "description": "A test schema for validation",
+            "type": "object",
+            "properties": {"name": {"type": "string"}},
+            "required": ["name"],
+            "additionalProperties": False,
+        }
+        assert result == expected
+    def test_preserves_definitions(self) -> None:
+        """Test that definitions are preserved for $ref resolution."""
+        root_schema = {
+            "type": "object",
+            "definitions": {
+                "timestamp": {"type": "string", "format": "date-time"},
+                "person": {
+                    "type": "object",
+                    "properties": {"name": {"type": "string"}},
+                },
+            },
+            "properties": {
+                "created_at": {"$ref": "#/definitions/timestamp"},
+                "author": {"$ref": "#/definitions/person"},
+            },
+        }
+        pointable = PointableJSONSchema(root_schema)
+        result = pointable.schema_for_pointer("/properties/created_at")
+        expected = {
+            "type": "object",
+            "additionalProperties": False,
+            "definitions": {
+                "timestamp": {"type": "string", "format": "date-time"},
+                "person": {
+                    "type": "object",
+                    "properties": {"name": {"type": "string"}},
+                },
+            },
+            "properties": {"created_at": {"$ref": "#/definitions/timestamp"}},
+            "required": ["created_at"],
+        }
+        assert result == expected
+    def test_preserves_defs(self) -> None:
+        """Test that $defs (newer JSON Schema) are preserved."""
+        root_schema = {
+            "type": "object",
+            "$defs": {
+                "timestamp": {"type": "string", "format": "date-time"},
+            },
+            "properties": {
+                "created_at": {"$ref": "#/$defs/timestamp"},
+            },
+        }
+        pointable = PointableJSONSchema(root_schema)
+        result = pointable.schema_for_pointer("/properties/created_at")
+        expected = {
+            "type": "object",
+            "additionalProperties": False,
+            "$defs": {
+                "timestamp": {"type": "string", "format": "date-time"},
+            },
+            "properties": {"created_at": {"$ref": "#/$defs/timestamp"}},
+            "required": ["created_at"],
+        }
+        assert result == expected
+    def test_empty_pointer_returns_root_schema(self) -> None:
+        """Test that empty pointer returns the complete root schema."""
+        root_schema = {
+            "type": "object",
+            "properties": {
+                "title": {"type": "string"},
+            },
+        }
+        pointable = PointableJSONSchema(root_schema)
+        result = pointable.schema_for_pointer("")
+        assert result == root_schema
+    def test_nested_pointer_extraction(self) -> None:
+        """Test extracting deeply nested properties."""
+        root_schema = {
+            "type": "object",
+            "properties": {
+                "user": {
+                    "type": "object",
+                    "properties": {
+                        "profile": {
+                            "type": "object",
+                            "properties": {
+                                "email": {"type": "string", "format": "email"},
+                            },
+                        },
+                    },
+                },
+            },
+        }
+        pointable = PointableJSONSchema(root_schema)
+        result = pointable.schema_for_pointer("/properties/user/properties/profile")
+        expected = {
+            "type": "object",
+            "properties": {
+                "profile": {
+                    "type": "object",
+                    "properties": {
+                        "email": {"type": "string", "format": "email"},
+                    },
+                }
+            },
+            "required": ["profile"],
+            "additionalProperties": False,
+        }
+        assert result == expected
+    def test_invalid_pointer_raises_error(self) -> None:
+        """Test that invalid JSON pointers raise ValueError."""
+        root_schema = {
+            "type": "object",
+            "properties": {
+                "title": {"type": "string"},
+            },
+        }
+        pointable = PointableJSONSchema(root_schema)
+        with pytest.raises(ValueError, match="Invalid JSON pointer"):
+            pointable.schema_for_pointer("/properties/nonexistent")
+    def test_malformed_pointer_raises_error(self) -> None:
+        """Test that malformed JSON pointers raise ValueError."""
+        root_schema = {
+            "type": "object",
+            "properties": {
+                "title": {"type": "string"},
+            },
+        }
+        pointable = PointableJSONSchema(root_schema)
+        with pytest.raises(ValueError, match="Invalid JSON pointer"):
+            pointable.schema_for_pointer("not/a/valid/pointer")
+    def test_array_items_extraction(self) -> None:
+        """Test extracting array item schemas."""
+        root_schema = {
+            "type": "object",
+            "properties": {
+                "tags": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                },
+            },
+        }
+        pointable = PointableJSONSchema(root_schema)
+        result = pointable.schema_for_pointer("/properties/tags/items")
+        expected = {
+            "type": "object",
+            "properties": {"items": {"type": "string"}},
+            "required": ["items"],
+            "additionalProperties": False,
+        }
+        assert result == expected
+    def test_preserves_all_metadata(self) -> None:
+        """Test that all root metadata is preserved."""
+        root_schema = {
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "$id": "https://example.com/schema.json",
+            "title": "Test Schema",
+            "description": "A test schema",
+            "version": "1.0.0",  # This should not be preserved
+            "custom_field": "value",  # This should not be preserved
+            "type": "object",
+            "properties": {
+                "name": {"type": "string"},
+            },
+        }
+        pointable = PointableJSONSchema(root_schema)
+        result = pointable.schema_for_pointer("/properties/name")
+        # Should preserve all root metadata
+        expected = {
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "$id": "https://example.com/schema.json",
+            "title": "Test Schema - /properties/name",
+            "description": "A test schema",
+            "version": "1.0.0",
+            "custom_field": "value",
+            "type": "object",
+            "properties": {"name": {"type": "string"}},
+            "required": ["name"],
+            "additionalProperties": False,
+        }
+        assert result == expected
+    def test_handles_schema_without_metadata(self) -> None:
+        """Test schemas that don't have any root metadata."""
+        root_schema = {
+            "type": "object",
+            "properties": {
+                "count": {"type": "integer"},
+            },
+        }
+        pointable = PointableJSONSchema(root_schema)
+        result = pointable.schema_for_pointer("/properties/count")
+        expected = {
+            "type": "object",
+            "properties": {"count": {"type": "integer"}},
+            "required": ["count"],
+            "additionalProperties": False,
+        }
+        assert result == expected
+    def test_properties_pointer_extraction(self) -> None:
+        """Test extracting the entire properties object - this reveals the double-wrapping issue."""
+        root_schema = {
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "type": "object",
+            "properties": {
+                "type": ["DigitalProductPassport", "VerifiableCredential"],
+                "@context": [
+                    "https://www.w3.org/ns/credentials/v2",
+                    "https://test.uncefact.org/vocabulary/untp/dpp/0.6.0/",
+                ],
+                "id": "https://bondor.com.au/credentials/bondorpanel-dpp-2024",
+                "issuer": {
+                    "type": "object",
+                    "properties": {
+                        "id": {"type": "string"},
+                        "name": {"type": "string"},
+                    },
+                },
+            },
+            "required": ["type", "@context", "id", "issuer"],
+        }
+        pointable = PointableJSONSchema(root_schema)
+        result = pointable.schema_for_pointer("/properties")
+        # This should return a schema that validates the properties DIRECTLY,
+        # NOT wrapped in another "properties" object
+        expected = {
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "type": "object",
+            "additionalProperties": False,
+            "properties": {
+                "type": ["DigitalProductPassport", "VerifiableCredential"],
+                "@context": [
+                    "https://www.w3.org/ns/credentials/v2",
+                    "https://test.uncefact.org/vocabulary/untp/dpp/0.6.0/",
+                ],
+                "id": "https://bondor.com.au/credentials/bondorpanel-dpp-2024",
+                "issuer": {
+                    "type": "object",
+                    "properties": {
+                        "id": {"type": "string"},
+                        "name": {"type": "string"},
+                    },
+                },
+            },
+            "required": ["type", "@context", "id", "issuer"],
+        }
+        assert result == expected
+    def test_complex_schema_with_all_features(self) -> None:
+        """Test a complex schema with multiple features."""
+        root_schema = {
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "$id": "https://example.com/assembly-spec.json",
+            "title": "Production Assembly Specification",
+            "description": "Schema for production data assembly",
+            "definitions": {
+                "timestamp": {"type": "string", "format": "date-time"},
+                "person": {
+                    "type": "object",
+                    "properties": {
+                        "name": {"type": "string"},
+                        "id": {"type": "string"},
+                    },
+                    "required": ["name", "id"],
+                },
+            },
+            "type": "object",
+            "properties": {
+                "title": {"type": "string"},
+                "created_at": {"$ref": "#/definitions/timestamp"},
+                "author": {"$ref": "#/definitions/person"},
+                "metadata": {
+                    "type": "object",
+                    "properties": {
+                        "version": {"type": "string"},
+                        "tags": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                        },
+                    },
+                },
+            },
+            "required": ["title", "created_at", "author"],
+        }
+        pointable = PointableJSONSchema(root_schema)
+        result = pointable.schema_for_pointer("/properties/author")
+        expected = {
+            "$schema": "http://json-schema.org/draft-07/schema#",
+            "$id": "https://example.com/assembly-spec.json",
+            "title": "Production Assembly Specification - /properties/author",
+            "description": "Schema for production data assembly",
+            "definitions": {
+                "timestamp": {"type": "string", "format": "date-time"},
+                "person": {
+                    "type": "object",
+                    "properties": {
+                        "name": {"type": "string"},
+                        "id": {"type": "string"},
+                    },
+                    "required": ["name", "id"],
+                },
+            },
+            "type": "object",
+            "additionalProperties": False,
+            "properties": {"author": {"$ref": "#/definitions/person"}},
+            "required": ["author"],
+        }
+        assert result == expected

julee/domain/use_cases/validate_document.py CHANGED Viewed

@@ -507,6 +507,7 @@ class ValidateDocumentUseCase:
             query_result = await self.knowledge_service.execute_query(
                 config,
                 query.prompt,
+                None,  # output_schema
                 [service_file_id],
                 query.query_metadata,
                 query.assistant_prompt,
@@ -654,6 +655,7 @@ class ValidateDocumentUseCase:
             transformation_result = await self.knowledge_service.execute_query(
                 config,
                 query.prompt,
+                None,  # output_schema
                 [service_file_id],
                 query.query_metadata,
                 query.assistant_prompt,

julee/fixtures/knowledge_service_queries.yaml CHANGED Viewed

@@ -3,7 +3,7 @@ knowledge_service_queries:
     name: "Generate Digital Product Passport"
     knowledge_service_id: "anthropic-4.5-as-a-knowledge-service"
     prompt: "From this product specification sheet, extract the product information to generate a Digital Product Passport, that conforms to the provided schema, including the issuer, the credential subject and the validation dates. Please make sure that the DPP conforms to the provided schema and types and that you don't add any other fields."
-    assistant_prompt: "Looking at the product specification sheet, here's the digital product passport that conforms to the provided schema, without surrounding ```json ... ``` markers:"
+    assistant_prompt: "{"
     query_metadata:
       max_tokens: 3000
       temperature: 0.1
@@ -12,7 +12,7 @@ knowledge_service_queries:
     name: "Extract Meeting Information"
     knowledge_service_id: "anthropic-4.5-as-a-knowledge-service"
     prompt: "Extract the basic meeting information from this transcript including title, date, times, and attendees with their roles."
-    assistant_prompt: "Looking at the meeting transcript, here's the extracted meeting information that conforms to the provided schema, without surrounding ```json ... ``` markers:"
+    assistant_prompt: "{"
     query_metadata:
       max_tokens: 1000
       temperature: 0.1
@@ -21,7 +21,7 @@ knowledge_service_queries:
     name: "Extract Agenda Items"
     knowledge_service_id: "anthropic-4.5-as-a-knowledge-service"
     prompt: "Analyze the meeting transcript and extract the main agenda items discussed, including the topic, key discussion points, and any decisions made for each item."
-    assistant_prompt: "Analyzing the meeting transcript, here are the agenda items with discussion points and decisions that conform to the provided schema, without surrounding ```json ... ``` markers:"
+    assistant_prompt: "{"
     query_metadata:
       max_tokens: 2000
       temperature: 0.1
@@ -30,7 +30,7 @@ knowledge_service_queries:
     name: "Extract Action Items"
     knowledge_service_id: "anthropic-4.5-as-a-knowledge-service"
     prompt: "Identify and extract action items from the meeting transcript, including the specific task, who it's assigned to, any mentioned due dates, and the priority level."
-    assistant_prompt: "From the meeting transcript, here are the identified action items formatted according to the provided schema, without surrounding ```json ... ``` markers:"
+    assistant_prompt: "{"
     query_metadata:
       max_tokens: 1500
       temperature: 0.1

julee/services/knowledge_service/anthropic/knowledge_service.py CHANGED Viewed

@@ -10,6 +10,7 @@ Requirements:
     - ANTHROPIC_API_KEY environment variable must be set
 """
+import json
 import logging
 import os
 import time
@@ -33,7 +34,7 @@ from ..knowledge_service import (
 logger = logging.getLogger(__name__)
 # Default configuration constants
-DEFAULT_MODEL = "claude-sonnet-4-20250514"
+DEFAULT_MODEL = "claude-sonnet-4-5"
 DEFAULT_MAX_TOKENS = 4000
@@ -172,6 +173,7 @@ class AnthropicKnowledgeService(KnowledgeService):
         self,
         config: KnowledgeServiceConfig,
         query_text: str,
+        output_schema: dict[str, Any] | None = None,
         service_file_ids: list[str] | None = None,
         query_metadata: dict[str, Any] | None = None,
         assistant_prompt: str | None = None,
@@ -181,12 +183,13 @@ class AnthropicKnowledgeService(KnowledgeService):
         Args:
             config: KnowledgeServiceConfig for this operation
             query_text: The query to execute
+            output_schema: Optional JSON schema for inclusion in prompt (not used for structured outputs)
             service_file_ids: Optional list of Anthropic file IDs to provide
                              as context for the query
             query_metadata: Optional Anthropic-specific configuration such as
                            model, temperature, max_tokens, etc.
             assistant_prompt: Optional assistant message content to constrain
-                             or prime the model's response
+                             or prime the model's response.
         Returns:
             QueryResult with Anthropic query results
@@ -227,8 +230,22 @@ class AnthropicKnowledgeService(KnowledgeService):
                         }
                     )
+            # Handle schema embedding if provided
+            if output_schema:
+                # Build query with embedded schema
+                schema_json = json.dumps(output_schema, indent=2)
+                enhanced_query_text = f"""{query_text}
+Please structure your response according to this JSON schema:
+{schema_json}
+Return only valid JSON that conforms to this schema, without any surrounding
+text or markdown formatting."""
+            else:
+                enhanced_query_text = query_text
             # Add the text query
-            content_parts.append({"type": "text", "text": query_text})
+            content_parts.append({"type": "text", "text": enhanced_query_text})
             # Prepare messages for the API
             messages = [{"role": "user", "content": content_parts}]
@@ -255,7 +272,7 @@ class AnthropicKnowledgeService(KnowledgeService):
             # Validate response has exactly one content block of type 'text'
             if len(response.content) != 1:
                 raise ValueError(
-                    f"Expected exactly 1 content block, got " f"{len(response.content)}"
+                    f"Expected exactly 1 content block, got {len(response.content)}"
                 )
             content_block = response.content[0]
@@ -280,9 +297,39 @@ class AnthropicKnowledgeService(KnowledgeService):
                 },
             )
-            # Structure the result with single text content
+            # Handle JSON parsing if schema was provided
+            if output_schema:
+                # Determine the text to parse
+                if assistant_prompt and assistant_prompt.strip().startswith("{"):
+                    # Concatenate assistant prompt with response for JSON parsing
+                    json_text_to_parse = assistant_prompt + response_text
+                else:
+                    json_text_to_parse = response_text
+                try:
+                    response_value = json.loads(json_text_to_parse.strip())
+                except json.JSONDecodeError as e:
+                    logger.error(
+                        f"Failed to parse JSON response when output schema was provided. "
+                        f"JSON text to parse: {json_text_to_parse[:500]}... "
+                        f"Parse error: {str(e)}",
+                        extra={
+                            "knowledge_service_id": config.knowledge_service_id,
+                            "query_id": query_id,
+                            "assistant_prompt": assistant_prompt,
+                            "response_text_preview": response_text[:100],
+                        },
+                    )
+                    raise ValueError(
+                        f"Expected valid JSON response when output schema provided, "
+                        f"but failed to parse: {str(e)}"
+                    )
+            else:
+                response_value = response_text
+            # Structure the result with parsed or text content
             result_data = {
-                "response": response_text,
+                "response": response_value,
                 "model": model,
                 "service": "anthropic",
                 "sources": service_file_ids or [],

julee 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

julee 0.1.7py3-none-any.whl → 0.1.8py3-none-any.whl