julee 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,451 @@
1
+ """
2
+ Unit tests for PointableJSONSchema utility class.
3
+
4
+ These tests verify that the PointableJSONSchema class correctly generates
5
+ standalone schemas from JSON pointer targets while preserving important
6
+ root metadata needed for proper JSON Schema validation.
7
+ """
8
+
9
+ import pytest
10
+
11
+ from julee.domain.use_cases.pointable_json_schema import PointableJSONSchema
12
+
13
+
14
+ class TestPointableJSONSchema:
15
+ """Test cases for PointableJSONSchema class."""
16
+
17
+ def test_simple_property_extraction(self) -> None:
18
+ """Test extracting a simple property schema."""
19
+ root_schema = {
20
+ "type": "object",
21
+ "properties": {
22
+ "title": {"type": "string"},
23
+ "count": {"type": "integer"},
24
+ },
25
+ "required": ["title"],
26
+ }
27
+
28
+ pointable = PointableJSONSchema(root_schema)
29
+ result = pointable.schema_for_pointer("/properties/title")
30
+
31
+ expected = {
32
+ "type": "object",
33
+ "properties": {"title": {"type": "string"}},
34
+ "required": ["title"],
35
+ "additionalProperties": False,
36
+ }
37
+ assert result == expected
38
+
39
+ def test_complex_property_extraction(self) -> None:
40
+ """Test extracting a complex property schema."""
41
+ root_schema = {
42
+ "type": "object",
43
+ "properties": {
44
+ "user": {
45
+ "type": "object",
46
+ "properties": {
47
+ "name": {"type": "string"},
48
+ "age": {"type": "integer"},
49
+ },
50
+ "required": ["name"],
51
+ },
52
+ },
53
+ }
54
+
55
+ pointable = PointableJSONSchema(root_schema)
56
+ result = pointable.schema_for_pointer("/properties/user")
57
+
58
+ expected = {
59
+ "type": "object",
60
+ "properties": {
61
+ "user": {
62
+ "type": "object",
63
+ "properties": {
64
+ "name": {"type": "string"},
65
+ "age": {"type": "integer"},
66
+ },
67
+ "required": ["name"],
68
+ }
69
+ },
70
+ "required": ["user"],
71
+ "additionalProperties": False,
72
+ }
73
+ assert result == expected
74
+
75
+ def test_primitive_value_wrapping(self) -> None:
76
+ """Test that primitive values are used directly with proper property name."""
77
+ root_schema = {
78
+ "type": "object",
79
+ "properties": {
80
+ "title": "some string value", # Not a proper schema
81
+ },
82
+ }
83
+
84
+ pointable = PointableJSONSchema(root_schema)
85
+ result = pointable.schema_for_pointer("/properties/title")
86
+
87
+ expected = {
88
+ "type": "object",
89
+ "properties": {"title": "some string value"},
90
+ "required": ["title"],
91
+ "additionalProperties": False,
92
+ }
93
+ assert result == expected
94
+
95
+ def test_preserves_schema_metadata(self) -> None:
96
+ """Test that important root metadata is preserved."""
97
+ root_schema = {
98
+ "$schema": "http://json-schema.org/draft-07/schema#",
99
+ "$id": "https://example.com/schema.json",
100
+ "title": "Test Schema",
101
+ "description": "A test schema for validation",
102
+ "type": "object",
103
+ "properties": {
104
+ "name": {"type": "string"},
105
+ },
106
+ }
107
+
108
+ pointable = PointableJSONSchema(root_schema)
109
+ result = pointable.schema_for_pointer("/properties/name")
110
+
111
+ expected = {
112
+ "$schema": "http://json-schema.org/draft-07/schema#",
113
+ "$id": "https://example.com/schema.json",
114
+ "title": "Test Schema - /properties/name",
115
+ "description": "A test schema for validation",
116
+ "type": "object",
117
+ "properties": {"name": {"type": "string"}},
118
+ "required": ["name"],
119
+ "additionalProperties": False,
120
+ }
121
+ assert result == expected
122
+
123
+ def test_preserves_definitions(self) -> None:
124
+ """Test that definitions are preserved for $ref resolution."""
125
+ root_schema = {
126
+ "type": "object",
127
+ "definitions": {
128
+ "timestamp": {"type": "string", "format": "date-time"},
129
+ "person": {
130
+ "type": "object",
131
+ "properties": {"name": {"type": "string"}},
132
+ },
133
+ },
134
+ "properties": {
135
+ "created_at": {"$ref": "#/definitions/timestamp"},
136
+ "author": {"$ref": "#/definitions/person"},
137
+ },
138
+ }
139
+
140
+ pointable = PointableJSONSchema(root_schema)
141
+ result = pointable.schema_for_pointer("/properties/created_at")
142
+
143
+ expected = {
144
+ "type": "object",
145
+ "additionalProperties": False,
146
+ "definitions": {
147
+ "timestamp": {"type": "string", "format": "date-time"},
148
+ "person": {
149
+ "type": "object",
150
+ "properties": {"name": {"type": "string"}},
151
+ },
152
+ },
153
+ "properties": {"created_at": {"$ref": "#/definitions/timestamp"}},
154
+ "required": ["created_at"],
155
+ }
156
+ assert result == expected
157
+
158
+ def test_preserves_defs(self) -> None:
159
+ """Test that $defs (newer JSON Schema) are preserved."""
160
+ root_schema = {
161
+ "type": "object",
162
+ "$defs": {
163
+ "timestamp": {"type": "string", "format": "date-time"},
164
+ },
165
+ "properties": {
166
+ "created_at": {"$ref": "#/$defs/timestamp"},
167
+ },
168
+ }
169
+
170
+ pointable = PointableJSONSchema(root_schema)
171
+ result = pointable.schema_for_pointer("/properties/created_at")
172
+
173
+ expected = {
174
+ "type": "object",
175
+ "additionalProperties": False,
176
+ "$defs": {
177
+ "timestamp": {"type": "string", "format": "date-time"},
178
+ },
179
+ "properties": {"created_at": {"$ref": "#/$defs/timestamp"}},
180
+ "required": ["created_at"],
181
+ }
182
+ assert result == expected
183
+
184
+ def test_empty_pointer_returns_root_schema(self) -> None:
185
+ """Test that empty pointer returns the complete root schema."""
186
+ root_schema = {
187
+ "type": "object",
188
+ "properties": {
189
+ "title": {"type": "string"},
190
+ },
191
+ }
192
+
193
+ pointable = PointableJSONSchema(root_schema)
194
+ result = pointable.schema_for_pointer("")
195
+
196
+ assert result == root_schema
197
+
198
+ def test_nested_pointer_extraction(self) -> None:
199
+ """Test extracting deeply nested properties."""
200
+ root_schema = {
201
+ "type": "object",
202
+ "properties": {
203
+ "user": {
204
+ "type": "object",
205
+ "properties": {
206
+ "profile": {
207
+ "type": "object",
208
+ "properties": {
209
+ "email": {"type": "string", "format": "email"},
210
+ },
211
+ },
212
+ },
213
+ },
214
+ },
215
+ }
216
+
217
+ pointable = PointableJSONSchema(root_schema)
218
+ result = pointable.schema_for_pointer("/properties/user/properties/profile")
219
+
220
+ expected = {
221
+ "type": "object",
222
+ "properties": {
223
+ "profile": {
224
+ "type": "object",
225
+ "properties": {
226
+ "email": {"type": "string", "format": "email"},
227
+ },
228
+ }
229
+ },
230
+ "required": ["profile"],
231
+ "additionalProperties": False,
232
+ }
233
+ assert result == expected
234
+
235
+ def test_invalid_pointer_raises_error(self) -> None:
236
+ """Test that invalid JSON pointers raise ValueError."""
237
+ root_schema = {
238
+ "type": "object",
239
+ "properties": {
240
+ "title": {"type": "string"},
241
+ },
242
+ }
243
+
244
+ pointable = PointableJSONSchema(root_schema)
245
+
246
+ with pytest.raises(ValueError, match="Invalid JSON pointer"):
247
+ pointable.schema_for_pointer("/properties/nonexistent")
248
+
249
+ def test_malformed_pointer_raises_error(self) -> None:
250
+ """Test that malformed JSON pointers raise ValueError."""
251
+ root_schema = {
252
+ "type": "object",
253
+ "properties": {
254
+ "title": {"type": "string"},
255
+ },
256
+ }
257
+
258
+ pointable = PointableJSONSchema(root_schema)
259
+
260
+ with pytest.raises(ValueError, match="Invalid JSON pointer"):
261
+ pointable.schema_for_pointer("not/a/valid/pointer")
262
+
263
+ def test_array_items_extraction(self) -> None:
264
+ """Test extracting array item schemas."""
265
+ root_schema = {
266
+ "type": "object",
267
+ "properties": {
268
+ "tags": {
269
+ "type": "array",
270
+ "items": {"type": "string"},
271
+ },
272
+ },
273
+ }
274
+
275
+ pointable = PointableJSONSchema(root_schema)
276
+ result = pointable.schema_for_pointer("/properties/tags/items")
277
+
278
+ expected = {
279
+ "type": "object",
280
+ "properties": {"items": {"type": "string"}},
281
+ "required": ["items"],
282
+ "additionalProperties": False,
283
+ }
284
+ assert result == expected
285
+
286
+ def test_preserves_all_metadata(self) -> None:
287
+ """Test that all root metadata is preserved."""
288
+ root_schema = {
289
+ "$schema": "http://json-schema.org/draft-07/schema#",
290
+ "$id": "https://example.com/schema.json",
291
+ "title": "Test Schema",
292
+ "description": "A test schema",
293
+ "version": "1.0.0", # This should not be preserved
294
+ "custom_field": "value", # This should not be preserved
295
+ "type": "object",
296
+ "properties": {
297
+ "name": {"type": "string"},
298
+ },
299
+ }
300
+
301
+ pointable = PointableJSONSchema(root_schema)
302
+ result = pointable.schema_for_pointer("/properties/name")
303
+
304
+ # Should preserve all root metadata
305
+ expected = {
306
+ "$schema": "http://json-schema.org/draft-07/schema#",
307
+ "$id": "https://example.com/schema.json",
308
+ "title": "Test Schema - /properties/name",
309
+ "description": "A test schema",
310
+ "version": "1.0.0",
311
+ "custom_field": "value",
312
+ "type": "object",
313
+ "properties": {"name": {"type": "string"}},
314
+ "required": ["name"],
315
+ "additionalProperties": False,
316
+ }
317
+ assert result == expected
318
+
319
+ def test_handles_schema_without_metadata(self) -> None:
320
+ """Test schemas that don't have any root metadata."""
321
+ root_schema = {
322
+ "type": "object",
323
+ "properties": {
324
+ "count": {"type": "integer"},
325
+ },
326
+ }
327
+
328
+ pointable = PointableJSONSchema(root_schema)
329
+ result = pointable.schema_for_pointer("/properties/count")
330
+
331
+ expected = {
332
+ "type": "object",
333
+ "properties": {"count": {"type": "integer"}},
334
+ "required": ["count"],
335
+ "additionalProperties": False,
336
+ }
337
+ assert result == expected
338
+
339
+ def test_properties_pointer_extraction(self) -> None:
340
+ """Test extracting the entire properties object - this reveals the double-wrapping issue."""
341
+ root_schema = {
342
+ "$schema": "http://json-schema.org/draft-07/schema#",
343
+ "type": "object",
344
+ "properties": {
345
+ "type": ["DigitalProductPassport", "VerifiableCredential"],
346
+ "@context": [
347
+ "https://www.w3.org/ns/credentials/v2",
348
+ "https://test.uncefact.org/vocabulary/untp/dpp/0.6.0/",
349
+ ],
350
+ "id": "https://bondor.com.au/credentials/bondorpanel-dpp-2024",
351
+ "issuer": {
352
+ "type": "object",
353
+ "properties": {
354
+ "id": {"type": "string"},
355
+ "name": {"type": "string"},
356
+ },
357
+ },
358
+ },
359
+ "required": ["type", "@context", "id", "issuer"],
360
+ }
361
+
362
+ pointable = PointableJSONSchema(root_schema)
363
+ result = pointable.schema_for_pointer("/properties")
364
+
365
+ # This should return a schema that validates the properties DIRECTLY,
366
+ # NOT wrapped in another "properties" object
367
+ expected = {
368
+ "$schema": "http://json-schema.org/draft-07/schema#",
369
+ "type": "object",
370
+ "additionalProperties": False,
371
+ "properties": {
372
+ "type": ["DigitalProductPassport", "VerifiableCredential"],
373
+ "@context": [
374
+ "https://www.w3.org/ns/credentials/v2",
375
+ "https://test.uncefact.org/vocabulary/untp/dpp/0.6.0/",
376
+ ],
377
+ "id": "https://bondor.com.au/credentials/bondorpanel-dpp-2024",
378
+ "issuer": {
379
+ "type": "object",
380
+ "properties": {
381
+ "id": {"type": "string"},
382
+ "name": {"type": "string"},
383
+ },
384
+ },
385
+ },
386
+ "required": ["type", "@context", "id", "issuer"],
387
+ }
388
+ assert result == expected
389
+
390
+ def test_complex_schema_with_all_features(self) -> None:
391
+ """Test a complex schema with multiple features."""
392
+ root_schema = {
393
+ "$schema": "http://json-schema.org/draft-07/schema#",
394
+ "$id": "https://example.com/assembly-spec.json",
395
+ "title": "Production Assembly Specification",
396
+ "description": "Schema for production data assembly",
397
+ "definitions": {
398
+ "timestamp": {"type": "string", "format": "date-time"},
399
+ "person": {
400
+ "type": "object",
401
+ "properties": {
402
+ "name": {"type": "string"},
403
+ "id": {"type": "string"},
404
+ },
405
+ "required": ["name", "id"],
406
+ },
407
+ },
408
+ "type": "object",
409
+ "properties": {
410
+ "title": {"type": "string"},
411
+ "created_at": {"$ref": "#/definitions/timestamp"},
412
+ "author": {"$ref": "#/definitions/person"},
413
+ "metadata": {
414
+ "type": "object",
415
+ "properties": {
416
+ "version": {"type": "string"},
417
+ "tags": {
418
+ "type": "array",
419
+ "items": {"type": "string"},
420
+ },
421
+ },
422
+ },
423
+ },
424
+ "required": ["title", "created_at", "author"],
425
+ }
426
+
427
+ pointable = PointableJSONSchema(root_schema)
428
+ result = pointable.schema_for_pointer("/properties/author")
429
+
430
+ expected = {
431
+ "$schema": "http://json-schema.org/draft-07/schema#",
432
+ "$id": "https://example.com/assembly-spec.json",
433
+ "title": "Production Assembly Specification - /properties/author",
434
+ "description": "Schema for production data assembly",
435
+ "definitions": {
436
+ "timestamp": {"type": "string", "format": "date-time"},
437
+ "person": {
438
+ "type": "object",
439
+ "properties": {
440
+ "name": {"type": "string"},
441
+ "id": {"type": "string"},
442
+ },
443
+ "required": ["name", "id"],
444
+ },
445
+ },
446
+ "type": "object",
447
+ "additionalProperties": False,
448
+ "properties": {"author": {"$ref": "#/definitions/person"}},
449
+ "required": ["author"],
450
+ }
451
+ assert result == expected
@@ -507,6 +507,7 @@ class ValidateDocumentUseCase:
507
507
  query_result = await self.knowledge_service.execute_query(
508
508
  config,
509
509
  query.prompt,
510
+ None, # output_schema
510
511
  [service_file_id],
511
512
  query.query_metadata,
512
513
  query.assistant_prompt,
@@ -654,6 +655,7 @@ class ValidateDocumentUseCase:
654
655
  transformation_result = await self.knowledge_service.execute_query(
655
656
  config,
656
657
  query.prompt,
658
+ None, # output_schema
657
659
  [service_file_id],
658
660
  query.query_metadata,
659
661
  query.assistant_prompt,
@@ -3,7 +3,7 @@ knowledge_service_queries:
3
3
  name: "Generate Digital Product Passport"
4
4
  knowledge_service_id: "anthropic-4.5-as-a-knowledge-service"
5
5
  prompt: "From this product specification sheet, extract the product information to generate a Digital Product Passport, that conforms to the provided schema, including the issuer, the credential subject and the validation dates. Please make sure that the DPP conforms to the provided schema and types and that you don't add any other fields."
6
- assistant_prompt: "Looking at the product specification sheet, here's the digital product passport that conforms to the provided schema, without surrounding ```json ... ``` markers:"
6
+ assistant_prompt: "{"
7
7
  query_metadata:
8
8
  max_tokens: 3000
9
9
  temperature: 0.1
@@ -12,7 +12,7 @@ knowledge_service_queries:
12
12
  name: "Extract Meeting Information"
13
13
  knowledge_service_id: "anthropic-4.5-as-a-knowledge-service"
14
14
  prompt: "Extract the basic meeting information from this transcript including title, date, times, and attendees with their roles."
15
- assistant_prompt: "Looking at the meeting transcript, here's the extracted meeting information that conforms to the provided schema, without surrounding ```json ... ``` markers:"
15
+ assistant_prompt: "{"
16
16
  query_metadata:
17
17
  max_tokens: 1000
18
18
  temperature: 0.1
@@ -21,7 +21,7 @@ knowledge_service_queries:
21
21
  name: "Extract Agenda Items"
22
22
  knowledge_service_id: "anthropic-4.5-as-a-knowledge-service"
23
23
  prompt: "Analyze the meeting transcript and extract the main agenda items discussed, including the topic, key discussion points, and any decisions made for each item."
24
- assistant_prompt: "Analyzing the meeting transcript, here are the agenda items with discussion points and decisions that conform to the provided schema, without surrounding ```json ... ``` markers:"
24
+ assistant_prompt: "{"
25
25
  query_metadata:
26
26
  max_tokens: 2000
27
27
  temperature: 0.1
@@ -30,7 +30,7 @@ knowledge_service_queries:
30
30
  name: "Extract Action Items"
31
31
  knowledge_service_id: "anthropic-4.5-as-a-knowledge-service"
32
32
  prompt: "Identify and extract action items from the meeting transcript, including the specific task, who it's assigned to, any mentioned due dates, and the priority level."
33
- assistant_prompt: "From the meeting transcript, here are the identified action items formatted according to the provided schema, without surrounding ```json ... ``` markers:"
33
+ assistant_prompt: "{"
34
34
  query_metadata:
35
35
  max_tokens: 1500
36
36
  temperature: 0.1
@@ -10,6 +10,7 @@ Requirements:
10
10
  - ANTHROPIC_API_KEY environment variable must be set
11
11
  """
12
12
 
13
+ import json
13
14
  import logging
14
15
  import os
15
16
  import time
@@ -33,7 +34,7 @@ from ..knowledge_service import (
33
34
  logger = logging.getLogger(__name__)
34
35
 
35
36
  # Default configuration constants
36
- DEFAULT_MODEL = "claude-sonnet-4-20250514"
37
+ DEFAULT_MODEL = "claude-sonnet-4-5"
37
38
  DEFAULT_MAX_TOKENS = 4000
38
39
 
39
40
 
@@ -172,6 +173,7 @@ class AnthropicKnowledgeService(KnowledgeService):
172
173
  self,
173
174
  config: KnowledgeServiceConfig,
174
175
  query_text: str,
176
+ output_schema: dict[str, Any] | None = None,
175
177
  service_file_ids: list[str] | None = None,
176
178
  query_metadata: dict[str, Any] | None = None,
177
179
  assistant_prompt: str | None = None,
@@ -181,12 +183,13 @@ class AnthropicKnowledgeService(KnowledgeService):
181
183
  Args:
182
184
  config: KnowledgeServiceConfig for this operation
183
185
  query_text: The query to execute
186
+ output_schema: Optional JSON schema for inclusion in prompt (not used for structured outputs)
184
187
  service_file_ids: Optional list of Anthropic file IDs to provide
185
188
  as context for the query
186
189
  query_metadata: Optional Anthropic-specific configuration such as
187
190
  model, temperature, max_tokens, etc.
188
191
  assistant_prompt: Optional assistant message content to constrain
189
- or prime the model's response
192
+ or prime the model's response.
190
193
 
191
194
  Returns:
192
195
  QueryResult with Anthropic query results
@@ -227,8 +230,22 @@ class AnthropicKnowledgeService(KnowledgeService):
227
230
  }
228
231
  )
229
232
 
233
+ # Handle schema embedding if provided
234
+ if output_schema:
235
+ # Build query with embedded schema
236
+ schema_json = json.dumps(output_schema, indent=2)
237
+ enhanced_query_text = f"""{query_text}
238
+
239
+ Please structure your response according to this JSON schema:
240
+ {schema_json}
241
+
242
+ Return only valid JSON that conforms to this schema, without any surrounding
243
+ text or markdown formatting."""
244
+ else:
245
+ enhanced_query_text = query_text
246
+
230
247
  # Add the text query
231
- content_parts.append({"type": "text", "text": query_text})
248
+ content_parts.append({"type": "text", "text": enhanced_query_text})
232
249
 
233
250
  # Prepare messages for the API
234
251
  messages = [{"role": "user", "content": content_parts}]
@@ -255,7 +272,7 @@ class AnthropicKnowledgeService(KnowledgeService):
255
272
  # Validate response has exactly one content block of type 'text'
256
273
  if len(response.content) != 1:
257
274
  raise ValueError(
258
- f"Expected exactly 1 content block, got " f"{len(response.content)}"
275
+ f"Expected exactly 1 content block, got {len(response.content)}"
259
276
  )
260
277
 
261
278
  content_block = response.content[0]
@@ -280,9 +297,39 @@ class AnthropicKnowledgeService(KnowledgeService):
280
297
  },
281
298
  )
282
299
 
283
- # Structure the result with single text content
300
+ # Handle JSON parsing if schema was provided
301
+ if output_schema:
302
+ # Determine the text to parse
303
+ if assistant_prompt and assistant_prompt.strip().startswith("{"):
304
+ # Concatenate assistant prompt with response for JSON parsing
305
+ json_text_to_parse = assistant_prompt + response_text
306
+ else:
307
+ json_text_to_parse = response_text
308
+
309
+ try:
310
+ response_value = json.loads(json_text_to_parse.strip())
311
+ except json.JSONDecodeError as e:
312
+ logger.error(
313
+ f"Failed to parse JSON response when output schema was provided. "
314
+ f"JSON text to parse: {json_text_to_parse[:500]}... "
315
+ f"Parse error: {str(e)}",
316
+ extra={
317
+ "knowledge_service_id": config.knowledge_service_id,
318
+ "query_id": query_id,
319
+ "assistant_prompt": assistant_prompt,
320
+ "response_text_preview": response_text[:100],
321
+ },
322
+ )
323
+ raise ValueError(
324
+ f"Expected valid JSON response when output schema provided, "
325
+ f"but failed to parse: {str(e)}"
326
+ )
327
+ else:
328
+ response_value = response_text
329
+
330
+ # Structure the result with parsed or text content
284
331
  result_data = {
285
- "response": response_text,
332
+ "response": response_value,
286
333
  "model": model,
287
334
  "service": "anthropic",
288
335
  "sources": service_file_ids or [],