json-repair 0.55.2__py3-none-any.whl → 0.56.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,63 @@
1
- from typing import TYPE_CHECKING
1
+ import re
2
+ from typing import TYPE_CHECKING, Any
2
3
 
3
- from .utils.constants import STRING_DELIMITERS, JSONReturnType
4
+ from .utils.constants import MISSING_VALUE, STRING_DELIMITERS, JSONReturnType
4
5
  from .utils.json_context import ContextValues
5
6
 
6
7
  if TYPE_CHECKING:
7
8
  from .json_parser import JSONParser
9
+ from .schema_repair import SchemaRepairer
8
10
 
9
11
 
10
- def parse_object(self: "JSONParser") -> JSONReturnType:
12
+ def parse_object(
13
+ self: "JSONParser",
14
+ schema: dict[str, Any] | bool | None = None,
15
+ path: str = "$",
16
+ ) -> JSONReturnType:
11
17
  # <object> ::= '{' [ <member> *(', ' <member>) ] '}' ; A sequence of 'members'
12
18
  obj: dict[str, JSONReturnType] = {}
13
19
  start_index = self.index
20
+
21
+ # Only activate schema-guided parsing if a repairer is available and schema looks object-like.
22
+ schema_repairer: SchemaRepairer | None = None
23
+ properties: dict[str, Any] = {}
24
+ pattern_properties: dict[str, Any] = {}
25
+ additional_properties: object | None = None
26
+ required: set[str] = set()
27
+
28
+ if schema is not None and schema is not True:
29
+ repairer = self.schema_repairer
30
+ if repairer is not None:
31
+ schema = repairer.resolve_schema(schema)
32
+ if schema is False:
33
+ raise ValueError("Schema does not allow any values.")
34
+ if schema is not True and repairer.is_object_schema(schema):
35
+ schema_repairer = repairer
36
+ properties = schema.get("properties", {})
37
+ if not isinstance(properties, dict):
38
+ properties = {}
39
+ pattern_properties = schema.get("patternProperties", {})
40
+ if not isinstance(pattern_properties, dict):
41
+ pattern_properties = {}
42
+ additional_properties = schema.get("additionalProperties", None)
43
+ required = set(schema.get("required", []))
44
+
45
+ def finalize_obj() -> dict[str, JSONReturnType]:
46
+ if schema_repairer is None:
47
+ return obj
48
+ schema_repairer_local = schema_repairer
49
+ # Enforce required fields and insert defaults for optional properties.
50
+ missing_required = [key for key in required if key not in obj]
51
+ if missing_required:
52
+ raise ValueError(f"Missing required properties at {path}: {', '.join(missing_required)}")
53
+ for key, prop_schema in properties.items():
54
+ if key in obj or key in required:
55
+ continue
56
+ if isinstance(prop_schema, dict) and "default" in prop_schema:
57
+ obj[key] = schema_repairer_local._copy_json_value(prop_schema["default"], f"{path}.{key}", "default")
58
+ schema_repairer_local._log("Inserted default value for missing property", f"{path}.{key}")
59
+ return obj
60
+
14
61
  # Stop when you either find the closing parentheses or you have iterated over the entire string
15
62
  while (self.get_char_at() or "}") != "}":
16
63
  # This is what we expect to find:
@@ -145,21 +192,71 @@ def parse_object(self: "JSONParser") -> JSONReturnType:
145
192
  self.skip_whitespaces()
146
193
  # Corner case, a lone comma
147
194
  value: JSONReturnType = ""
195
+ prop_schema: dict[str, Any] | bool | None = None
196
+ extra_schemas: list[dict[str, Any] | bool | None] = []
197
+ drop_property = False
198
+
199
+ if schema_repairer is not None:
200
+ if key in properties:
201
+ schema_value = properties[key]
202
+ # Schema entries must be dict/bool; reject invalid metadata early.
203
+ if schema_value is not None and not isinstance(schema_value, (dict, bool)):
204
+ raise ValueError("Schema must be an object.")
205
+ prop_schema = schema_value
206
+ else:
207
+ matched = [
208
+ schema_value for pattern, schema_value in pattern_properties.items() if re.search(pattern, key)
209
+ ]
210
+ if matched:
211
+ # patternProperties can stack: apply the first schema, then any extras in order.
212
+ primary_schema = matched[0]
213
+ if primary_schema is not None and not isinstance(primary_schema, (dict, bool)):
214
+ raise ValueError("Schema must be an object.")
215
+ prop_schema = primary_schema
216
+ for extra_schema in matched[1:]:
217
+ if extra_schema is not None and not isinstance(extra_schema, (dict, bool)):
218
+ raise ValueError("Schema must be an object.")
219
+ extra_schemas.append(extra_schema)
220
+ else:
221
+ if additional_properties is False:
222
+ # Schema forbids unknown keys: parse but drop this property.
223
+ drop_property = True
224
+ elif isinstance(additional_properties, dict):
225
+ prop_schema = additional_properties
226
+ else:
227
+ prop_schema = True
228
+
148
229
  char = self.get_char_at()
230
+ key_path = f"{path}.{key}"
149
231
  if char in [",", "}"]:
150
232
  self.log(
151
233
  f"While parsing an object value we found a stray {char}, ignoring it",
152
234
  )
235
+ if schema_repairer is not None:
236
+ # Missing value: fill according to schema (defaults/const/enum/type).
237
+ value = schema_repairer.repair_value(MISSING_VALUE, prop_schema, key_path)
153
238
  else:
154
- value = self.parse_json()
155
- if value == "" and self.strict and self.get_char_at(-1) not in STRING_DELIMITERS:
239
+ # Schema-aware parsing guides repairs inside nested values.
240
+ value = self.parse_json(prop_schema, key_path) if schema_repairer is not None else self.parse_json()
241
+
242
+ if schema_repairer is not None and extra_schemas:
243
+ # Apply any additional pattern schemas in order.
244
+ for extra_schema in extra_schemas:
245
+ value = schema_repairer.repair_value(value, extra_schema, key_path)
246
+
247
+ if schema_repairer is None and value == "" and self.strict and self.get_char_at(-1) not in STRING_DELIMITERS:
156
248
  self.log(
157
249
  "Parsed value is empty in strict mode while parsing object, raising an error",
158
250
  )
159
251
  raise ValueError("Parsed value is empty in strict mode while parsing object.")
252
+
160
253
  # Reset context since our job is done
161
254
  self.context.reset()
162
- obj[key] = value
255
+ if schema_repairer is None or not drop_property:
256
+ obj[key] = value
257
+ else:
258
+ # Keep parsing but omit forbidden properties to respect the schema.
259
+ schema_repairer._log("Dropped extra property not covered by schema", key_path)
163
260
 
164
261
  if self.get_char_at() in [",", "'", '"']:
165
262
  self.index += 1
@@ -204,17 +301,17 @@ def parse_object(self: "JSONParser") -> JSONReturnType:
204
301
 
205
302
  self.skip_whitespaces()
206
303
  if self.get_char_at() != ",":
207
- return obj
304
+ return finalize_obj()
208
305
  self.index += 1
209
306
  self.skip_whitespaces()
210
307
  if self.get_char_at() not in STRING_DELIMITERS:
211
- return obj
308
+ return finalize_obj()
212
309
  if not self.strict:
213
310
  self.log(
214
311
  "Found a comma and string delimiter after object closing brace, checking for additional key-value pairs",
215
312
  )
216
- additional_obj = self.parse_object()
313
+ additional_obj = self.parse_object(schema, path)
217
314
  if isinstance(additional_obj, dict):
218
315
  obj.update(additional_obj)
219
316
 
220
- return obj
317
+ return finalize_obj()
@@ -0,0 +1,508 @@
1
+ from __future__ import annotations
2
+
3
+ import copy
4
+ import importlib
5
+ import re
6
+ from types import ModuleType
7
+ from typing import Any
8
+
9
+ from .utils.constants import MISSING_VALUE, JSONReturnType, MissingValueType
10
+
11
+
12
+ def _require_jsonschema() -> Any:
13
+ try:
14
+ return importlib.import_module("jsonschema")
15
+ except ImportError as exc: # pragma: no cover - optional dependency
16
+ raise ValueError("jsonschema is required when using schema-aware repair.") from exc
17
+
18
+
19
+ def _require_pydantic() -> Any:
20
+ try:
21
+ return importlib.import_module("pydantic")
22
+ except ImportError as exc: # pragma: no cover - optional dependency
23
+ raise ValueError("pydantic is required when using schema models.") from exc
24
+
25
+
26
+ def load_schema_model(path: str) -> type[Any]:
27
+ if ":" not in path:
28
+ raise ValueError("Schema model must be in the form 'module:ClassName'.")
29
+ module_name, class_name = path.split(":", 1)
30
+ module: ModuleType = importlib.import_module(module_name)
31
+ model: object | None = module.__dict__.get(class_name)
32
+ if model is None or not isinstance(model, type):
33
+ raise ValueError(f"Schema model '{class_name}' not found in module '{module_name}'.")
34
+ return model
35
+
36
+
37
+ def normalize_missing_values(value: object) -> JSONReturnType:
38
+ if value is MISSING_VALUE or isinstance(value, MissingValueType):
39
+ return ""
40
+ if isinstance(value, dict):
41
+ normalized: dict[str, JSONReturnType] = {}
42
+ for key, item in value.items():
43
+ if not isinstance(key, str):
44
+ raise ValueError("Object keys must be strings.")
45
+ normalized[key] = normalize_missing_values(item)
46
+ return normalized
47
+ if isinstance(value, list):
48
+ return [normalize_missing_values(item) for item in value]
49
+ if value is None or isinstance(value, (str, int, float, bool)):
50
+ return value
51
+ raise ValueError("Value is not JSON compatible.")
52
+
53
+
54
+ def schema_from_input(schema: Any) -> dict[str, Any] | bool:
55
+ if isinstance(schema, dict):
56
+ return schema
57
+ if schema is True or schema is False:
58
+ return schema
59
+ if hasattr(schema, "model_json_schema"):
60
+ pydantic = _require_pydantic()
61
+ version = getattr(pydantic, "VERSION", getattr(pydantic, "__version__", "0"))
62
+ if int(version.split(".")[0]) < 2:
63
+ raise ValueError("pydantic v2 is required for schema models.")
64
+ schema_dict: dict[str, Any] = schema.model_json_schema()
65
+ if hasattr(schema, "model_fields"):
66
+ properties = schema_dict.setdefault("properties", {})
67
+ if not isinstance(properties, dict):
68
+ properties = {}
69
+ schema_dict["properties"] = properties
70
+ for name, field in schema.model_fields.items():
71
+ if field.is_required():
72
+ continue
73
+ property_schema = properties.setdefault(name, {})
74
+ if not isinstance(property_schema, dict):
75
+ property_schema = {}
76
+ properties[name] = property_schema
77
+ if "default" in property_schema:
78
+ continue
79
+ if field.default_factory is not None:
80
+ property_schema["default"] = field.default_factory()
81
+ else:
82
+ property_schema["default"] = field.default
83
+ return schema_dict
84
+ raise ValueError("Schema must be a JSON Schema dict, boolean schema, or pydantic v2 model.")
85
+
86
+
87
+ class SchemaRepairer:
88
+ def __init__(self, schema: dict[str, Any] | bool, log: list[dict[str, str]] | None) -> None:
89
+ self.root_schema = schema
90
+ self.log = log
91
+
92
+ def _log(self, text: str, path: str) -> None:
93
+ if self.log is not None:
94
+ self.log.append({"text": text, "context": path})
95
+
96
+ def validate(self, value: JSONReturnType, schema: dict[str, Any] | bool) -> None:
97
+ schema = self.resolve_schema(schema)
98
+ if schema is True:
99
+ return
100
+ if schema is False:
101
+ raise ValueError("Schema does not allow any values.")
102
+ schema_for_validation = self._prepare_schema_for_validation(schema)
103
+ jsonschema = _require_jsonschema()
104
+ validator_cls = jsonschema.validators.validator_for(schema_for_validation)
105
+ validator = validator_cls(schema_for_validation)
106
+ errors = sorted(validator.iter_errors(value), key=lambda e: e.path)
107
+ if errors:
108
+ raise ValueError(errors[0].message)
109
+
110
+ def resolve_schema(self, schema: object | None) -> dict[str, Any] | bool:
111
+ if schema is None:
112
+ return True
113
+ if isinstance(schema, bool):
114
+ return schema
115
+ if not isinstance(schema, dict):
116
+ raise ValueError("Schema must be an object.")
117
+ schema_dict: dict[str, Any] = {}
118
+ for key, value in schema.items():
119
+ if not isinstance(key, str):
120
+ raise ValueError("Schema keys must be strings.")
121
+ schema_dict[key] = value
122
+ while "$ref" in schema_dict:
123
+ ref = schema_dict["$ref"]
124
+ resolved = self._resolve_ref(ref)
125
+ if isinstance(resolved, bool):
126
+ return resolved
127
+ schema_dict = resolved
128
+ return schema_dict
129
+
130
+ def is_object_schema(self, schema: dict[str, Any] | bool | None) -> bool:
131
+ schema = self.resolve_schema(schema)
132
+ if not isinstance(schema, dict):
133
+ return False
134
+ schema_type = schema.get("type")
135
+ if schema_type == "object":
136
+ return True
137
+ if isinstance(schema_type, list) and "object" in schema_type:
138
+ return True
139
+ return any(key in schema for key in ("properties", "patternProperties", "additionalProperties", "required"))
140
+
141
+ def is_array_schema(self, schema: dict[str, Any] | bool | None) -> bool:
142
+ schema = self.resolve_schema(schema)
143
+ if not isinstance(schema, dict):
144
+ return False
145
+ schema_type = schema.get("type")
146
+ if schema_type == "array":
147
+ return True
148
+ if isinstance(schema_type, list) and "array" in schema_type:
149
+ return True
150
+ return "items" in schema
151
+
152
+ def repair_value(self, value: Any, schema: dict[str, Any] | bool | None, path: str) -> JSONReturnType:
153
+ """Apply schema rules to a parsed value, including unions, coercions, and defaults."""
154
+ schema = self.resolve_schema(schema)
155
+ if schema is True:
156
+ return normalize_missing_values(value)
157
+ if schema is False:
158
+ raise ValueError("Schema does not allow any values.")
159
+ if not schema:
160
+ return normalize_missing_values(value)
161
+
162
+ if value is MISSING_VALUE:
163
+ return self._fill_missing(schema, path)
164
+
165
+ if "allOf" in schema:
166
+ subschemas = schema["allOf"]
167
+ if not subschemas:
168
+ return normalize_missing_values(value)
169
+ repaired = self.repair_value(value, subschemas[0], path)
170
+ for subschema in subschemas[1:]:
171
+ repaired = self.repair_value(repaired, subschema, path)
172
+ return repaired
173
+
174
+ if "oneOf" in schema:
175
+ return self._repair_union(value, schema["oneOf"], path)
176
+ if "anyOf" in schema:
177
+ return self._repair_union(value, schema["anyOf"], path)
178
+
179
+ expected_type = schema.get("type")
180
+ if expected_type is None:
181
+ if self.is_object_schema(schema):
182
+ expected_type = "object"
183
+ elif self.is_array_schema(schema):
184
+ expected_type = "array"
185
+
186
+ if isinstance(expected_type, list):
187
+ return self._repair_type_union(value, expected_type, schema, path)
188
+
189
+ if expected_type == "object":
190
+ repaired = self._repair_object(value, schema, path)
191
+ elif expected_type == "array":
192
+ repaired = self._repair_array(value, schema, path)
193
+ elif isinstance(expected_type, str):
194
+ repaired = self._coerce_scalar(value, expected_type, path)
195
+ else:
196
+ repaired = normalize_missing_values(value)
197
+
198
+ return self._apply_enum_const(repaired, schema, path)
199
+
200
+ def _repair_union(self, value: Any, schemas: list[dict[str, Any] | bool], path: str) -> JSONReturnType:
201
+ last_error: Exception | None = None
202
+ for subschema in schemas:
203
+ try:
204
+ candidate = self.repair_value(copy.deepcopy(value), subschema, path)
205
+ self.validate(candidate, subschema)
206
+ return candidate
207
+ except ValueError as exc:
208
+ last_error = exc
209
+ if last_error:
210
+ raise ValueError(str(last_error)) from last_error
211
+ raise ValueError("No schema matched the value.")
212
+
213
+ def _repair_type_union(
214
+ self,
215
+ value: Any,
216
+ types: list[str],
217
+ schema: dict[str, Any],
218
+ path: str,
219
+ ) -> JSONReturnType:
220
+ last_error: Exception | None = None
221
+ for schema_type in types:
222
+ try:
223
+ candidate = self._repair_by_type(value, schema_type, schema, path)
224
+ return self._apply_enum_const(candidate, schema, path)
225
+ except ValueError as exc:
226
+ last_error = exc
227
+ if last_error:
228
+ raise ValueError(str(last_error)) from last_error
229
+ raise ValueError("No schema type matched the value.")
230
+
231
+ def _repair_by_type(self, value: Any, schema_type: str, schema: dict[str, Any], path: str) -> JSONReturnType:
232
+ if schema_type == "array":
233
+ return self._repair_array(value, schema, path)
234
+ if schema_type == "object":
235
+ return self._repair_object(value, schema, path)
236
+ return self._coerce_scalar(value, schema_type, path)
237
+
238
+ def _repair_array(self, value: Any, schema: dict[str, Any], path: str) -> JSONReturnType:
239
+ if isinstance(value, list):
240
+ items: list[JSONReturnType] = value
241
+ else:
242
+ self._log("Wrapped value in array to match schema", path)
243
+ items = [normalize_missing_values(value)]
244
+ items_schema = schema.get("items")
245
+ if items_schema is not None:
246
+ if isinstance(items_schema, list):
247
+ repaired_items: list[JSONReturnType] = []
248
+ for idx, item_schema in enumerate(items_schema):
249
+ if idx >= len(items):
250
+ break
251
+ repaired_items.append(self.repair_value(items[idx], item_schema, f"{path}[{idx}]"))
252
+ additional_items = schema.get("additionalItems")
253
+ if len(items) > len(items_schema):
254
+ tail = items[len(items_schema) :]
255
+ if isinstance(additional_items, dict):
256
+ for offset, item in enumerate(tail, start=len(items_schema)):
257
+ repaired_items.append(self.repair_value(item, additional_items, f"{path}[{offset}]"))
258
+ elif additional_items is True or additional_items is None:
259
+ repaired_items.extend(normalize_missing_values(item) for item in tail)
260
+ else:
261
+ for offset, _item in enumerate(tail, start=len(items_schema)):
262
+ self._log("Dropped extra array item not covered by schema", f"{path}[{offset}]")
263
+ items = repaired_items
264
+ else:
265
+ items = [self.repair_value(item, items_schema, f"{path}[{idx}]") for idx, item in enumerate(items)]
266
+ min_items = schema.get("minItems")
267
+ if min_items is not None and len(items) < min_items:
268
+ raise ValueError(f"Array at {path} does not meet minItems.")
269
+ return items
270
+
271
+ def _repair_object(self, value: Any, schema: dict[str, Any], path: str) -> JSONReturnType:
272
+ if not isinstance(value, dict):
273
+ raise ValueError(f"Expected object at {path}, got {type(value).__name__}.")
274
+
275
+ properties = schema.get("properties", {})
276
+ if not isinstance(properties, dict):
277
+ properties = {}
278
+ required = set(schema.get("required", []))
279
+ pattern_properties = schema.get("patternProperties", {})
280
+ if not isinstance(pattern_properties, dict):
281
+ pattern_properties = {}
282
+ additional_properties = schema.get("additionalProperties")
283
+
284
+ missing_required = [key for key in required if key not in value]
285
+ if missing_required:
286
+ raise ValueError(f"Missing required properties at {path}: {', '.join(missing_required)}")
287
+
288
+ repaired: dict[str, JSONReturnType] = {}
289
+
290
+ for key, prop_schema in properties.items():
291
+ key_path = f"{path}.{key}"
292
+ if key in value:
293
+ repaired[key] = self.repair_value(value[key], prop_schema, key_path)
294
+ elif isinstance(prop_schema, dict) and "default" in prop_schema and key not in required:
295
+ repaired[key] = self._copy_json_value(prop_schema["default"], key_path, "default")
296
+ self._log("Inserted default value for missing property", key_path)
297
+
298
+ for key, raw_value in value.items():
299
+ if key in properties:
300
+ continue
301
+ key_path = f"{path}.{key}"
302
+ matched = [prop_schema for pattern, prop_schema in pattern_properties.items() if re.search(pattern, key)]
303
+ if matched:
304
+ repaired_value = self.repair_value(raw_value, matched[0], key_path)
305
+ for prop_schema in matched[1:]:
306
+ repaired_value = self.repair_value(repaired_value, prop_schema, key_path)
307
+ repaired[key] = repaired_value
308
+ continue
309
+ if isinstance(additional_properties, dict):
310
+ repaired[key] = self.repair_value(raw_value, additional_properties, key_path)
311
+ continue
312
+ if additional_properties is True or additional_properties is None:
313
+ repaired[key] = normalize_missing_values(raw_value)
314
+ continue
315
+ self._log("Dropped extra property not covered by schema", key_path)
316
+
317
+ min_properties = schema.get("minProperties")
318
+ if min_properties is not None and len(repaired) < min_properties:
319
+ raise ValueError(f"Object at {path} does not meet minProperties.")
320
+ return repaired
321
+
322
+ def _fill_missing(self, schema: dict[str, Any], path: str) -> JSONReturnType:
323
+ if "const" in schema:
324
+ # Const/enum/default have priority over type inference.
325
+ self._log("Filled missing value with const", path)
326
+ return self._copy_json_value(schema["const"], path, "const")
327
+ if "enum" in schema:
328
+ enum_values = schema["enum"]
329
+ if not enum_values:
330
+ raise ValueError(f"Enum at {path} has no values.")
331
+ self._log("Filled missing value with first enum value", path)
332
+ return self._copy_json_value(enum_values[0], path, "enum")
333
+ if "default" in schema:
334
+ self._log("Filled missing value with default", path)
335
+ return self._copy_json_value(schema["default"], path, "default")
336
+
337
+ expected_type = schema.get("type")
338
+ if isinstance(expected_type, list):
339
+ for schema_type in expected_type:
340
+ try:
341
+ return self._fill_missing({**schema, "type": schema_type}, path)
342
+ except ValueError:
343
+ continue
344
+ raise ValueError(f"Cannot infer missing value at {path}.")
345
+
346
+ if expected_type is None:
347
+ # Infer container types based on schema shape if type is omitted.
348
+ if self.is_object_schema(schema):
349
+ expected_type = "object"
350
+ elif self.is_array_schema(schema):
351
+ expected_type = "array"
352
+
353
+ if expected_type == "string":
354
+ self._log("Filled missing value with empty string", path)
355
+ return ""
356
+ if expected_type in ("integer", "number"):
357
+ self._log("Filled missing value with 0", path)
358
+ return 0
359
+ if expected_type == "boolean":
360
+ self._log("Filled missing value with false", path)
361
+ return False
362
+ if expected_type == "array":
363
+ min_items = schema.get("minItems")
364
+ if min_items:
365
+ raise ValueError(f"Array at {path} requires at least {min_items} items.")
366
+ self._log("Filled missing value with empty array", path)
367
+ return []
368
+ if expected_type == "object":
369
+ min_properties = schema.get("minProperties")
370
+ if min_properties:
371
+ raise ValueError(f"Object at {path} requires at least {min_properties} properties.")
372
+ self._log("Filled missing value with empty object", path)
373
+ return {}
374
+ if expected_type == "null":
375
+ self._log("Filled missing value with null", path)
376
+ return None
377
+
378
+ raise ValueError(f"Cannot infer missing value at {path}.")
379
+
380
+ def _coerce_scalar(self, value: Any, schema_type: str, path: str) -> JSONReturnType:
381
+ if schema_type == "string":
382
+ if isinstance(value, str):
383
+ return value
384
+ if isinstance(value, (int, float)) and not isinstance(value, bool):
385
+ self._log("Coerced number to string", path)
386
+ return str(value)
387
+ raise ValueError(f"Expected string at {path}.")
388
+
389
+ if schema_type == "integer":
390
+ if isinstance(value, bool):
391
+ raise ValueError(f"Expected integer at {path}.")
392
+ if isinstance(value, int):
393
+ return value
394
+ if isinstance(value, float):
395
+ if value.is_integer():
396
+ self._log("Coerced number to integer", path)
397
+ return int(value)
398
+ raise ValueError(f"Expected integer at {path}.")
399
+ if isinstance(value, str):
400
+ try:
401
+ int_value = int(value)
402
+ except ValueError:
403
+ int_value = None
404
+ if int_value is not None:
405
+ self._log("Coerced string to integer", path)
406
+ return int_value
407
+ try:
408
+ num = float(value)
409
+ except ValueError as exc:
410
+ raise ValueError(f"Expected integer at {path}.") from exc
411
+ if not num.is_integer():
412
+ raise ValueError(f"Expected integer at {path}.")
413
+ self._log("Coerced number to integer", path)
414
+ return int(num)
415
+ raise ValueError(f"Expected integer at {path}.")
416
+
417
+ if schema_type == "number":
418
+ if isinstance(value, bool):
419
+ raise ValueError(f"Expected number at {path}.")
420
+ if isinstance(value, (int, float)):
421
+ return value
422
+ if isinstance(value, str):
423
+ try:
424
+ float_value = float(value)
425
+ except ValueError as exc:
426
+ raise ValueError(f"Expected number at {path}.") from exc
427
+ self._log("Coerced string to number", path)
428
+ return float_value
429
+ raise ValueError(f"Expected number at {path}.")
430
+
431
+ if schema_type == "boolean":
432
+ if isinstance(value, bool):
433
+ return value
434
+ if isinstance(value, str):
435
+ lowered = value.lower()
436
+ if lowered in ("true", "false"):
437
+ self._log("Coerced string to boolean", path)
438
+ return lowered == "true"
439
+ raise ValueError(f"Expected boolean at {path}.")
440
+
441
+ if schema_type == "null":
442
+ if value is None:
443
+ return None
444
+ raise ValueError(f"Expected null at {path}.")
445
+
446
+ raise ValueError(f"Unsupported schema type {schema_type} at {path}.")
447
+
448
+ def _apply_enum_const(self, value: JSONReturnType, schema: dict[str, Any], path: str) -> JSONReturnType:
449
+ if "const" in schema and value != schema["const"]:
450
+ raise ValueError(f"Value at {path} does not match const.")
451
+ if "enum" in schema and value not in schema["enum"]:
452
+ raise ValueError(f"Value at {path} does not match enum.")
453
+ return value
454
+
455
+ def _resolve_ref(self, ref: str) -> dict[str, Any] | bool:
456
+ if not ref.startswith("#/"):
457
+ raise ValueError(f"Unsupported $ref: {ref}")
458
+ parts = ref.lstrip("#/").split("/")
459
+ current: Any = self.root_schema
460
+ for part in parts:
461
+ resolved_part = part.replace("~1", "/").replace("~0", "~")
462
+ if not isinstance(current, dict) or resolved_part not in current:
463
+ raise ValueError(f"Unresolvable $ref: {ref}")
464
+ current = current[resolved_part]
465
+ if isinstance(current, dict):
466
+ return current
467
+ if current is True:
468
+ return True
469
+ if current is False:
470
+ return False
471
+ raise ValueError(f"Unresolvable $ref: {ref}")
472
+
473
+ def _copy_json_value(self, value: Any, path: str, label: str) -> JSONReturnType:
474
+ if value is None or isinstance(value, (str, int, float, bool)):
475
+ return value
476
+ if isinstance(value, list):
477
+ return [self._copy_json_value(item, f"{path}[{idx}]", label) for idx, item in enumerate(value)]
478
+ if isinstance(value, dict):
479
+ copied: dict[str, JSONReturnType] = {}
480
+ for key, item in value.items():
481
+ if not isinstance(key, str):
482
+ raise ValueError(f"{label.capitalize()} value at {path} contains a non-string key.")
483
+ copied[key] = self._copy_json_value(item, f"{path}.{key}", label)
484
+ return copied
485
+ raise ValueError(f"{label.capitalize()} value at {path} is not JSON compatible.")
486
+
487
+ def _prepare_schema_for_validation(self, schema: object) -> dict[str, Any]:
488
+ def normalize(node: Any) -> Any:
489
+ if isinstance(node, dict):
490
+ normalized = {key: normalize(value) for key, value in node.items()}
491
+ items = normalized.get("items")
492
+ if isinstance(items, list):
493
+ normalized.pop("items", None)
494
+ normalized["prefixItems"] = items
495
+ additional_items = normalized.pop("additionalItems", None)
496
+ if additional_items is False:
497
+ normalized["items"] = False
498
+ elif isinstance(additional_items, dict):
499
+ normalized["items"] = additional_items
500
+ return normalized
501
+ if isinstance(node, list):
502
+ return [normalize(item) for item in node]
503
+ return node
504
+
505
+ normalized = normalize(schema)
506
+ if not isinstance(normalized, dict):
507
+ raise ValueError("Schema must be an object.")
508
+ return normalized
@@ -1,4 +1,15 @@
1
1
  from typing import Any
2
2
 
3
+
4
+ class MissingValueType:
5
+ def __repr__(self) -> str:
6
+ return "<MISSING_VALUE>"
7
+
8
+ def __deepcopy__(self, memo: dict[int, Any]) -> "MissingValueType":
9
+ return self
10
+
11
+
12
+ MISSING_VALUE = MissingValueType()
13
+
3
14
  JSONReturnType = dict[str, Any] | list[Any] | str | float | int | bool | None
4
15
  STRING_DELIMITERS: list[str] = ['"', "'", "“", "”"]