lionagi 0.16.2__py3-none-any.whl → 0.16.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. lionagi/adapters/_utils.py +0 -14
  2. lionagi/ln/__init__.py +4 -0
  3. lionagi/ln/fuzzy/__init__.py +4 -1
  4. lionagi/ln/fuzzy/_fuzzy_validate.py +109 -0
  5. lionagi/ln/fuzzy/_to_dict.py +388 -0
  6. lionagi/models/__init__.py +0 -2
  7. lionagi/operations/communicate/communicate.py +1 -1
  8. lionagi/operations/parse/parse.py +1 -1
  9. lionagi/protocols/generic/pile.py +1 -1
  10. lionagi/protocols/operatives/operative.py +2 -2
  11. lionagi/service/connections/match_endpoint.py +2 -10
  12. lionagi/service/connections/providers/types.py +1 -3
  13. lionagi/service/hooks/hook_event.py +1 -1
  14. lionagi/service/hooks/hook_registry.py +1 -1
  15. lionagi/service/rate_limited_processor.py +1 -1
  16. lionagi/utils.py +3 -335
  17. lionagi/version.py +1 -1
  18. {lionagi-0.16.2.dist-info → lionagi-0.16.3.dist-info}/METADATA +3 -12
  19. {lionagi-0.16.2.dist-info → lionagi-0.16.3.dist-info}/RECORD +21 -43
  20. lionagi/adapters/postgres_model_adapter.py +0 -131
  21. lionagi/libs/concurrency.py +0 -1
  22. lionagi/libs/nested/__init__.py +0 -3
  23. lionagi/libs/nested/flatten.py +0 -172
  24. lionagi/libs/nested/nfilter.py +0 -59
  25. lionagi/libs/nested/nget.py +0 -45
  26. lionagi/libs/nested/ninsert.py +0 -104
  27. lionagi/libs/nested/nmerge.py +0 -158
  28. lionagi/libs/nested/npop.py +0 -69
  29. lionagi/libs/nested/nset.py +0 -94
  30. lionagi/libs/nested/unflatten.py +0 -83
  31. lionagi/libs/nested/utils.py +0 -189
  32. lionagi/libs/parse.py +0 -31
  33. lionagi/libs/schema/json_schema.py +0 -231
  34. lionagi/libs/unstructured/__init__.py +0 -0
  35. lionagi/libs/unstructured/pdf_to_image.py +0 -45
  36. lionagi/libs/unstructured/read_image_to_base64.py +0 -33
  37. lionagi/libs/validate/fuzzy_match_keys.py +0 -7
  38. lionagi/libs/validate/fuzzy_validate_mapping.py +0 -144
  39. lionagi/libs/validate/string_similarity.py +0 -7
  40. lionagi/libs/validate/xml_parser.py +0 -203
  41. lionagi/models/note.py +0 -387
  42. lionagi/service/connections/providers/claude_code_.py +0 -299
  43. {lionagi-0.16.2.dist-info → lionagi-0.16.3.dist-info}/WHEEL +0 -0
  44. {lionagi-0.16.2.dist-info → lionagi-0.16.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,69 +0,0 @@
1
- # Copyright (c) 2023 - 2025, HaiyangLi <quantocean.li at gmail dot com>
2
- #
3
- # SPDX-License-Identifier: Apache-2.0
4
-
5
- from collections.abc import Sequence
6
- from typing import Any
7
-
8
- from lionagi.utils import UNDEFINED, to_list
9
-
10
-
11
- def npop(
12
- input_: dict[str, Any] | list[Any],
13
- /,
14
- indices: str | int | Sequence[str | int],
15
- default: Any = UNDEFINED,
16
- ) -> Any:
17
- """
18
- Perform a nested pop operation on the input structure.
19
-
20
- This function navigates through the nested structure using the provided
21
- indices and removes and returns the value at the final location.
22
-
23
- Args:
24
- input_: The input nested structure (dict or list) to pop from.
25
- indices: A single index or a sequence of indices to navigate the
26
- nested structure.
27
- default: The value to return if the key is not found. If not
28
- provided, a KeyError will be raised.
29
-
30
- Returns:
31
- The value at the specified nested location.
32
-
33
- Raises:
34
- ValueError: If the indices list is empty.
35
- KeyError: If a key is not found in a dictionary.
36
- IndexError: If an index is out of range for a list.
37
- TypeError: If an operation is not supported on the current data type.
38
- """
39
- if not indices:
40
- raise ValueError("Indices list cannot be empty")
41
-
42
- indices = to_list(indices)
43
-
44
- current = input_
45
- for key in indices[:-1]:
46
- if isinstance(current, dict):
47
- if current.get(key):
48
- current = current[key]
49
- else:
50
- raise KeyError(f"{key} is not found in {current}")
51
- elif isinstance(current, list) and isinstance(key, int):
52
- if key >= len(current):
53
- raise KeyError(
54
- f"{key} exceeds the length of the list {current}"
55
- )
56
- elif key < 0:
57
- raise ValueError("list index cannot be negative")
58
- current = current[key]
59
-
60
- last_key = indices[-1]
61
- try:
62
- return current.pop(
63
- last_key,
64
- )
65
- except Exception as e:
66
- if default is not UNDEFINED:
67
- return default
68
- else:
69
- raise KeyError(f"Invalid npop. Error: {e}")
@@ -1,94 +0,0 @@
1
- # Copyright (c) 2023 - 2025, HaiyangLi <quantocean.li at gmail dot com>
2
- #
3
- # SPDX-License-Identifier: Apache-2.0
4
-
5
- from collections.abc import Sequence
6
- from typing import Any
7
-
8
- from lionagi.utils import to_list
9
-
10
- from .utils import ensure_list_index
11
-
12
-
13
- def nset(
14
- nested_structure: dict[str, Any] | list[Any],
15
- /,
16
- indices: str | int | Sequence[str | int],
17
- value: Any,
18
- ) -> None:
19
- """Set a value within a nested structure at the specified path.
20
-
21
- This method allows setting a value deep within a nested dictionary or list
22
- by specifying a path to the target location using a sequence of indices.
23
- Each index in the sequence represents a level in the nested structure,
24
- with integers used for list indices and strings for dictionary keys.
25
-
26
- Args:
27
- nested_structure: The nested structure to modify.
28
- indices: The path of indices leading to the target location.
29
- value: The value to set at the specified location.
30
-
31
- Raises:
32
- ValueError: If the indices sequence is empty.
33
- TypeError: If the target container is not a list or dictionary,
34
- or if the index type is incorrect.
35
-
36
- Examples:
37
- >>> data = {'a': {'b': [10, 20]}}
38
- >>> nset(data, ['a', 'b', 1], 99)
39
- >>> assert data == {'a': {'b': [10, 99]}}
40
-
41
- >>> data = [0, [1, 2], 3]
42
- >>> nset(data, [1, 1], 99)
43
- >>> assert data == [0, [1, 99], 3]
44
- """
45
-
46
- if not indices:
47
- raise ValueError(
48
- "Indices list is empty, cannot determine target container"
49
- )
50
-
51
- _indices = to_list(indices)
52
- target_container = nested_structure
53
-
54
- for i, index in enumerate(_indices[:-1]):
55
- if isinstance(target_container, list):
56
- if not isinstance(index, int):
57
- raise TypeError("Cannot use non-integer index on a list")
58
- ensure_list_index(target_container, index)
59
- if target_container[index] is None:
60
- next_index = _indices[i + 1]
61
- target_container[index] = (
62
- [] if isinstance(next_index, int) else {}
63
- )
64
- elif isinstance(target_container, dict):
65
- if isinstance(index, int):
66
- raise TypeError(
67
- f"Unsupported key type: {type(index).__name__}. "
68
- "Only string keys are acceptable."
69
- )
70
- if index not in target_container:
71
- next_index = _indices[i + 1]
72
- target_container[index] = (
73
- [] if isinstance(next_index, int) else {}
74
- )
75
- else:
76
- raise TypeError("Target container is not a list or dictionary")
77
-
78
- target_container = target_container[index]
79
-
80
- last_index = _indices[-1]
81
- if isinstance(target_container, list):
82
- if not isinstance(last_index, int):
83
- raise TypeError("Cannot use non-integer index on a list")
84
- ensure_list_index(target_container, last_index)
85
- target_container[last_index] = value
86
- elif isinstance(target_container, dict):
87
- if not isinstance(last_index, str):
88
- raise TypeError(
89
- f"Unsupported key type: {type(last_index).__name__}. "
90
- "Only string keys are acceptable."
91
- )
92
- target_container[last_index] = value
93
- else:
94
- raise TypeError("Cannot set value on non-list/dict element")
@@ -1,83 +0,0 @@
1
- # Copyright (c) 2023 - 2025, HaiyangLi <quantocean.li at gmail dot com>
2
- #
3
- # SPDX-License-Identifier: Apache-2.0
4
-
5
- from typing import Any
6
-
7
-
8
- def unflatten(
9
- flat_dict: dict[str, Any], sep: str = "|", inplace: bool = False
10
- ) -> dict[str, Any] | list[Any]:
11
- """
12
- Unflatten a single-level dictionary into a nested dictionary or list.
13
-
14
- Args:
15
- flat_dict: The flattened dictionary to unflatten.
16
- sep: The separator used for joining keys.
17
- inplace: Whether to modify the input dictionary in place.
18
-
19
- Returns:
20
- The unflattened nested dictionary or list.
21
-
22
- Examples:
23
- >>> unflatten({"a|b|c": 1, "a|b|d": 2})
24
- {'a': {'b': {'c': 1, 'd': 2}}}
25
-
26
- >>> unflatten({"0": "a", "1": "b", "2": "c"})
27
- ['a', 'b', 'c']
28
- """
29
-
30
- def _unflatten(data: dict) -> dict | list:
31
- result = {}
32
- for key, value in data.items():
33
- parts = key.split(sep)
34
- current = result
35
- for part in parts[:-1]:
36
- if part not in current:
37
- current[part] = {}
38
- current = current[part]
39
- if isinstance(value, dict):
40
- current[parts[-1]] = _unflatten(value)
41
- else:
42
- current[parts[-1]] = value
43
-
44
- # Convert dictionary to list if keys are consecutive integers
45
- if result and all(
46
- isinstance(key, str) and key.isdigit() for key in result
47
- ):
48
- return [result[str(i)] for i in range(len(result))]
49
- return result
50
-
51
- if inplace:
52
- unflattened_dict = {}
53
- for key, value in flat_dict.items():
54
- parts = key.split(sep)
55
- current = unflattened_dict
56
- for part in parts[:-1]:
57
- if part not in current:
58
- current[part] = {}
59
- current = current[part]
60
- current[parts[-1]] = value
61
-
62
- unflattened_result = _unflatten(unflattened_dict)
63
- flat_dict.clear()
64
- if isinstance(unflattened_result, list):
65
- flat_dict.update(
66
- {str(i): v for i, v in enumerate(unflattened_result)}
67
- )
68
- else:
69
- flat_dict.update(unflattened_result)
70
- return flat_dict
71
-
72
- else:
73
- unflattened_dict = {}
74
- for key, value in flat_dict.items():
75
- parts = key.split(sep)
76
- current = unflattened_dict
77
- for part in parts[:-1]:
78
- if part not in current:
79
- current[part] = {}
80
- current = current[part]
81
- current[parts[-1]] = value
82
-
83
- return _unflatten(unflattened_dict)
@@ -1,189 +0,0 @@
1
- # Copyright (c) 2023 - 2025, HaiyangLi <quantocean.li at gmail dot com>
2
- #
3
- # SPDX-License-Identifier: Apache-2.0
4
-
5
- from typing import Any
6
-
7
- from lionagi.utils import UNDEFINED
8
-
9
-
10
- def is_homogeneous(
11
- iterables: list[Any] | dict[Any, Any], type_check: type | tuple[type, ...]
12
- ) -> bool:
13
- """
14
- Check if all elements in a list or all values in a dict are of same type.
15
-
16
- Args:
17
- iterables: The list or dictionary to check.
18
- type_check: The type to check against.
19
-
20
- Returns:
21
- True if all elements/values are of the same type, False otherwise.
22
- """
23
- if isinstance(iterables, list):
24
- return all(isinstance(it, type_check) for it in iterables)
25
-
26
- elif isinstance(iterables, dict):
27
- return all(isinstance(val, type_check) for val in iterables.values())
28
-
29
- else:
30
- return isinstance(iterables, type_check)
31
-
32
-
33
- def is_same_dtype(
34
- input_: list[Any] | dict[Any, Any],
35
- dtype: type | None = None,
36
- return_dtype: bool = False,
37
- ) -> bool | tuple[bool, type | None]:
38
- """
39
- Check if all elements in a list or dict values are of the same data type.
40
-
41
- Args:
42
- input_: The input list or dictionary to check.
43
- dtype: The data type to check against. If None, uses the type of the
44
- first element.
45
- return_dtype: If True, return the data type with the check result.
46
-
47
- Returns:
48
- If return_dtype is False, returns True if all elements are of the
49
- same type (or if the input is empty), False otherwise.
50
- If return_dtype is True, returns a tuple (bool, type | None).
51
- """
52
- if not input_:
53
- return True
54
-
55
- iterable = input_.values() if isinstance(input_, dict) else input_
56
- first_element_type = type(next(iter(iterable), None))
57
-
58
- dtype = dtype or first_element_type
59
-
60
- result = all(isinstance(element, dtype) for element in iterable)
61
- return (result, dtype) if return_dtype else result
62
-
63
-
64
- def is_structure_homogeneous(
65
- structure: Any, return_structure_type: bool = False
66
- ) -> bool | tuple[bool, type | None]:
67
- """
68
- Check if a nested structure is homogeneous (no mix of lists and dicts).
69
-
70
- Args:
71
- structure: The nested structure to check.
72
- return_structure_type: If True, return the type of the homogeneous
73
- structure.
74
-
75
- Returns:
76
- If return_structure_type is False, returns True if the structure is
77
- homogeneous, False otherwise.
78
- If True, returns a tuple (bool, type | None).
79
-
80
- Examples:
81
- >>> is_structure_homogeneous({'a': {'b': 1}, 'c': {'d': 2}})
82
- True
83
- >>> is_structure_homogeneous({'a': {'b': 1}, 'c': [1, 2]})
84
- False
85
- """
86
-
87
- def _check_structure(substructure):
88
- structure_type = None
89
- if isinstance(substructure, list):
90
- structure_type = list
91
- for item in substructure:
92
- if not isinstance(item, structure_type) and isinstance(
93
- item, list | dict
94
- ):
95
- return False, None
96
- result, _ = _check_structure(item)
97
- if not result:
98
- return False, None
99
- elif isinstance(substructure, dict):
100
- structure_type = dict
101
- for item in substructure.values():
102
- if not isinstance(item, structure_type) and isinstance(
103
- item, list | dict
104
- ):
105
- return False, None
106
- result, _ = _check_structure(item)
107
- if not result:
108
- return False, None
109
- return True, structure_type
110
-
111
- is_homogeneous, structure_type = _check_structure(structure)
112
- return (
113
- (is_homogeneous, structure_type)
114
- if return_structure_type
115
- else is_homogeneous
116
- )
117
-
118
-
119
- def deep_update(
120
- original: dict[Any, Any], update: dict[Any, Any]
121
- ) -> dict[Any, Any]:
122
- """
123
- Recursively merge two dicts, updating nested dicts instead of overwriting.
124
-
125
- Args:
126
- original: The dictionary to update.
127
- update: The dictionary containing updates to apply to `original`.
128
-
129
- Returns:
130
- The `original` dictionary after applying updates from `update`.
131
-
132
- Note:
133
- This method modifies the `original` dictionary in place.
134
- """
135
- for key, value in update.items():
136
- if isinstance(value, dict) and key in original:
137
- original[key] = deep_update(original.get(key, {}), value)
138
- else:
139
- original[key] = value
140
- return original
141
-
142
-
143
- def get_target_container(
144
- nested: list[Any] | dict[Any, Any], indices: list[int | str]
145
- ) -> list[Any] | dict[Any, Any]:
146
- """
147
- Retrieve the target container in a nested structure using indices.
148
-
149
- Args:
150
- nested: The nested structure to navigate.
151
- indices: A list of indices to navigate through the nested structure.
152
-
153
- Returns:
154
- The target container at the specified path.
155
-
156
- Raises:
157
- IndexError: If a list index is out of range.
158
- KeyError: If a dictionary key is not found.
159
- TypeError: If the current element is neither a list nor a dictionary.
160
- """
161
- current_element = nested
162
- for index in indices:
163
- if isinstance(current_element, list):
164
- if isinstance(index, str) and index.isdigit():
165
- index = int(index)
166
-
167
- if isinstance(index, int) and 0 <= index < len(current_element):
168
- current_element = current_element[index]
169
-
170
- else:
171
- raise IndexError("List index is invalid or out of range")
172
-
173
- elif isinstance(current_element, dict):
174
- if index in current_element:
175
- current_element = current_element.get(index, None)
176
- else:
177
- raise KeyError("Key not found in dictionary")
178
- else:
179
- raise TypeError(
180
- "Current element is neither a list nor a dictionary"
181
- )
182
- return current_element
183
-
184
-
185
- def ensure_list_index(
186
- lst: list[Any], index: int, default: Any = UNDEFINED
187
- ) -> None:
188
- while len(lst) <= index:
189
- lst.append(default if default is not UNDEFINED else None)
lionagi/libs/parse.py DELETED
@@ -1,31 +0,0 @@
1
- # Copyright (c) 2023 - 2025, HaiyangLi <quantocean.li at gmail dot com>
2
- #
3
- # SPDX-License-Identifier: Apache-2.0
4
-
5
- from lionagi.libs.schema.as_readable import as_readable
6
- from lionagi.libs.schema.extract_code_block import extract_code_block
7
- from lionagi.libs.schema.function_to_schema import function_to_schema
8
- from lionagi.libs.validate.fuzzy_match_keys import fuzzy_match_keys
9
- from lionagi.libs.validate.fuzzy_validate_mapping import fuzzy_validate_mapping
10
- from lionagi.libs.validate.string_similarity import string_similarity
11
- from lionagi.libs.validate.to_num import to_num
12
- from lionagi.utils import fuzzy_parse_json, to_dict, to_json
13
-
14
- validate_keys = fuzzy_match_keys # for backward compatibility
15
- validate_mapping = fuzzy_validate_mapping # for backward compatibility
16
-
17
-
18
- __all__ = (
19
- "as_readable",
20
- "extract_code_block",
21
- "function_to_schema",
22
- "fuzzy_match_keys",
23
- "fuzzy_validate_mapping",
24
- "string_similarity",
25
- "validate_keys",
26
- "validate_mapping",
27
- "to_dict",
28
- "to_json",
29
- "to_num",
30
- "fuzzy_parse_json",
31
- )
@@ -1,231 +0,0 @@
1
- # Copyright (c) 2023 - 2025, HaiyangLi <quantocean.li at gmail dot com>
2
- #
3
- # SPDX-License-Identifier: Apache-2.0
4
-
5
- from typing import Any, Literal
6
-
7
- from ..nested.flatten import flatten
8
-
9
-
10
- def extract_json_schema(
11
- data: Any,
12
- *,
13
- sep: str = "|",
14
- coerce_keys: bool = True,
15
- dynamic: bool = True,
16
- coerce_sequence: Literal["dict", "list"] | None = None,
17
- max_depth: int | None = None,
18
- ) -> dict[str, Any]:
19
- """
20
- Extract a JSON schema from JSON data.
21
-
22
- This function uses the flatten function to create a flat representation
23
- of the JSON data, then builds a schema based on the flattened structure.
24
-
25
- Args:
26
- data: The JSON data to extract the schema from.
27
- sep: Separator used in flattened keys.
28
- coerce_keys: Whether to coerce keys to strings.
29
- dynamic: Whether to use dynamic flattening.
30
- coerce_sequence: How to coerce sequences ("dict", "list", or None).
31
- max_depth: Maximum depth to flatten.
32
-
33
- Returns:
34
- A dictionary representing the JSON schema.
35
- """
36
- flattened = flatten(
37
- data,
38
- sep=sep,
39
- coerce_keys=coerce_keys,
40
- dynamic=dynamic,
41
- coerce_sequence=coerce_sequence,
42
- max_depth=max_depth,
43
- )
44
-
45
- schema = {}
46
- for key, value in flattened.items():
47
- key_parts = key.split(sep) if isinstance(key, str) else key
48
- current = schema
49
- for part in key_parts[:-1]:
50
- if part not in current:
51
- current[part] = {}
52
- current = current[part]
53
-
54
- current[key_parts[-1]] = _get_type(value)
55
-
56
- return {"type": "object", "properties": _consolidate_schema(schema)}
57
-
58
-
59
- def _get_type(value: Any) -> dict[str, Any]:
60
- if isinstance(value, str):
61
- return {"type": "string"}
62
- elif isinstance(value, bool):
63
- return {"type": "boolean"}
64
- elif isinstance(value, int):
65
- return {"type": "integer"}
66
- elif isinstance(value, float):
67
- return {"type": "number"}
68
- elif isinstance(value, list):
69
- if not value:
70
- return {"type": "array", "items": {}}
71
- item_types = [_get_type(item) for item in value]
72
- if all(item_type == item_types[0] for item_type in item_types):
73
- return {"type": "array", "items": item_types[0]}
74
- else:
75
- return {"type": "array", "items": {"oneOf": item_types}}
76
- elif isinstance(value, dict):
77
- return {
78
- "type": "object",
79
- "properties": _consolidate_schema(
80
- {k: _get_type(v) for k, v in value.items()}
81
- ),
82
- }
83
- elif value is None:
84
- return {"type": "null"}
85
- else:
86
- return {"type": "any"}
87
-
88
-
89
- def _consolidate_schema(schema: dict) -> dict:
90
- """
91
- Consolidate the schema to handle lists and nested structures.
92
- """
93
- consolidated = {}
94
- for key, value in schema.items():
95
- if isinstance(value, dict) and all(k.isdigit() for k in value.keys()):
96
- # This is likely a list
97
- item_types = list(value.values())
98
- if all(item_type == item_types[0] for item_type in item_types):
99
- consolidated[key] = {"type": "array", "items": item_types[0]}
100
- else:
101
- consolidated[key] = {
102
- "type": "array",
103
- "items": {"oneOf": item_types},
104
- }
105
- elif isinstance(value, dict) and "type" in value:
106
- consolidated[key] = value
107
- else:
108
- consolidated[key] = _consolidate_schema(value)
109
- return consolidated
110
-
111
-
112
- def json_schema_to_cfg(
113
- schema: dict[str, Any], start_symbol: str = "S"
114
- ) -> list[tuple[str, list[str]]]:
115
- productions = []
116
- visited = set()
117
- symbol_counter = 0
118
-
119
- def generate_symbol(base: str) -> str:
120
- nonlocal symbol_counter
121
- symbol = f"{base}@{symbol_counter}"
122
- symbol_counter += 1
123
- return symbol
124
-
125
- def generate_rules(s: dict[str, Any], symbol: str):
126
- if symbol in visited:
127
- return
128
- visited.add(symbol)
129
-
130
- if s.get("type") == "object":
131
- properties = s.get("properties", {})
132
- if properties:
133
- props_symbol = generate_symbol("PROPS")
134
- productions.append((symbol, ["{", props_symbol, "}"]))
135
-
136
- productions.append((props_symbol, [])) # Empty object
137
- for i, prop in enumerate(properties):
138
- prop_symbol = generate_symbol(prop)
139
- if i == 0:
140
- productions.append((props_symbol, [prop_symbol]))
141
- else:
142
- productions.append(
143
- (props_symbol, [props_symbol, ",", prop_symbol])
144
- )
145
-
146
- for prop, prop_schema in properties.items():
147
- prop_symbol = generate_symbol(prop)
148
- value_symbol = generate_symbol("VALUE")
149
- productions.append(
150
- (prop_symbol, [f'"{prop}"', ":", value_symbol])
151
- )
152
- generate_rules(prop_schema, value_symbol)
153
- else:
154
- productions.append((symbol, ["{", "}"]))
155
-
156
- elif s.get("type") == "array":
157
- items = s.get("items", {})
158
- items_symbol = generate_symbol("ITEMS")
159
- value_symbol = generate_symbol("VALUE")
160
- productions.append((symbol, ["[", "]"]))
161
- productions.append((symbol, ["[", items_symbol, "]"]))
162
- productions.append((items_symbol, [value_symbol]))
163
- productions.append(
164
- (items_symbol, [value_symbol, ",", items_symbol])
165
- )
166
- generate_rules(items, value_symbol)
167
-
168
- elif s.get("type") == "string":
169
- productions.append((symbol, ["STRING"]))
170
-
171
- elif s.get("type") == "number":
172
- productions.append((symbol, ["NUMBER"]))
173
-
174
- elif s.get("type") == "integer":
175
- productions.append((symbol, ["INTEGER"]))
176
-
177
- elif s.get("type") == "boolean":
178
- productions.append((symbol, ["BOOLEAN"]))
179
-
180
- elif s.get("type") == "null":
181
- productions.append((symbol, ["NULL"]))
182
-
183
- generate_rules(schema, start_symbol)
184
- return productions
185
-
186
-
187
- def json_schema_to_regex(schema: dict[str, Any]) -> str:
188
- def schema_to_regex(s):
189
- if s.get("type") == "object":
190
- properties = s.get("properties", {})
191
- prop_patterns = [
192
- rf'"{prop}"\s*:\s*{schema_to_regex(prop_schema)}'
193
- for prop, prop_schema in properties.items()
194
- ]
195
- return (
196
- r"\{"
197
- + r"\s*("
198
- + r"|".join(prop_patterns)
199
- + r")"
200
- + r"(\s*,\s*("
201
- + r"|".join(prop_patterns)
202
- + r"))*\s*\}"
203
- )
204
- elif s.get("type") == "array":
205
- items = s.get("items", {})
206
- return (
207
- r"\[\s*("
208
- + schema_to_regex(items)
209
- + r"(\s*,\s*"
210
- + schema_to_regex(items)
211
- + r")*)?\s*\]"
212
- )
213
- elif s.get("type") == "string":
214
- return r'"[^"]*"'
215
- elif s.get("type") == "integer":
216
- return r"-?\d+"
217
- elif s.get("type") == "number":
218
- return r"-?\d+(\.\d+)?"
219
- elif s.get("type") == "boolean":
220
- return r"(true|false)"
221
- elif s.get("type") == "null":
222
- return r"null"
223
- else:
224
- return r".*"
225
-
226
- return "^" + schema_to_regex(schema) + "$"
227
-
228
-
229
- def print_cfg(productions: list[tuple[str, list[str]]]):
230
- for lhs, rhs in productions:
231
- print(f"{lhs} -> {' '.join(rhs)}")
File without changes