lionagi 0.16.1__py3-none-any.whl → 0.16.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/adapters/_utils.py +0 -14
- lionagi/libs/file/save.py +8 -1
- lionagi/ln/__init__.py +10 -0
- lionagi/ln/_json_dump.py +322 -49
- lionagi/ln/fuzzy/__init__.py +4 -1
- lionagi/ln/fuzzy/_fuzzy_validate.py +109 -0
- lionagi/ln/fuzzy/_to_dict.py +388 -0
- lionagi/models/__init__.py +0 -2
- lionagi/operations/brainstorm/brainstorm.py +10 -10
- lionagi/operations/communicate/communicate.py +1 -1
- lionagi/operations/parse/parse.py +1 -1
- lionagi/protocols/generic/element.py +5 -14
- lionagi/protocols/generic/log.py +2 -2
- lionagi/protocols/generic/pile.py +1 -1
- lionagi/protocols/messages/message.py +8 -1
- lionagi/protocols/operatives/operative.py +2 -2
- lionagi/service/connections/endpoint.py +7 -0
- lionagi/service/connections/match_endpoint.py +2 -10
- lionagi/service/connections/providers/types.py +1 -3
- lionagi/service/hooks/hook_event.py +1 -1
- lionagi/service/hooks/hook_registry.py +1 -1
- lionagi/service/rate_limited_processor.py +1 -1
- lionagi/session/branch.py +1 -101
- lionagi/session/session.py +9 -14
- lionagi/utils.py +3 -334
- lionagi/version.py +1 -1
- {lionagi-0.16.1.dist-info → lionagi-0.16.3.dist-info}/METADATA +3 -13
- {lionagi-0.16.1.dist-info → lionagi-0.16.3.dist-info}/RECORD +30 -78
- lionagi/adapters/postgres_model_adapter.py +0 -131
- lionagi/libs/concurrency.py +0 -1
- lionagi/libs/file/params.py +0 -175
- lionagi/libs/nested/__init__.py +0 -3
- lionagi/libs/nested/flatten.py +0 -172
- lionagi/libs/nested/nfilter.py +0 -59
- lionagi/libs/nested/nget.py +0 -45
- lionagi/libs/nested/ninsert.py +0 -104
- lionagi/libs/nested/nmerge.py +0 -158
- lionagi/libs/nested/npop.py +0 -69
- lionagi/libs/nested/nset.py +0 -94
- lionagi/libs/nested/unflatten.py +0 -83
- lionagi/libs/nested/utils.py +0 -189
- lionagi/libs/parse.py +0 -31
- lionagi/libs/schema/json_schema.py +0 -231
- lionagi/libs/token_transform/__init__.py +0 -0
- lionagi/libs/token_transform/base.py +0 -54
- lionagi/libs/token_transform/llmlingua.py +0 -1
- lionagi/libs/token_transform/perplexity.py +0 -450
- lionagi/libs/token_transform/symbolic_compress_context.py +0 -152
- lionagi/libs/token_transform/synthlang.py +0 -9
- lionagi/libs/token_transform/synthlang_/base.py +0 -128
- lionagi/libs/token_transform/synthlang_/resources/frameworks/abstract_algebra.toml +0 -11
- lionagi/libs/token_transform/synthlang_/resources/frameworks/category_theory.toml +0 -11
- lionagi/libs/token_transform/synthlang_/resources/frameworks/complex_analysis.toml +0 -11
- lionagi/libs/token_transform/synthlang_/resources/frameworks/framework_options.json +0 -52
- lionagi/libs/token_transform/synthlang_/resources/frameworks/group_theory.toml +0 -11
- lionagi/libs/token_transform/synthlang_/resources/frameworks/math_logic.toml +0 -11
- lionagi/libs/token_transform/synthlang_/resources/frameworks/reflective_patterns.toml +0 -11
- lionagi/libs/token_transform/synthlang_/resources/frameworks/set_theory.toml +0 -11
- lionagi/libs/token_transform/synthlang_/resources/frameworks/topology_fundamentals.toml +0 -11
- lionagi/libs/token_transform/synthlang_/resources/mapping/lion_emoji_mapping.toml +0 -61
- lionagi/libs/token_transform/synthlang_/resources/mapping/python_math_mapping.toml +0 -41
- lionagi/libs/token_transform/synthlang_/resources/mapping/rust_chinese_mapping.toml +0 -60
- lionagi/libs/token_transform/synthlang_/resources/utility/base_synthlang_system_prompt.toml +0 -11
- lionagi/libs/token_transform/synthlang_/translate_to_synthlang.py +0 -140
- lionagi/libs/token_transform/types.py +0 -15
- lionagi/libs/unstructured/__init__.py +0 -0
- lionagi/libs/unstructured/pdf_to_image.py +0 -45
- lionagi/libs/unstructured/read_image_to_base64.py +0 -33
- lionagi/libs/validate/fuzzy_match_keys.py +0 -7
- lionagi/libs/validate/fuzzy_validate_mapping.py +0 -144
- lionagi/libs/validate/string_similarity.py +0 -7
- lionagi/libs/validate/xml_parser.py +0 -203
- lionagi/models/note.py +0 -383
- lionagi/operations/translate/__init__.py +0 -0
- lionagi/operations/translate/translate.py +0 -47
- lionagi/service/connections/providers/claude_code_.py +0 -294
- lionagi/tools/memory/tools.py +0 -495
- {lionagi-0.16.1.dist-info → lionagi-0.16.3.dist-info}/WHEEL +0 -0
- {lionagi-0.16.1.dist-info → lionagi-0.16.3.dist-info}/licenses/LICENSE +0 -0
lionagi/libs/nested/unflatten.py
DELETED
@@ -1,83 +0,0 @@
|
|
1
|
-
# Copyright (c) 2023 - 2025, HaiyangLi <quantocean.li at gmail dot com>
|
2
|
-
#
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
4
|
-
|
5
|
-
from typing import Any
|
6
|
-
|
7
|
-
|
8
|
-
def unflatten(
|
9
|
-
flat_dict: dict[str, Any], sep: str = "|", inplace: bool = False
|
10
|
-
) -> dict[str, Any] | list[Any]:
|
11
|
-
"""
|
12
|
-
Unflatten a single-level dictionary into a nested dictionary or list.
|
13
|
-
|
14
|
-
Args:
|
15
|
-
flat_dict: The flattened dictionary to unflatten.
|
16
|
-
sep: The separator used for joining keys.
|
17
|
-
inplace: Whether to modify the input dictionary in place.
|
18
|
-
|
19
|
-
Returns:
|
20
|
-
The unflattened nested dictionary or list.
|
21
|
-
|
22
|
-
Examples:
|
23
|
-
>>> unflatten({"a|b|c": 1, "a|b|d": 2})
|
24
|
-
{'a': {'b': {'c': 1, 'd': 2}}}
|
25
|
-
|
26
|
-
>>> unflatten({"0": "a", "1": "b", "2": "c"})
|
27
|
-
['a', 'b', 'c']
|
28
|
-
"""
|
29
|
-
|
30
|
-
def _unflatten(data: dict) -> dict | list:
|
31
|
-
result = {}
|
32
|
-
for key, value in data.items():
|
33
|
-
parts = key.split(sep)
|
34
|
-
current = result
|
35
|
-
for part in parts[:-1]:
|
36
|
-
if part not in current:
|
37
|
-
current[part] = {}
|
38
|
-
current = current[part]
|
39
|
-
if isinstance(value, dict):
|
40
|
-
current[parts[-1]] = _unflatten(value)
|
41
|
-
else:
|
42
|
-
current[parts[-1]] = value
|
43
|
-
|
44
|
-
# Convert dictionary to list if keys are consecutive integers
|
45
|
-
if result and all(
|
46
|
-
isinstance(key, str) and key.isdigit() for key in result
|
47
|
-
):
|
48
|
-
return [result[str(i)] for i in range(len(result))]
|
49
|
-
return result
|
50
|
-
|
51
|
-
if inplace:
|
52
|
-
unflattened_dict = {}
|
53
|
-
for key, value in flat_dict.items():
|
54
|
-
parts = key.split(sep)
|
55
|
-
current = unflattened_dict
|
56
|
-
for part in parts[:-1]:
|
57
|
-
if part not in current:
|
58
|
-
current[part] = {}
|
59
|
-
current = current[part]
|
60
|
-
current[parts[-1]] = value
|
61
|
-
|
62
|
-
unflattened_result = _unflatten(unflattened_dict)
|
63
|
-
flat_dict.clear()
|
64
|
-
if isinstance(unflattened_result, list):
|
65
|
-
flat_dict.update(
|
66
|
-
{str(i): v for i, v in enumerate(unflattened_result)}
|
67
|
-
)
|
68
|
-
else:
|
69
|
-
flat_dict.update(unflattened_result)
|
70
|
-
return flat_dict
|
71
|
-
|
72
|
-
else:
|
73
|
-
unflattened_dict = {}
|
74
|
-
for key, value in flat_dict.items():
|
75
|
-
parts = key.split(sep)
|
76
|
-
current = unflattened_dict
|
77
|
-
for part in parts[:-1]:
|
78
|
-
if part not in current:
|
79
|
-
current[part] = {}
|
80
|
-
current = current[part]
|
81
|
-
current[parts[-1]] = value
|
82
|
-
|
83
|
-
return _unflatten(unflattened_dict)
|
lionagi/libs/nested/utils.py
DELETED
@@ -1,189 +0,0 @@
|
|
1
|
-
# Copyright (c) 2023 - 2025, HaiyangLi <quantocean.li at gmail dot com>
|
2
|
-
#
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
4
|
-
|
5
|
-
from typing import Any
|
6
|
-
|
7
|
-
from lionagi.utils import UNDEFINED
|
8
|
-
|
9
|
-
|
10
|
-
def is_homogeneous(
|
11
|
-
iterables: list[Any] | dict[Any, Any], type_check: type | tuple[type, ...]
|
12
|
-
) -> bool:
|
13
|
-
"""
|
14
|
-
Check if all elements in a list or all values in a dict are of same type.
|
15
|
-
|
16
|
-
Args:
|
17
|
-
iterables: The list or dictionary to check.
|
18
|
-
type_check: The type to check against.
|
19
|
-
|
20
|
-
Returns:
|
21
|
-
True if all elements/values are of the same type, False otherwise.
|
22
|
-
"""
|
23
|
-
if isinstance(iterables, list):
|
24
|
-
return all(isinstance(it, type_check) for it in iterables)
|
25
|
-
|
26
|
-
elif isinstance(iterables, dict):
|
27
|
-
return all(isinstance(val, type_check) for val in iterables.values())
|
28
|
-
|
29
|
-
else:
|
30
|
-
return isinstance(iterables, type_check)
|
31
|
-
|
32
|
-
|
33
|
-
def is_same_dtype(
|
34
|
-
input_: list[Any] | dict[Any, Any],
|
35
|
-
dtype: type | None = None,
|
36
|
-
return_dtype: bool = False,
|
37
|
-
) -> bool | tuple[bool, type | None]:
|
38
|
-
"""
|
39
|
-
Check if all elements in a list or dict values are of the same data type.
|
40
|
-
|
41
|
-
Args:
|
42
|
-
input_: The input list or dictionary to check.
|
43
|
-
dtype: The data type to check against. If None, uses the type of the
|
44
|
-
first element.
|
45
|
-
return_dtype: If True, return the data type with the check result.
|
46
|
-
|
47
|
-
Returns:
|
48
|
-
If return_dtype is False, returns True if all elements are of the
|
49
|
-
same type (or if the input is empty), False otherwise.
|
50
|
-
If return_dtype is True, returns a tuple (bool, type | None).
|
51
|
-
"""
|
52
|
-
if not input_:
|
53
|
-
return True
|
54
|
-
|
55
|
-
iterable = input_.values() if isinstance(input_, dict) else input_
|
56
|
-
first_element_type = type(next(iter(iterable), None))
|
57
|
-
|
58
|
-
dtype = dtype or first_element_type
|
59
|
-
|
60
|
-
result = all(isinstance(element, dtype) for element in iterable)
|
61
|
-
return (result, dtype) if return_dtype else result
|
62
|
-
|
63
|
-
|
64
|
-
def is_structure_homogeneous(
|
65
|
-
structure: Any, return_structure_type: bool = False
|
66
|
-
) -> bool | tuple[bool, type | None]:
|
67
|
-
"""
|
68
|
-
Check if a nested structure is homogeneous (no mix of lists and dicts).
|
69
|
-
|
70
|
-
Args:
|
71
|
-
structure: The nested structure to check.
|
72
|
-
return_structure_type: If True, return the type of the homogeneous
|
73
|
-
structure.
|
74
|
-
|
75
|
-
Returns:
|
76
|
-
If return_structure_type is False, returns True if the structure is
|
77
|
-
homogeneous, False otherwise.
|
78
|
-
If True, returns a tuple (bool, type | None).
|
79
|
-
|
80
|
-
Examples:
|
81
|
-
>>> is_structure_homogeneous({'a': {'b': 1}, 'c': {'d': 2}})
|
82
|
-
True
|
83
|
-
>>> is_structure_homogeneous({'a': {'b': 1}, 'c': [1, 2]})
|
84
|
-
False
|
85
|
-
"""
|
86
|
-
|
87
|
-
def _check_structure(substructure):
|
88
|
-
structure_type = None
|
89
|
-
if isinstance(substructure, list):
|
90
|
-
structure_type = list
|
91
|
-
for item in substructure:
|
92
|
-
if not isinstance(item, structure_type) and isinstance(
|
93
|
-
item, list | dict
|
94
|
-
):
|
95
|
-
return False, None
|
96
|
-
result, _ = _check_structure(item)
|
97
|
-
if not result:
|
98
|
-
return False, None
|
99
|
-
elif isinstance(substructure, dict):
|
100
|
-
structure_type = dict
|
101
|
-
for item in substructure.values():
|
102
|
-
if not isinstance(item, structure_type) and isinstance(
|
103
|
-
item, list | dict
|
104
|
-
):
|
105
|
-
return False, None
|
106
|
-
result, _ = _check_structure(item)
|
107
|
-
if not result:
|
108
|
-
return False, None
|
109
|
-
return True, structure_type
|
110
|
-
|
111
|
-
is_homogeneous, structure_type = _check_structure(structure)
|
112
|
-
return (
|
113
|
-
(is_homogeneous, structure_type)
|
114
|
-
if return_structure_type
|
115
|
-
else is_homogeneous
|
116
|
-
)
|
117
|
-
|
118
|
-
|
119
|
-
def deep_update(
|
120
|
-
original: dict[Any, Any], update: dict[Any, Any]
|
121
|
-
) -> dict[Any, Any]:
|
122
|
-
"""
|
123
|
-
Recursively merge two dicts, updating nested dicts instead of overwriting.
|
124
|
-
|
125
|
-
Args:
|
126
|
-
original: The dictionary to update.
|
127
|
-
update: The dictionary containing updates to apply to `original`.
|
128
|
-
|
129
|
-
Returns:
|
130
|
-
The `original` dictionary after applying updates from `update`.
|
131
|
-
|
132
|
-
Note:
|
133
|
-
This method modifies the `original` dictionary in place.
|
134
|
-
"""
|
135
|
-
for key, value in update.items():
|
136
|
-
if isinstance(value, dict) and key in original:
|
137
|
-
original[key] = deep_update(original.get(key, {}), value)
|
138
|
-
else:
|
139
|
-
original[key] = value
|
140
|
-
return original
|
141
|
-
|
142
|
-
|
143
|
-
def get_target_container(
|
144
|
-
nested: list[Any] | dict[Any, Any], indices: list[int | str]
|
145
|
-
) -> list[Any] | dict[Any, Any]:
|
146
|
-
"""
|
147
|
-
Retrieve the target container in a nested structure using indices.
|
148
|
-
|
149
|
-
Args:
|
150
|
-
nested: The nested structure to navigate.
|
151
|
-
indices: A list of indices to navigate through the nested structure.
|
152
|
-
|
153
|
-
Returns:
|
154
|
-
The target container at the specified path.
|
155
|
-
|
156
|
-
Raises:
|
157
|
-
IndexError: If a list index is out of range.
|
158
|
-
KeyError: If a dictionary key is not found.
|
159
|
-
TypeError: If the current element is neither a list nor a dictionary.
|
160
|
-
"""
|
161
|
-
current_element = nested
|
162
|
-
for index in indices:
|
163
|
-
if isinstance(current_element, list):
|
164
|
-
if isinstance(index, str) and index.isdigit():
|
165
|
-
index = int(index)
|
166
|
-
|
167
|
-
if isinstance(index, int) and 0 <= index < len(current_element):
|
168
|
-
current_element = current_element[index]
|
169
|
-
|
170
|
-
else:
|
171
|
-
raise IndexError("List index is invalid or out of range")
|
172
|
-
|
173
|
-
elif isinstance(current_element, dict):
|
174
|
-
if index in current_element:
|
175
|
-
current_element = current_element.get(index, None)
|
176
|
-
else:
|
177
|
-
raise KeyError("Key not found in dictionary")
|
178
|
-
else:
|
179
|
-
raise TypeError(
|
180
|
-
"Current element is neither a list nor a dictionary"
|
181
|
-
)
|
182
|
-
return current_element
|
183
|
-
|
184
|
-
|
185
|
-
def ensure_list_index(
|
186
|
-
lst: list[Any], index: int, default: Any = UNDEFINED
|
187
|
-
) -> None:
|
188
|
-
while len(lst) <= index:
|
189
|
-
lst.append(default if default is not UNDEFINED else None)
|
lionagi/libs/parse.py
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
# Copyright (c) 2023 - 2025, HaiyangLi <quantocean.li at gmail dot com>
|
2
|
-
#
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
4
|
-
|
5
|
-
from lionagi.libs.schema.as_readable import as_readable
|
6
|
-
from lionagi.libs.schema.extract_code_block import extract_code_block
|
7
|
-
from lionagi.libs.schema.function_to_schema import function_to_schema
|
8
|
-
from lionagi.libs.validate.fuzzy_match_keys import fuzzy_match_keys
|
9
|
-
from lionagi.libs.validate.fuzzy_validate_mapping import fuzzy_validate_mapping
|
10
|
-
from lionagi.libs.validate.string_similarity import string_similarity
|
11
|
-
from lionagi.libs.validate.to_num import to_num
|
12
|
-
from lionagi.utils import fuzzy_parse_json, to_dict, to_json
|
13
|
-
|
14
|
-
validate_keys = fuzzy_match_keys # for backward compatibility
|
15
|
-
validate_mapping = fuzzy_validate_mapping # for backward compatibility
|
16
|
-
|
17
|
-
|
18
|
-
__all__ = (
|
19
|
-
"as_readable",
|
20
|
-
"extract_code_block",
|
21
|
-
"function_to_schema",
|
22
|
-
"fuzzy_match_keys",
|
23
|
-
"fuzzy_validate_mapping",
|
24
|
-
"string_similarity",
|
25
|
-
"validate_keys",
|
26
|
-
"validate_mapping",
|
27
|
-
"to_dict",
|
28
|
-
"to_json",
|
29
|
-
"to_num",
|
30
|
-
"fuzzy_parse_json",
|
31
|
-
)
|
@@ -1,231 +0,0 @@
|
|
1
|
-
# Copyright (c) 2023 - 2025, HaiyangLi <quantocean.li at gmail dot com>
|
2
|
-
#
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
4
|
-
|
5
|
-
from typing import Any, Literal
|
6
|
-
|
7
|
-
from ..nested.flatten import flatten
|
8
|
-
|
9
|
-
|
10
|
-
def extract_json_schema(
|
11
|
-
data: Any,
|
12
|
-
*,
|
13
|
-
sep: str = "|",
|
14
|
-
coerce_keys: bool = True,
|
15
|
-
dynamic: bool = True,
|
16
|
-
coerce_sequence: Literal["dict", "list"] | None = None,
|
17
|
-
max_depth: int | None = None,
|
18
|
-
) -> dict[str, Any]:
|
19
|
-
"""
|
20
|
-
Extract a JSON schema from JSON data.
|
21
|
-
|
22
|
-
This function uses the flatten function to create a flat representation
|
23
|
-
of the JSON data, then builds a schema based on the flattened structure.
|
24
|
-
|
25
|
-
Args:
|
26
|
-
data: The JSON data to extract the schema from.
|
27
|
-
sep: Separator used in flattened keys.
|
28
|
-
coerce_keys: Whether to coerce keys to strings.
|
29
|
-
dynamic: Whether to use dynamic flattening.
|
30
|
-
coerce_sequence: How to coerce sequences ("dict", "list", or None).
|
31
|
-
max_depth: Maximum depth to flatten.
|
32
|
-
|
33
|
-
Returns:
|
34
|
-
A dictionary representing the JSON schema.
|
35
|
-
"""
|
36
|
-
flattened = flatten(
|
37
|
-
data,
|
38
|
-
sep=sep,
|
39
|
-
coerce_keys=coerce_keys,
|
40
|
-
dynamic=dynamic,
|
41
|
-
coerce_sequence=coerce_sequence,
|
42
|
-
max_depth=max_depth,
|
43
|
-
)
|
44
|
-
|
45
|
-
schema = {}
|
46
|
-
for key, value in flattened.items():
|
47
|
-
key_parts = key.split(sep) if isinstance(key, str) else key
|
48
|
-
current = schema
|
49
|
-
for part in key_parts[:-1]:
|
50
|
-
if part not in current:
|
51
|
-
current[part] = {}
|
52
|
-
current = current[part]
|
53
|
-
|
54
|
-
current[key_parts[-1]] = _get_type(value)
|
55
|
-
|
56
|
-
return {"type": "object", "properties": _consolidate_schema(schema)}
|
57
|
-
|
58
|
-
|
59
|
-
def _get_type(value: Any) -> dict[str, Any]:
|
60
|
-
if isinstance(value, str):
|
61
|
-
return {"type": "string"}
|
62
|
-
elif isinstance(value, bool):
|
63
|
-
return {"type": "boolean"}
|
64
|
-
elif isinstance(value, int):
|
65
|
-
return {"type": "integer"}
|
66
|
-
elif isinstance(value, float):
|
67
|
-
return {"type": "number"}
|
68
|
-
elif isinstance(value, list):
|
69
|
-
if not value:
|
70
|
-
return {"type": "array", "items": {}}
|
71
|
-
item_types = [_get_type(item) for item in value]
|
72
|
-
if all(item_type == item_types[0] for item_type in item_types):
|
73
|
-
return {"type": "array", "items": item_types[0]}
|
74
|
-
else:
|
75
|
-
return {"type": "array", "items": {"oneOf": item_types}}
|
76
|
-
elif isinstance(value, dict):
|
77
|
-
return {
|
78
|
-
"type": "object",
|
79
|
-
"properties": _consolidate_schema(
|
80
|
-
{k: _get_type(v) for k, v in value.items()}
|
81
|
-
),
|
82
|
-
}
|
83
|
-
elif value is None:
|
84
|
-
return {"type": "null"}
|
85
|
-
else:
|
86
|
-
return {"type": "any"}
|
87
|
-
|
88
|
-
|
89
|
-
def _consolidate_schema(schema: dict) -> dict:
|
90
|
-
"""
|
91
|
-
Consolidate the schema to handle lists and nested structures.
|
92
|
-
"""
|
93
|
-
consolidated = {}
|
94
|
-
for key, value in schema.items():
|
95
|
-
if isinstance(value, dict) and all(k.isdigit() for k in value.keys()):
|
96
|
-
# This is likely a list
|
97
|
-
item_types = list(value.values())
|
98
|
-
if all(item_type == item_types[0] for item_type in item_types):
|
99
|
-
consolidated[key] = {"type": "array", "items": item_types[0]}
|
100
|
-
else:
|
101
|
-
consolidated[key] = {
|
102
|
-
"type": "array",
|
103
|
-
"items": {"oneOf": item_types},
|
104
|
-
}
|
105
|
-
elif isinstance(value, dict) and "type" in value:
|
106
|
-
consolidated[key] = value
|
107
|
-
else:
|
108
|
-
consolidated[key] = _consolidate_schema(value)
|
109
|
-
return consolidated
|
110
|
-
|
111
|
-
|
112
|
-
def json_schema_to_cfg(
|
113
|
-
schema: dict[str, Any], start_symbol: str = "S"
|
114
|
-
) -> list[tuple[str, list[str]]]:
|
115
|
-
productions = []
|
116
|
-
visited = set()
|
117
|
-
symbol_counter = 0
|
118
|
-
|
119
|
-
def generate_symbol(base: str) -> str:
|
120
|
-
nonlocal symbol_counter
|
121
|
-
symbol = f"{base}@{symbol_counter}"
|
122
|
-
symbol_counter += 1
|
123
|
-
return symbol
|
124
|
-
|
125
|
-
def generate_rules(s: dict[str, Any], symbol: str):
|
126
|
-
if symbol in visited:
|
127
|
-
return
|
128
|
-
visited.add(symbol)
|
129
|
-
|
130
|
-
if s.get("type") == "object":
|
131
|
-
properties = s.get("properties", {})
|
132
|
-
if properties:
|
133
|
-
props_symbol = generate_symbol("PROPS")
|
134
|
-
productions.append((symbol, ["{", props_symbol, "}"]))
|
135
|
-
|
136
|
-
productions.append((props_symbol, [])) # Empty object
|
137
|
-
for i, prop in enumerate(properties):
|
138
|
-
prop_symbol = generate_symbol(prop)
|
139
|
-
if i == 0:
|
140
|
-
productions.append((props_symbol, [prop_symbol]))
|
141
|
-
else:
|
142
|
-
productions.append(
|
143
|
-
(props_symbol, [props_symbol, ",", prop_symbol])
|
144
|
-
)
|
145
|
-
|
146
|
-
for prop, prop_schema in properties.items():
|
147
|
-
prop_symbol = generate_symbol(prop)
|
148
|
-
value_symbol = generate_symbol("VALUE")
|
149
|
-
productions.append(
|
150
|
-
(prop_symbol, [f'"{prop}"', ":", value_symbol])
|
151
|
-
)
|
152
|
-
generate_rules(prop_schema, value_symbol)
|
153
|
-
else:
|
154
|
-
productions.append((symbol, ["{", "}"]))
|
155
|
-
|
156
|
-
elif s.get("type") == "array":
|
157
|
-
items = s.get("items", {})
|
158
|
-
items_symbol = generate_symbol("ITEMS")
|
159
|
-
value_symbol = generate_symbol("VALUE")
|
160
|
-
productions.append((symbol, ["[", "]"]))
|
161
|
-
productions.append((symbol, ["[", items_symbol, "]"]))
|
162
|
-
productions.append((items_symbol, [value_symbol]))
|
163
|
-
productions.append(
|
164
|
-
(items_symbol, [value_symbol, ",", items_symbol])
|
165
|
-
)
|
166
|
-
generate_rules(items, value_symbol)
|
167
|
-
|
168
|
-
elif s.get("type") == "string":
|
169
|
-
productions.append((symbol, ["STRING"]))
|
170
|
-
|
171
|
-
elif s.get("type") == "number":
|
172
|
-
productions.append((symbol, ["NUMBER"]))
|
173
|
-
|
174
|
-
elif s.get("type") == "integer":
|
175
|
-
productions.append((symbol, ["INTEGER"]))
|
176
|
-
|
177
|
-
elif s.get("type") == "boolean":
|
178
|
-
productions.append((symbol, ["BOOLEAN"]))
|
179
|
-
|
180
|
-
elif s.get("type") == "null":
|
181
|
-
productions.append((symbol, ["NULL"]))
|
182
|
-
|
183
|
-
generate_rules(schema, start_symbol)
|
184
|
-
return productions
|
185
|
-
|
186
|
-
|
187
|
-
def json_schema_to_regex(schema: dict[str, Any]) -> str:
|
188
|
-
def schema_to_regex(s):
|
189
|
-
if s.get("type") == "object":
|
190
|
-
properties = s.get("properties", {})
|
191
|
-
prop_patterns = [
|
192
|
-
rf'"{prop}"\s*:\s*{schema_to_regex(prop_schema)}'
|
193
|
-
for prop, prop_schema in properties.items()
|
194
|
-
]
|
195
|
-
return (
|
196
|
-
r"\{"
|
197
|
-
+ r"\s*("
|
198
|
-
+ r"|".join(prop_patterns)
|
199
|
-
+ r")"
|
200
|
-
+ r"(\s*,\s*("
|
201
|
-
+ r"|".join(prop_patterns)
|
202
|
-
+ r"))*\s*\}"
|
203
|
-
)
|
204
|
-
elif s.get("type") == "array":
|
205
|
-
items = s.get("items", {})
|
206
|
-
return (
|
207
|
-
r"\[\s*("
|
208
|
-
+ schema_to_regex(items)
|
209
|
-
+ r"(\s*,\s*"
|
210
|
-
+ schema_to_regex(items)
|
211
|
-
+ r")*)?\s*\]"
|
212
|
-
)
|
213
|
-
elif s.get("type") == "string":
|
214
|
-
return r'"[^"]*"'
|
215
|
-
elif s.get("type") == "integer":
|
216
|
-
return r"-?\d+"
|
217
|
-
elif s.get("type") == "number":
|
218
|
-
return r"-?\d+(\.\d+)?"
|
219
|
-
elif s.get("type") == "boolean":
|
220
|
-
return r"(true|false)"
|
221
|
-
elif s.get("type") == "null":
|
222
|
-
return r"null"
|
223
|
-
else:
|
224
|
-
return r".*"
|
225
|
-
|
226
|
-
return "^" + schema_to_regex(schema) + "$"
|
227
|
-
|
228
|
-
|
229
|
-
def print_cfg(productions: list[tuple[str, list[str]]]):
|
230
|
-
for lhs, rhs in productions:
|
231
|
-
print(f"{lhs} -> {' '.join(rhs)}")
|
File without changes
|
@@ -1,54 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from enum import Enum
|
4
|
-
from pathlib import Path
|
5
|
-
|
6
|
-
from pydantic import Field
|
7
|
-
|
8
|
-
from lionagi.tools.base import Resource, ResourceCategory
|
9
|
-
|
10
|
-
here = Path(__file__).parent.resolve()
|
11
|
-
MAPPING_PATH = "synthlang_/resources/mapping"
|
12
|
-
|
13
|
-
|
14
|
-
class TokenMappingTemplate(str, Enum):
|
15
|
-
RUST_CHINESE = "rust_chinese"
|
16
|
-
LION_EMOJI = "lion_emoji"
|
17
|
-
PYTHON_MATH = "python_math"
|
18
|
-
|
19
|
-
@property
|
20
|
-
def fp(self) -> Path:
|
21
|
-
return here / MAPPING_PATH / f"{self.value}_mapping.toml"
|
22
|
-
|
23
|
-
|
24
|
-
class TokenMapping(Resource):
|
25
|
-
category: ResourceCategory = Field(
|
26
|
-
default=ResourceCategory.UTILITY, frozen=True
|
27
|
-
)
|
28
|
-
content: dict
|
29
|
-
|
30
|
-
@classmethod
|
31
|
-
def load_from_template(
|
32
|
-
cls, template: TokenMappingTemplate | str
|
33
|
-
) -> TokenMapping:
|
34
|
-
if isinstance(template, str):
|
35
|
-
template = template.lower().strip()
|
36
|
-
template = (
|
37
|
-
template.replace(".toml", "")
|
38
|
-
.replace(" ", "_")
|
39
|
-
.replace("-", "_")
|
40
|
-
.strip()
|
41
|
-
)
|
42
|
-
if template.endswith("_mapping"):
|
43
|
-
template = template[:-8]
|
44
|
-
if "/" in template:
|
45
|
-
template = template.split("/")[-1]
|
46
|
-
template = TokenMappingTemplate(template)
|
47
|
-
|
48
|
-
if isinstance(template, TokenMappingTemplate):
|
49
|
-
template = template.fp
|
50
|
-
return cls.adapt_from(template, ".toml", many=False)
|
51
|
-
|
52
|
-
raise ValueError(
|
53
|
-
f"Invalid template: {template}. Must be a TokenMappingTemplate or a valid path."
|
54
|
-
)
|
@@ -1 +0,0 @@
|
|
1
|
-
# TODO
|