lionagi 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/__init__.py +60 -5
- lionagi/core/__init__.py +0 -25
- lionagi/core/_setting/_setting.py +59 -0
- lionagi/core/action/__init__.py +14 -0
- lionagi/core/action/function_calling.py +136 -0
- lionagi/core/action/manual.py +1 -0
- lionagi/core/action/node.py +109 -0
- lionagi/core/action/tool.py +114 -0
- lionagi/core/action/tool_manager.py +356 -0
- lionagi/core/agent/base_agent.py +27 -13
- lionagi/core/agent/eval/evaluator.py +1 -0
- lionagi/core/agent/eval/vote.py +40 -0
- lionagi/core/agent/learn/learner.py +59 -0
- lionagi/core/agent/plan/unit_template.py +1 -0
- lionagi/core/collections/__init__.py +17 -0
- lionagi/core/{generic/data_logger.py → collections/_logger.py} +69 -55
- lionagi/core/collections/abc/__init__.py +53 -0
- lionagi/core/collections/abc/component.py +615 -0
- lionagi/core/collections/abc/concepts.py +297 -0
- lionagi/core/collections/abc/exceptions.py +150 -0
- lionagi/core/collections/abc/util.py +45 -0
- lionagi/core/collections/exchange.py +161 -0
- lionagi/core/collections/flow.py +426 -0
- lionagi/core/collections/model.py +419 -0
- lionagi/core/collections/pile.py +913 -0
- lionagi/core/collections/progression.py +236 -0
- lionagi/core/collections/util.py +64 -0
- lionagi/core/director/direct.py +314 -0
- lionagi/core/director/director.py +2 -0
- lionagi/core/{execute/branch_executor.py → engine/branch_engine.py} +134 -97
- lionagi/core/{execute/instruction_map_executor.py → engine/instruction_map_engine.py} +80 -55
- lionagi/{experimental/directive/evaluator → core/engine}/script_engine.py +17 -1
- lionagi/core/executor/base_executor.py +90 -0
- lionagi/core/{execute/structure_executor.py → executor/graph_executor.py} +62 -66
- lionagi/core/{execute → executor}/neo4j_executor.py +70 -67
- lionagi/core/generic/__init__.py +3 -33
- lionagi/core/generic/edge.py +29 -79
- lionagi/core/generic/edge_condition.py +16 -0
- lionagi/core/generic/graph.py +236 -0
- lionagi/core/generic/hyperedge.py +1 -0
- lionagi/core/generic/node.py +156 -221
- lionagi/core/generic/tree.py +48 -0
- lionagi/core/generic/tree_node.py +79 -0
- lionagi/core/mail/__init__.py +12 -0
- lionagi/core/mail/mail.py +25 -0
- lionagi/core/mail/mail_manager.py +139 -58
- lionagi/core/mail/package.py +45 -0
- lionagi/core/mail/start_mail.py +36 -0
- lionagi/core/message/__init__.py +19 -0
- lionagi/core/message/action_request.py +133 -0
- lionagi/core/message/action_response.py +135 -0
- lionagi/core/message/assistant_response.py +95 -0
- lionagi/core/message/instruction.py +234 -0
- lionagi/core/message/message.py +101 -0
- lionagi/core/message/system.py +86 -0
- lionagi/core/message/util.py +283 -0
- lionagi/core/report/__init__.py +4 -0
- lionagi/core/report/base.py +217 -0
- lionagi/core/report/form.py +231 -0
- lionagi/core/report/report.py +166 -0
- lionagi/core/report/util.py +28 -0
- lionagi/core/rule/_default.py +16 -0
- lionagi/core/rule/action.py +99 -0
- lionagi/core/rule/base.py +238 -0
- lionagi/core/rule/boolean.py +56 -0
- lionagi/core/rule/choice.py +47 -0
- lionagi/core/rule/mapping.py +96 -0
- lionagi/core/rule/number.py +71 -0
- lionagi/core/rule/rulebook.py +109 -0
- lionagi/core/rule/string.py +52 -0
- lionagi/core/rule/util.py +35 -0
- lionagi/core/session/branch.py +431 -0
- lionagi/core/session/directive_mixin.py +287 -0
- lionagi/core/session/session.py +229 -903
- lionagi/core/structure/__init__.py +1 -0
- lionagi/core/structure/chain.py +1 -0
- lionagi/core/structure/forest.py +1 -0
- lionagi/core/structure/graph.py +1 -0
- lionagi/core/structure/tree.py +1 -0
- lionagi/core/unit/__init__.py +5 -0
- lionagi/core/unit/parallel_unit.py +245 -0
- lionagi/core/unit/template/action.py +81 -0
- lionagi/core/unit/template/base.py +51 -0
- lionagi/core/unit/template/plan.py +84 -0
- lionagi/core/unit/template/predict.py +109 -0
- lionagi/core/unit/template/score.py +124 -0
- lionagi/core/unit/template/select.py +104 -0
- lionagi/core/unit/unit.py +362 -0
- lionagi/core/unit/unit_form.py +305 -0
- lionagi/core/unit/unit_mixin.py +1168 -0
- lionagi/core/unit/util.py +71 -0
- lionagi/core/validator/validator.py +364 -0
- lionagi/core/work/work.py +76 -0
- lionagi/core/work/work_function.py +101 -0
- lionagi/core/work/work_queue.py +103 -0
- lionagi/core/work/worker.py +258 -0
- lionagi/core/work/worklog.py +120 -0
- lionagi/experimental/compressor/base.py +46 -0
- lionagi/experimental/compressor/llm_compressor.py +247 -0
- lionagi/experimental/compressor/llm_summarizer.py +61 -0
- lionagi/experimental/compressor/util.py +70 -0
- lionagi/experimental/directive/__init__.py +19 -0
- lionagi/experimental/directive/parser/base_parser.py +69 -2
- lionagi/experimental/directive/{template_ → template}/base_template.py +17 -1
- lionagi/{libs/ln_tokenizer.py → experimental/directive/tokenizer.py} +16 -0
- lionagi/experimental/{directive/evaluator → evaluator}/ast_evaluator.py +16 -0
- lionagi/experimental/{directive/evaluator → evaluator}/base_evaluator.py +16 -0
- lionagi/experimental/knowledge/base.py +10 -0
- lionagi/experimental/memory/__init__.py +0 -0
- lionagi/experimental/strategies/__init__.py +0 -0
- lionagi/experimental/strategies/base.py +1 -0
- lionagi/integrations/bridge/langchain_/documents.py +4 -0
- lionagi/integrations/bridge/llamaindex_/index.py +30 -0
- lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +6 -0
- lionagi/integrations/chunker/chunk.py +161 -24
- lionagi/integrations/config/oai_configs.py +34 -3
- lionagi/integrations/config/openrouter_configs.py +14 -2
- lionagi/integrations/loader/load.py +122 -21
- lionagi/integrations/loader/load_util.py +6 -77
- lionagi/integrations/provider/_mapping.py +46 -0
- lionagi/integrations/provider/litellm.py +2 -1
- lionagi/integrations/provider/mlx_service.py +16 -9
- lionagi/integrations/provider/oai.py +91 -4
- lionagi/integrations/provider/ollama.py +6 -5
- lionagi/integrations/provider/openrouter.py +115 -8
- lionagi/integrations/provider/services.py +2 -2
- lionagi/integrations/provider/transformers.py +18 -22
- lionagi/integrations/storage/__init__.py +3 -3
- lionagi/integrations/storage/neo4j.py +52 -60
- lionagi/integrations/storage/storage_util.py +44 -46
- lionagi/integrations/storage/structure_excel.py +43 -26
- lionagi/integrations/storage/to_excel.py +11 -4
- lionagi/libs/__init__.py +22 -1
- lionagi/libs/ln_api.py +75 -20
- lionagi/libs/ln_context.py +37 -0
- lionagi/libs/ln_convert.py +21 -9
- lionagi/libs/ln_func_call.py +69 -28
- lionagi/libs/ln_image.py +107 -0
- lionagi/libs/ln_nested.py +26 -11
- lionagi/libs/ln_parse.py +82 -23
- lionagi/libs/ln_queue.py +16 -0
- lionagi/libs/ln_tokenize.py +164 -0
- lionagi/libs/ln_validate.py +16 -0
- lionagi/libs/special_tokens.py +172 -0
- lionagi/libs/sys_util.py +95 -24
- lionagi/lions/coder/code_form.py +13 -0
- lionagi/lions/coder/coder.py +50 -3
- lionagi/lions/coder/util.py +30 -25
- lionagi/tests/libs/test_func_call.py +23 -21
- lionagi/tests/libs/test_nested.py +36 -21
- lionagi/tests/libs/test_parse.py +1 -1
- lionagi/tests/test_core/collections/__init__.py +0 -0
- lionagi/tests/test_core/collections/test_component.py +206 -0
- lionagi/tests/test_core/collections/test_exchange.py +138 -0
- lionagi/tests/test_core/collections/test_flow.py +145 -0
- lionagi/tests/test_core/collections/test_pile.py +171 -0
- lionagi/tests/test_core/collections/test_progression.py +129 -0
- lionagi/tests/test_core/generic/test_edge.py +67 -0
- lionagi/tests/test_core/generic/test_graph.py +96 -0
- lionagi/tests/test_core/generic/test_node.py +106 -0
- lionagi/tests/test_core/generic/test_tree_node.py +73 -0
- lionagi/tests/test_core/test_branch.py +115 -294
- lionagi/tests/test_core/test_form.py +46 -0
- lionagi/tests/test_core/test_report.py +105 -0
- lionagi/tests/test_core/test_validator.py +111 -0
- lionagi/version.py +1 -1
- lionagi-0.2.1.dist-info/LICENSE +202 -0
- lionagi-0.2.1.dist-info/METADATA +272 -0
- lionagi-0.2.1.dist-info/RECORD +240 -0
- lionagi/core/branch/base.py +0 -653
- lionagi/core/branch/branch.py +0 -474
- lionagi/core/branch/flow_mixin.py +0 -96
- lionagi/core/branch/util.py +0 -323
- lionagi/core/direct/__init__.py +0 -19
- lionagi/core/direct/cot.py +0 -123
- lionagi/core/direct/plan.py +0 -164
- lionagi/core/direct/predict.py +0 -166
- lionagi/core/direct/react.py +0 -171
- lionagi/core/direct/score.py +0 -279
- lionagi/core/direct/select.py +0 -170
- lionagi/core/direct/sentiment.py +0 -1
- lionagi/core/direct/utils.py +0 -110
- lionagi/core/direct/vote.py +0 -64
- lionagi/core/execute/base_executor.py +0 -47
- lionagi/core/flow/baseflow.py +0 -23
- lionagi/core/flow/monoflow/ReAct.py +0 -240
- lionagi/core/flow/monoflow/__init__.py +0 -9
- lionagi/core/flow/monoflow/chat.py +0 -95
- lionagi/core/flow/monoflow/chat_mixin.py +0 -253
- lionagi/core/flow/monoflow/followup.py +0 -215
- lionagi/core/flow/polyflow/__init__.py +0 -1
- lionagi/core/flow/polyflow/chat.py +0 -251
- lionagi/core/form/action_form.py +0 -26
- lionagi/core/form/field_validator.py +0 -287
- lionagi/core/form/form.py +0 -302
- lionagi/core/form/mixin.py +0 -214
- lionagi/core/form/scored_form.py +0 -13
- lionagi/core/generic/action.py +0 -26
- lionagi/core/generic/component.py +0 -532
- lionagi/core/generic/condition.py +0 -46
- lionagi/core/generic/mail.py +0 -90
- lionagi/core/generic/mailbox.py +0 -36
- lionagi/core/generic/relation.py +0 -70
- lionagi/core/generic/signal.py +0 -22
- lionagi/core/generic/structure.py +0 -362
- lionagi/core/generic/transfer.py +0 -20
- lionagi/core/generic/work.py +0 -40
- lionagi/core/graph/graph.py +0 -126
- lionagi/core/graph/tree.py +0 -190
- lionagi/core/mail/schema.py +0 -63
- lionagi/core/messages/schema.py +0 -325
- lionagi/core/tool/__init__.py +0 -5
- lionagi/core/tool/tool.py +0 -28
- lionagi/core/tool/tool_manager.py +0 -283
- lionagi/experimental/report/form.py +0 -64
- lionagi/experimental/report/report.py +0 -138
- lionagi/experimental/report/util.py +0 -47
- lionagi/experimental/tool/function_calling.py +0 -43
- lionagi/experimental/tool/manual.py +0 -66
- lionagi/experimental/tool/schema.py +0 -59
- lionagi/experimental/tool/tool_manager.py +0 -138
- lionagi/experimental/tool/util.py +0 -16
- lionagi/experimental/validator/rule.py +0 -139
- lionagi/experimental/validator/validator.py +0 -56
- lionagi/experimental/work/__init__.py +0 -10
- lionagi/experimental/work/async_queue.py +0 -54
- lionagi/experimental/work/schema.py +0 -73
- lionagi/experimental/work/work_function.py +0 -67
- lionagi/experimental/work/worker.py +0 -56
- lionagi/experimental/work2/form.py +0 -371
- lionagi/experimental/work2/report.py +0 -289
- lionagi/experimental/work2/schema.py +0 -30
- lionagi/experimental/work2/tests.py +0 -72
- lionagi/experimental/work2/work_function.py +0 -89
- lionagi/experimental/work2/worker.py +0 -12
- lionagi/integrations/bridge/llamaindex_/get_index.py +0 -294
- lionagi/tests/test_core/generic/test_component.py +0 -89
- lionagi/tests/test_core/test_base_branch.py +0 -426
- lionagi/tests/test_core/test_chat_flow.py +0 -63
- lionagi/tests/test_core/test_mail_manager.py +0 -75
- lionagi/tests/test_core/test_prompts.py +0 -51
- lionagi/tests/test_core/test_session.py +0 -254
- lionagi/tests/test_core/test_session_base_util.py +0 -313
- lionagi/tests/test_core/test_tool_manager.py +0 -95
- lionagi-0.1.2.dist-info/LICENSE +0 -9
- lionagi-0.1.2.dist-info/METADATA +0 -174
- lionagi-0.1.2.dist-info/RECORD +0 -206
- /lionagi/core/{branch → _setting}/__init__.py +0 -0
- /lionagi/core/{execute → agent/eval}/__init__.py +0 -0
- /lionagi/core/{flow → agent/learn}/__init__.py +0 -0
- /lionagi/core/{form → agent/plan}/__init__.py +0 -0
- /lionagi/core/{branch/executable_branch.py → agent/plan/plan.py} +0 -0
- /lionagi/core/{graph → director}/__init__.py +0 -0
- /lionagi/core/{messages → engine}/__init__.py +0 -0
- /lionagi/{experimental/directive/evaluator → core/engine}/sandbox_.py +0 -0
- /lionagi/{experimental/directive/evaluator → core/executor}/__init__.py +0 -0
- /lionagi/{experimental/directive/template_ → core/rule}/__init__.py +0 -0
- /lionagi/{experimental/report → core/unit/template}/__init__.py +0 -0
- /lionagi/{experimental/tool → core/validator}/__init__.py +0 -0
- /lionagi/{experimental/validator → core/work}/__init__.py +0 -0
- /lionagi/experimental/{work2 → compressor}/__init__.py +0 -0
- /lionagi/{core/flow/mono_chat_mixin.py → experimental/directive/template/__init__.py} +0 -0
- /lionagi/experimental/directive/{schema.py → template/schema.py} +0 -0
- /lionagi/experimental/{work2/util.py → evaluator/__init__.py} +0 -0
- /lionagi/experimental/{work2/work.py → knowledge/__init__.py} +0 -0
- /lionagi/{tests/libs/test_async.py → experimental/knowledge/graph.py} +0 -0
- {lionagi-0.1.2.dist-info → lionagi-0.2.1.dist-info}/WHEEL +0 -0
- {lionagi-0.1.2.dist-info → lionagi-0.2.1.dist-info}/top_level.txt +0 -0
lionagi/libs/ln_image.py
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
import base64
|
2
|
+
import numpy as np
|
3
|
+
from typing import Optional
|
4
|
+
from .sys_util import SysUtil
|
5
|
+
|
6
|
+
|
7
|
+
class ImageUtil:
|
8
|
+
|
9
|
+
@staticmethod
|
10
|
+
def preprocess_image(
|
11
|
+
image: np.ndarray, color_conversion_code: Optional[int] = None
|
12
|
+
) -> np.ndarray:
|
13
|
+
SysUtil.check_import("cv2", pip_name="opencv-python")
|
14
|
+
import cv2
|
15
|
+
|
16
|
+
color_conversion_code = color_conversion_code or cv2.COLOR_BGR2RGB
|
17
|
+
return cv2.cvtColor(image, color_conversion_code)
|
18
|
+
|
19
|
+
@staticmethod
|
20
|
+
def encode_image_to_base64(image: np.ndarray, file_extension: str = ".jpg") -> str:
|
21
|
+
SysUtil.check_import("cv2", pip_name="opencv-python")
|
22
|
+
import cv2
|
23
|
+
|
24
|
+
success, buffer = cv2.imencode(file_extension, image)
|
25
|
+
if not success:
|
26
|
+
raise ValueError(f"Could not encode image to {file_extension} format.")
|
27
|
+
encoded_image = base64.b64encode(buffer).decode("utf-8")
|
28
|
+
return encoded_image
|
29
|
+
|
30
|
+
@staticmethod
|
31
|
+
def read_image_to_array(
|
32
|
+
image_path: str, color_flag: Optional[int] = None
|
33
|
+
) -> np.ndarray:
|
34
|
+
SysUtil.check_import("cv2", pip_name="opencv-python")
|
35
|
+
import cv2
|
36
|
+
|
37
|
+
image = cv2.imread(image_path, color_flag)
|
38
|
+
color_flag = color_flag or cv2.IMREAD_COLOR
|
39
|
+
if image is None:
|
40
|
+
raise ValueError(f"Could not read image from path: {image_path}")
|
41
|
+
return image
|
42
|
+
|
43
|
+
@staticmethod
|
44
|
+
def read_image_to_base64(
|
45
|
+
image_path: str,
|
46
|
+
color_flag: Optional[int] = None,
|
47
|
+
) -> str:
|
48
|
+
image_path = str(image_path)
|
49
|
+
image = ImageUtil.read_image_to_array(image_path, color_flag)
|
50
|
+
|
51
|
+
file_extension = "." + image_path.split(".")[-1]
|
52
|
+
return ImageUtil.encode_image_to_base64(image, file_extension)
|
53
|
+
|
54
|
+
# @staticmethod
|
55
|
+
# def encode_image(image_path):
|
56
|
+
# with open(image_path, "rb") as image_file:
|
57
|
+
# return base64.b64encode(image_file.read()).decode("utf-8")
|
58
|
+
|
59
|
+
@staticmethod
|
60
|
+
def calculate_image_token_usage_from_base64(image_base64: str, detail):
|
61
|
+
"""
|
62
|
+
Calculate the token usage for processing OpenAI images from a base64-encoded string.
|
63
|
+
|
64
|
+
Parameters:
|
65
|
+
image_base64 (str): The base64-encoded string of the image.
|
66
|
+
detail (str): The detail level of the image, either 'low' or 'high'.
|
67
|
+
|
68
|
+
Returns:
|
69
|
+
int: The total token cost for processing the image.
|
70
|
+
"""
|
71
|
+
import base64
|
72
|
+
from io import BytesIO
|
73
|
+
from PIL import Image
|
74
|
+
|
75
|
+
# Decode the base64 string to get image data
|
76
|
+
if "data:image/jpeg;base64," in image_base64:
|
77
|
+
image_base64 = image_base64.split("data:image/jpeg;base64,")[1]
|
78
|
+
image_base64.strip("{}")
|
79
|
+
|
80
|
+
image_data = base64.b64decode(image_base64)
|
81
|
+
image = Image.open(BytesIO(image_data))
|
82
|
+
|
83
|
+
# Get image dimensions
|
84
|
+
width, height = image.size
|
85
|
+
|
86
|
+
if detail == "low":
|
87
|
+
return 85
|
88
|
+
|
89
|
+
# Scale to fit within a 2048 x 2048 square
|
90
|
+
max_dimension = 2048
|
91
|
+
if width > max_dimension or height > max_dimension:
|
92
|
+
scale_factor = max_dimension / max(width, height)
|
93
|
+
width = int(width * scale_factor)
|
94
|
+
height = int(height * scale_factor)
|
95
|
+
|
96
|
+
# Scale such that the shortest side is 768px
|
97
|
+
min_side = 768
|
98
|
+
if min(width, height) > min_side:
|
99
|
+
scale_factor = min_side / min(width, height)
|
100
|
+
width = int(width * scale_factor)
|
101
|
+
height = int(height * scale_factor)
|
102
|
+
|
103
|
+
# Calculate the number of 512px squares
|
104
|
+
num_squares = (width // 512) * (height // 512)
|
105
|
+
token_cost = 170 * num_squares + 85
|
106
|
+
|
107
|
+
return token_cost
|
lionagi/libs/ln_nested.py
CHANGED
@@ -1,3 +1,19 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
1
17
|
from collections import defaultdict
|
2
18
|
from itertools import chain
|
3
19
|
from typing import Any, Generator, Callable
|
@@ -52,7 +68,7 @@ def nset(nested_structure: dict | list, indices: list[int | str], value: Any) ->
|
|
52
68
|
def nget(
|
53
69
|
nested_structure: dict | list,
|
54
70
|
indices: list[int | str],
|
55
|
-
default
|
71
|
+
default=...,
|
56
72
|
) -> Any:
|
57
73
|
"""
|
58
74
|
retrieves a value from a nested list or dictionary structure, with an option to
|
@@ -98,12 +114,12 @@ def nget(
|
|
98
114
|
return target_container[last_index]
|
99
115
|
elif isinstance(target_container, dict) and last_index in target_container:
|
100
116
|
return target_container[last_index]
|
101
|
-
elif default is not
|
117
|
+
elif default is not ...:
|
102
118
|
return default
|
103
119
|
else:
|
104
120
|
raise LookupError("Target not found and no default value provided.")
|
105
121
|
except (IndexError, KeyError, TypeError):
|
106
|
-
if default is not
|
122
|
+
if default is not ...:
|
107
123
|
return default
|
108
124
|
else:
|
109
125
|
raise LookupError("Target not found and no default value provided.")
|
@@ -116,7 +132,7 @@ def nmerge(
|
|
116
132
|
*,
|
117
133
|
overwrite: bool = False,
|
118
134
|
dict_sequence: bool = False,
|
119
|
-
sequence_separator: str = "_",
|
135
|
+
sequence_separator: str = "[^_^]",
|
120
136
|
sort_list: bool = False,
|
121
137
|
custom_sort: Callable[[Any], Any] | None = None,
|
122
138
|
) -> dict | list:
|
@@ -176,7 +192,7 @@ def flatten(
|
|
176
192
|
/,
|
177
193
|
*,
|
178
194
|
parent_key: str = "",
|
179
|
-
sep: str = "_",
|
195
|
+
sep: str = "[^_^]",
|
180
196
|
max_depth: int | None = None,
|
181
197
|
inplace: bool = False,
|
182
198
|
dict_only: bool = False,
|
@@ -238,7 +254,7 @@ def unflatten(
|
|
238
254
|
flat_dict: dict[str, Any],
|
239
255
|
/,
|
240
256
|
*,
|
241
|
-
sep: str = "_",
|
257
|
+
sep: str = "[^_^]",
|
242
258
|
custom_logic: Callable[[str], Any] | None = None,
|
243
259
|
max_depth: int | None = None,
|
244
260
|
) -> dict | list:
|
@@ -330,7 +346,7 @@ def ninsert(
|
|
330
346
|
indices: list[str | int],
|
331
347
|
value: Any,
|
332
348
|
*,
|
333
|
-
sep: str = "_",
|
349
|
+
sep: str = "[^_^]",
|
334
350
|
max_depth: int | None = None,
|
335
351
|
current_depth: int = 0,
|
336
352
|
) -> None:
|
@@ -393,12 +409,11 @@ def ninsert(
|
|
393
409
|
nested_structure[last_part] = value
|
394
410
|
|
395
411
|
|
396
|
-
# noinspection PyDecorator
|
397
412
|
def get_flattened_keys(
|
398
413
|
nested_structure: Any,
|
399
414
|
/,
|
400
415
|
*,
|
401
|
-
sep: str = "_",
|
416
|
+
sep: str = "[^_^]",
|
402
417
|
max_depth: int | None = None,
|
403
418
|
dict_only: bool = False,
|
404
419
|
inplace: bool = False,
|
@@ -448,7 +463,7 @@ def _dynamic_flatten_in_place(
|
|
448
463
|
/,
|
449
464
|
*,
|
450
465
|
parent_key: str = "",
|
451
|
-
sep: str = "_",
|
466
|
+
sep: str = "[^_^]",
|
452
467
|
max_depth: int | None = None,
|
453
468
|
current_depth: int = 0,
|
454
469
|
dict_only: bool = False,
|
@@ -581,7 +596,7 @@ def _deep_update(original: dict, update: dict) -> dict:
|
|
581
596
|
def _dynamic_flatten_generator(
|
582
597
|
nested_structure: Any,
|
583
598
|
parent_key: tuple[str, ...],
|
584
|
-
sep: str = "_",
|
599
|
+
sep: str = "[^_^]",
|
585
600
|
max_depth: int | None = None,
|
586
601
|
current_depth: int = 0,
|
587
602
|
dict_only: bool = False,
|
lionagi/libs/ln_parse.py
CHANGED
@@ -1,9 +1,23 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
1
17
|
from collections.abc import Callable
|
2
18
|
import re
|
3
19
|
import inspect
|
4
20
|
import itertools
|
5
|
-
import contextlib
|
6
|
-
from functools import singledispatchmethod
|
7
21
|
from typing import Any
|
8
22
|
import numpy as np
|
9
23
|
import lionagi.libs.ln_convert as convert
|
@@ -103,12 +117,12 @@ class ParseUtil:
|
|
103
117
|
# inspired by langchain_core.output_parsers.json (MIT License)
|
104
118
|
# https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/output_parsers/json.py
|
105
119
|
@staticmethod
|
106
|
-
def
|
120
|
+
def extract_json_block(
|
107
121
|
str_to_parse: str,
|
108
122
|
language: str | None = None,
|
109
123
|
regex_pattern: str | None = None,
|
110
124
|
*,
|
111
|
-
parser: Callable[[str], Any],
|
125
|
+
parser: Callable[[str], Any] = None,
|
112
126
|
) -> Any:
|
113
127
|
"""
|
114
128
|
Extracts and parses a code block from Markdown content.
|
@@ -148,9 +162,37 @@ class ParseUtil:
|
|
148
162
|
raise ValueError(
|
149
163
|
f"No {language or 'specified'} code block found in the Markdown content."
|
150
164
|
)
|
165
|
+
if not match:
|
166
|
+
str_to_parse = str_to_parse.strip()
|
167
|
+
if str_to_parse.startswith("```json\n") and str_to_parse.endswith("\n```"):
|
168
|
+
str_to_parse = str_to_parse[8:-4].strip()
|
151
169
|
|
170
|
+
parser = parser or ParseUtil.fuzzy_parse_json
|
152
171
|
return parser(code_str)
|
153
172
|
|
173
|
+
@staticmethod
|
174
|
+
def extract_code_blocks(code):
|
175
|
+
code_blocks = []
|
176
|
+
lines = code.split("\n")
|
177
|
+
inside_code_block = False
|
178
|
+
current_block = []
|
179
|
+
|
180
|
+
for line in lines:
|
181
|
+
if line.startswith("```"):
|
182
|
+
if inside_code_block:
|
183
|
+
code_blocks.append("\n".join(current_block))
|
184
|
+
current_block = []
|
185
|
+
inside_code_block = False
|
186
|
+
else:
|
187
|
+
inside_code_block = True
|
188
|
+
elif inside_code_block:
|
189
|
+
current_block.append(line)
|
190
|
+
|
191
|
+
if current_block:
|
192
|
+
code_blocks.append("\n".join(current_block))
|
193
|
+
|
194
|
+
return "\n\n".join(code_blocks)
|
195
|
+
|
154
196
|
@staticmethod
|
155
197
|
def md_to_json(
|
156
198
|
str_to_parse: str,
|
@@ -181,7 +223,7 @@ class ParseUtil:
|
|
181
223
|
>>> md_to_json('```json\\n{"key": "value"}\\n```', expected_keys=['key'])
|
182
224
|
{'key': 'value'}
|
183
225
|
"""
|
184
|
-
json_obj = ParseUtil.
|
226
|
+
json_obj = ParseUtil.extract_json_block(
|
185
227
|
str_to_parse, language="json", parser=parser or ParseUtil.fuzzy_parse_json
|
186
228
|
)
|
187
229
|
|
@@ -385,7 +427,9 @@ class ParseUtil:
|
|
385
427
|
return type_mapping.get(py_type, "object")
|
386
428
|
|
387
429
|
@staticmethod
|
388
|
-
def _func_to_schema(
|
430
|
+
def _func_to_schema(
|
431
|
+
func, style="google", func_description=None, params_description=None
|
432
|
+
):
|
389
433
|
"""
|
390
434
|
Generates a schema description for a given function, using typing hints and
|
391
435
|
docstrings. The schema includes the function's name, description, and parameters.
|
@@ -412,9 +456,11 @@ class ParseUtil:
|
|
412
456
|
"""
|
413
457
|
# Extracting function name and docstring details
|
414
458
|
func_name = func.__name__
|
415
|
-
|
416
|
-
|
417
|
-
|
459
|
+
|
460
|
+
if not func_description:
|
461
|
+
func_description, _ = ParseUtil._extract_docstring_details(func, style)
|
462
|
+
if not params_description:
|
463
|
+
_, params_description = ParseUtil._extract_docstring_details(func, style)
|
418
464
|
|
419
465
|
# Extracting parameters with typing hints
|
420
466
|
sig = inspect.signature(func)
|
@@ -634,7 +680,7 @@ class StringMatch:
|
|
634
680
|
# Calculate Jaro-Winkler similarity scores for each potential match
|
635
681
|
scores = np.array(
|
636
682
|
[
|
637
|
-
score_func(
|
683
|
+
score_func(str(word), str(correct_word))
|
638
684
|
for correct_word in correct_words_list
|
639
685
|
]
|
640
686
|
)
|
@@ -648,26 +694,39 @@ class StringMatch:
|
|
648
694
|
|
649
695
|
if isinstance(out_, str):
|
650
696
|
# first try to parse it straight as a fuzzy json
|
697
|
+
|
651
698
|
try:
|
652
699
|
out_ = ParseUtil.fuzzy_parse_json(out_)
|
653
|
-
|
700
|
+
return StringMatch.correct_dict_keys(keys, out_)
|
701
|
+
|
702
|
+
except:
|
654
703
|
try:
|
655
|
-
# if failed we try to extract the json block and parse it
|
656
704
|
out_ = ParseUtil.md_to_json(out_)
|
705
|
+
return StringMatch.correct_dict_keys(keys, out_)
|
706
|
+
|
657
707
|
except Exception:
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
708
|
+
try:
|
709
|
+
# if failed we try to extract the json block and parse it
|
710
|
+
out_ = ParseUtil.md_to_json(out_)
|
711
|
+
return StringMatch.correct_dict_keys(keys, out_)
|
712
|
+
|
713
|
+
except Exception:
|
714
|
+
# if still failed we try to extract the json block using re and parse it again
|
715
|
+
match = re.search(r"```json\n({.*?})\n```", out_, re.DOTALL)
|
716
|
+
if match:
|
717
|
+
out_ = match.group(1)
|
665
718
|
try:
|
666
|
-
out_ = ParseUtil.fuzzy_parse_json(
|
667
|
-
|
668
|
-
|
719
|
+
out_ = ParseUtil.fuzzy_parse_json(out_)
|
720
|
+
return StringMatch.correct_dict_keys(keys, out_)
|
721
|
+
|
669
722
|
except:
|
670
|
-
|
723
|
+
try:
|
724
|
+
out_ = ParseUtil.fuzzy_parse_json(
|
725
|
+
out_.replace("'", '"')
|
726
|
+
)
|
727
|
+
return StringMatch.correct_dict_keys(keys, out_)
|
728
|
+
except:
|
729
|
+
pass
|
671
730
|
|
672
731
|
if isinstance(out_, dict):
|
673
732
|
try:
|
lionagi/libs/ln_queue.py
CHANGED
@@ -1,3 +1,19 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
1
17
|
"""
|
2
18
|
A class that manages asynchronous task processing with controlled concurrency.
|
3
19
|
"""
|
@@ -0,0 +1,164 @@
|
|
1
|
+
import tiktoken
|
2
|
+
import math
|
3
|
+
from .ln_convert import to_str
|
4
|
+
from .special_tokens import disallowed_tokens
|
5
|
+
|
6
|
+
|
7
|
+
class TokenizeUtil:
|
8
|
+
|
9
|
+
@staticmethod
|
10
|
+
def tokenize(
|
11
|
+
text,
|
12
|
+
encoding_model=None,
|
13
|
+
encoding_name=None,
|
14
|
+
return_byte=False,
|
15
|
+
disallowed_tokens=disallowed_tokens,
|
16
|
+
):
|
17
|
+
encoding = None
|
18
|
+
|
19
|
+
if encoding_model:
|
20
|
+
try:
|
21
|
+
encoding_name = tiktoken.encoding_name_for_model(encoding_model)
|
22
|
+
except:
|
23
|
+
encoding_name = encoding_name or "cl100k_base"
|
24
|
+
|
25
|
+
if not encoding_name or encoding_name in tiktoken.list_encoding_names():
|
26
|
+
encoding_name = encoding_name or "cl100k_base"
|
27
|
+
encoding = tiktoken.get_encoding(encoding_name)
|
28
|
+
|
29
|
+
special_encodings = (
|
30
|
+
[encoding.encode(token) for token in disallowed_tokens]
|
31
|
+
if disallowed_tokens
|
32
|
+
else []
|
33
|
+
)
|
34
|
+
codes = encoding.encode(text)
|
35
|
+
if special_encodings and len(special_encodings) > 0:
|
36
|
+
codes = [code for code in codes if code not in special_encodings]
|
37
|
+
|
38
|
+
if return_byte:
|
39
|
+
return codes
|
40
|
+
|
41
|
+
return [encoding.decode([code]) for code in codes]
|
42
|
+
|
43
|
+
@staticmethod
|
44
|
+
def chunk_by_chars(
|
45
|
+
text: str, chunk_size: int, overlap: float, threshold: int
|
46
|
+
) -> list[str | None]:
|
47
|
+
"""
|
48
|
+
Chunks the input text into smaller parts, with optional overlap and threshold for final chunk.
|
49
|
+
|
50
|
+
Parameters:
|
51
|
+
text (str): The input text to chunk.
|
52
|
+
|
53
|
+
chunk_size (int): The size of each chunk.
|
54
|
+
|
55
|
+
overlap (float): The amount of overlap between chunks.
|
56
|
+
|
57
|
+
threshold (int): The minimum size of the final chunk.
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
List[Union[str, None]]: A list of text chunks.
|
61
|
+
|
62
|
+
Raises:
|
63
|
+
ValueError: If an error occurs during chunking.
|
64
|
+
"""
|
65
|
+
|
66
|
+
def _chunk_n1():
|
67
|
+
return [text]
|
68
|
+
|
69
|
+
def _chunk_n2():
|
70
|
+
chunks = []
|
71
|
+
chunks.append(text[: chunk_size + overlap_size])
|
72
|
+
|
73
|
+
if len(text) - chunk_size > threshold:
|
74
|
+
chunks.append(text[chunk_size - overlap_size :])
|
75
|
+
else:
|
76
|
+
return _chunk_n1()
|
77
|
+
|
78
|
+
return chunks
|
79
|
+
|
80
|
+
def _chunk_n3():
|
81
|
+
chunks = []
|
82
|
+
chunks.append(text[: chunk_size + overlap_size])
|
83
|
+
for i in range(1, n_chunks - 1):
|
84
|
+
start_idx = chunk_size * i - overlap_size
|
85
|
+
end_idx = chunk_size * (i + 1) + overlap_size
|
86
|
+
chunks.append(text[start_idx:end_idx])
|
87
|
+
|
88
|
+
if len(text) - chunk_size * (n_chunks - 1) > threshold:
|
89
|
+
chunks.append(text[chunk_size * (n_chunks - 1) - overlap_size :])
|
90
|
+
else:
|
91
|
+
chunks[-1] += text[chunk_size * (n_chunks - 1) + overlap_size :]
|
92
|
+
|
93
|
+
return chunks
|
94
|
+
|
95
|
+
try:
|
96
|
+
if not isinstance(text, str):
|
97
|
+
text = to_str(text)
|
98
|
+
|
99
|
+
n_chunks = math.ceil(len(text) / chunk_size)
|
100
|
+
overlap_size = int(overlap / 2)
|
101
|
+
|
102
|
+
if n_chunks == 1:
|
103
|
+
return _chunk_n1()
|
104
|
+
|
105
|
+
elif n_chunks == 2:
|
106
|
+
return _chunk_n2()
|
107
|
+
|
108
|
+
elif n_chunks > 2:
|
109
|
+
return _chunk_n3()
|
110
|
+
|
111
|
+
except Exception as e:
|
112
|
+
raise ValueError(f"An error occurred while chunking the text. {e}")
|
113
|
+
|
114
|
+
@staticmethod
|
115
|
+
def chunk_by_tokens(
|
116
|
+
text: str,
|
117
|
+
chunk_size: int,
|
118
|
+
overlap: float,
|
119
|
+
threshold: int, # minimum size of the final chunk in number of tokens
|
120
|
+
encoding_model=None,
|
121
|
+
encoding_name=None,
|
122
|
+
return_tokens=False,
|
123
|
+
return_byte=False,
|
124
|
+
) -> list[str | None]:
|
125
|
+
|
126
|
+
tokens = TokenizeUtil.tokenize(
|
127
|
+
text, encoding_model, encoding_name, return_byte=return_byte
|
128
|
+
)
|
129
|
+
|
130
|
+
n_chunks = math.ceil(len(tokens) / chunk_size)
|
131
|
+
overlap_size = int(overlap * chunk_size / 2)
|
132
|
+
residue = len(tokens) % chunk_size
|
133
|
+
|
134
|
+
if n_chunks == 1:
|
135
|
+
return text if not return_tokens else [tokens]
|
136
|
+
|
137
|
+
elif n_chunks == 2:
|
138
|
+
chunks = [tokens[: chunk_size + overlap_size]]
|
139
|
+
if residue > threshold:
|
140
|
+
chunks.append(tokens[chunk_size - overlap_size :])
|
141
|
+
return (
|
142
|
+
[" ".join(chunk).strip() for chunk in chunks]
|
143
|
+
if not return_tokens
|
144
|
+
else chunks
|
145
|
+
)
|
146
|
+
else:
|
147
|
+
return text if not return_tokens else [tokens]
|
148
|
+
|
149
|
+
elif n_chunks > 2:
|
150
|
+
chunks = []
|
151
|
+
chunks.append(tokens[: chunk_size + overlap_size])
|
152
|
+
for i in range(1, n_chunks - 1):
|
153
|
+
start_idx = chunk_size * i - overlap_size
|
154
|
+
end_idx = chunk_size * (i + 1) + overlap_size
|
155
|
+
chunks.append(tokens[start_idx:end_idx])
|
156
|
+
|
157
|
+
if len(tokens) - chunk_size * (n_chunks - 1) > threshold:
|
158
|
+
chunks.append(tokens[chunk_size * (n_chunks - 1) - overlap_size :])
|
159
|
+
else:
|
160
|
+
chunks[-1] += tokens[-residue:]
|
161
|
+
|
162
|
+
return (
|
163
|
+
[" ".join(chunk) for chunk in chunks] if not return_tokens else chunks
|
164
|
+
)
|
lionagi/libs/ln_validate.py
CHANGED
@@ -1,3 +1,19 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
1
17
|
"""
|
2
18
|
This module provides functions for validating and fixing field values based on their data types.
|
3
19
|
|