lionagi 0.0.312__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/__init__.py +61 -3
- lionagi/core/__init__.py +0 -14
- lionagi/core/_setting/_setting.py +59 -0
- lionagi/core/action/__init__.py +14 -0
- lionagi/core/action/function_calling.py +136 -0
- lionagi/core/action/manual.py +1 -0
- lionagi/core/action/node.py +109 -0
- lionagi/core/action/tool.py +114 -0
- lionagi/core/action/tool_manager.py +356 -0
- lionagi/core/agent/__init__.py +0 -3
- lionagi/core/agent/base_agent.py +45 -36
- lionagi/core/agent/eval/evaluator.py +1 -0
- lionagi/core/agent/eval/vote.py +40 -0
- lionagi/core/agent/learn/learner.py +59 -0
- lionagi/core/agent/plan/unit_template.py +1 -0
- lionagi/core/collections/__init__.py +17 -0
- lionagi/core/collections/_logger.py +319 -0
- lionagi/core/collections/abc/__init__.py +53 -0
- lionagi/core/collections/abc/component.py +615 -0
- lionagi/core/collections/abc/concepts.py +297 -0
- lionagi/core/collections/abc/exceptions.py +150 -0
- lionagi/core/collections/abc/util.py +45 -0
- lionagi/core/collections/exchange.py +161 -0
- lionagi/core/collections/flow.py +426 -0
- lionagi/core/collections/model.py +419 -0
- lionagi/core/collections/pile.py +913 -0
- lionagi/core/collections/progression.py +236 -0
- lionagi/core/collections/util.py +64 -0
- lionagi/core/director/direct.py +314 -0
- lionagi/core/director/director.py +2 -0
- lionagi/core/engine/branch_engine.py +333 -0
- lionagi/core/engine/instruction_map_engine.py +204 -0
- lionagi/core/engine/sandbox_.py +14 -0
- lionagi/core/engine/script_engine.py +99 -0
- lionagi/core/executor/base_executor.py +90 -0
- lionagi/core/executor/graph_executor.py +330 -0
- lionagi/core/executor/neo4j_executor.py +384 -0
- lionagi/core/generic/__init__.py +7 -0
- lionagi/core/generic/edge.py +112 -0
- lionagi/core/generic/edge_condition.py +16 -0
- lionagi/core/generic/graph.py +236 -0
- lionagi/core/generic/hyperedge.py +1 -0
- lionagi/core/generic/node.py +220 -0
- lionagi/core/generic/tree.py +48 -0
- lionagi/core/generic/tree_node.py +79 -0
- lionagi/core/mail/__init__.py +7 -3
- lionagi/core/mail/mail.py +25 -0
- lionagi/core/mail/mail_manager.py +142 -58
- lionagi/core/mail/package.py +45 -0
- lionagi/core/mail/start_mail.py +36 -0
- lionagi/core/message/__init__.py +19 -0
- lionagi/core/message/action_request.py +133 -0
- lionagi/core/message/action_response.py +135 -0
- lionagi/core/message/assistant_response.py +95 -0
- lionagi/core/message/instruction.py +234 -0
- lionagi/core/message/message.py +101 -0
- lionagi/core/message/system.py +86 -0
- lionagi/core/message/util.py +283 -0
- lionagi/core/report/__init__.py +4 -0
- lionagi/core/report/base.py +217 -0
- lionagi/core/report/form.py +231 -0
- lionagi/core/report/report.py +166 -0
- lionagi/core/report/util.py +28 -0
- lionagi/core/rule/__init__.py +0 -0
- lionagi/core/rule/_default.py +16 -0
- lionagi/core/rule/action.py +99 -0
- lionagi/core/rule/base.py +238 -0
- lionagi/core/rule/boolean.py +56 -0
- lionagi/core/rule/choice.py +47 -0
- lionagi/core/rule/mapping.py +96 -0
- lionagi/core/rule/number.py +71 -0
- lionagi/core/rule/rulebook.py +109 -0
- lionagi/core/rule/string.py +52 -0
- lionagi/core/rule/util.py +35 -0
- lionagi/core/session/__init__.py +0 -3
- lionagi/core/session/branch.py +431 -0
- lionagi/core/session/directive_mixin.py +287 -0
- lionagi/core/session/session.py +230 -902
- lionagi/core/structure/__init__.py +1 -0
- lionagi/core/structure/chain.py +1 -0
- lionagi/core/structure/forest.py +1 -0
- lionagi/core/structure/graph.py +1 -0
- lionagi/core/structure/tree.py +1 -0
- lionagi/core/unit/__init__.py +5 -0
- lionagi/core/unit/parallel_unit.py +245 -0
- lionagi/core/unit/template/__init__.py +0 -0
- lionagi/core/unit/template/action.py +81 -0
- lionagi/core/unit/template/base.py +51 -0
- lionagi/core/unit/template/plan.py +84 -0
- lionagi/core/unit/template/predict.py +109 -0
- lionagi/core/unit/template/score.py +124 -0
- lionagi/core/unit/template/select.py +104 -0
- lionagi/core/unit/unit.py +362 -0
- lionagi/core/unit/unit_form.py +305 -0
- lionagi/core/unit/unit_mixin.py +1168 -0
- lionagi/core/unit/util.py +71 -0
- lionagi/core/validator/__init__.py +0 -0
- lionagi/core/validator/validator.py +364 -0
- lionagi/core/work/__init__.py +0 -0
- lionagi/core/work/work.py +76 -0
- lionagi/core/work/work_function.py +101 -0
- lionagi/core/work/work_queue.py +103 -0
- lionagi/core/work/worker.py +258 -0
- lionagi/core/work/worklog.py +120 -0
- lionagi/experimental/__init__.py +0 -0
- lionagi/experimental/compressor/__init__.py +0 -0
- lionagi/experimental/compressor/base.py +46 -0
- lionagi/experimental/compressor/llm_compressor.py +247 -0
- lionagi/experimental/compressor/llm_summarizer.py +61 -0
- lionagi/experimental/compressor/util.py +70 -0
- lionagi/experimental/directive/__init__.py +19 -0
- lionagi/experimental/directive/parser/__init__.py +0 -0
- lionagi/experimental/directive/parser/base_parser.py +282 -0
- lionagi/experimental/directive/template/__init__.py +0 -0
- lionagi/experimental/directive/template/base_template.py +79 -0
- lionagi/experimental/directive/template/schema.py +36 -0
- lionagi/experimental/directive/tokenizer.py +73 -0
- lionagi/experimental/evaluator/__init__.py +0 -0
- lionagi/experimental/evaluator/ast_evaluator.py +131 -0
- lionagi/experimental/evaluator/base_evaluator.py +218 -0
- lionagi/experimental/knowledge/__init__.py +0 -0
- lionagi/experimental/knowledge/base.py +10 -0
- lionagi/experimental/knowledge/graph.py +0 -0
- lionagi/experimental/memory/__init__.py +0 -0
- lionagi/experimental/strategies/__init__.py +0 -0
- lionagi/experimental/strategies/base.py +1 -0
- lionagi/integrations/bridge/autogen_/__init__.py +0 -0
- lionagi/integrations/bridge/autogen_/autogen_.py +124 -0
- lionagi/integrations/bridge/langchain_/documents.py +4 -0
- lionagi/integrations/bridge/llamaindex_/index.py +30 -0
- lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +6 -0
- lionagi/integrations/bridge/llamaindex_/llama_pack.py +227 -0
- lionagi/integrations/bridge/llamaindex_/node_parser.py +6 -9
- lionagi/integrations/bridge/pydantic_/pydantic_bridge.py +1 -0
- lionagi/integrations/bridge/transformers_/__init__.py +0 -0
- lionagi/integrations/bridge/transformers_/install_.py +36 -0
- lionagi/integrations/chunker/__init__.py +0 -0
- lionagi/integrations/chunker/chunk.py +312 -0
- lionagi/integrations/config/oai_configs.py +38 -7
- lionagi/integrations/config/ollama_configs.py +1 -1
- lionagi/integrations/config/openrouter_configs.py +14 -2
- lionagi/integrations/loader/__init__.py +0 -0
- lionagi/integrations/loader/load.py +253 -0
- lionagi/integrations/loader/load_util.py +195 -0
- lionagi/integrations/provider/_mapping.py +46 -0
- lionagi/integrations/provider/litellm.py +2 -1
- lionagi/integrations/provider/mlx_service.py +16 -9
- lionagi/integrations/provider/oai.py +91 -4
- lionagi/integrations/provider/ollama.py +7 -6
- lionagi/integrations/provider/openrouter.py +115 -8
- lionagi/integrations/provider/services.py +2 -2
- lionagi/integrations/provider/transformers.py +18 -22
- lionagi/integrations/storage/__init__.py +3 -0
- lionagi/integrations/storage/neo4j.py +665 -0
- lionagi/integrations/storage/storage_util.py +287 -0
- lionagi/integrations/storage/structure_excel.py +285 -0
- lionagi/integrations/storage/to_csv.py +63 -0
- lionagi/integrations/storage/to_excel.py +83 -0
- lionagi/libs/__init__.py +26 -1
- lionagi/libs/ln_api.py +78 -23
- lionagi/libs/ln_context.py +37 -0
- lionagi/libs/ln_convert.py +21 -9
- lionagi/libs/ln_func_call.py +69 -28
- lionagi/libs/ln_image.py +107 -0
- lionagi/libs/ln_knowledge_graph.py +405 -0
- lionagi/libs/ln_nested.py +26 -11
- lionagi/libs/ln_parse.py +110 -14
- lionagi/libs/ln_queue.py +117 -0
- lionagi/libs/ln_tokenize.py +164 -0
- lionagi/{core/prompt/field_validator.py → libs/ln_validate.py} +79 -14
- lionagi/libs/special_tokens.py +172 -0
- lionagi/libs/sys_util.py +107 -2
- lionagi/lions/__init__.py +0 -0
- lionagi/lions/coder/__init__.py +0 -0
- lionagi/lions/coder/add_feature.py +20 -0
- lionagi/lions/coder/base_prompts.py +22 -0
- lionagi/lions/coder/code_form.py +13 -0
- lionagi/lions/coder/coder.py +168 -0
- lionagi/lions/coder/util.py +96 -0
- lionagi/lions/researcher/__init__.py +0 -0
- lionagi/lions/researcher/data_source/__init__.py +0 -0
- lionagi/lions/researcher/data_source/finhub_.py +191 -0
- lionagi/lions/researcher/data_source/google_.py +199 -0
- lionagi/lions/researcher/data_source/wiki_.py +96 -0
- lionagi/lions/researcher/data_source/yfinance_.py +21 -0
- lionagi/tests/integrations/__init__.py +0 -0
- lionagi/tests/libs/__init__.py +0 -0
- lionagi/tests/libs/test_field_validators.py +353 -0
- lionagi/tests/{test_libs → libs}/test_func_call.py +23 -21
- lionagi/tests/{test_libs → libs}/test_nested.py +36 -21
- lionagi/tests/{test_libs → libs}/test_parse.py +1 -1
- lionagi/tests/libs/test_queue.py +67 -0
- lionagi/tests/test_core/collections/__init__.py +0 -0
- lionagi/tests/test_core/collections/test_component.py +206 -0
- lionagi/tests/test_core/collections/test_exchange.py +138 -0
- lionagi/tests/test_core/collections/test_flow.py +145 -0
- lionagi/tests/test_core/collections/test_pile.py +171 -0
- lionagi/tests/test_core/collections/test_progression.py +129 -0
- lionagi/tests/test_core/generic/__init__.py +0 -0
- lionagi/tests/test_core/generic/test_edge.py +67 -0
- lionagi/tests/test_core/generic/test_graph.py +96 -0
- lionagi/tests/test_core/generic/test_node.py +106 -0
- lionagi/tests/test_core/generic/test_tree_node.py +73 -0
- lionagi/tests/test_core/test_branch.py +115 -292
- lionagi/tests/test_core/test_form.py +46 -0
- lionagi/tests/test_core/test_report.py +105 -0
- lionagi/tests/test_core/test_validator.py +111 -0
- lionagi/version.py +1 -1
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/LICENSE +12 -11
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/METADATA +19 -118
- lionagi-0.2.1.dist-info/RECORD +240 -0
- lionagi/core/branch/__init__.py +0 -4
- lionagi/core/branch/base_branch.py +0 -654
- lionagi/core/branch/branch.py +0 -471
- lionagi/core/branch/branch_flow_mixin.py +0 -96
- lionagi/core/branch/executable_branch.py +0 -347
- lionagi/core/branch/util.py +0 -323
- lionagi/core/direct/__init__.py +0 -6
- lionagi/core/direct/predict.py +0 -161
- lionagi/core/direct/score.py +0 -278
- lionagi/core/direct/select.py +0 -169
- lionagi/core/direct/utils.py +0 -87
- lionagi/core/direct/vote.py +0 -64
- lionagi/core/flow/base/baseflow.py +0 -23
- lionagi/core/flow/monoflow/ReAct.py +0 -238
- lionagi/core/flow/monoflow/__init__.py +0 -9
- lionagi/core/flow/monoflow/chat.py +0 -95
- lionagi/core/flow/monoflow/chat_mixin.py +0 -263
- lionagi/core/flow/monoflow/followup.py +0 -214
- lionagi/core/flow/polyflow/__init__.py +0 -1
- lionagi/core/flow/polyflow/chat.py +0 -248
- lionagi/core/mail/schema.py +0 -56
- lionagi/core/messages/__init__.py +0 -3
- lionagi/core/messages/schema.py +0 -533
- lionagi/core/prompt/prompt_template.py +0 -316
- lionagi/core/schema/__init__.py +0 -22
- lionagi/core/schema/action_node.py +0 -29
- lionagi/core/schema/base_mixin.py +0 -296
- lionagi/core/schema/base_node.py +0 -199
- lionagi/core/schema/condition.py +0 -24
- lionagi/core/schema/data_logger.py +0 -354
- lionagi/core/schema/data_node.py +0 -93
- lionagi/core/schema/prompt_template.py +0 -67
- lionagi/core/schema/structure.py +0 -910
- lionagi/core/tool/__init__.py +0 -3
- lionagi/core/tool/tool_manager.py +0 -280
- lionagi/integrations/bridge/pydantic_/base_model.py +0 -7
- lionagi/tests/test_core/test_base_branch.py +0 -427
- lionagi/tests/test_core/test_chat_flow.py +0 -63
- lionagi/tests/test_core/test_mail_manager.py +0 -75
- lionagi/tests/test_core/test_prompts.py +0 -51
- lionagi/tests/test_core/test_session.py +0 -254
- lionagi/tests/test_core/test_session_base_util.py +0 -312
- lionagi/tests/test_core/test_tool_manager.py +0 -95
- lionagi-0.0.312.dist-info/RECORD +0 -111
- /lionagi/core/{branch/base → _setting}/__init__.py +0 -0
- /lionagi/core/{flow → agent/eval}/__init__.py +0 -0
- /lionagi/core/{flow/base → agent/learn}/__init__.py +0 -0
- /lionagi/core/{prompt → agent/plan}/__init__.py +0 -0
- /lionagi/core/{tool/manual.py → agent/plan/plan.py} +0 -0
- /lionagi/{tests/test_integrations → core/director}/__init__.py +0 -0
- /lionagi/{tests/test_libs → core/engine}/__init__.py +0 -0
- /lionagi/{tests/test_libs/test_async.py → core/executor/__init__.py} +0 -0
- /lionagi/tests/{test_libs → libs}/test_api.py +0 -0
- /lionagi/tests/{test_libs → libs}/test_convert.py +0 -0
- /lionagi/tests/{test_libs → libs}/test_sys_util.py +0 -0
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/WHEEL +0 -0
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/top_level.txt +0 -0
lionagi/libs/ln_parse.py
CHANGED
@@ -1,11 +1,28 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
17
|
+
from collections.abc import Callable
|
1
18
|
import re
|
2
19
|
import inspect
|
3
20
|
import itertools
|
4
|
-
from collections.abc import Callable
|
5
21
|
from typing import Any
|
6
22
|
import numpy as np
|
7
23
|
import lionagi.libs.ln_convert as convert
|
8
24
|
|
25
|
+
|
9
26
|
md_json_char_map = {"\n": "\\n", "\r": "\\r", "\t": "\\t", '"': '\\"'}
|
10
27
|
|
11
28
|
|
@@ -100,12 +117,12 @@ class ParseUtil:
|
|
100
117
|
# inspired by langchain_core.output_parsers.json (MIT License)
|
101
118
|
# https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/output_parsers/json.py
|
102
119
|
@staticmethod
|
103
|
-
def
|
120
|
+
def extract_json_block(
|
104
121
|
str_to_parse: str,
|
105
122
|
language: str | None = None,
|
106
123
|
regex_pattern: str | None = None,
|
107
124
|
*,
|
108
|
-
parser: Callable[[str], Any],
|
125
|
+
parser: Callable[[str], Any] = None,
|
109
126
|
) -> Any:
|
110
127
|
"""
|
111
128
|
Extracts and parses a code block from Markdown content.
|
@@ -145,9 +162,37 @@ class ParseUtil:
|
|
145
162
|
raise ValueError(
|
146
163
|
f"No {language or 'specified'} code block found in the Markdown content."
|
147
164
|
)
|
165
|
+
if not match:
|
166
|
+
str_to_parse = str_to_parse.strip()
|
167
|
+
if str_to_parse.startswith("```json\n") and str_to_parse.endswith("\n```"):
|
168
|
+
str_to_parse = str_to_parse[8:-4].strip()
|
148
169
|
|
170
|
+
parser = parser or ParseUtil.fuzzy_parse_json
|
149
171
|
return parser(code_str)
|
150
172
|
|
173
|
+
@staticmethod
|
174
|
+
def extract_code_blocks(code):
|
175
|
+
code_blocks = []
|
176
|
+
lines = code.split("\n")
|
177
|
+
inside_code_block = False
|
178
|
+
current_block = []
|
179
|
+
|
180
|
+
for line in lines:
|
181
|
+
if line.startswith("```"):
|
182
|
+
if inside_code_block:
|
183
|
+
code_blocks.append("\n".join(current_block))
|
184
|
+
current_block = []
|
185
|
+
inside_code_block = False
|
186
|
+
else:
|
187
|
+
inside_code_block = True
|
188
|
+
elif inside_code_block:
|
189
|
+
current_block.append(line)
|
190
|
+
|
191
|
+
if current_block:
|
192
|
+
code_blocks.append("\n".join(current_block))
|
193
|
+
|
194
|
+
return "\n\n".join(code_blocks)
|
195
|
+
|
151
196
|
@staticmethod
|
152
197
|
def md_to_json(
|
153
198
|
str_to_parse: str,
|
@@ -178,7 +223,7 @@ class ParseUtil:
|
|
178
223
|
>>> md_to_json('```json\\n{"key": "value"}\\n```', expected_keys=['key'])
|
179
224
|
{'key': 'value'}
|
180
225
|
"""
|
181
|
-
json_obj = ParseUtil.
|
226
|
+
json_obj = ParseUtil.extract_json_block(
|
182
227
|
str_to_parse, language="json", parser=parser or ParseUtil.fuzzy_parse_json
|
183
228
|
)
|
184
229
|
|
@@ -382,7 +427,9 @@ class ParseUtil:
|
|
382
427
|
return type_mapping.get(py_type, "object")
|
383
428
|
|
384
429
|
@staticmethod
|
385
|
-
def _func_to_schema(
|
430
|
+
def _func_to_schema(
|
431
|
+
func, style="google", func_description=None, params_description=None
|
432
|
+
):
|
386
433
|
"""
|
387
434
|
Generates a schema description for a given function, using typing hints and
|
388
435
|
docstrings. The schema includes the function's name, description, and parameters.
|
@@ -409,9 +456,11 @@ class ParseUtil:
|
|
409
456
|
"""
|
410
457
|
# Extracting function name and docstring details
|
411
458
|
func_name = func.__name__
|
412
|
-
|
413
|
-
|
414
|
-
|
459
|
+
|
460
|
+
if not func_description:
|
461
|
+
func_description, _ = ParseUtil._extract_docstring_details(func, style)
|
462
|
+
if not params_description:
|
463
|
+
_, params_description = ParseUtil._extract_docstring_details(func, style)
|
415
464
|
|
416
465
|
# Extracting parameters with typing hints
|
417
466
|
sig = inspect.signature(func)
|
@@ -590,14 +639,15 @@ class StringMatch:
|
|
590
639
|
return d[m][n]
|
591
640
|
|
592
641
|
@staticmethod
|
593
|
-
def
|
642
|
+
def correct_dict_keys(keys: dict | list[str], dict_, score_func=None):
|
594
643
|
if score_func is None:
|
595
644
|
score_func = StringMatch.jaro_winkler_similarity
|
596
|
-
|
645
|
+
|
646
|
+
fields_set = set(keys if isinstance(keys, list) else keys.keys())
|
597
647
|
corrected_out = {}
|
598
648
|
used_keys = set()
|
599
649
|
|
600
|
-
for k, v in
|
650
|
+
for k, v in dict_.items():
|
601
651
|
if k in fields_set:
|
602
652
|
corrected_out[k] = v
|
603
653
|
fields_set.remove(k) # Remove the matched key
|
@@ -614,8 +664,8 @@ class StringMatch:
|
|
614
664
|
fields_set.remove(best_match) # Remove the matched key
|
615
665
|
used_keys.add(best_match)
|
616
666
|
|
617
|
-
if len(used_keys) < len(
|
618
|
-
for k, v in
|
667
|
+
if len(used_keys) < len(dict_):
|
668
|
+
for k, v in dict_.items():
|
619
669
|
if k not in used_keys:
|
620
670
|
corrected_out[k] = v
|
621
671
|
|
@@ -630,10 +680,56 @@ class StringMatch:
|
|
630
680
|
# Calculate Jaro-Winkler similarity scores for each potential match
|
631
681
|
scores = np.array(
|
632
682
|
[
|
633
|
-
score_func(
|
683
|
+
score_func(str(word), str(correct_word))
|
634
684
|
for correct_word in correct_words_list
|
635
685
|
]
|
636
686
|
)
|
637
687
|
# Find the index of the highest score
|
638
688
|
max_score_index = np.argmax(scores)
|
639
689
|
return correct_words_list[max_score_index]
|
690
|
+
|
691
|
+
@staticmethod
|
692
|
+
def force_validate_dict(x, keys: dict | list[str]) -> dict:
|
693
|
+
out_ = x
|
694
|
+
|
695
|
+
if isinstance(out_, str):
|
696
|
+
# first try to parse it straight as a fuzzy json
|
697
|
+
|
698
|
+
try:
|
699
|
+
out_ = ParseUtil.fuzzy_parse_json(out_)
|
700
|
+
return StringMatch.correct_dict_keys(keys, out_)
|
701
|
+
|
702
|
+
except:
|
703
|
+
try:
|
704
|
+
out_ = ParseUtil.md_to_json(out_)
|
705
|
+
return StringMatch.correct_dict_keys(keys, out_)
|
706
|
+
|
707
|
+
except Exception:
|
708
|
+
try:
|
709
|
+
# if failed we try to extract the json block and parse it
|
710
|
+
out_ = ParseUtil.md_to_json(out_)
|
711
|
+
return StringMatch.correct_dict_keys(keys, out_)
|
712
|
+
|
713
|
+
except Exception:
|
714
|
+
# if still failed we try to extract the json block using re and parse it again
|
715
|
+
match = re.search(r"```json\n({.*?})\n```", out_, re.DOTALL)
|
716
|
+
if match:
|
717
|
+
out_ = match.group(1)
|
718
|
+
try:
|
719
|
+
out_ = ParseUtil.fuzzy_parse_json(out_)
|
720
|
+
return StringMatch.correct_dict_keys(keys, out_)
|
721
|
+
|
722
|
+
except:
|
723
|
+
try:
|
724
|
+
out_ = ParseUtil.fuzzy_parse_json(
|
725
|
+
out_.replace("'", '"')
|
726
|
+
)
|
727
|
+
return StringMatch.correct_dict_keys(keys, out_)
|
728
|
+
except:
|
729
|
+
pass
|
730
|
+
|
731
|
+
if isinstance(out_, dict):
|
732
|
+
try:
|
733
|
+
return StringMatch.correct_dict_keys(keys, out_)
|
734
|
+
except Exception as e:
|
735
|
+
raise ValueError(f"Failed to force_validate_dict for input: {x}") from e
|
lionagi/libs/ln_queue.py
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
17
|
+
"""
|
18
|
+
A class that manages asynchronous task processing with controlled concurrency.
|
19
|
+
"""
|
20
|
+
|
21
|
+
from typing import Any, Callable
|
22
|
+
import asyncio
|
23
|
+
from lionagi.libs import func_call
|
24
|
+
|
25
|
+
|
26
|
+
class AsyncQueue:
|
27
|
+
"""
|
28
|
+
This class handles the enqueueing and processing of tasks with a limit on
|
29
|
+
how many can run simultaneously, using an asyncio.Queue for task storage and
|
30
|
+
an asyncio.Semaphore to limit concurrency.
|
31
|
+
|
32
|
+
Attributes:
|
33
|
+
queue (asyncio.Queue): The queue to store tasks.
|
34
|
+
_stop_event (asyncio.Event): Event to signal processing should stop.
|
35
|
+
max_concurrent_tasks (int): Maximum number of tasks processed concurrently.
|
36
|
+
semaphore (asyncio.Semaphore): Controls concurrent access to task execution.
|
37
|
+
"""
|
38
|
+
|
39
|
+
def __init__(self, max_concurrent_tasks=5):
|
40
|
+
"""
|
41
|
+
Initializes the AsyncQueue with a concurrency limit.
|
42
|
+
|
43
|
+
Args:
|
44
|
+
max_concurrent_tasks (int): The maximum number of concurrent tasks
|
45
|
+
allowed. Default is 5.
|
46
|
+
"""
|
47
|
+
self.queue = asyncio.Queue()
|
48
|
+
self._stop_event = asyncio.Event()
|
49
|
+
self.max_concurrent_tasks = max_concurrent_tasks
|
50
|
+
self.semaphore = asyncio.Semaphore(max_concurrent_tasks)
|
51
|
+
|
52
|
+
async def enqueue(self, input_: Any) -> None:
|
53
|
+
"""
|
54
|
+
Enqueues an item to be processed asynchronously.
|
55
|
+
|
56
|
+
Args:
|
57
|
+
input_ (Any): The item to be enqueued.
|
58
|
+
"""
|
59
|
+
await self.queue.put(input_)
|
60
|
+
|
61
|
+
async def dequeue(self) -> Any:
|
62
|
+
"""
|
63
|
+
Dequeues an item for processing.
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
Any: The dequeued item.
|
67
|
+
"""
|
68
|
+
return await self.queue.get()
|
69
|
+
|
70
|
+
async def join(self) -> None:
|
71
|
+
"""Waits for all items in the queue to be processed."""
|
72
|
+
await self.queue.join()
|
73
|
+
|
74
|
+
async def stop(self) -> None:
|
75
|
+
"""Signals the queue to stop processing new items."""
|
76
|
+
self._stop_event.set()
|
77
|
+
|
78
|
+
def stopped(self) -> bool:
|
79
|
+
"""
|
80
|
+
Checks if the stop signal has been issued.
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
bool: True if the queue has been stopped, otherwise False.
|
84
|
+
"""
|
85
|
+
return self._stop_event.is_set()
|
86
|
+
|
87
|
+
async def process_requests(self, func: Callable, retry_kwargs: dict = {}) -> None:
|
88
|
+
"""
|
89
|
+
Processes tasks from the queue using the provided function with retries.
|
90
|
+
|
91
|
+
This method continuously processes tasks from the queue using the specified
|
92
|
+
function until a stop event is triggered. Handles concurrency using a
|
93
|
+
semaphore and manages task completion.
|
94
|
+
|
95
|
+
Args:
|
96
|
+
func (Callable): The function to process each task.
|
97
|
+
retry_kwargs (dict): Keyword arguments for retry behavior. Default is
|
98
|
+
an empty dictionary.
|
99
|
+
"""
|
100
|
+
tasks = set()
|
101
|
+
while not self.stopped():
|
102
|
+
if len(tasks) >= self.max_concurrent_tasks:
|
103
|
+
_, done = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
|
104
|
+
tasks.difference_update(done)
|
105
|
+
|
106
|
+
async with self.semaphore:
|
107
|
+
input_ = await self.dequeue()
|
108
|
+
if input_ is None:
|
109
|
+
await self.stop()
|
110
|
+
break
|
111
|
+
task = asyncio.create_task(
|
112
|
+
func_call.rcall(func, input_, **retry_kwargs)
|
113
|
+
)
|
114
|
+
tasks.add(task)
|
115
|
+
|
116
|
+
if tasks:
|
117
|
+
await asyncio.wait(tasks)
|
@@ -0,0 +1,164 @@
|
|
1
|
+
import tiktoken
|
2
|
+
import math
|
3
|
+
from .ln_convert import to_str
|
4
|
+
from .special_tokens import disallowed_tokens
|
5
|
+
|
6
|
+
|
7
|
+
class TokenizeUtil:
|
8
|
+
|
9
|
+
@staticmethod
|
10
|
+
def tokenize(
|
11
|
+
text,
|
12
|
+
encoding_model=None,
|
13
|
+
encoding_name=None,
|
14
|
+
return_byte=False,
|
15
|
+
disallowed_tokens=disallowed_tokens,
|
16
|
+
):
|
17
|
+
encoding = None
|
18
|
+
|
19
|
+
if encoding_model:
|
20
|
+
try:
|
21
|
+
encoding_name = tiktoken.encoding_name_for_model(encoding_model)
|
22
|
+
except:
|
23
|
+
encoding_name = encoding_name or "cl100k_base"
|
24
|
+
|
25
|
+
if not encoding_name or encoding_name in tiktoken.list_encoding_names():
|
26
|
+
encoding_name = encoding_name or "cl100k_base"
|
27
|
+
encoding = tiktoken.get_encoding(encoding_name)
|
28
|
+
|
29
|
+
special_encodings = (
|
30
|
+
[encoding.encode(token) for token in disallowed_tokens]
|
31
|
+
if disallowed_tokens
|
32
|
+
else []
|
33
|
+
)
|
34
|
+
codes = encoding.encode(text)
|
35
|
+
if special_encodings and len(special_encodings) > 0:
|
36
|
+
codes = [code for code in codes if code not in special_encodings]
|
37
|
+
|
38
|
+
if return_byte:
|
39
|
+
return codes
|
40
|
+
|
41
|
+
return [encoding.decode([code]) for code in codes]
|
42
|
+
|
43
|
+
@staticmethod
|
44
|
+
def chunk_by_chars(
|
45
|
+
text: str, chunk_size: int, overlap: float, threshold: int
|
46
|
+
) -> list[str | None]:
|
47
|
+
"""
|
48
|
+
Chunks the input text into smaller parts, with optional overlap and threshold for final chunk.
|
49
|
+
|
50
|
+
Parameters:
|
51
|
+
text (str): The input text to chunk.
|
52
|
+
|
53
|
+
chunk_size (int): The size of each chunk.
|
54
|
+
|
55
|
+
overlap (float): The amount of overlap between chunks.
|
56
|
+
|
57
|
+
threshold (int): The minimum size of the final chunk.
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
List[Union[str, None]]: A list of text chunks.
|
61
|
+
|
62
|
+
Raises:
|
63
|
+
ValueError: If an error occurs during chunking.
|
64
|
+
"""
|
65
|
+
|
66
|
+
def _chunk_n1():
|
67
|
+
return [text]
|
68
|
+
|
69
|
+
def _chunk_n2():
|
70
|
+
chunks = []
|
71
|
+
chunks.append(text[: chunk_size + overlap_size])
|
72
|
+
|
73
|
+
if len(text) - chunk_size > threshold:
|
74
|
+
chunks.append(text[chunk_size - overlap_size :])
|
75
|
+
else:
|
76
|
+
return _chunk_n1()
|
77
|
+
|
78
|
+
return chunks
|
79
|
+
|
80
|
+
def _chunk_n3():
|
81
|
+
chunks = []
|
82
|
+
chunks.append(text[: chunk_size + overlap_size])
|
83
|
+
for i in range(1, n_chunks - 1):
|
84
|
+
start_idx = chunk_size * i - overlap_size
|
85
|
+
end_idx = chunk_size * (i + 1) + overlap_size
|
86
|
+
chunks.append(text[start_idx:end_idx])
|
87
|
+
|
88
|
+
if len(text) - chunk_size * (n_chunks - 1) > threshold:
|
89
|
+
chunks.append(text[chunk_size * (n_chunks - 1) - overlap_size :])
|
90
|
+
else:
|
91
|
+
chunks[-1] += text[chunk_size * (n_chunks - 1) + overlap_size :]
|
92
|
+
|
93
|
+
return chunks
|
94
|
+
|
95
|
+
try:
|
96
|
+
if not isinstance(text, str):
|
97
|
+
text = to_str(text)
|
98
|
+
|
99
|
+
n_chunks = math.ceil(len(text) / chunk_size)
|
100
|
+
overlap_size = int(overlap / 2)
|
101
|
+
|
102
|
+
if n_chunks == 1:
|
103
|
+
return _chunk_n1()
|
104
|
+
|
105
|
+
elif n_chunks == 2:
|
106
|
+
return _chunk_n2()
|
107
|
+
|
108
|
+
elif n_chunks > 2:
|
109
|
+
return _chunk_n3()
|
110
|
+
|
111
|
+
except Exception as e:
|
112
|
+
raise ValueError(f"An error occurred while chunking the text. {e}")
|
113
|
+
|
114
|
+
@staticmethod
|
115
|
+
def chunk_by_tokens(
|
116
|
+
text: str,
|
117
|
+
chunk_size: int,
|
118
|
+
overlap: float,
|
119
|
+
threshold: int, # minimum size of the final chunk in number of tokens
|
120
|
+
encoding_model=None,
|
121
|
+
encoding_name=None,
|
122
|
+
return_tokens=False,
|
123
|
+
return_byte=False,
|
124
|
+
) -> list[str | None]:
|
125
|
+
|
126
|
+
tokens = TokenizeUtil.tokenize(
|
127
|
+
text, encoding_model, encoding_name, return_byte=return_byte
|
128
|
+
)
|
129
|
+
|
130
|
+
n_chunks = math.ceil(len(tokens) / chunk_size)
|
131
|
+
overlap_size = int(overlap * chunk_size / 2)
|
132
|
+
residue = len(tokens) % chunk_size
|
133
|
+
|
134
|
+
if n_chunks == 1:
|
135
|
+
return text if not return_tokens else [tokens]
|
136
|
+
|
137
|
+
elif n_chunks == 2:
|
138
|
+
chunks = [tokens[: chunk_size + overlap_size]]
|
139
|
+
if residue > threshold:
|
140
|
+
chunks.append(tokens[chunk_size - overlap_size :])
|
141
|
+
return (
|
142
|
+
[" ".join(chunk).strip() for chunk in chunks]
|
143
|
+
if not return_tokens
|
144
|
+
else chunks
|
145
|
+
)
|
146
|
+
else:
|
147
|
+
return text if not return_tokens else [tokens]
|
148
|
+
|
149
|
+
elif n_chunks > 2:
|
150
|
+
chunks = []
|
151
|
+
chunks.append(tokens[: chunk_size + overlap_size])
|
152
|
+
for i in range(1, n_chunks - 1):
|
153
|
+
start_idx = chunk_size * i - overlap_size
|
154
|
+
end_idx = chunk_size * (i + 1) + overlap_size
|
155
|
+
chunks.append(tokens[start_idx:end_idx])
|
156
|
+
|
157
|
+
if len(tokens) - chunk_size * (n_chunks - 1) > threshold:
|
158
|
+
chunks.append(tokens[chunk_size * (n_chunks - 1) - overlap_size :])
|
159
|
+
else:
|
160
|
+
chunks[-1] += tokens[-residue:]
|
161
|
+
|
162
|
+
return (
|
163
|
+
[" ".join(chunk) for chunk in chunks] if not return_tokens else chunks
|
164
|
+
)
|
@@ -1,3 +1,19 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
1
17
|
"""
|
2
18
|
This module provides functions for validating and fixing field values based on their data types.
|
3
19
|
|
@@ -6,7 +22,34 @@ including numeric, boolean, string, and enum. It also provides a dictionary `val
|
|
6
22
|
maps data types to their corresponding validation functions.
|
7
23
|
"""
|
8
24
|
|
9
|
-
from
|
25
|
+
from .ln_convert import to_str, is_same_dtype, to_list, to_dict, to_num, strip_lower
|
26
|
+
from .ln_parse import StringMatch, ParseUtil
|
27
|
+
|
28
|
+
|
29
|
+
def check_dict_field(x, keys: list[str] | dict, fix_=True, **kwargs):
|
30
|
+
if isinstance(x, dict):
|
31
|
+
return x
|
32
|
+
if fix_:
|
33
|
+
try:
|
34
|
+
x = to_str(x)
|
35
|
+
return StringMatch.force_validate_dict(x, keys=keys, **kwargs)
|
36
|
+
except Exception as e:
|
37
|
+
raise ValueError("Invalid dict field type.") from e
|
38
|
+
raise ValueError(f"Default value for DICT must be a dict, got {type(x).__name__}")
|
39
|
+
|
40
|
+
|
41
|
+
def check_action_field(x, fix_=True, **kwargs):
|
42
|
+
if (
|
43
|
+
isinstance(x, list)
|
44
|
+
and is_same_dtype(x, dict)
|
45
|
+
and all(_has_action_keys(y) for y in x)
|
46
|
+
):
|
47
|
+
return x
|
48
|
+
try:
|
49
|
+
x = _fix_action_field(x, fix_)
|
50
|
+
return x
|
51
|
+
except Exception as e:
|
52
|
+
raise ValueError("Invalid action field type.") from e
|
10
53
|
|
11
54
|
|
12
55
|
def check_number_field(x, fix_=True, **kwargs):
|
@@ -109,7 +152,7 @@ def check_enum_field(x, choices, fix_=True, **kwargs):
|
|
109
152
|
Raises:
|
110
153
|
ValueError: If the value is not a valid enum field and cannot be fixed.
|
111
154
|
"""
|
112
|
-
same_dtype, dtype_ =
|
155
|
+
same_dtype, dtype_ = is_same_dtype(choices, return_dtype=True)
|
113
156
|
if not same_dtype:
|
114
157
|
raise ValueError(
|
115
158
|
f"Field type ENUM requires all choices to be of the same type, got {choices}"
|
@@ -133,6 +176,30 @@ def check_enum_field(x, choices, fix_=True, **kwargs):
|
|
133
176
|
return x
|
134
177
|
|
135
178
|
|
179
|
+
def _has_action_keys(dict_):
|
180
|
+
return list(dict_.keys()) >= ["function", "arguments"]
|
181
|
+
|
182
|
+
|
183
|
+
def _fix_action_field(x, discard_=True):
|
184
|
+
corrected = []
|
185
|
+
if isinstance(x, str):
|
186
|
+
x = ParseUtil.fuzzy_parse_json(x)
|
187
|
+
|
188
|
+
try:
|
189
|
+
x = to_list(x)
|
190
|
+
|
191
|
+
for i in x:
|
192
|
+
i = to_dict(i)
|
193
|
+
if _has_action_keys(i):
|
194
|
+
corrected.append(i)
|
195
|
+
elif not discard_:
|
196
|
+
raise ValueError(f"Invalid action field: {i}")
|
197
|
+
except Exception as e:
|
198
|
+
raise ValueError(f"Invalid action field: {e}") from e
|
199
|
+
|
200
|
+
return corrected
|
201
|
+
|
202
|
+
|
136
203
|
def _fix_number_field(x, *args, **kwargs):
|
137
204
|
"""
|
138
205
|
Attempts to fix an invalid numeric field value.
|
@@ -149,7 +216,7 @@ def _fix_number_field(x, *args, **kwargs):
|
|
149
216
|
ValueError: If the value cannot be converted into a valid numeric value.
|
150
217
|
"""
|
151
218
|
try:
|
152
|
-
x =
|
219
|
+
x = to_num(x, *args, **kwargs)
|
153
220
|
if isinstance(x, (int, float)):
|
154
221
|
return x
|
155
222
|
raise ValueError(f"Failed to convert {x} into a numeric value")
|
@@ -171,17 +238,13 @@ def _fix_bool_field(x):
|
|
171
238
|
ValueError: If the value cannot be converted into a valid boolean value.
|
172
239
|
"""
|
173
240
|
try:
|
174
|
-
x =
|
175
|
-
if
|
176
|
-
convert.strip_lower(x) in ["true", "1", "correct", "yes"]
|
177
|
-
or convert.to_num(x) == 1
|
178
|
-
):
|
241
|
+
x = strip_lower(to_str(x))
|
242
|
+
if x in ["true", "1", "correct", "yes"]:
|
179
243
|
return True
|
180
|
-
|
181
|
-
|
182
|
-
or convert.to_num(x) == 0
|
183
|
-
):
|
244
|
+
|
245
|
+
elif x in ["false", "0", "incorrect", "no", "none", "n/a"]:
|
184
246
|
return False
|
247
|
+
|
185
248
|
raise ValueError(f"Failed to convert {x} into a boolean value")
|
186
249
|
except Exception as e:
|
187
250
|
raise ValueError(f"Failed to convert {x} into a boolean value") from e
|
@@ -201,7 +264,7 @@ def _fix_str_field(x):
|
|
201
264
|
ValueError: If the value cannot be converted into a valid string value.
|
202
265
|
"""
|
203
266
|
try:
|
204
|
-
x =
|
267
|
+
x = to_str(x)
|
205
268
|
if isinstance(x, str):
|
206
269
|
return x
|
207
270
|
raise ValueError(f"Failed to convert {x} into a string value")
|
@@ -225,7 +288,7 @@ def _fix_enum_field(x, choices, **kwargs):
|
|
225
288
|
ValueError: If the value cannot be converted into a valid enum value.
|
226
289
|
"""
|
227
290
|
try:
|
228
|
-
x =
|
291
|
+
x = to_str(x)
|
229
292
|
return StringMatch.choose_most_similar(x, choices, **kwargs)
|
230
293
|
except Exception as e:
|
231
294
|
raise ValueError(f"Failed to convert {x} into one of the choices") from e
|
@@ -236,4 +299,6 @@ validation_funcs = {
|
|
236
299
|
"bool": check_bool_field,
|
237
300
|
"str": check_str_field,
|
238
301
|
"enum": check_enum_field,
|
302
|
+
"action": check_action_field,
|
303
|
+
"dict": check_dict_field,
|
239
304
|
}
|