lionagi 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- lionagi/__init__.py +60 -5
- lionagi/core/__init__.py +0 -25
- lionagi/core/_setting/_setting.py +59 -0
- lionagi/core/action/__init__.py +14 -0
- lionagi/core/action/function_calling.py +136 -0
- lionagi/core/action/manual.py +1 -0
- lionagi/core/action/node.py +109 -0
- lionagi/core/action/tool.py +114 -0
- lionagi/core/action/tool_manager.py +356 -0
- lionagi/core/agent/base_agent.py +27 -13
- lionagi/core/agent/eval/evaluator.py +1 -0
- lionagi/core/agent/eval/vote.py +40 -0
- lionagi/core/agent/learn/learner.py +59 -0
- lionagi/core/agent/plan/unit_template.py +1 -0
- lionagi/core/collections/__init__.py +17 -0
- lionagi/core/{generic/data_logger.py → collections/_logger.py} +69 -55
- lionagi/core/collections/abc/__init__.py +53 -0
- lionagi/core/collections/abc/component.py +615 -0
- lionagi/core/collections/abc/concepts.py +297 -0
- lionagi/core/collections/abc/exceptions.py +150 -0
- lionagi/core/collections/abc/util.py +45 -0
- lionagi/core/collections/exchange.py +161 -0
- lionagi/core/collections/flow.py +426 -0
- lionagi/core/collections/model.py +419 -0
- lionagi/core/collections/pile.py +913 -0
- lionagi/core/collections/progression.py +236 -0
- lionagi/core/collections/util.py +64 -0
- lionagi/core/director/direct.py +314 -0
- lionagi/core/director/director.py +2 -0
- lionagi/core/{execute/branch_executor.py → engine/branch_engine.py} +134 -97
- lionagi/core/{execute/instruction_map_executor.py → engine/instruction_map_engine.py} +80 -55
- lionagi/{experimental/directive/evaluator → core/engine}/script_engine.py +17 -1
- lionagi/core/executor/base_executor.py +90 -0
- lionagi/core/{execute/structure_executor.py → executor/graph_executor.py} +62 -66
- lionagi/core/{execute → executor}/neo4j_executor.py +70 -67
- lionagi/core/generic/__init__.py +3 -33
- lionagi/core/generic/edge.py +29 -79
- lionagi/core/generic/edge_condition.py +16 -0
- lionagi/core/generic/graph.py +236 -0
- lionagi/core/generic/hyperedge.py +1 -0
- lionagi/core/generic/node.py +156 -221
- lionagi/core/generic/tree.py +48 -0
- lionagi/core/generic/tree_node.py +79 -0
- lionagi/core/mail/__init__.py +12 -0
- lionagi/core/mail/mail.py +25 -0
- lionagi/core/mail/mail_manager.py +139 -58
- lionagi/core/mail/package.py +45 -0
- lionagi/core/mail/start_mail.py +36 -0
- lionagi/core/message/__init__.py +19 -0
- lionagi/core/message/action_request.py +133 -0
- lionagi/core/message/action_response.py +135 -0
- lionagi/core/message/assistant_response.py +95 -0
- lionagi/core/message/instruction.py +234 -0
- lionagi/core/message/message.py +101 -0
- lionagi/core/message/system.py +86 -0
- lionagi/core/message/util.py +283 -0
- lionagi/core/report/__init__.py +4 -0
- lionagi/core/report/base.py +217 -0
- lionagi/core/report/form.py +231 -0
- lionagi/core/report/report.py +166 -0
- lionagi/core/report/util.py +28 -0
- lionagi/core/rule/_default.py +16 -0
- lionagi/core/rule/action.py +99 -0
- lionagi/core/rule/base.py +238 -0
- lionagi/core/rule/boolean.py +56 -0
- lionagi/core/rule/choice.py +47 -0
- lionagi/core/rule/mapping.py +96 -0
- lionagi/core/rule/number.py +71 -0
- lionagi/core/rule/rulebook.py +109 -0
- lionagi/core/rule/string.py +52 -0
- lionagi/core/rule/util.py +35 -0
- lionagi/core/session/branch.py +431 -0
- lionagi/core/session/directive_mixin.py +287 -0
- lionagi/core/session/session.py +229 -903
- lionagi/core/structure/__init__.py +1 -0
- lionagi/core/structure/chain.py +1 -0
- lionagi/core/structure/forest.py +1 -0
- lionagi/core/structure/graph.py +1 -0
- lionagi/core/structure/tree.py +1 -0
- lionagi/core/unit/__init__.py +5 -0
- lionagi/core/unit/parallel_unit.py +245 -0
- lionagi/core/unit/template/action.py +81 -0
- lionagi/core/unit/template/base.py +51 -0
- lionagi/core/unit/template/plan.py +84 -0
- lionagi/core/unit/template/predict.py +109 -0
- lionagi/core/unit/template/score.py +124 -0
- lionagi/core/unit/template/select.py +104 -0
- lionagi/core/unit/unit.py +362 -0
- lionagi/core/unit/unit_form.py +305 -0
- lionagi/core/unit/unit_mixin.py +1168 -0
- lionagi/core/unit/util.py +71 -0
- lionagi/core/validator/validator.py +364 -0
- lionagi/core/work/work.py +76 -0
- lionagi/core/work/work_function.py +101 -0
- lionagi/core/work/work_queue.py +103 -0
- lionagi/core/work/worker.py +258 -0
- lionagi/core/work/worklog.py +120 -0
- lionagi/experimental/compressor/base.py +46 -0
- lionagi/experimental/compressor/llm_compressor.py +247 -0
- lionagi/experimental/compressor/llm_summarizer.py +61 -0
- lionagi/experimental/compressor/util.py +70 -0
- lionagi/experimental/directive/__init__.py +19 -0
- lionagi/experimental/directive/parser/base_parser.py +69 -2
- lionagi/experimental/directive/{template_ → template}/base_template.py +17 -1
- lionagi/{libs/ln_tokenizer.py → experimental/directive/tokenizer.py} +16 -0
- lionagi/experimental/{directive/evaluator → evaluator}/ast_evaluator.py +16 -0
- lionagi/experimental/{directive/evaluator → evaluator}/base_evaluator.py +16 -0
- lionagi/experimental/knowledge/base.py +10 -0
- lionagi/experimental/memory/__init__.py +0 -0
- lionagi/experimental/strategies/__init__.py +0 -0
- lionagi/experimental/strategies/base.py +1 -0
- lionagi/integrations/bridge/langchain_/documents.py +4 -0
- lionagi/integrations/bridge/llamaindex_/index.py +30 -0
- lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +6 -0
- lionagi/integrations/chunker/chunk.py +161 -24
- lionagi/integrations/config/oai_configs.py +34 -3
- lionagi/integrations/config/openrouter_configs.py +14 -2
- lionagi/integrations/loader/load.py +122 -21
- lionagi/integrations/loader/load_util.py +6 -77
- lionagi/integrations/provider/_mapping.py +46 -0
- lionagi/integrations/provider/litellm.py +2 -1
- lionagi/integrations/provider/mlx_service.py +16 -9
- lionagi/integrations/provider/oai.py +91 -4
- lionagi/integrations/provider/ollama.py +6 -5
- lionagi/integrations/provider/openrouter.py +115 -8
- lionagi/integrations/provider/services.py +2 -2
- lionagi/integrations/provider/transformers.py +18 -22
- lionagi/integrations/storage/__init__.py +3 -3
- lionagi/integrations/storage/neo4j.py +52 -60
- lionagi/integrations/storage/storage_util.py +44 -46
- lionagi/integrations/storage/structure_excel.py +43 -26
- lionagi/integrations/storage/to_excel.py +11 -4
- lionagi/libs/__init__.py +22 -1
- lionagi/libs/ln_api.py +75 -20
- lionagi/libs/ln_context.py +37 -0
- lionagi/libs/ln_convert.py +21 -9
- lionagi/libs/ln_func_call.py +69 -28
- lionagi/libs/ln_image.py +107 -0
- lionagi/libs/ln_nested.py +26 -11
- lionagi/libs/ln_parse.py +82 -23
- lionagi/libs/ln_queue.py +16 -0
- lionagi/libs/ln_tokenize.py +164 -0
- lionagi/libs/ln_validate.py +16 -0
- lionagi/libs/special_tokens.py +172 -0
- lionagi/libs/sys_util.py +95 -24
- lionagi/lions/coder/code_form.py +13 -0
- lionagi/lions/coder/coder.py +50 -3
- lionagi/lions/coder/util.py +30 -25
- lionagi/tests/libs/test_func_call.py +23 -21
- lionagi/tests/libs/test_nested.py +36 -21
- lionagi/tests/libs/test_parse.py +1 -1
- lionagi/tests/test_core/collections/__init__.py +0 -0
- lionagi/tests/test_core/collections/test_component.py +206 -0
- lionagi/tests/test_core/collections/test_exchange.py +138 -0
- lionagi/tests/test_core/collections/test_flow.py +145 -0
- lionagi/tests/test_core/collections/test_pile.py +171 -0
- lionagi/tests/test_core/collections/test_progression.py +129 -0
- lionagi/tests/test_core/generic/test_edge.py +67 -0
- lionagi/tests/test_core/generic/test_graph.py +96 -0
- lionagi/tests/test_core/generic/test_node.py +106 -0
- lionagi/tests/test_core/generic/test_tree_node.py +73 -0
- lionagi/tests/test_core/test_branch.py +115 -294
- lionagi/tests/test_core/test_form.py +46 -0
- lionagi/tests/test_core/test_report.py +105 -0
- lionagi/tests/test_core/test_validator.py +111 -0
- lionagi/version.py +1 -1
- lionagi-0.2.1.dist-info/LICENSE +202 -0
- lionagi-0.2.1.dist-info/METADATA +272 -0
- lionagi-0.2.1.dist-info/RECORD +240 -0
- lionagi/core/branch/base.py +0 -653
- lionagi/core/branch/branch.py +0 -474
- lionagi/core/branch/flow_mixin.py +0 -96
- lionagi/core/branch/util.py +0 -323
- lionagi/core/direct/__init__.py +0 -19
- lionagi/core/direct/cot.py +0 -123
- lionagi/core/direct/plan.py +0 -164
- lionagi/core/direct/predict.py +0 -166
- lionagi/core/direct/react.py +0 -171
- lionagi/core/direct/score.py +0 -279
- lionagi/core/direct/select.py +0 -170
- lionagi/core/direct/sentiment.py +0 -1
- lionagi/core/direct/utils.py +0 -110
- lionagi/core/direct/vote.py +0 -64
- lionagi/core/execute/base_executor.py +0 -47
- lionagi/core/flow/baseflow.py +0 -23
- lionagi/core/flow/monoflow/ReAct.py +0 -240
- lionagi/core/flow/monoflow/__init__.py +0 -9
- lionagi/core/flow/monoflow/chat.py +0 -95
- lionagi/core/flow/monoflow/chat_mixin.py +0 -253
- lionagi/core/flow/monoflow/followup.py +0 -215
- lionagi/core/flow/polyflow/__init__.py +0 -1
- lionagi/core/flow/polyflow/chat.py +0 -251
- lionagi/core/form/action_form.py +0 -26
- lionagi/core/form/field_validator.py +0 -287
- lionagi/core/form/form.py +0 -302
- lionagi/core/form/mixin.py +0 -214
- lionagi/core/form/scored_form.py +0 -13
- lionagi/core/generic/action.py +0 -26
- lionagi/core/generic/component.py +0 -532
- lionagi/core/generic/condition.py +0 -46
- lionagi/core/generic/mail.py +0 -90
- lionagi/core/generic/mailbox.py +0 -36
- lionagi/core/generic/relation.py +0 -70
- lionagi/core/generic/signal.py +0 -22
- lionagi/core/generic/structure.py +0 -362
- lionagi/core/generic/transfer.py +0 -20
- lionagi/core/generic/work.py +0 -40
- lionagi/core/graph/graph.py +0 -126
- lionagi/core/graph/tree.py +0 -190
- lionagi/core/mail/schema.py +0 -63
- lionagi/core/messages/schema.py +0 -325
- lionagi/core/tool/__init__.py +0 -5
- lionagi/core/tool/tool.py +0 -28
- lionagi/core/tool/tool_manager.py +0 -283
- lionagi/experimental/report/form.py +0 -64
- lionagi/experimental/report/report.py +0 -138
- lionagi/experimental/report/util.py +0 -47
- lionagi/experimental/tool/function_calling.py +0 -43
- lionagi/experimental/tool/manual.py +0 -66
- lionagi/experimental/tool/schema.py +0 -59
- lionagi/experimental/tool/tool_manager.py +0 -138
- lionagi/experimental/tool/util.py +0 -16
- lionagi/experimental/validator/rule.py +0 -139
- lionagi/experimental/validator/validator.py +0 -56
- lionagi/experimental/work/__init__.py +0 -10
- lionagi/experimental/work/async_queue.py +0 -54
- lionagi/experimental/work/schema.py +0 -73
- lionagi/experimental/work/work_function.py +0 -67
- lionagi/experimental/work/worker.py +0 -56
- lionagi/experimental/work2/form.py +0 -371
- lionagi/experimental/work2/report.py +0 -289
- lionagi/experimental/work2/schema.py +0 -30
- lionagi/experimental/work2/tests.py +0 -72
- lionagi/experimental/work2/work_function.py +0 -89
- lionagi/experimental/work2/worker.py +0 -12
- lionagi/integrations/bridge/llamaindex_/get_index.py +0 -294
- lionagi/tests/test_core/generic/test_component.py +0 -89
- lionagi/tests/test_core/test_base_branch.py +0 -426
- lionagi/tests/test_core/test_chat_flow.py +0 -63
- lionagi/tests/test_core/test_mail_manager.py +0 -75
- lionagi/tests/test_core/test_prompts.py +0 -51
- lionagi/tests/test_core/test_session.py +0 -254
- lionagi/tests/test_core/test_session_base_util.py +0 -313
- lionagi/tests/test_core/test_tool_manager.py +0 -95
- lionagi-0.1.2.dist-info/LICENSE +0 -9
- lionagi-0.1.2.dist-info/METADATA +0 -174
- lionagi-0.1.2.dist-info/RECORD +0 -206
- /lionagi/core/{branch → _setting}/__init__.py +0 -0
- /lionagi/core/{execute → agent/eval}/__init__.py +0 -0
- /lionagi/core/{flow → agent/learn}/__init__.py +0 -0
- /lionagi/core/{form → agent/plan}/__init__.py +0 -0
- /lionagi/core/{branch/executable_branch.py → agent/plan/plan.py} +0 -0
- /lionagi/core/{graph → director}/__init__.py +0 -0
- /lionagi/core/{messages → engine}/__init__.py +0 -0
- /lionagi/{experimental/directive/evaluator → core/engine}/sandbox_.py +0 -0
- /lionagi/{experimental/directive/evaluator → core/executor}/__init__.py +0 -0
- /lionagi/{experimental/directive/template_ → core/rule}/__init__.py +0 -0
- /lionagi/{experimental/report → core/unit/template}/__init__.py +0 -0
- /lionagi/{experimental/tool → core/validator}/__init__.py +0 -0
- /lionagi/{experimental/validator → core/work}/__init__.py +0 -0
- /lionagi/experimental/{work2 → compressor}/__init__.py +0 -0
- /lionagi/{core/flow/mono_chat_mixin.py → experimental/directive/template/__init__.py} +0 -0
- /lionagi/experimental/directive/{schema.py → template/schema.py} +0 -0
- /lionagi/experimental/{work2/util.py → evaluator/__init__.py} +0 -0
- /lionagi/experimental/{work2/work.py → knowledge/__init__.py} +0 -0
- /lionagi/{tests/libs/test_async.py → experimental/knowledge/graph.py} +0 -0
- {lionagi-0.1.2.dist-info → lionagi-0.2.1.dist-info}/WHEEL +0 -0
- {lionagi-0.1.2.dist-info → lionagi-0.2.1.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,8 @@
|
|
1
1
|
from typing import Union, Callable
|
2
2
|
|
3
3
|
from lionagi.libs import func_call
|
4
|
+
from lionagi.libs.ln_convert import to_list
|
5
|
+
from lionagi.core.collections import pile
|
4
6
|
from lionagi.core.generic import Node
|
5
7
|
from ..bridge.langchain_.langchain_bridge import LangchainBridge
|
6
8
|
from ..bridge.llamaindex_.llama_index_bridge import LlamaIndexBridge
|
@@ -10,7 +12,20 @@ from ..loader.load_util import ChunkerType, file_to_chunks, _datanode_parser
|
|
10
12
|
|
11
13
|
|
12
14
|
def datanodes_convert(documents, chunker_type):
|
15
|
+
"""
|
16
|
+
Converts documents to the specified chunker type.
|
13
17
|
|
18
|
+
Args:
|
19
|
+
documents (list): List of documents to be converted.
|
20
|
+
chunker_type (ChunkerType): The type of chunker to convert the documents to.
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
list: The converted documents.
|
24
|
+
|
25
|
+
Example usage:
|
26
|
+
>>> documents = [Node(...), Node(...)]
|
27
|
+
>>> converted_docs = datanodes_convert(documents, ChunkerType.LLAMAINDEX)
|
28
|
+
"""
|
14
29
|
for i in range(len(documents)):
|
15
30
|
if type(documents[i]) == Node:
|
16
31
|
if chunker_type == ChunkerType.LLAMAINDEX:
|
@@ -21,25 +36,71 @@ def datanodes_convert(documents, chunker_type):
|
|
21
36
|
|
22
37
|
|
23
38
|
def text_chunker(documents, args, kwargs):
|
39
|
+
"""
|
40
|
+
Chunks text documents into smaller pieces.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
documents (list): List of documents to be chunked.
|
44
|
+
args (tuple): Positional arguments for the chunking function.
|
45
|
+
kwargs (dict): Keyword arguments for the chunking function.
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
pile: A pile of chunked Node instances.
|
49
|
+
|
50
|
+
Example usage:
|
51
|
+
>>> documents = [Node(...), Node(...)]
|
52
|
+
>>> chunked_docs = text_chunker(documents, args, kwargs)
|
53
|
+
"""
|
24
54
|
|
25
55
|
def chunk_node(node):
|
26
56
|
chunks = file_to_chunks(node.to_dict(), *args, **kwargs)
|
27
|
-
func_call.lcall(chunks, lambda chunk: chunk.pop("
|
57
|
+
func_call.lcall(chunks, lambda chunk: chunk.pop("ln_id"))
|
28
58
|
return [Node.from_obj({**chunk}) for chunk in chunks]
|
29
59
|
|
30
|
-
|
60
|
+
a = to_list([chunk_node(doc) for doc in documents], flatten=True, dropna=True)
|
61
|
+
return pile(a)
|
31
62
|
|
32
63
|
|
33
64
|
def chunk(
|
34
|
-
|
35
|
-
|
65
|
+
docs,
|
66
|
+
field: str = "content",
|
67
|
+
chunk_size: int = 1500,
|
68
|
+
overlap: float = 0.1,
|
69
|
+
threshold: int = 200,
|
70
|
+
chunker="text_chunker",
|
36
71
|
chunker_type=ChunkerType.PLAIN,
|
37
72
|
chunker_args=None,
|
38
73
|
chunker_kwargs=None,
|
39
74
|
chunking_kwargs=None,
|
40
75
|
documents_convert_func=None,
|
41
|
-
|
76
|
+
to_lion: bool | Callable = True,
|
42
77
|
):
|
78
|
+
"""
|
79
|
+
Chunks documents using the specified chunker.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
docs (list): List of documents to be chunked.
|
83
|
+
field (str, optional): The field to chunk. Defaults to "content".
|
84
|
+
chunk_size (int, optional): The size of each chunk. Defaults to 1500.
|
85
|
+
overlap (float, optional): The overlap between chunks. Defaults to 0.1.
|
86
|
+
threshold (int, optional): The threshold for chunking. Defaults to 200.
|
87
|
+
chunker (str, optional): The chunker function or its name. Defaults to "text_chunker".
|
88
|
+
chunker_type (ChunkerType, optional): The type of chunker to use. Defaults to ChunkerType.PLAIN.
|
89
|
+
chunker_args (list, optional): Positional arguments for the chunker function. Defaults to None.
|
90
|
+
chunker_kwargs (dict, optional): Keyword arguments for the chunker function. Defaults to None.
|
91
|
+
chunking_kwargs (dict, optional): Additional keyword arguments for chunking. Defaults to None.
|
92
|
+
documents_convert_func (Callable, optional): Function to convert documents. Defaults to None.
|
93
|
+
to_lion (bool | Callable, optional): Whether to convert the data to Node instances or a custom parser. Defaults to True.
|
94
|
+
|
95
|
+
Returns:
|
96
|
+
pile: A pile of chunked Node instances.
|
97
|
+
|
98
|
+
Raises:
|
99
|
+
ValueError: If the chunker_type is not supported.
|
100
|
+
|
101
|
+
Example usage:
|
102
|
+
>>> chunked_docs = chunk(docs, field='text', chunk_size=1000, overlap=0.2)
|
103
|
+
"""
|
43
104
|
|
44
105
|
if chunker_args is None:
|
45
106
|
chunker_args = []
|
@@ -49,38 +110,42 @@ def chunk(
|
|
49
110
|
chunking_kwargs = {}
|
50
111
|
|
51
112
|
if chunker_type == ChunkerType.PLAIN:
|
113
|
+
chunker_kwargs["field"] = field
|
114
|
+
chunker_kwargs["chunk_size"] = chunk_size
|
115
|
+
chunker_kwargs["overlap"] = overlap
|
116
|
+
chunker_kwargs["threshold"] = threshold
|
52
117
|
return chunk_funcs[ChunkerType.PLAIN](
|
53
|
-
|
118
|
+
docs, chunker, chunker_args, chunker_kwargs
|
54
119
|
)
|
55
120
|
|
56
121
|
elif chunker_type == ChunkerType.LANGCHAIN:
|
57
122
|
return chunk_funcs[ChunkerType.LANGCHAIN](
|
58
|
-
|
123
|
+
docs,
|
59
124
|
documents_convert_func,
|
60
125
|
chunker,
|
61
126
|
chunker_args,
|
62
127
|
chunker_kwargs,
|
63
|
-
|
128
|
+
to_lion,
|
64
129
|
)
|
65
130
|
|
66
131
|
elif chunker_type == ChunkerType.LLAMAINDEX:
|
67
132
|
return chunk_funcs[ChunkerType.LLAMAINDEX](
|
68
|
-
|
133
|
+
docs,
|
69
134
|
documents_convert_func,
|
70
135
|
chunker,
|
71
136
|
chunker_args,
|
72
137
|
chunker_kwargs,
|
73
|
-
|
138
|
+
to_lion,
|
74
139
|
)
|
75
140
|
|
76
141
|
elif chunker_type == ChunkerType.SELFDEFINED:
|
77
142
|
return chunk_funcs[ChunkerType.SELFDEFINED](
|
78
|
-
|
143
|
+
docs,
|
79
144
|
chunker,
|
80
145
|
chunker_args,
|
81
146
|
chunker_kwargs,
|
82
147
|
chunking_kwargs,
|
83
|
-
|
148
|
+
to_lion,
|
84
149
|
)
|
85
150
|
|
86
151
|
else:
|
@@ -95,8 +160,28 @@ def _self_defined_chunker(
|
|
95
160
|
chunker_args,
|
96
161
|
chunker_kwargs,
|
97
162
|
chunking_kwargs,
|
98
|
-
|
163
|
+
to_lion: bool | Callable,
|
99
164
|
):
|
165
|
+
"""
|
166
|
+
Chunks documents using a self-defined chunker.
|
167
|
+
|
168
|
+
Args:
|
169
|
+
documents (list): List of documents to be chunked.
|
170
|
+
chunker (str | Callable): The chunker function or its name.
|
171
|
+
chunker_args (list): Positional arguments for the chunker function.
|
172
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function.
|
173
|
+
chunking_kwargs (dict): Additional keyword arguments for chunking.
|
174
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
175
|
+
|
176
|
+
Returns:
|
177
|
+
pile: A pile of chunked Node instances or custom parsed nodes.
|
178
|
+
|
179
|
+
Raises:
|
180
|
+
ValueError: If the self-defined chunker is not valid.
|
181
|
+
|
182
|
+
Example usage:
|
183
|
+
>>> chunked_docs = _self_defined_chunker(docs, custom_chunker, ['arg1'], {'key': 'value'}, {}, custom_parser)
|
184
|
+
"""
|
100
185
|
try:
|
101
186
|
splitter = chunker(*chunker_args, **chunker_kwargs)
|
102
187
|
nodes = splitter.split(documents, **chunking_kwargs)
|
@@ -105,10 +190,10 @@ def _self_defined_chunker(
|
|
105
190
|
f"Self defined chunker {chunker} is not valid. Error: {e}"
|
106
191
|
) from e
|
107
192
|
|
108
|
-
if isinstance(
|
193
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
109
194
|
raise ValueError("Please define a valid parser to Node.")
|
110
|
-
elif isinstance(
|
111
|
-
nodes = _datanode_parser(nodes,
|
195
|
+
elif isinstance(to_lion, Callable):
|
196
|
+
nodes = _datanode_parser(nodes, to_lion)
|
112
197
|
return nodes
|
113
198
|
|
114
199
|
|
@@ -118,18 +203,35 @@ def _llama_index_chunker(
|
|
118
203
|
chunker,
|
119
204
|
chunker_args,
|
120
205
|
chunker_kwargs,
|
121
|
-
|
206
|
+
to_lion: bool | Callable,
|
122
207
|
):
|
208
|
+
"""
|
209
|
+
Chunks documents using a LlamaIndex chunker.
|
210
|
+
|
211
|
+
Args:
|
212
|
+
documents (list): List of documents to be chunked.
|
213
|
+
documents_convert_func (Callable): Function to convert documents.
|
214
|
+
chunker (str | Callable): The chunker function or its name.
|
215
|
+
chunker_args (list): Positional arguments for the chunker function.
|
216
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function.
|
217
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
218
|
+
|
219
|
+
Returns:
|
220
|
+
pile: A pile of chunked Node instances or custom parsed nodes.
|
221
|
+
|
222
|
+
Example usage:
|
223
|
+
>>> chunked_docs = _llama_index_chunker(docs, convert_func, llama_chunker, ['arg1'], {'key': 'value'}, True)
|
224
|
+
"""
|
123
225
|
if documents_convert_func:
|
124
226
|
documents = documents_convert_func(documents, "llama_index")
|
125
227
|
nodes = LlamaIndexBridge.llama_index_parse_node(
|
126
228
|
documents, chunker, chunker_args, chunker_kwargs
|
127
229
|
)
|
128
230
|
|
129
|
-
if isinstance(
|
231
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
130
232
|
nodes = [Node.from_llama_index(i) for i in nodes]
|
131
|
-
elif isinstance(
|
132
|
-
nodes = _datanode_parser(nodes,
|
233
|
+
elif isinstance(to_lion, Callable):
|
234
|
+
nodes = _datanode_parser(nodes, to_lion)
|
133
235
|
return nodes
|
134
236
|
|
135
237
|
|
@@ -139,24 +241,59 @@ def _langchain_chunker(
|
|
139
241
|
chunker,
|
140
242
|
chunker_args,
|
141
243
|
chunker_kwargs,
|
142
|
-
|
244
|
+
to_lion: bool | Callable,
|
143
245
|
):
|
246
|
+
"""
|
247
|
+
Chunks documents using a Langchain chunker.
|
248
|
+
|
249
|
+
Args:
|
250
|
+
documents (list): List of documents to be chunked.
|
251
|
+
documents_convert_func (Callable): Function to convert documents.
|
252
|
+
chunker (str | Callable): The chunker function or its name.
|
253
|
+
chunker_args (list): Positional arguments for the chunker function.
|
254
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function.
|
255
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
256
|
+
|
257
|
+
Returns:
|
258
|
+
pile: A pile of chunked Node instances or custom parsed nodes.
|
259
|
+
|
260
|
+
Example usage:
|
261
|
+
>>> chunked_docs = _langchain_chunker(docs, convert_func, langchain_chunker, ['arg1'], {'key': 'value'}, True)
|
262
|
+
"""
|
144
263
|
if documents_convert_func:
|
145
264
|
documents = documents_convert_func(documents, "langchain")
|
146
265
|
nodes = LangchainBridge.langchain_text_splitter(
|
147
266
|
documents, chunker, chunker_args, chunker_kwargs
|
148
267
|
)
|
149
|
-
if isinstance(
|
268
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
150
269
|
if isinstance(documents, str):
|
151
270
|
nodes = [Node(content=i) for i in nodes]
|
152
271
|
else:
|
153
272
|
nodes = [Node.from_langchain(i) for i in nodes]
|
154
|
-
elif isinstance(
|
155
|
-
nodes = _datanode_parser(nodes,
|
273
|
+
elif isinstance(to_lion, Callable):
|
274
|
+
nodes = _datanode_parser(nodes, to_lion)
|
156
275
|
return nodes
|
157
276
|
|
158
277
|
|
159
278
|
def _plain_chunker(documents, chunker, chunker_args, chunker_kwargs):
|
279
|
+
"""
|
280
|
+
Chunks documents using a plain chunker.
|
281
|
+
|
282
|
+
Args:
|
283
|
+
documents (list): List of documents to be chunked.
|
284
|
+
chunker (str | Callable): The chunker function or its name.
|
285
|
+
chunker_args (list): Positional arguments for the chunker function.
|
286
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function.
|
287
|
+
|
288
|
+
Returns:
|
289
|
+
pile: A pile of chunked Node instances.
|
290
|
+
|
291
|
+
Raises:
|
292
|
+
ValueError: If the chunker is not supported.
|
293
|
+
|
294
|
+
Example usage:
|
295
|
+
>>> chunked_docs = _plain_chunker(docs, 'text_chunker', ['arg1'], {'key': 'value'})
|
296
|
+
"""
|
160
297
|
try:
|
161
298
|
if chunker == "text_chunker":
|
162
299
|
chunker = text_chunker
|
@@ -1,8 +1,10 @@
|
|
1
1
|
# Default configs for the OpenAI API
|
2
2
|
|
3
|
+
API_key_schema = ("OPENAI_API_KEY",)
|
4
|
+
|
3
5
|
# ChatCompletion
|
4
6
|
oai_chat_llmconfig = {
|
5
|
-
"model": "gpt-
|
7
|
+
"model": "gpt-4o",
|
6
8
|
"frequency_penalty": 0,
|
7
9
|
"max_tokens": None,
|
8
10
|
"n": 1,
|
@@ -11,11 +13,13 @@ oai_chat_llmconfig = {
|
|
11
13
|
"seed": None,
|
12
14
|
"stop": None,
|
13
15
|
"stream": False,
|
14
|
-
"temperature": 0.
|
16
|
+
"temperature": 0.1,
|
15
17
|
"top_p": 1,
|
16
18
|
"tools": None,
|
17
19
|
"tool_choice": "none",
|
18
20
|
"user": None,
|
21
|
+
"logprobs": False,
|
22
|
+
"top_logprobs": None,
|
19
23
|
}
|
20
24
|
|
21
25
|
oai_chat_schema = {
|
@@ -36,9 +40,16 @@ oai_chat_schema = {
|
|
36
40
|
"tool_choice",
|
37
41
|
"user",
|
38
42
|
"max_tokens",
|
43
|
+
"logprobs",
|
44
|
+
"top_logprobs",
|
39
45
|
],
|
40
46
|
"input_": "messages",
|
41
47
|
"config": oai_chat_llmconfig,
|
48
|
+
"token_encoding_name": "cl100k_base",
|
49
|
+
"token_limit": 128_000,
|
50
|
+
"interval_tokens": 1_000_000,
|
51
|
+
"interval_requests": 1_000,
|
52
|
+
"interval": 60,
|
42
53
|
}
|
43
54
|
|
44
55
|
# Finetune
|
@@ -111,8 +122,26 @@ oai_audio_translations_schema = {
|
|
111
122
|
"config": oai_audio_translations_llmconfig,
|
112
123
|
}
|
113
124
|
|
114
|
-
#
|
125
|
+
# embeddings
|
115
126
|
|
127
|
+
oai_embeddings_llmconfig = {
|
128
|
+
"model": "text-embedding-ada-002",
|
129
|
+
"encoding_format": "float",
|
130
|
+
"user": None,
|
131
|
+
"dimensions": None,
|
132
|
+
}
|
133
|
+
|
134
|
+
oai_embeddings_schema = {
|
135
|
+
"required": ["model", "encoding_format"],
|
136
|
+
"optional": ["user", "dimensions"],
|
137
|
+
"input_": "input",
|
138
|
+
"config": oai_embeddings_llmconfig,
|
139
|
+
"token_encoding_name": "cl100k_base",
|
140
|
+
"token_limit": 8192,
|
141
|
+
"interval_tokens": 1_000_000,
|
142
|
+
"interval_requests": 1_000,
|
143
|
+
"interval": 60,
|
144
|
+
}
|
116
145
|
|
117
146
|
oai_schema = {
|
118
147
|
"chat/completions": oai_chat_schema,
|
@@ -120,4 +149,6 @@ oai_schema = {
|
|
120
149
|
"audio_speech": oai_audio_speech_schema,
|
121
150
|
"audio_transcriptions": oai_audio_transcriptions_schema,
|
122
151
|
"audio_translations": oai_audio_translations_schema,
|
152
|
+
"API_key_schema": API_key_schema,
|
153
|
+
"embeddings": oai_embeddings_schema,
|
123
154
|
}
|
@@ -1,5 +1,7 @@
|
|
1
|
+
API_key_schema = ("OPENROUTER_API_KEY",)
|
2
|
+
|
1
3
|
openrouter_chat_llmconfig = {
|
2
|
-
"model": "gpt-
|
4
|
+
"model": "gpt-4o",
|
3
5
|
"frequency_penalty": 0,
|
4
6
|
"max_tokens": None,
|
5
7
|
"num": 1,
|
@@ -8,11 +10,13 @@ openrouter_chat_llmconfig = {
|
|
8
10
|
"seed": None,
|
9
11
|
"stop": None,
|
10
12
|
"stream": False,
|
11
|
-
"temperature": 0.
|
13
|
+
"temperature": 0.1,
|
12
14
|
"top_p": 1,
|
13
15
|
"tools": None,
|
14
16
|
"tool_choice": "none",
|
15
17
|
"user": None,
|
18
|
+
"logprobs": False,
|
19
|
+
"top_logprobs": None,
|
16
20
|
}
|
17
21
|
|
18
22
|
openrouter_chat_schema = {
|
@@ -33,9 +37,16 @@ openrouter_chat_schema = {
|
|
33
37
|
"tool_choice",
|
34
38
|
"user",
|
35
39
|
"max_tokens",
|
40
|
+
"logprobs",
|
41
|
+
"top_logprobs",
|
36
42
|
],
|
37
43
|
"input_": "messages",
|
38
44
|
"config": openrouter_chat_llmconfig,
|
45
|
+
"token_encoding_name": "cl100k_base",
|
46
|
+
"token_limit": 128_000,
|
47
|
+
"interval_tokens": 10_000,
|
48
|
+
"interval_requests": 100,
|
49
|
+
"interval": 60,
|
39
50
|
}
|
40
51
|
|
41
52
|
openrouter_finetune_llmconfig = {
|
@@ -59,4 +70,5 @@ openrouter_finetune_schema = {
|
|
59
70
|
openrouter_schema = {
|
60
71
|
"chat/completions": openrouter_chat_schema,
|
61
72
|
"finetune": openrouter_finetune_schema,
|
73
|
+
"API_key_schema": API_key_schema,
|
62
74
|
}
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from typing import Callable
|
2
2
|
|
3
3
|
from lionagi.core.generic import Node
|
4
|
+
from lionagi.core.collections import pile
|
4
5
|
from ..bridge.langchain_.langchain_bridge import LangchainBridge
|
5
6
|
from ..bridge.llamaindex_.llama_index_bridge import LlamaIndexBridge
|
6
7
|
|
@@ -27,18 +28,43 @@ def text_reader(args, kwargs):
|
|
27
28
|
|
28
29
|
|
29
30
|
def load(
|
30
|
-
reader: str | Callable = "
|
31
|
+
reader: str | Callable = "text_reader",
|
31
32
|
input_dir=None,
|
32
33
|
input_files=None,
|
33
34
|
recursive: bool = False,
|
34
35
|
required_exts: list[str] = None,
|
35
|
-
reader_type=ReaderType.
|
36
|
+
reader_type=ReaderType.PLAIN,
|
36
37
|
reader_args=None,
|
37
38
|
reader_kwargs=None,
|
38
39
|
load_args=None,
|
39
40
|
load_kwargs=None,
|
40
|
-
|
41
|
+
to_lion: bool | Callable = True,
|
41
42
|
):
|
43
|
+
"""
|
44
|
+
Loads data using the specified reader and converts it to Node instances.
|
45
|
+
|
46
|
+
Args:
|
47
|
+
reader (str | Callable): The reader function or its name. Defaults to "text_reader".
|
48
|
+
input_dir (str, optional): The directory to read files from. Defaults to None.
|
49
|
+
input_files (list[str], optional): Specific files to read. Defaults to None.
|
50
|
+
recursive (bool, optional): Whether to read files recursively. Defaults to False.
|
51
|
+
required_exts (list[str], optional): List of required file extensions. Defaults to None.
|
52
|
+
reader_type (ReaderType, optional): The type of reader to use. Defaults to ReaderType.PLAIN.
|
53
|
+
reader_args (list, optional): Positional arguments for the reader function. Defaults to None.
|
54
|
+
reader_kwargs (dict, optional): Keyword arguments for the reader function. Defaults to None.
|
55
|
+
load_args (list, optional): Positional arguments for loading. Defaults to None.
|
56
|
+
load_kwargs (dict, optional): Keyword arguments for loading. Defaults to None.
|
57
|
+
to_lion (bool | Callable, optional): Whether to convert the data to Node instances or a custom parser. Defaults to True.
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
pile: A pile of Node instances.
|
61
|
+
|
62
|
+
Raises:
|
63
|
+
ValueError: If the reader_type is not supported.
|
64
|
+
|
65
|
+
Example usage:
|
66
|
+
>>> nodes = load(input_dir='path/to/text/files', required_exts=['txt'])
|
67
|
+
"""
|
42
68
|
|
43
69
|
if reader_args is None:
|
44
70
|
reader_args = []
|
@@ -50,11 +76,15 @@ def load(
|
|
50
76
|
load_kwargs = {}
|
51
77
|
|
52
78
|
if reader_type == ReaderType.PLAIN:
|
79
|
+
reader_kwargs["dir_"] = input_dir
|
80
|
+
reader_kwargs["ext"] = required_exts
|
81
|
+
reader_kwargs["recursive"] = recursive
|
82
|
+
|
53
83
|
return read_funcs[ReaderType.PLAIN](reader, reader_args, reader_kwargs)
|
54
84
|
|
55
85
|
if reader_type == ReaderType.LANGCHAIN:
|
56
86
|
return read_funcs[ReaderType.LANGCHAIN](
|
57
|
-
reader, reader_args, reader_kwargs,
|
87
|
+
reader, reader_args, reader_kwargs, to_lion
|
58
88
|
)
|
59
89
|
|
60
90
|
elif reader_type == ReaderType.LLAMAINDEX:
|
@@ -68,12 +98,12 @@ def load(
|
|
68
98
|
reader_kwargs["required_exts"] = required_exts
|
69
99
|
|
70
100
|
return read_funcs[ReaderType.LLAMAINDEX](
|
71
|
-
reader, reader_args, reader_kwargs, load_args, load_kwargs,
|
101
|
+
reader, reader_args, reader_kwargs, load_args, load_kwargs, to_lion
|
72
102
|
)
|
73
103
|
|
74
104
|
elif reader_type == ReaderType.SELFDEFINED:
|
75
105
|
return read_funcs[ReaderType.SELFDEFINED](
|
76
|
-
reader, reader_args, reader_kwargs, load_args, load_kwargs,
|
106
|
+
reader, reader_args, reader_kwargs, load_args, load_kwargs, to_lion
|
77
107
|
)
|
78
108
|
|
79
109
|
else:
|
@@ -83,23 +113,56 @@ def load(
|
|
83
113
|
|
84
114
|
|
85
115
|
def _plain_reader(reader, reader_args, reader_kwargs):
|
116
|
+
"""
|
117
|
+
Reads data using a plain reader.
|
118
|
+
|
119
|
+
Args:
|
120
|
+
reader (str | Callable): The reader function or its name.
|
121
|
+
reader_args (list): Positional arguments for the reader function.
|
122
|
+
reader_kwargs (dict): Keyword arguments for the reader function.
|
123
|
+
|
124
|
+
Returns:
|
125
|
+
pile: A pile of Node instances.
|
126
|
+
|
127
|
+
Raises:
|
128
|
+
ValueError: If the reader is not supported.
|
129
|
+
|
130
|
+
Example usage:
|
131
|
+
>>> nodes = _plain_reader('text_reader', ['path/to/files'], {'ext': 'txt'})
|
132
|
+
"""
|
86
133
|
try:
|
87
134
|
if reader == "text_reader":
|
88
135
|
reader = text_reader
|
89
|
-
|
136
|
+
nodes = reader(reader_args, reader_kwargs)
|
137
|
+
return pile(nodes)
|
90
138
|
except Exception as e:
|
91
139
|
raise ValueError(
|
92
140
|
f"Reader {reader} is currently not supported. Error: {e}"
|
93
141
|
) from e
|
94
142
|
|
95
143
|
|
96
|
-
def _langchain_reader(reader, reader_args, reader_kwargs,
|
144
|
+
def _langchain_reader(reader, reader_args, reader_kwargs, to_lion: bool | Callable):
|
145
|
+
"""
|
146
|
+
Reads data using a Langchain reader.
|
147
|
+
|
148
|
+
Args:
|
149
|
+
reader (str | Callable): The reader function or its name.
|
150
|
+
reader_args (list): Positional arguments for the reader function.
|
151
|
+
reader_kwargs (dict): Keyword arguments for the reader function.
|
152
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
153
|
+
|
154
|
+
Returns:
|
155
|
+
pile: A pile of Node instances or custom parsed nodes.
|
156
|
+
|
157
|
+
Example usage:
|
158
|
+
>>> nodes = _langchain_reader('langchain_reader', ['arg1'], {'key': 'value'}, True)
|
159
|
+
"""
|
97
160
|
nodes = LangchainBridge.langchain_loader(reader, reader_args, reader_kwargs)
|
98
|
-
if isinstance(
|
99
|
-
|
161
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
162
|
+
return pile([Node.from_langchain(i) for i in nodes])
|
100
163
|
|
101
|
-
elif isinstance(
|
102
|
-
nodes = _datanode_parser(nodes,
|
164
|
+
elif isinstance(to_lion, Callable):
|
165
|
+
nodes = _datanode_parser(nodes, to_lion)
|
103
166
|
return nodes
|
104
167
|
|
105
168
|
|
@@ -109,15 +172,33 @@ def _llama_index_reader(
|
|
109
172
|
reader_kwargs,
|
110
173
|
load_args,
|
111
174
|
load_kwargs,
|
112
|
-
|
175
|
+
to_lion: bool | Callable,
|
113
176
|
):
|
177
|
+
"""
|
178
|
+
Reads data using a LlamaIndex reader.
|
179
|
+
|
180
|
+
Args:
|
181
|
+
reader (str | Callable): The reader function or its name.
|
182
|
+
reader_args (list): Positional arguments for the reader function.
|
183
|
+
reader_kwargs (dict): Keyword arguments for the reader function.
|
184
|
+
load_args (list): Positional arguments for loading.
|
185
|
+
load_kwargs (dict): Keyword arguments for loading.
|
186
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
187
|
+
|
188
|
+
Returns:
|
189
|
+
pile: A pile of Node instances or custom parsed nodes.
|
190
|
+
|
191
|
+
Example usage:
|
192
|
+
>>> nodes = _llama_index_reader('llama_reader', ['arg1'], {'key': 'value'}, [], {}, True)
|
193
|
+
"""
|
114
194
|
nodes = LlamaIndexBridge.llama_index_read_data(
|
115
195
|
reader, reader_args, reader_kwargs, load_args, load_kwargs
|
116
196
|
)
|
117
|
-
if isinstance(
|
118
|
-
|
119
|
-
|
120
|
-
|
197
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
198
|
+
return pile([Node.from_llama_index(i) for i in nodes])
|
199
|
+
|
200
|
+
elif isinstance(to_lion, Callable):
|
201
|
+
nodes = _datanode_parser(nodes, to_lion)
|
121
202
|
return nodes
|
122
203
|
|
123
204
|
|
@@ -127,8 +208,28 @@ def _self_defined_reader(
|
|
127
208
|
reader_kwargs,
|
128
209
|
load_args,
|
129
210
|
load_kwargs,
|
130
|
-
|
211
|
+
to_lion: bool | Callable,
|
131
212
|
):
|
213
|
+
"""
|
214
|
+
Reads data using a self-defined reader.
|
215
|
+
|
216
|
+
Args:
|
217
|
+
reader (str | Callable): The reader function or its name.
|
218
|
+
reader_args (list): Positional arguments for the reader function.
|
219
|
+
reader_kwargs (dict): Keyword arguments for the reader function.
|
220
|
+
load_args (list): Positional arguments for loading.
|
221
|
+
load_kwargs (dict): Keyword arguments for loading.
|
222
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
223
|
+
|
224
|
+
Returns:
|
225
|
+
pile: A pile of Node instances or custom parsed nodes.
|
226
|
+
|
227
|
+
Raises:
|
228
|
+
ValueError: If the self-defined reader is not valid.
|
229
|
+
|
230
|
+
Example usage:
|
231
|
+
>>> nodes = _self_defined_reader(custom_reader, ['arg1'], {'key': 'value'}, [], {}, custom_parser)
|
232
|
+
"""
|
132
233
|
try:
|
133
234
|
loader = reader(*reader_args, **reader_kwargs)
|
134
235
|
nodes = loader.load(*load_args, **load_kwargs)
|
@@ -137,10 +238,10 @@ def _self_defined_reader(
|
|
137
238
|
f"Self defined reader {reader} is not valid. Error: {e}"
|
138
239
|
) from e
|
139
240
|
|
140
|
-
if isinstance(
|
241
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
141
242
|
raise ValueError("Please define a valid parser to Node.")
|
142
|
-
elif isinstance(
|
143
|
-
nodes = _datanode_parser(nodes,
|
243
|
+
elif isinstance(to_lion, Callable):
|
244
|
+
nodes = _datanode_parser(nodes, to_lion)
|
144
245
|
return nodes
|
145
246
|
|
146
247
|
|