lionagi 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/__init__.py +60 -5
- lionagi/core/__init__.py +0 -25
- lionagi/core/_setting/_setting.py +59 -0
- lionagi/core/action/__init__.py +14 -0
- lionagi/core/action/function_calling.py +136 -0
- lionagi/core/action/manual.py +1 -0
- lionagi/core/action/node.py +109 -0
- lionagi/core/action/tool.py +114 -0
- lionagi/core/action/tool_manager.py +356 -0
- lionagi/core/agent/base_agent.py +27 -13
- lionagi/core/agent/eval/evaluator.py +1 -0
- lionagi/core/agent/eval/vote.py +40 -0
- lionagi/core/agent/learn/learner.py +59 -0
- lionagi/core/agent/plan/unit_template.py +1 -0
- lionagi/core/collections/__init__.py +17 -0
- lionagi/core/{generic/data_logger.py → collections/_logger.py} +69 -55
- lionagi/core/collections/abc/__init__.py +53 -0
- lionagi/core/collections/abc/component.py +615 -0
- lionagi/core/collections/abc/concepts.py +297 -0
- lionagi/core/collections/abc/exceptions.py +150 -0
- lionagi/core/collections/abc/util.py +45 -0
- lionagi/core/collections/exchange.py +161 -0
- lionagi/core/collections/flow.py +426 -0
- lionagi/core/collections/model.py +419 -0
- lionagi/core/collections/pile.py +913 -0
- lionagi/core/collections/progression.py +236 -0
- lionagi/core/collections/util.py +64 -0
- lionagi/core/director/direct.py +314 -0
- lionagi/core/director/director.py +2 -0
- lionagi/core/{execute/branch_executor.py → engine/branch_engine.py} +134 -97
- lionagi/core/{execute/instruction_map_executor.py → engine/instruction_map_engine.py} +80 -55
- lionagi/{experimental/directive/evaluator → core/engine}/script_engine.py +17 -1
- lionagi/core/executor/base_executor.py +90 -0
- lionagi/core/{execute/structure_executor.py → executor/graph_executor.py} +62 -66
- lionagi/core/{execute → executor}/neo4j_executor.py +70 -67
- lionagi/core/generic/__init__.py +3 -33
- lionagi/core/generic/edge.py +29 -79
- lionagi/core/generic/edge_condition.py +16 -0
- lionagi/core/generic/graph.py +236 -0
- lionagi/core/generic/hyperedge.py +1 -0
- lionagi/core/generic/node.py +156 -221
- lionagi/core/generic/tree.py +48 -0
- lionagi/core/generic/tree_node.py +79 -0
- lionagi/core/mail/__init__.py +12 -0
- lionagi/core/mail/mail.py +25 -0
- lionagi/core/mail/mail_manager.py +139 -58
- lionagi/core/mail/package.py +45 -0
- lionagi/core/mail/start_mail.py +36 -0
- lionagi/core/message/__init__.py +19 -0
- lionagi/core/message/action_request.py +133 -0
- lionagi/core/message/action_response.py +135 -0
- lionagi/core/message/assistant_response.py +95 -0
- lionagi/core/message/instruction.py +234 -0
- lionagi/core/message/message.py +101 -0
- lionagi/core/message/system.py +86 -0
- lionagi/core/message/util.py +283 -0
- lionagi/core/report/__init__.py +4 -0
- lionagi/core/report/base.py +217 -0
- lionagi/core/report/form.py +231 -0
- lionagi/core/report/report.py +166 -0
- lionagi/core/report/util.py +28 -0
- lionagi/core/rule/_default.py +16 -0
- lionagi/core/rule/action.py +99 -0
- lionagi/core/rule/base.py +238 -0
- lionagi/core/rule/boolean.py +56 -0
- lionagi/core/rule/choice.py +47 -0
- lionagi/core/rule/mapping.py +96 -0
- lionagi/core/rule/number.py +71 -0
- lionagi/core/rule/rulebook.py +109 -0
- lionagi/core/rule/string.py +52 -0
- lionagi/core/rule/util.py +35 -0
- lionagi/core/session/branch.py +431 -0
- lionagi/core/session/directive_mixin.py +287 -0
- lionagi/core/session/session.py +229 -903
- lionagi/core/structure/__init__.py +1 -0
- lionagi/core/structure/chain.py +1 -0
- lionagi/core/structure/forest.py +1 -0
- lionagi/core/structure/graph.py +1 -0
- lionagi/core/structure/tree.py +1 -0
- lionagi/core/unit/__init__.py +5 -0
- lionagi/core/unit/parallel_unit.py +245 -0
- lionagi/core/unit/template/action.py +81 -0
- lionagi/core/unit/template/base.py +51 -0
- lionagi/core/unit/template/plan.py +84 -0
- lionagi/core/unit/template/predict.py +109 -0
- lionagi/core/unit/template/score.py +124 -0
- lionagi/core/unit/template/select.py +104 -0
- lionagi/core/unit/unit.py +362 -0
- lionagi/core/unit/unit_form.py +305 -0
- lionagi/core/unit/unit_mixin.py +1168 -0
- lionagi/core/unit/util.py +71 -0
- lionagi/core/validator/validator.py +364 -0
- lionagi/core/work/work.py +76 -0
- lionagi/core/work/work_function.py +101 -0
- lionagi/core/work/work_queue.py +103 -0
- lionagi/core/work/worker.py +258 -0
- lionagi/core/work/worklog.py +120 -0
- lionagi/experimental/compressor/base.py +46 -0
- lionagi/experimental/compressor/llm_compressor.py +247 -0
- lionagi/experimental/compressor/llm_summarizer.py +61 -0
- lionagi/experimental/compressor/util.py +70 -0
- lionagi/experimental/directive/__init__.py +19 -0
- lionagi/experimental/directive/parser/base_parser.py +69 -2
- lionagi/experimental/directive/{template_ → template}/base_template.py +17 -1
- lionagi/{libs/ln_tokenizer.py → experimental/directive/tokenizer.py} +16 -0
- lionagi/experimental/{directive/evaluator → evaluator}/ast_evaluator.py +16 -0
- lionagi/experimental/{directive/evaluator → evaluator}/base_evaluator.py +16 -0
- lionagi/experimental/knowledge/base.py +10 -0
- lionagi/experimental/memory/__init__.py +0 -0
- lionagi/experimental/strategies/__init__.py +0 -0
- lionagi/experimental/strategies/base.py +1 -0
- lionagi/integrations/bridge/langchain_/documents.py +4 -0
- lionagi/integrations/bridge/llamaindex_/index.py +30 -0
- lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +6 -0
- lionagi/integrations/chunker/chunk.py +161 -24
- lionagi/integrations/config/oai_configs.py +34 -3
- lionagi/integrations/config/openrouter_configs.py +14 -2
- lionagi/integrations/loader/load.py +122 -21
- lionagi/integrations/loader/load_util.py +6 -77
- lionagi/integrations/provider/_mapping.py +46 -0
- lionagi/integrations/provider/litellm.py +2 -1
- lionagi/integrations/provider/mlx_service.py +16 -9
- lionagi/integrations/provider/oai.py +91 -4
- lionagi/integrations/provider/ollama.py +6 -5
- lionagi/integrations/provider/openrouter.py +115 -8
- lionagi/integrations/provider/services.py +2 -2
- lionagi/integrations/provider/transformers.py +18 -22
- lionagi/integrations/storage/__init__.py +3 -3
- lionagi/integrations/storage/neo4j.py +52 -60
- lionagi/integrations/storage/storage_util.py +44 -46
- lionagi/integrations/storage/structure_excel.py +43 -26
- lionagi/integrations/storage/to_excel.py +11 -4
- lionagi/libs/__init__.py +22 -1
- lionagi/libs/ln_api.py +75 -20
- lionagi/libs/ln_context.py +37 -0
- lionagi/libs/ln_convert.py +21 -9
- lionagi/libs/ln_func_call.py +69 -28
- lionagi/libs/ln_image.py +107 -0
- lionagi/libs/ln_nested.py +26 -11
- lionagi/libs/ln_parse.py +82 -23
- lionagi/libs/ln_queue.py +16 -0
- lionagi/libs/ln_tokenize.py +164 -0
- lionagi/libs/ln_validate.py +16 -0
- lionagi/libs/special_tokens.py +172 -0
- lionagi/libs/sys_util.py +95 -24
- lionagi/lions/coder/code_form.py +13 -0
- lionagi/lions/coder/coder.py +50 -3
- lionagi/lions/coder/util.py +30 -25
- lionagi/tests/libs/test_func_call.py +23 -21
- lionagi/tests/libs/test_nested.py +36 -21
- lionagi/tests/libs/test_parse.py +1 -1
- lionagi/tests/test_core/collections/__init__.py +0 -0
- lionagi/tests/test_core/collections/test_component.py +206 -0
- lionagi/tests/test_core/collections/test_exchange.py +138 -0
- lionagi/tests/test_core/collections/test_flow.py +145 -0
- lionagi/tests/test_core/collections/test_pile.py +171 -0
- lionagi/tests/test_core/collections/test_progression.py +129 -0
- lionagi/tests/test_core/generic/test_edge.py +67 -0
- lionagi/tests/test_core/generic/test_graph.py +96 -0
- lionagi/tests/test_core/generic/test_node.py +106 -0
- lionagi/tests/test_core/generic/test_tree_node.py +73 -0
- lionagi/tests/test_core/test_branch.py +115 -294
- lionagi/tests/test_core/test_form.py +46 -0
- lionagi/tests/test_core/test_report.py +105 -0
- lionagi/tests/test_core/test_validator.py +111 -0
- lionagi/version.py +1 -1
- lionagi-0.2.1.dist-info/LICENSE +202 -0
- lionagi-0.2.1.dist-info/METADATA +272 -0
- lionagi-0.2.1.dist-info/RECORD +240 -0
- lionagi/core/branch/base.py +0 -653
- lionagi/core/branch/branch.py +0 -474
- lionagi/core/branch/flow_mixin.py +0 -96
- lionagi/core/branch/util.py +0 -323
- lionagi/core/direct/__init__.py +0 -19
- lionagi/core/direct/cot.py +0 -123
- lionagi/core/direct/plan.py +0 -164
- lionagi/core/direct/predict.py +0 -166
- lionagi/core/direct/react.py +0 -171
- lionagi/core/direct/score.py +0 -279
- lionagi/core/direct/select.py +0 -170
- lionagi/core/direct/sentiment.py +0 -1
- lionagi/core/direct/utils.py +0 -110
- lionagi/core/direct/vote.py +0 -64
- lionagi/core/execute/base_executor.py +0 -47
- lionagi/core/flow/baseflow.py +0 -23
- lionagi/core/flow/monoflow/ReAct.py +0 -240
- lionagi/core/flow/monoflow/__init__.py +0 -9
- lionagi/core/flow/monoflow/chat.py +0 -95
- lionagi/core/flow/monoflow/chat_mixin.py +0 -253
- lionagi/core/flow/monoflow/followup.py +0 -215
- lionagi/core/flow/polyflow/__init__.py +0 -1
- lionagi/core/flow/polyflow/chat.py +0 -251
- lionagi/core/form/action_form.py +0 -26
- lionagi/core/form/field_validator.py +0 -287
- lionagi/core/form/form.py +0 -302
- lionagi/core/form/mixin.py +0 -214
- lionagi/core/form/scored_form.py +0 -13
- lionagi/core/generic/action.py +0 -26
- lionagi/core/generic/component.py +0 -532
- lionagi/core/generic/condition.py +0 -46
- lionagi/core/generic/mail.py +0 -90
- lionagi/core/generic/mailbox.py +0 -36
- lionagi/core/generic/relation.py +0 -70
- lionagi/core/generic/signal.py +0 -22
- lionagi/core/generic/structure.py +0 -362
- lionagi/core/generic/transfer.py +0 -20
- lionagi/core/generic/work.py +0 -40
- lionagi/core/graph/graph.py +0 -126
- lionagi/core/graph/tree.py +0 -190
- lionagi/core/mail/schema.py +0 -63
- lionagi/core/messages/schema.py +0 -325
- lionagi/core/tool/__init__.py +0 -5
- lionagi/core/tool/tool.py +0 -28
- lionagi/core/tool/tool_manager.py +0 -283
- lionagi/experimental/report/form.py +0 -64
- lionagi/experimental/report/report.py +0 -138
- lionagi/experimental/report/util.py +0 -47
- lionagi/experimental/tool/function_calling.py +0 -43
- lionagi/experimental/tool/manual.py +0 -66
- lionagi/experimental/tool/schema.py +0 -59
- lionagi/experimental/tool/tool_manager.py +0 -138
- lionagi/experimental/tool/util.py +0 -16
- lionagi/experimental/validator/rule.py +0 -139
- lionagi/experimental/validator/validator.py +0 -56
- lionagi/experimental/work/__init__.py +0 -10
- lionagi/experimental/work/async_queue.py +0 -54
- lionagi/experimental/work/schema.py +0 -73
- lionagi/experimental/work/work_function.py +0 -67
- lionagi/experimental/work/worker.py +0 -56
- lionagi/experimental/work2/form.py +0 -371
- lionagi/experimental/work2/report.py +0 -289
- lionagi/experimental/work2/schema.py +0 -30
- lionagi/experimental/work2/tests.py +0 -72
- lionagi/experimental/work2/work_function.py +0 -89
- lionagi/experimental/work2/worker.py +0 -12
- lionagi/integrations/bridge/llamaindex_/get_index.py +0 -294
- lionagi/tests/test_core/generic/test_component.py +0 -89
- lionagi/tests/test_core/test_base_branch.py +0 -426
- lionagi/tests/test_core/test_chat_flow.py +0 -63
- lionagi/tests/test_core/test_mail_manager.py +0 -75
- lionagi/tests/test_core/test_prompts.py +0 -51
- lionagi/tests/test_core/test_session.py +0 -254
- lionagi/tests/test_core/test_session_base_util.py +0 -313
- lionagi/tests/test_core/test_tool_manager.py +0 -95
- lionagi-0.1.2.dist-info/LICENSE +0 -9
- lionagi-0.1.2.dist-info/METADATA +0 -174
- lionagi-0.1.2.dist-info/RECORD +0 -206
- /lionagi/core/{branch → _setting}/__init__.py +0 -0
- /lionagi/core/{execute → agent/eval}/__init__.py +0 -0
- /lionagi/core/{flow → agent/learn}/__init__.py +0 -0
- /lionagi/core/{form → agent/plan}/__init__.py +0 -0
- /lionagi/core/{branch/executable_branch.py → agent/plan/plan.py} +0 -0
- /lionagi/core/{graph → director}/__init__.py +0 -0
- /lionagi/core/{messages → engine}/__init__.py +0 -0
- /lionagi/{experimental/directive/evaluator → core/engine}/sandbox_.py +0 -0
- /lionagi/{experimental/directive/evaluator → core/executor}/__init__.py +0 -0
- /lionagi/{experimental/directive/template_ → core/rule}/__init__.py +0 -0
- /lionagi/{experimental/report → core/unit/template}/__init__.py +0 -0
- /lionagi/{experimental/tool → core/validator}/__init__.py +0 -0
- /lionagi/{experimental/validator → core/work}/__init__.py +0 -0
- /lionagi/experimental/{work2 → compressor}/__init__.py +0 -0
- /lionagi/{core/flow/mono_chat_mixin.py → experimental/directive/template/__init__.py} +0 -0
- /lionagi/experimental/directive/{schema.py → template/schema.py} +0 -0
- /lionagi/experimental/{work2/util.py → evaluator/__init__.py} +0 -0
- /lionagi/experimental/{work2/work.py → knowledge/__init__.py} +0 -0
- /lionagi/{tests/libs/test_async.py → experimental/knowledge/graph.py} +0 -0
- {lionagi-0.1.2.dist-info → lionagi-0.2.1.dist-info}/WHEEL +0 -0
- {lionagi-0.1.2.dist-info → lionagi-0.2.1.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,8 @@
|
|
1
1
|
from typing import Union, Callable
|
2
2
|
|
3
3
|
from lionagi.libs import func_call
|
4
|
+
from lionagi.libs.ln_convert import to_list
|
5
|
+
from lionagi.core.collections import pile
|
4
6
|
from lionagi.core.generic import Node
|
5
7
|
from ..bridge.langchain_.langchain_bridge import LangchainBridge
|
6
8
|
from ..bridge.llamaindex_.llama_index_bridge import LlamaIndexBridge
|
@@ -10,7 +12,20 @@ from ..loader.load_util import ChunkerType, file_to_chunks, _datanode_parser
|
|
10
12
|
|
11
13
|
|
12
14
|
def datanodes_convert(documents, chunker_type):
|
15
|
+
"""
|
16
|
+
Converts documents to the specified chunker type.
|
13
17
|
|
18
|
+
Args:
|
19
|
+
documents (list): List of documents to be converted.
|
20
|
+
chunker_type (ChunkerType): The type of chunker to convert the documents to.
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
list: The converted documents.
|
24
|
+
|
25
|
+
Example usage:
|
26
|
+
>>> documents = [Node(...), Node(...)]
|
27
|
+
>>> converted_docs = datanodes_convert(documents, ChunkerType.LLAMAINDEX)
|
28
|
+
"""
|
14
29
|
for i in range(len(documents)):
|
15
30
|
if type(documents[i]) == Node:
|
16
31
|
if chunker_type == ChunkerType.LLAMAINDEX:
|
@@ -21,25 +36,71 @@ def datanodes_convert(documents, chunker_type):
|
|
21
36
|
|
22
37
|
|
23
38
|
def text_chunker(documents, args, kwargs):
|
39
|
+
"""
|
40
|
+
Chunks text documents into smaller pieces.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
documents (list): List of documents to be chunked.
|
44
|
+
args (tuple): Positional arguments for the chunking function.
|
45
|
+
kwargs (dict): Keyword arguments for the chunking function.
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
pile: A pile of chunked Node instances.
|
49
|
+
|
50
|
+
Example usage:
|
51
|
+
>>> documents = [Node(...), Node(...)]
|
52
|
+
>>> chunked_docs = text_chunker(documents, args, kwargs)
|
53
|
+
"""
|
24
54
|
|
25
55
|
def chunk_node(node):
|
26
56
|
chunks = file_to_chunks(node.to_dict(), *args, **kwargs)
|
27
|
-
func_call.lcall(chunks, lambda chunk: chunk.pop("
|
57
|
+
func_call.lcall(chunks, lambda chunk: chunk.pop("ln_id"))
|
28
58
|
return [Node.from_obj({**chunk}) for chunk in chunks]
|
29
59
|
|
30
|
-
|
60
|
+
a = to_list([chunk_node(doc) for doc in documents], flatten=True, dropna=True)
|
61
|
+
return pile(a)
|
31
62
|
|
32
63
|
|
33
64
|
def chunk(
|
34
|
-
|
35
|
-
|
65
|
+
docs,
|
66
|
+
field: str = "content",
|
67
|
+
chunk_size: int = 1500,
|
68
|
+
overlap: float = 0.1,
|
69
|
+
threshold: int = 200,
|
70
|
+
chunker="text_chunker",
|
36
71
|
chunker_type=ChunkerType.PLAIN,
|
37
72
|
chunker_args=None,
|
38
73
|
chunker_kwargs=None,
|
39
74
|
chunking_kwargs=None,
|
40
75
|
documents_convert_func=None,
|
41
|
-
|
76
|
+
to_lion: bool | Callable = True,
|
42
77
|
):
|
78
|
+
"""
|
79
|
+
Chunks documents using the specified chunker.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
docs (list): List of documents to be chunked.
|
83
|
+
field (str, optional): The field to chunk. Defaults to "content".
|
84
|
+
chunk_size (int, optional): The size of each chunk. Defaults to 1500.
|
85
|
+
overlap (float, optional): The overlap between chunks. Defaults to 0.1.
|
86
|
+
threshold (int, optional): The threshold for chunking. Defaults to 200.
|
87
|
+
chunker (str, optional): The chunker function or its name. Defaults to "text_chunker".
|
88
|
+
chunker_type (ChunkerType, optional): The type of chunker to use. Defaults to ChunkerType.PLAIN.
|
89
|
+
chunker_args (list, optional): Positional arguments for the chunker function. Defaults to None.
|
90
|
+
chunker_kwargs (dict, optional): Keyword arguments for the chunker function. Defaults to None.
|
91
|
+
chunking_kwargs (dict, optional): Additional keyword arguments for chunking. Defaults to None.
|
92
|
+
documents_convert_func (Callable, optional): Function to convert documents. Defaults to None.
|
93
|
+
to_lion (bool | Callable, optional): Whether to convert the data to Node instances or a custom parser. Defaults to True.
|
94
|
+
|
95
|
+
Returns:
|
96
|
+
pile: A pile of chunked Node instances.
|
97
|
+
|
98
|
+
Raises:
|
99
|
+
ValueError: If the chunker_type is not supported.
|
100
|
+
|
101
|
+
Example usage:
|
102
|
+
>>> chunked_docs = chunk(docs, field='text', chunk_size=1000, overlap=0.2)
|
103
|
+
"""
|
43
104
|
|
44
105
|
if chunker_args is None:
|
45
106
|
chunker_args = []
|
@@ -49,38 +110,42 @@ def chunk(
|
|
49
110
|
chunking_kwargs = {}
|
50
111
|
|
51
112
|
if chunker_type == ChunkerType.PLAIN:
|
113
|
+
chunker_kwargs["field"] = field
|
114
|
+
chunker_kwargs["chunk_size"] = chunk_size
|
115
|
+
chunker_kwargs["overlap"] = overlap
|
116
|
+
chunker_kwargs["threshold"] = threshold
|
52
117
|
return chunk_funcs[ChunkerType.PLAIN](
|
53
|
-
|
118
|
+
docs, chunker, chunker_args, chunker_kwargs
|
54
119
|
)
|
55
120
|
|
56
121
|
elif chunker_type == ChunkerType.LANGCHAIN:
|
57
122
|
return chunk_funcs[ChunkerType.LANGCHAIN](
|
58
|
-
|
123
|
+
docs,
|
59
124
|
documents_convert_func,
|
60
125
|
chunker,
|
61
126
|
chunker_args,
|
62
127
|
chunker_kwargs,
|
63
|
-
|
128
|
+
to_lion,
|
64
129
|
)
|
65
130
|
|
66
131
|
elif chunker_type == ChunkerType.LLAMAINDEX:
|
67
132
|
return chunk_funcs[ChunkerType.LLAMAINDEX](
|
68
|
-
|
133
|
+
docs,
|
69
134
|
documents_convert_func,
|
70
135
|
chunker,
|
71
136
|
chunker_args,
|
72
137
|
chunker_kwargs,
|
73
|
-
|
138
|
+
to_lion,
|
74
139
|
)
|
75
140
|
|
76
141
|
elif chunker_type == ChunkerType.SELFDEFINED:
|
77
142
|
return chunk_funcs[ChunkerType.SELFDEFINED](
|
78
|
-
|
143
|
+
docs,
|
79
144
|
chunker,
|
80
145
|
chunker_args,
|
81
146
|
chunker_kwargs,
|
82
147
|
chunking_kwargs,
|
83
|
-
|
148
|
+
to_lion,
|
84
149
|
)
|
85
150
|
|
86
151
|
else:
|
@@ -95,8 +160,28 @@ def _self_defined_chunker(
|
|
95
160
|
chunker_args,
|
96
161
|
chunker_kwargs,
|
97
162
|
chunking_kwargs,
|
98
|
-
|
163
|
+
to_lion: bool | Callable,
|
99
164
|
):
|
165
|
+
"""
|
166
|
+
Chunks documents using a self-defined chunker.
|
167
|
+
|
168
|
+
Args:
|
169
|
+
documents (list): List of documents to be chunked.
|
170
|
+
chunker (str | Callable): The chunker function or its name.
|
171
|
+
chunker_args (list): Positional arguments for the chunker function.
|
172
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function.
|
173
|
+
chunking_kwargs (dict): Additional keyword arguments for chunking.
|
174
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
175
|
+
|
176
|
+
Returns:
|
177
|
+
pile: A pile of chunked Node instances or custom parsed nodes.
|
178
|
+
|
179
|
+
Raises:
|
180
|
+
ValueError: If the self-defined chunker is not valid.
|
181
|
+
|
182
|
+
Example usage:
|
183
|
+
>>> chunked_docs = _self_defined_chunker(docs, custom_chunker, ['arg1'], {'key': 'value'}, {}, custom_parser)
|
184
|
+
"""
|
100
185
|
try:
|
101
186
|
splitter = chunker(*chunker_args, **chunker_kwargs)
|
102
187
|
nodes = splitter.split(documents, **chunking_kwargs)
|
@@ -105,10 +190,10 @@ def _self_defined_chunker(
|
|
105
190
|
f"Self defined chunker {chunker} is not valid. Error: {e}"
|
106
191
|
) from e
|
107
192
|
|
108
|
-
if isinstance(
|
193
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
109
194
|
raise ValueError("Please define a valid parser to Node.")
|
110
|
-
elif isinstance(
|
111
|
-
nodes = _datanode_parser(nodes,
|
195
|
+
elif isinstance(to_lion, Callable):
|
196
|
+
nodes = _datanode_parser(nodes, to_lion)
|
112
197
|
return nodes
|
113
198
|
|
114
199
|
|
@@ -118,18 +203,35 @@ def _llama_index_chunker(
|
|
118
203
|
chunker,
|
119
204
|
chunker_args,
|
120
205
|
chunker_kwargs,
|
121
|
-
|
206
|
+
to_lion: bool | Callable,
|
122
207
|
):
|
208
|
+
"""
|
209
|
+
Chunks documents using a LlamaIndex chunker.
|
210
|
+
|
211
|
+
Args:
|
212
|
+
documents (list): List of documents to be chunked.
|
213
|
+
documents_convert_func (Callable): Function to convert documents.
|
214
|
+
chunker (str | Callable): The chunker function or its name.
|
215
|
+
chunker_args (list): Positional arguments for the chunker function.
|
216
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function.
|
217
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
218
|
+
|
219
|
+
Returns:
|
220
|
+
pile: A pile of chunked Node instances or custom parsed nodes.
|
221
|
+
|
222
|
+
Example usage:
|
223
|
+
>>> chunked_docs = _llama_index_chunker(docs, convert_func, llama_chunker, ['arg1'], {'key': 'value'}, True)
|
224
|
+
"""
|
123
225
|
if documents_convert_func:
|
124
226
|
documents = documents_convert_func(documents, "llama_index")
|
125
227
|
nodes = LlamaIndexBridge.llama_index_parse_node(
|
126
228
|
documents, chunker, chunker_args, chunker_kwargs
|
127
229
|
)
|
128
230
|
|
129
|
-
if isinstance(
|
231
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
130
232
|
nodes = [Node.from_llama_index(i) for i in nodes]
|
131
|
-
elif isinstance(
|
132
|
-
nodes = _datanode_parser(nodes,
|
233
|
+
elif isinstance(to_lion, Callable):
|
234
|
+
nodes = _datanode_parser(nodes, to_lion)
|
133
235
|
return nodes
|
134
236
|
|
135
237
|
|
@@ -139,24 +241,59 @@ def _langchain_chunker(
|
|
139
241
|
chunker,
|
140
242
|
chunker_args,
|
141
243
|
chunker_kwargs,
|
142
|
-
|
244
|
+
to_lion: bool | Callable,
|
143
245
|
):
|
246
|
+
"""
|
247
|
+
Chunks documents using a Langchain chunker.
|
248
|
+
|
249
|
+
Args:
|
250
|
+
documents (list): List of documents to be chunked.
|
251
|
+
documents_convert_func (Callable): Function to convert documents.
|
252
|
+
chunker (str | Callable): The chunker function or its name.
|
253
|
+
chunker_args (list): Positional arguments for the chunker function.
|
254
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function.
|
255
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
256
|
+
|
257
|
+
Returns:
|
258
|
+
pile: A pile of chunked Node instances or custom parsed nodes.
|
259
|
+
|
260
|
+
Example usage:
|
261
|
+
>>> chunked_docs = _langchain_chunker(docs, convert_func, langchain_chunker, ['arg1'], {'key': 'value'}, True)
|
262
|
+
"""
|
144
263
|
if documents_convert_func:
|
145
264
|
documents = documents_convert_func(documents, "langchain")
|
146
265
|
nodes = LangchainBridge.langchain_text_splitter(
|
147
266
|
documents, chunker, chunker_args, chunker_kwargs
|
148
267
|
)
|
149
|
-
if isinstance(
|
268
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
150
269
|
if isinstance(documents, str):
|
151
270
|
nodes = [Node(content=i) for i in nodes]
|
152
271
|
else:
|
153
272
|
nodes = [Node.from_langchain(i) for i in nodes]
|
154
|
-
elif isinstance(
|
155
|
-
nodes = _datanode_parser(nodes,
|
273
|
+
elif isinstance(to_lion, Callable):
|
274
|
+
nodes = _datanode_parser(nodes, to_lion)
|
156
275
|
return nodes
|
157
276
|
|
158
277
|
|
159
278
|
def _plain_chunker(documents, chunker, chunker_args, chunker_kwargs):
|
279
|
+
"""
|
280
|
+
Chunks documents using a plain chunker.
|
281
|
+
|
282
|
+
Args:
|
283
|
+
documents (list): List of documents to be chunked.
|
284
|
+
chunker (str | Callable): The chunker function or its name.
|
285
|
+
chunker_args (list): Positional arguments for the chunker function.
|
286
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function.
|
287
|
+
|
288
|
+
Returns:
|
289
|
+
pile: A pile of chunked Node instances.
|
290
|
+
|
291
|
+
Raises:
|
292
|
+
ValueError: If the chunker is not supported.
|
293
|
+
|
294
|
+
Example usage:
|
295
|
+
>>> chunked_docs = _plain_chunker(docs, 'text_chunker', ['arg1'], {'key': 'value'})
|
296
|
+
"""
|
160
297
|
try:
|
161
298
|
if chunker == "text_chunker":
|
162
299
|
chunker = text_chunker
|
@@ -1,8 +1,10 @@
|
|
1
1
|
# Default configs for the OpenAI API
|
2
2
|
|
3
|
+
API_key_schema = ("OPENAI_API_KEY",)
|
4
|
+
|
3
5
|
# ChatCompletion
|
4
6
|
oai_chat_llmconfig = {
|
5
|
-
"model": "gpt-
|
7
|
+
"model": "gpt-4o",
|
6
8
|
"frequency_penalty": 0,
|
7
9
|
"max_tokens": None,
|
8
10
|
"n": 1,
|
@@ -11,11 +13,13 @@ oai_chat_llmconfig = {
|
|
11
13
|
"seed": None,
|
12
14
|
"stop": None,
|
13
15
|
"stream": False,
|
14
|
-
"temperature": 0.
|
16
|
+
"temperature": 0.1,
|
15
17
|
"top_p": 1,
|
16
18
|
"tools": None,
|
17
19
|
"tool_choice": "none",
|
18
20
|
"user": None,
|
21
|
+
"logprobs": False,
|
22
|
+
"top_logprobs": None,
|
19
23
|
}
|
20
24
|
|
21
25
|
oai_chat_schema = {
|
@@ -36,9 +40,16 @@ oai_chat_schema = {
|
|
36
40
|
"tool_choice",
|
37
41
|
"user",
|
38
42
|
"max_tokens",
|
43
|
+
"logprobs",
|
44
|
+
"top_logprobs",
|
39
45
|
],
|
40
46
|
"input_": "messages",
|
41
47
|
"config": oai_chat_llmconfig,
|
48
|
+
"token_encoding_name": "cl100k_base",
|
49
|
+
"token_limit": 128_000,
|
50
|
+
"interval_tokens": 1_000_000,
|
51
|
+
"interval_requests": 1_000,
|
52
|
+
"interval": 60,
|
42
53
|
}
|
43
54
|
|
44
55
|
# Finetune
|
@@ -111,8 +122,26 @@ oai_audio_translations_schema = {
|
|
111
122
|
"config": oai_audio_translations_llmconfig,
|
112
123
|
}
|
113
124
|
|
114
|
-
#
|
125
|
+
# embeddings
|
115
126
|
|
127
|
+
oai_embeddings_llmconfig = {
|
128
|
+
"model": "text-embedding-ada-002",
|
129
|
+
"encoding_format": "float",
|
130
|
+
"user": None,
|
131
|
+
"dimensions": None,
|
132
|
+
}
|
133
|
+
|
134
|
+
oai_embeddings_schema = {
|
135
|
+
"required": ["model", "encoding_format"],
|
136
|
+
"optional": ["user", "dimensions"],
|
137
|
+
"input_": "input",
|
138
|
+
"config": oai_embeddings_llmconfig,
|
139
|
+
"token_encoding_name": "cl100k_base",
|
140
|
+
"token_limit": 8192,
|
141
|
+
"interval_tokens": 1_000_000,
|
142
|
+
"interval_requests": 1_000,
|
143
|
+
"interval": 60,
|
144
|
+
}
|
116
145
|
|
117
146
|
oai_schema = {
|
118
147
|
"chat/completions": oai_chat_schema,
|
@@ -120,4 +149,6 @@ oai_schema = {
|
|
120
149
|
"audio_speech": oai_audio_speech_schema,
|
121
150
|
"audio_transcriptions": oai_audio_transcriptions_schema,
|
122
151
|
"audio_translations": oai_audio_translations_schema,
|
152
|
+
"API_key_schema": API_key_schema,
|
153
|
+
"embeddings": oai_embeddings_schema,
|
123
154
|
}
|
@@ -1,5 +1,7 @@
|
|
1
|
+
API_key_schema = ("OPENROUTER_API_KEY",)
|
2
|
+
|
1
3
|
openrouter_chat_llmconfig = {
|
2
|
-
"model": "gpt-
|
4
|
+
"model": "gpt-4o",
|
3
5
|
"frequency_penalty": 0,
|
4
6
|
"max_tokens": None,
|
5
7
|
"num": 1,
|
@@ -8,11 +10,13 @@ openrouter_chat_llmconfig = {
|
|
8
10
|
"seed": None,
|
9
11
|
"stop": None,
|
10
12
|
"stream": False,
|
11
|
-
"temperature": 0.
|
13
|
+
"temperature": 0.1,
|
12
14
|
"top_p": 1,
|
13
15
|
"tools": None,
|
14
16
|
"tool_choice": "none",
|
15
17
|
"user": None,
|
18
|
+
"logprobs": False,
|
19
|
+
"top_logprobs": None,
|
16
20
|
}
|
17
21
|
|
18
22
|
openrouter_chat_schema = {
|
@@ -33,9 +37,16 @@ openrouter_chat_schema = {
|
|
33
37
|
"tool_choice",
|
34
38
|
"user",
|
35
39
|
"max_tokens",
|
40
|
+
"logprobs",
|
41
|
+
"top_logprobs",
|
36
42
|
],
|
37
43
|
"input_": "messages",
|
38
44
|
"config": openrouter_chat_llmconfig,
|
45
|
+
"token_encoding_name": "cl100k_base",
|
46
|
+
"token_limit": 128_000,
|
47
|
+
"interval_tokens": 10_000,
|
48
|
+
"interval_requests": 100,
|
49
|
+
"interval": 60,
|
39
50
|
}
|
40
51
|
|
41
52
|
openrouter_finetune_llmconfig = {
|
@@ -59,4 +70,5 @@ openrouter_finetune_schema = {
|
|
59
70
|
openrouter_schema = {
|
60
71
|
"chat/completions": openrouter_chat_schema,
|
61
72
|
"finetune": openrouter_finetune_schema,
|
73
|
+
"API_key_schema": API_key_schema,
|
62
74
|
}
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from typing import Callable
|
2
2
|
|
3
3
|
from lionagi.core.generic import Node
|
4
|
+
from lionagi.core.collections import pile
|
4
5
|
from ..bridge.langchain_.langchain_bridge import LangchainBridge
|
5
6
|
from ..bridge.llamaindex_.llama_index_bridge import LlamaIndexBridge
|
6
7
|
|
@@ -27,18 +28,43 @@ def text_reader(args, kwargs):
|
|
27
28
|
|
28
29
|
|
29
30
|
def load(
|
30
|
-
reader: str | Callable = "
|
31
|
+
reader: str | Callable = "text_reader",
|
31
32
|
input_dir=None,
|
32
33
|
input_files=None,
|
33
34
|
recursive: bool = False,
|
34
35
|
required_exts: list[str] = None,
|
35
|
-
reader_type=ReaderType.
|
36
|
+
reader_type=ReaderType.PLAIN,
|
36
37
|
reader_args=None,
|
37
38
|
reader_kwargs=None,
|
38
39
|
load_args=None,
|
39
40
|
load_kwargs=None,
|
40
|
-
|
41
|
+
to_lion: bool | Callable = True,
|
41
42
|
):
|
43
|
+
"""
|
44
|
+
Loads data using the specified reader and converts it to Node instances.
|
45
|
+
|
46
|
+
Args:
|
47
|
+
reader (str | Callable): The reader function or its name. Defaults to "text_reader".
|
48
|
+
input_dir (str, optional): The directory to read files from. Defaults to None.
|
49
|
+
input_files (list[str], optional): Specific files to read. Defaults to None.
|
50
|
+
recursive (bool, optional): Whether to read files recursively. Defaults to False.
|
51
|
+
required_exts (list[str], optional): List of required file extensions. Defaults to None.
|
52
|
+
reader_type (ReaderType, optional): The type of reader to use. Defaults to ReaderType.PLAIN.
|
53
|
+
reader_args (list, optional): Positional arguments for the reader function. Defaults to None.
|
54
|
+
reader_kwargs (dict, optional): Keyword arguments for the reader function. Defaults to None.
|
55
|
+
load_args (list, optional): Positional arguments for loading. Defaults to None.
|
56
|
+
load_kwargs (dict, optional): Keyword arguments for loading. Defaults to None.
|
57
|
+
to_lion (bool | Callable, optional): Whether to convert the data to Node instances or a custom parser. Defaults to True.
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
pile: A pile of Node instances.
|
61
|
+
|
62
|
+
Raises:
|
63
|
+
ValueError: If the reader_type is not supported.
|
64
|
+
|
65
|
+
Example usage:
|
66
|
+
>>> nodes = load(input_dir='path/to/text/files', required_exts=['txt'])
|
67
|
+
"""
|
42
68
|
|
43
69
|
if reader_args is None:
|
44
70
|
reader_args = []
|
@@ -50,11 +76,15 @@ def load(
|
|
50
76
|
load_kwargs = {}
|
51
77
|
|
52
78
|
if reader_type == ReaderType.PLAIN:
|
79
|
+
reader_kwargs["dir_"] = input_dir
|
80
|
+
reader_kwargs["ext"] = required_exts
|
81
|
+
reader_kwargs["recursive"] = recursive
|
82
|
+
|
53
83
|
return read_funcs[ReaderType.PLAIN](reader, reader_args, reader_kwargs)
|
54
84
|
|
55
85
|
if reader_type == ReaderType.LANGCHAIN:
|
56
86
|
return read_funcs[ReaderType.LANGCHAIN](
|
57
|
-
reader, reader_args, reader_kwargs,
|
87
|
+
reader, reader_args, reader_kwargs, to_lion
|
58
88
|
)
|
59
89
|
|
60
90
|
elif reader_type == ReaderType.LLAMAINDEX:
|
@@ -68,12 +98,12 @@ def load(
|
|
68
98
|
reader_kwargs["required_exts"] = required_exts
|
69
99
|
|
70
100
|
return read_funcs[ReaderType.LLAMAINDEX](
|
71
|
-
reader, reader_args, reader_kwargs, load_args, load_kwargs,
|
101
|
+
reader, reader_args, reader_kwargs, load_args, load_kwargs, to_lion
|
72
102
|
)
|
73
103
|
|
74
104
|
elif reader_type == ReaderType.SELFDEFINED:
|
75
105
|
return read_funcs[ReaderType.SELFDEFINED](
|
76
|
-
reader, reader_args, reader_kwargs, load_args, load_kwargs,
|
106
|
+
reader, reader_args, reader_kwargs, load_args, load_kwargs, to_lion
|
77
107
|
)
|
78
108
|
|
79
109
|
else:
|
@@ -83,23 +113,56 @@ def load(
|
|
83
113
|
|
84
114
|
|
85
115
|
def _plain_reader(reader, reader_args, reader_kwargs):
|
116
|
+
"""
|
117
|
+
Reads data using a plain reader.
|
118
|
+
|
119
|
+
Args:
|
120
|
+
reader (str | Callable): The reader function or its name.
|
121
|
+
reader_args (list): Positional arguments for the reader function.
|
122
|
+
reader_kwargs (dict): Keyword arguments for the reader function.
|
123
|
+
|
124
|
+
Returns:
|
125
|
+
pile: A pile of Node instances.
|
126
|
+
|
127
|
+
Raises:
|
128
|
+
ValueError: If the reader is not supported.
|
129
|
+
|
130
|
+
Example usage:
|
131
|
+
>>> nodes = _plain_reader('text_reader', ['path/to/files'], {'ext': 'txt'})
|
132
|
+
"""
|
86
133
|
try:
|
87
134
|
if reader == "text_reader":
|
88
135
|
reader = text_reader
|
89
|
-
|
136
|
+
nodes = reader(reader_args, reader_kwargs)
|
137
|
+
return pile(nodes)
|
90
138
|
except Exception as e:
|
91
139
|
raise ValueError(
|
92
140
|
f"Reader {reader} is currently not supported. Error: {e}"
|
93
141
|
) from e
|
94
142
|
|
95
143
|
|
96
|
-
def _langchain_reader(reader, reader_args, reader_kwargs,
|
144
|
+
def _langchain_reader(reader, reader_args, reader_kwargs, to_lion: bool | Callable):
|
145
|
+
"""
|
146
|
+
Reads data using a Langchain reader.
|
147
|
+
|
148
|
+
Args:
|
149
|
+
reader (str | Callable): The reader function or its name.
|
150
|
+
reader_args (list): Positional arguments for the reader function.
|
151
|
+
reader_kwargs (dict): Keyword arguments for the reader function.
|
152
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
153
|
+
|
154
|
+
Returns:
|
155
|
+
pile: A pile of Node instances or custom parsed nodes.
|
156
|
+
|
157
|
+
Example usage:
|
158
|
+
>>> nodes = _langchain_reader('langchain_reader', ['arg1'], {'key': 'value'}, True)
|
159
|
+
"""
|
97
160
|
nodes = LangchainBridge.langchain_loader(reader, reader_args, reader_kwargs)
|
98
|
-
if isinstance(
|
99
|
-
|
161
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
162
|
+
return pile([Node.from_langchain(i) for i in nodes])
|
100
163
|
|
101
|
-
elif isinstance(
|
102
|
-
nodes = _datanode_parser(nodes,
|
164
|
+
elif isinstance(to_lion, Callable):
|
165
|
+
nodes = _datanode_parser(nodes, to_lion)
|
103
166
|
return nodes
|
104
167
|
|
105
168
|
|
@@ -109,15 +172,33 @@ def _llama_index_reader(
|
|
109
172
|
reader_kwargs,
|
110
173
|
load_args,
|
111
174
|
load_kwargs,
|
112
|
-
|
175
|
+
to_lion: bool | Callable,
|
113
176
|
):
|
177
|
+
"""
|
178
|
+
Reads data using a LlamaIndex reader.
|
179
|
+
|
180
|
+
Args:
|
181
|
+
reader (str | Callable): The reader function or its name.
|
182
|
+
reader_args (list): Positional arguments for the reader function.
|
183
|
+
reader_kwargs (dict): Keyword arguments for the reader function.
|
184
|
+
load_args (list): Positional arguments for loading.
|
185
|
+
load_kwargs (dict): Keyword arguments for loading.
|
186
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
187
|
+
|
188
|
+
Returns:
|
189
|
+
pile: A pile of Node instances or custom parsed nodes.
|
190
|
+
|
191
|
+
Example usage:
|
192
|
+
>>> nodes = _llama_index_reader('llama_reader', ['arg1'], {'key': 'value'}, [], {}, True)
|
193
|
+
"""
|
114
194
|
nodes = LlamaIndexBridge.llama_index_read_data(
|
115
195
|
reader, reader_args, reader_kwargs, load_args, load_kwargs
|
116
196
|
)
|
117
|
-
if isinstance(
|
118
|
-
|
119
|
-
|
120
|
-
|
197
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
198
|
+
return pile([Node.from_llama_index(i) for i in nodes])
|
199
|
+
|
200
|
+
elif isinstance(to_lion, Callable):
|
201
|
+
nodes = _datanode_parser(nodes, to_lion)
|
121
202
|
return nodes
|
122
203
|
|
123
204
|
|
@@ -127,8 +208,28 @@ def _self_defined_reader(
|
|
127
208
|
reader_kwargs,
|
128
209
|
load_args,
|
129
210
|
load_kwargs,
|
130
|
-
|
211
|
+
to_lion: bool | Callable,
|
131
212
|
):
|
213
|
+
"""
|
214
|
+
Reads data using a self-defined reader.
|
215
|
+
|
216
|
+
Args:
|
217
|
+
reader (str | Callable): The reader function or its name.
|
218
|
+
reader_args (list): Positional arguments for the reader function.
|
219
|
+
reader_kwargs (dict): Keyword arguments for the reader function.
|
220
|
+
load_args (list): Positional arguments for loading.
|
221
|
+
load_kwargs (dict): Keyword arguments for loading.
|
222
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
223
|
+
|
224
|
+
Returns:
|
225
|
+
pile: A pile of Node instances or custom parsed nodes.
|
226
|
+
|
227
|
+
Raises:
|
228
|
+
ValueError: If the self-defined reader is not valid.
|
229
|
+
|
230
|
+
Example usage:
|
231
|
+
>>> nodes = _self_defined_reader(custom_reader, ['arg1'], {'key': 'value'}, [], {}, custom_parser)
|
232
|
+
"""
|
132
233
|
try:
|
133
234
|
loader = reader(*reader_args, **reader_kwargs)
|
134
235
|
nodes = loader.load(*load_args, **load_kwargs)
|
@@ -137,10 +238,10 @@ def _self_defined_reader(
|
|
137
238
|
f"Self defined reader {reader} is not valid. Error: {e}"
|
138
239
|
) from e
|
139
240
|
|
140
|
-
if isinstance(
|
241
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
141
242
|
raise ValueError("Please define a valid parser to Node.")
|
142
|
-
elif isinstance(
|
143
|
-
nodes = _datanode_parser(nodes,
|
243
|
+
elif isinstance(to_lion, Callable):
|
244
|
+
nodes = _datanode_parser(nodes, to_lion)
|
144
245
|
return nodes
|
145
246
|
|
146
247
|
|