lionagi 0.0.312__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/__init__.py +61 -3
- lionagi/core/__init__.py +0 -14
- lionagi/core/_setting/_setting.py +59 -0
- lionagi/core/action/__init__.py +14 -0
- lionagi/core/action/function_calling.py +136 -0
- lionagi/core/action/manual.py +1 -0
- lionagi/core/action/node.py +109 -0
- lionagi/core/action/tool.py +114 -0
- lionagi/core/action/tool_manager.py +356 -0
- lionagi/core/agent/__init__.py +0 -3
- lionagi/core/agent/base_agent.py +45 -36
- lionagi/core/agent/eval/evaluator.py +1 -0
- lionagi/core/agent/eval/vote.py +40 -0
- lionagi/core/agent/learn/learner.py +59 -0
- lionagi/core/agent/plan/unit_template.py +1 -0
- lionagi/core/collections/__init__.py +17 -0
- lionagi/core/collections/_logger.py +319 -0
- lionagi/core/collections/abc/__init__.py +53 -0
- lionagi/core/collections/abc/component.py +615 -0
- lionagi/core/collections/abc/concepts.py +297 -0
- lionagi/core/collections/abc/exceptions.py +150 -0
- lionagi/core/collections/abc/util.py +45 -0
- lionagi/core/collections/exchange.py +161 -0
- lionagi/core/collections/flow.py +426 -0
- lionagi/core/collections/model.py +419 -0
- lionagi/core/collections/pile.py +913 -0
- lionagi/core/collections/progression.py +236 -0
- lionagi/core/collections/util.py +64 -0
- lionagi/core/director/direct.py +314 -0
- lionagi/core/director/director.py +2 -0
- lionagi/core/engine/branch_engine.py +333 -0
- lionagi/core/engine/instruction_map_engine.py +204 -0
- lionagi/core/engine/sandbox_.py +14 -0
- lionagi/core/engine/script_engine.py +99 -0
- lionagi/core/executor/base_executor.py +90 -0
- lionagi/core/executor/graph_executor.py +330 -0
- lionagi/core/executor/neo4j_executor.py +384 -0
- lionagi/core/generic/__init__.py +7 -0
- lionagi/core/generic/edge.py +112 -0
- lionagi/core/generic/edge_condition.py +16 -0
- lionagi/core/generic/graph.py +236 -0
- lionagi/core/generic/hyperedge.py +1 -0
- lionagi/core/generic/node.py +220 -0
- lionagi/core/generic/tree.py +48 -0
- lionagi/core/generic/tree_node.py +79 -0
- lionagi/core/mail/__init__.py +7 -3
- lionagi/core/mail/mail.py +25 -0
- lionagi/core/mail/mail_manager.py +142 -58
- lionagi/core/mail/package.py +45 -0
- lionagi/core/mail/start_mail.py +36 -0
- lionagi/core/message/__init__.py +19 -0
- lionagi/core/message/action_request.py +133 -0
- lionagi/core/message/action_response.py +135 -0
- lionagi/core/message/assistant_response.py +95 -0
- lionagi/core/message/instruction.py +234 -0
- lionagi/core/message/message.py +101 -0
- lionagi/core/message/system.py +86 -0
- lionagi/core/message/util.py +283 -0
- lionagi/core/report/__init__.py +4 -0
- lionagi/core/report/base.py +217 -0
- lionagi/core/report/form.py +231 -0
- lionagi/core/report/report.py +166 -0
- lionagi/core/report/util.py +28 -0
- lionagi/core/rule/__init__.py +0 -0
- lionagi/core/rule/_default.py +16 -0
- lionagi/core/rule/action.py +99 -0
- lionagi/core/rule/base.py +238 -0
- lionagi/core/rule/boolean.py +56 -0
- lionagi/core/rule/choice.py +47 -0
- lionagi/core/rule/mapping.py +96 -0
- lionagi/core/rule/number.py +71 -0
- lionagi/core/rule/rulebook.py +109 -0
- lionagi/core/rule/string.py +52 -0
- lionagi/core/rule/util.py +35 -0
- lionagi/core/session/__init__.py +0 -3
- lionagi/core/session/branch.py +431 -0
- lionagi/core/session/directive_mixin.py +287 -0
- lionagi/core/session/session.py +230 -902
- lionagi/core/structure/__init__.py +1 -0
- lionagi/core/structure/chain.py +1 -0
- lionagi/core/structure/forest.py +1 -0
- lionagi/core/structure/graph.py +1 -0
- lionagi/core/structure/tree.py +1 -0
- lionagi/core/unit/__init__.py +5 -0
- lionagi/core/unit/parallel_unit.py +245 -0
- lionagi/core/unit/template/__init__.py +0 -0
- lionagi/core/unit/template/action.py +81 -0
- lionagi/core/unit/template/base.py +51 -0
- lionagi/core/unit/template/plan.py +84 -0
- lionagi/core/unit/template/predict.py +109 -0
- lionagi/core/unit/template/score.py +124 -0
- lionagi/core/unit/template/select.py +104 -0
- lionagi/core/unit/unit.py +362 -0
- lionagi/core/unit/unit_form.py +305 -0
- lionagi/core/unit/unit_mixin.py +1168 -0
- lionagi/core/unit/util.py +71 -0
- lionagi/core/validator/__init__.py +0 -0
- lionagi/core/validator/validator.py +364 -0
- lionagi/core/work/__init__.py +0 -0
- lionagi/core/work/work.py +76 -0
- lionagi/core/work/work_function.py +101 -0
- lionagi/core/work/work_queue.py +103 -0
- lionagi/core/work/worker.py +258 -0
- lionagi/core/work/worklog.py +120 -0
- lionagi/experimental/__init__.py +0 -0
- lionagi/experimental/compressor/__init__.py +0 -0
- lionagi/experimental/compressor/base.py +46 -0
- lionagi/experimental/compressor/llm_compressor.py +247 -0
- lionagi/experimental/compressor/llm_summarizer.py +61 -0
- lionagi/experimental/compressor/util.py +70 -0
- lionagi/experimental/directive/__init__.py +19 -0
- lionagi/experimental/directive/parser/__init__.py +0 -0
- lionagi/experimental/directive/parser/base_parser.py +282 -0
- lionagi/experimental/directive/template/__init__.py +0 -0
- lionagi/experimental/directive/template/base_template.py +79 -0
- lionagi/experimental/directive/template/schema.py +36 -0
- lionagi/experimental/directive/tokenizer.py +73 -0
- lionagi/experimental/evaluator/__init__.py +0 -0
- lionagi/experimental/evaluator/ast_evaluator.py +131 -0
- lionagi/experimental/evaluator/base_evaluator.py +218 -0
- lionagi/experimental/knowledge/__init__.py +0 -0
- lionagi/experimental/knowledge/base.py +10 -0
- lionagi/experimental/knowledge/graph.py +0 -0
- lionagi/experimental/memory/__init__.py +0 -0
- lionagi/experimental/strategies/__init__.py +0 -0
- lionagi/experimental/strategies/base.py +1 -0
- lionagi/integrations/bridge/autogen_/__init__.py +0 -0
- lionagi/integrations/bridge/autogen_/autogen_.py +124 -0
- lionagi/integrations/bridge/langchain_/documents.py +4 -0
- lionagi/integrations/bridge/llamaindex_/index.py +30 -0
- lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +6 -0
- lionagi/integrations/bridge/llamaindex_/llama_pack.py +227 -0
- lionagi/integrations/bridge/llamaindex_/node_parser.py +6 -9
- lionagi/integrations/bridge/pydantic_/pydantic_bridge.py +1 -0
- lionagi/integrations/bridge/transformers_/__init__.py +0 -0
- lionagi/integrations/bridge/transformers_/install_.py +36 -0
- lionagi/integrations/chunker/__init__.py +0 -0
- lionagi/integrations/chunker/chunk.py +312 -0
- lionagi/integrations/config/oai_configs.py +38 -7
- lionagi/integrations/config/ollama_configs.py +1 -1
- lionagi/integrations/config/openrouter_configs.py +14 -2
- lionagi/integrations/loader/__init__.py +0 -0
- lionagi/integrations/loader/load.py +253 -0
- lionagi/integrations/loader/load_util.py +195 -0
- lionagi/integrations/provider/_mapping.py +46 -0
- lionagi/integrations/provider/litellm.py +2 -1
- lionagi/integrations/provider/mlx_service.py +16 -9
- lionagi/integrations/provider/oai.py +91 -4
- lionagi/integrations/provider/ollama.py +7 -6
- lionagi/integrations/provider/openrouter.py +115 -8
- lionagi/integrations/provider/services.py +2 -2
- lionagi/integrations/provider/transformers.py +18 -22
- lionagi/integrations/storage/__init__.py +3 -0
- lionagi/integrations/storage/neo4j.py +665 -0
- lionagi/integrations/storage/storage_util.py +287 -0
- lionagi/integrations/storage/structure_excel.py +285 -0
- lionagi/integrations/storage/to_csv.py +63 -0
- lionagi/integrations/storage/to_excel.py +83 -0
- lionagi/libs/__init__.py +26 -1
- lionagi/libs/ln_api.py +78 -23
- lionagi/libs/ln_context.py +37 -0
- lionagi/libs/ln_convert.py +21 -9
- lionagi/libs/ln_func_call.py +69 -28
- lionagi/libs/ln_image.py +107 -0
- lionagi/libs/ln_knowledge_graph.py +405 -0
- lionagi/libs/ln_nested.py +26 -11
- lionagi/libs/ln_parse.py +110 -14
- lionagi/libs/ln_queue.py +117 -0
- lionagi/libs/ln_tokenize.py +164 -0
- lionagi/{core/prompt/field_validator.py → libs/ln_validate.py} +79 -14
- lionagi/libs/special_tokens.py +172 -0
- lionagi/libs/sys_util.py +107 -2
- lionagi/lions/__init__.py +0 -0
- lionagi/lions/coder/__init__.py +0 -0
- lionagi/lions/coder/add_feature.py +20 -0
- lionagi/lions/coder/base_prompts.py +22 -0
- lionagi/lions/coder/code_form.py +13 -0
- lionagi/lions/coder/coder.py +168 -0
- lionagi/lions/coder/util.py +96 -0
- lionagi/lions/researcher/__init__.py +0 -0
- lionagi/lions/researcher/data_source/__init__.py +0 -0
- lionagi/lions/researcher/data_source/finhub_.py +191 -0
- lionagi/lions/researcher/data_source/google_.py +199 -0
- lionagi/lions/researcher/data_source/wiki_.py +96 -0
- lionagi/lions/researcher/data_source/yfinance_.py +21 -0
- lionagi/tests/integrations/__init__.py +0 -0
- lionagi/tests/libs/__init__.py +0 -0
- lionagi/tests/libs/test_field_validators.py +353 -0
- lionagi/tests/{test_libs → libs}/test_func_call.py +23 -21
- lionagi/tests/{test_libs → libs}/test_nested.py +36 -21
- lionagi/tests/{test_libs → libs}/test_parse.py +1 -1
- lionagi/tests/libs/test_queue.py +67 -0
- lionagi/tests/test_core/collections/__init__.py +0 -0
- lionagi/tests/test_core/collections/test_component.py +206 -0
- lionagi/tests/test_core/collections/test_exchange.py +138 -0
- lionagi/tests/test_core/collections/test_flow.py +145 -0
- lionagi/tests/test_core/collections/test_pile.py +171 -0
- lionagi/tests/test_core/collections/test_progression.py +129 -0
- lionagi/tests/test_core/generic/__init__.py +0 -0
- lionagi/tests/test_core/generic/test_edge.py +67 -0
- lionagi/tests/test_core/generic/test_graph.py +96 -0
- lionagi/tests/test_core/generic/test_node.py +106 -0
- lionagi/tests/test_core/generic/test_tree_node.py +73 -0
- lionagi/tests/test_core/test_branch.py +115 -292
- lionagi/tests/test_core/test_form.py +46 -0
- lionagi/tests/test_core/test_report.py +105 -0
- lionagi/tests/test_core/test_validator.py +111 -0
- lionagi/version.py +1 -1
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/LICENSE +12 -11
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/METADATA +19 -118
- lionagi-0.2.1.dist-info/RECORD +240 -0
- lionagi/core/branch/__init__.py +0 -4
- lionagi/core/branch/base_branch.py +0 -654
- lionagi/core/branch/branch.py +0 -471
- lionagi/core/branch/branch_flow_mixin.py +0 -96
- lionagi/core/branch/executable_branch.py +0 -347
- lionagi/core/branch/util.py +0 -323
- lionagi/core/direct/__init__.py +0 -6
- lionagi/core/direct/predict.py +0 -161
- lionagi/core/direct/score.py +0 -278
- lionagi/core/direct/select.py +0 -169
- lionagi/core/direct/utils.py +0 -87
- lionagi/core/direct/vote.py +0 -64
- lionagi/core/flow/base/baseflow.py +0 -23
- lionagi/core/flow/monoflow/ReAct.py +0 -238
- lionagi/core/flow/monoflow/__init__.py +0 -9
- lionagi/core/flow/monoflow/chat.py +0 -95
- lionagi/core/flow/monoflow/chat_mixin.py +0 -263
- lionagi/core/flow/monoflow/followup.py +0 -214
- lionagi/core/flow/polyflow/__init__.py +0 -1
- lionagi/core/flow/polyflow/chat.py +0 -248
- lionagi/core/mail/schema.py +0 -56
- lionagi/core/messages/__init__.py +0 -3
- lionagi/core/messages/schema.py +0 -533
- lionagi/core/prompt/prompt_template.py +0 -316
- lionagi/core/schema/__init__.py +0 -22
- lionagi/core/schema/action_node.py +0 -29
- lionagi/core/schema/base_mixin.py +0 -296
- lionagi/core/schema/base_node.py +0 -199
- lionagi/core/schema/condition.py +0 -24
- lionagi/core/schema/data_logger.py +0 -354
- lionagi/core/schema/data_node.py +0 -93
- lionagi/core/schema/prompt_template.py +0 -67
- lionagi/core/schema/structure.py +0 -910
- lionagi/core/tool/__init__.py +0 -3
- lionagi/core/tool/tool_manager.py +0 -280
- lionagi/integrations/bridge/pydantic_/base_model.py +0 -7
- lionagi/tests/test_core/test_base_branch.py +0 -427
- lionagi/tests/test_core/test_chat_flow.py +0 -63
- lionagi/tests/test_core/test_mail_manager.py +0 -75
- lionagi/tests/test_core/test_prompts.py +0 -51
- lionagi/tests/test_core/test_session.py +0 -254
- lionagi/tests/test_core/test_session_base_util.py +0 -312
- lionagi/tests/test_core/test_tool_manager.py +0 -95
- lionagi-0.0.312.dist-info/RECORD +0 -111
- /lionagi/core/{branch/base → _setting}/__init__.py +0 -0
- /lionagi/core/{flow → agent/eval}/__init__.py +0 -0
- /lionagi/core/{flow/base → agent/learn}/__init__.py +0 -0
- /lionagi/core/{prompt → agent/plan}/__init__.py +0 -0
- /lionagi/core/{tool/manual.py → agent/plan/plan.py} +0 -0
- /lionagi/{tests/test_integrations → core/director}/__init__.py +0 -0
- /lionagi/{tests/test_libs → core/engine}/__init__.py +0 -0
- /lionagi/{tests/test_libs/test_async.py → core/executor/__init__.py} +0 -0
- /lionagi/tests/{test_libs → libs}/test_api.py +0 -0
- /lionagi/tests/{test_libs → libs}/test_convert.py +0 -0
- /lionagi/tests/{test_libs → libs}/test_sys_util.py +0 -0
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/WHEEL +0 -0
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/top_level.txt +0 -0
@@ -28,6 +28,10 @@ def to_langchain_document(datanode: T, **kwargs: Any) -> Any:
|
|
28
28
|
SysUtil.change_dict_key(dnode, old_key="content", new_key="page_content")
|
29
29
|
SysUtil.change_dict_key(dnode, old_key="lc_id", new_key="id_")
|
30
30
|
dnode = {**dnode, **kwargs}
|
31
|
+
dnode = {k: v for k, v in dnode.items() if v is not None}
|
32
|
+
if "page_content" not in dnode:
|
33
|
+
dnode["page_content"] = ""
|
34
|
+
|
31
35
|
return LangchainDocument(**dnode)
|
32
36
|
|
33
37
|
|
@@ -0,0 +1,30 @@
|
|
1
|
+
class LlamaIndex:
|
2
|
+
|
3
|
+
@classmethod
|
4
|
+
def index(
|
5
|
+
cls,
|
6
|
+
nodes,
|
7
|
+
llm_obj=None,
|
8
|
+
llm_class=None,
|
9
|
+
llm_kwargs=None,
|
10
|
+
index_type=None,
|
11
|
+
**kwargs,
|
12
|
+
):
|
13
|
+
from llama_index.core import Settings
|
14
|
+
from llama_index.llms.openai import OpenAI
|
15
|
+
|
16
|
+
if not llm_obj:
|
17
|
+
llm_class = llm_class or OpenAI
|
18
|
+
llm_kwargs = llm_kwargs or {}
|
19
|
+
if "model" not in llm_kwargs:
|
20
|
+
llm_kwargs["model"] = "gpt-4o"
|
21
|
+
llm_obj = llm_class(**llm_kwargs)
|
22
|
+
|
23
|
+
Settings.llm = llm_obj
|
24
|
+
|
25
|
+
if not index_type:
|
26
|
+
from llama_index.core import VectorStoreIndex
|
27
|
+
|
28
|
+
index_type = VectorStoreIndex
|
29
|
+
|
30
|
+
return index_type(nodes, **kwargs)
|
@@ -100,3 +100,9 @@ class LlamaIndexBridge:
|
|
100
100
|
from .reader import get_llama_index_reader
|
101
101
|
|
102
102
|
return get_llama_index_reader(*args, **kwargs)
|
103
|
+
|
104
|
+
@staticmethod
|
105
|
+
def index(nodes, **kwargs):
|
106
|
+
from .index import LlamaIndex
|
107
|
+
|
108
|
+
return LlamaIndex.index(nodes, **kwargs)
|
@@ -0,0 +1,227 @@
|
|
1
|
+
class LlamaPack:
|
2
|
+
|
3
|
+
@staticmethod
|
4
|
+
def download(pack_name, pack_path):
|
5
|
+
try:
|
6
|
+
from llama_index.llama_pack import download_llama_pack
|
7
|
+
|
8
|
+
return download_llama_pack(pack_name, pack_path)
|
9
|
+
except Exception as e:
|
10
|
+
raise ImportError(f"Error in downloading llama pack: {e}")
|
11
|
+
|
12
|
+
@staticmethod
|
13
|
+
def build(pack_name, pack_path, args=[], **kwargs):
|
14
|
+
pack = LlamaPack.download(pack_name, pack_path)
|
15
|
+
return pack(*args, **kwargs)
|
16
|
+
|
17
|
+
@staticmethod
|
18
|
+
def stock_market_pack(pack_path="./stock_market_data_pack", args=[], **kwargs):
|
19
|
+
name_ = "StockMarketDataQueryEnginePack"
|
20
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
21
|
+
|
22
|
+
@staticmethod
|
23
|
+
def embedded_table_pack(
|
24
|
+
pack_path="./embedded_tables_unstructured_pack", args=[], **kwargs
|
25
|
+
):
|
26
|
+
name_ = "RecursiveRetrieverSmallToBigPack"
|
27
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
28
|
+
|
29
|
+
@staticmethod
|
30
|
+
def rag_evaluator_pack(pack_path="./rag_evaluator_pack", args=[], **kwargs):
|
31
|
+
name_ = "RagEvaluatorPack"
|
32
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
33
|
+
|
34
|
+
@staticmethod
|
35
|
+
def ollma_pack(pack_path="./ollama_pack", args=[], **kwargs):
|
36
|
+
name_ = "OllamaQueryEnginePack"
|
37
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
38
|
+
|
39
|
+
@staticmethod
|
40
|
+
def llm_compiler_agent_pack(
|
41
|
+
pack_path="./llm_compiler_agent_pack", args=[], **kwargs
|
42
|
+
):
|
43
|
+
name_ = "LLMCompilerAgentPack"
|
44
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
45
|
+
|
46
|
+
@staticmethod
|
47
|
+
def resume_screener_pack(pack_path="./resume_screener_pack", args=[], **kwargs):
|
48
|
+
name_ = "ResumeScreenerPack"
|
49
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
50
|
+
|
51
|
+
@staticmethod
|
52
|
+
def ragatouille_retriever_pack(pack_path="./ragatouille_pack", args=[], **kwargs):
|
53
|
+
name_ = "RAGatouilleRetrieverPack"
|
54
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
55
|
+
|
56
|
+
@staticmethod
|
57
|
+
def chain_of_table_pack(pack_path="./chain_of_table_pack", args=[], **kwargs):
|
58
|
+
name_ = "ChainOfTablePack"
|
59
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
60
|
+
|
61
|
+
@staticmethod
|
62
|
+
def hybrid_fusion_retriever_pack(
|
63
|
+
pack_path="./hybrid_fusion_pack", args=[], **kwargs
|
64
|
+
):
|
65
|
+
name_ = "HybridFusionRetrieverPack"
|
66
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
67
|
+
|
68
|
+
@staticmethod
|
69
|
+
def neo4j_query_engine_pack(pack_path="./neo4j_pack", args=[], **kwargs):
|
70
|
+
name_ = "Neo4jQueryEnginePack"
|
71
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
72
|
+
|
73
|
+
@staticmethod
|
74
|
+
def llava_completion_pack(pack_path="./llava_pack", args=[], **kwargs):
|
75
|
+
name_ = "LlavaCompletionPack"
|
76
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
77
|
+
|
78
|
+
@staticmethod
|
79
|
+
def sentence_window_retriever_pack(
|
80
|
+
pack_path="./sentence_window_retriever_pack", args=[], **kwargs
|
81
|
+
):
|
82
|
+
name_ = "SentenceWindowRetrieverPack"
|
83
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
84
|
+
|
85
|
+
@staticmethod
|
86
|
+
def dense_x_retrieval_pack(pack_path="./dense_pack", args=[], **kwargs):
|
87
|
+
name_ = "DenseXRetrievalPack"
|
88
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
89
|
+
|
90
|
+
@staticmethod
|
91
|
+
def zephyr_query_engine_pack(pack_path="./zephyr_pack", args=[], **kwargs):
|
92
|
+
name_ = "ZephyrQueryEnginePack"
|
93
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
94
|
+
|
95
|
+
@staticmethod
|
96
|
+
def query_rewriting_retriever_pack(
|
97
|
+
pack_path="./query_rewriting_pack", args=[], **kwargs
|
98
|
+
):
|
99
|
+
name_ = "QueryRewritingRetrieverPack"
|
100
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
101
|
+
|
102
|
+
@staticmethod
|
103
|
+
def fuzzy_citation_engine_pack(
|
104
|
+
pack_path="./fuzzy_citation_pack", args=[], **kwargs
|
105
|
+
):
|
106
|
+
name_ = "FuzzyCitationEnginePack"
|
107
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
108
|
+
|
109
|
+
@staticmethod
|
110
|
+
def multidoc_auto_retriever_pack(
|
111
|
+
pack_path="./multidoc_autoretrieval_pack", args=[], **kwargs
|
112
|
+
):
|
113
|
+
name_ = "MultiDocAutoRetrieverPack"
|
114
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
115
|
+
|
116
|
+
@staticmethod
|
117
|
+
def auto_merging_retriever_pack(
|
118
|
+
pack_path="./auto_merging_retriever_pack", args=[], **kwargs
|
119
|
+
):
|
120
|
+
name_ = "AutoMergingRetrieverPack"
|
121
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
122
|
+
|
123
|
+
@staticmethod
|
124
|
+
def voyage_query_engine_pack(pack_path="./voyage_pack", args=[], **kwargs):
|
125
|
+
name_ = "VoyageQueryEnginePack"
|
126
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
127
|
+
|
128
|
+
@staticmethod
|
129
|
+
def mix_self_consistency_pack(
|
130
|
+
pack_path="./mix_self_consistency_pack", args=[], **kwargs
|
131
|
+
):
|
132
|
+
name_ = "MixSelfConsistencyPack"
|
133
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
134
|
+
|
135
|
+
@staticmethod
|
136
|
+
def rag_fusion_pipeline_pack(
|
137
|
+
pack_path="./rag_fusion_pipeline_pack", args=[], **kwargs
|
138
|
+
):
|
139
|
+
name_ = "RAGFusionPipelinePack"
|
140
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
141
|
+
|
142
|
+
@staticmethod
|
143
|
+
def multi_document_agents_pack(
|
144
|
+
pack_path="./multi_doc_agents_pack", args=[], **kwargs
|
145
|
+
):
|
146
|
+
name_ = "MultiDocumentAgentsPack"
|
147
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
148
|
+
|
149
|
+
@staticmethod
|
150
|
+
def llama_guard_moderator_pack(pack_path="./llamaguard_pack", args=[], **kwargs):
|
151
|
+
name_ = "LlamaGuardModeratorPack"
|
152
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
153
|
+
|
154
|
+
@staticmethod
|
155
|
+
def evaluator_benchmarker_pack(
|
156
|
+
pack_path="./eval_benchmark_pack", args=[], **kwargs
|
157
|
+
):
|
158
|
+
name_ = "EvaluatorBenchmarkerPack"
|
159
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
160
|
+
|
161
|
+
@staticmethod
|
162
|
+
def amazon_product_extraction_pack(
|
163
|
+
pack_path="./amazon_product_extraction_pack", args=[], **kwargs
|
164
|
+
):
|
165
|
+
name_ = "AmazonProductExtractionPack"
|
166
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
167
|
+
|
168
|
+
@staticmethod
|
169
|
+
def llama_dataset_metadata_pack(
|
170
|
+
pack_path="./llama_dataset_metadata_pack", args=[], **kwargs
|
171
|
+
):
|
172
|
+
name_ = "LlamaDatasetMetadataPack"
|
173
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
174
|
+
|
175
|
+
@staticmethod
|
176
|
+
def multi_tenancy_rag_pack(pack_path="./multitenancy_rag_pack", args=[], **kwargs):
|
177
|
+
name_ = "MultiTenancyRAGPack"
|
178
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
179
|
+
|
180
|
+
@staticmethod
|
181
|
+
def gmail_openai_agent_pack(pack_path="./gmail_pack", args=[], **kwargs):
|
182
|
+
name_ = "GmailOpenAIAgentPack"
|
183
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
184
|
+
|
185
|
+
@staticmethod
|
186
|
+
def snowflake_query_engine_pack(pack_path="./snowflake_pack", args=[], **kwargs):
|
187
|
+
name_ = "SnowflakeQueryEnginePack"
|
188
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
189
|
+
|
190
|
+
@staticmethod
|
191
|
+
def agent_search_retriever_pack(pack_path="./agent_search_pack", args=[], **kwargs):
|
192
|
+
name_ = "AgentSearchRetrieverPack"
|
193
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
194
|
+
|
195
|
+
@staticmethod
|
196
|
+
def vectara_rag_pack(pack_path="./vectara_rag_pack", args=[], **kwargs):
|
197
|
+
name_ = "VectaraRagPack"
|
198
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
199
|
+
|
200
|
+
@staticmethod
|
201
|
+
def chroma_autoretrieval_pack(pack_path="./chroma_pack", args=[], **kwargs):
|
202
|
+
name_ = "ChromaAutoretrievalPack"
|
203
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
204
|
+
|
205
|
+
@staticmethod
|
206
|
+
def arize_phoenix_query_engine_pack(pack_path="./arize_pack", args=[], **kwargs):
|
207
|
+
name_ = "ArizePhoenixQueryEnginePack"
|
208
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
209
|
+
|
210
|
+
@staticmethod
|
211
|
+
def redis_ingestion_pipeline_pack(
|
212
|
+
pack_path="./redis_ingestion_pack", args=[], **kwargs
|
213
|
+
):
|
214
|
+
name_ = "RedisIngestionPipelinePack"
|
215
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
216
|
+
|
217
|
+
@staticmethod
|
218
|
+
def nebula_graph_query_engine_pack(
|
219
|
+
pack_path="./nebulagraph_pack", args=[], **kwargs
|
220
|
+
):
|
221
|
+
name_ = "NebulaGraphQueryEnginePack"
|
222
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
223
|
+
|
224
|
+
@staticmethod
|
225
|
+
def weaviate_retry_engine_pack(pack_path="./weaviate_pack", args=[], **kwargs):
|
226
|
+
name_ = "WeaviateRetryEnginePack"
|
227
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
@@ -29,19 +29,18 @@ def get_llama_index_node_parser(node_parser: Any):
|
|
29
29
|
import llama_index.core.node_parser
|
30
30
|
|
31
31
|
if not isinstance(node_parser, str) and not issubclass(node_parser, NodeParser):
|
32
|
-
raise TypeError(
|
32
|
+
raise TypeError("node_parser must be a string or NodeParser.")
|
33
33
|
|
34
34
|
if isinstance(node_parser, str):
|
35
35
|
if node_parser == "CodeSplitter":
|
36
36
|
SysUtil.check_import("tree_sitter_languages")
|
37
37
|
|
38
38
|
try:
|
39
|
-
|
40
|
-
return parser
|
39
|
+
return getattr(llama_index.core.node_parser, node_parser)
|
41
40
|
except Exception as e:
|
42
41
|
raise AttributeError(
|
43
42
|
f"llama_index_core has no such attribute:" f" {node_parser}, Error: {e}"
|
44
|
-
)
|
43
|
+
) from e
|
45
44
|
|
46
45
|
elif isinstance(node_parser, NodeParser):
|
47
46
|
return node_parser
|
@@ -75,10 +74,8 @@ def llama_index_parse_node(
|
|
75
74
|
parser = get_llama_index_node_parser(node_parser)
|
76
75
|
try:
|
77
76
|
parser = parser(*parser_args, **parser_kwargs)
|
78
|
-
except:
|
77
|
+
except Exception:
|
79
78
|
parser = parser.from_defaults(*parser_args, **parser_kwargs)
|
80
|
-
|
81
|
-
return nodes
|
82
|
-
|
79
|
+
return parser.get_nodes_from_documents(documents)
|
83
80
|
except Exception as e:
|
84
|
-
raise ValueError(f"Failed to parse. Error: {e}")
|
81
|
+
raise ValueError(f"Failed to parse. Error: {e}") from e
|
@@ -0,0 +1 @@
|
|
1
|
+
from pydantic import BaseModel, Field, ValidationError, AliasChoices, field_serializer
|
File without changes
|
@@ -0,0 +1,36 @@
|
|
1
|
+
import subprocess
|
2
|
+
from lionagi.libs import SysUtil
|
3
|
+
|
4
|
+
|
5
|
+
def get_pytorch_install_command():
|
6
|
+
cpu_arch = SysUtil.get_cpu_architecture()
|
7
|
+
|
8
|
+
if cpu_arch == "apple_silicon":
|
9
|
+
return "pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu"
|
10
|
+
else:
|
11
|
+
# Default CPU installation
|
12
|
+
return "pip install torch torchvision torchaudio"
|
13
|
+
|
14
|
+
|
15
|
+
def install_pytorch():
|
16
|
+
command = get_pytorch_install_command()
|
17
|
+
try:
|
18
|
+
subprocess.run(command.split(), check=True)
|
19
|
+
print("PyTorch installed successfully.")
|
20
|
+
except subprocess.CalledProcessError as e:
|
21
|
+
print(f"Failed to install PyTorch: {e}")
|
22
|
+
|
23
|
+
|
24
|
+
def install_transformers():
|
25
|
+
if not SysUtil.is_package_installed("torch"):
|
26
|
+
in_ = input(
|
27
|
+
"PyTorch is required for transformers. Would you like to install it now? (y/n): "
|
28
|
+
)
|
29
|
+
if in_ == "y":
|
30
|
+
install_pytorch()
|
31
|
+
if not SysUtil.is_package_installed("transformers"):
|
32
|
+
in_ = input(
|
33
|
+
"transformers is required. Would you like to install it now? (y/n): "
|
34
|
+
)
|
35
|
+
if in_ == "y":
|
36
|
+
SysUtil.install_import(package_name="transformers", import_name="pipeline")
|
File without changes
|
@@ -0,0 +1,312 @@
|
|
1
|
+
from typing import Union, Callable
|
2
|
+
|
3
|
+
from lionagi.libs import func_call
|
4
|
+
from lionagi.libs.ln_convert import to_list
|
5
|
+
from lionagi.core.collections import pile
|
6
|
+
from lionagi.core.generic import Node
|
7
|
+
from ..bridge.langchain_.langchain_bridge import LangchainBridge
|
8
|
+
from ..bridge.llamaindex_.llama_index_bridge import LlamaIndexBridge
|
9
|
+
|
10
|
+
|
11
|
+
from ..loader.load_util import ChunkerType, file_to_chunks, _datanode_parser
|
12
|
+
|
13
|
+
|
14
|
+
def datanodes_convert(documents, chunker_type):
|
15
|
+
"""
|
16
|
+
Converts documents to the specified chunker type.
|
17
|
+
|
18
|
+
Args:
|
19
|
+
documents (list): List of documents to be converted.
|
20
|
+
chunker_type (ChunkerType): The type of chunker to convert the documents to.
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
list: The converted documents.
|
24
|
+
|
25
|
+
Example usage:
|
26
|
+
>>> documents = [Node(...), Node(...)]
|
27
|
+
>>> converted_docs = datanodes_convert(documents, ChunkerType.LLAMAINDEX)
|
28
|
+
"""
|
29
|
+
for i in range(len(documents)):
|
30
|
+
if type(documents[i]) == Node:
|
31
|
+
if chunker_type == ChunkerType.LLAMAINDEX:
|
32
|
+
documents[i] = documents[i].to_llama_index()
|
33
|
+
elif chunker_type == ChunkerType.LANGCHAIN:
|
34
|
+
documents[i] = documents[i].to_langchain()
|
35
|
+
return documents
|
36
|
+
|
37
|
+
|
38
|
+
def text_chunker(documents, args, kwargs):
|
39
|
+
"""
|
40
|
+
Chunks text documents into smaller pieces.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
documents (list): List of documents to be chunked.
|
44
|
+
args (tuple): Positional arguments for the chunking function.
|
45
|
+
kwargs (dict): Keyword arguments for the chunking function.
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
pile: A pile of chunked Node instances.
|
49
|
+
|
50
|
+
Example usage:
|
51
|
+
>>> documents = [Node(...), Node(...)]
|
52
|
+
>>> chunked_docs = text_chunker(documents, args, kwargs)
|
53
|
+
"""
|
54
|
+
|
55
|
+
def chunk_node(node):
|
56
|
+
chunks = file_to_chunks(node.to_dict(), *args, **kwargs)
|
57
|
+
func_call.lcall(chunks, lambda chunk: chunk.pop("ln_id"))
|
58
|
+
return [Node.from_obj({**chunk}) for chunk in chunks]
|
59
|
+
|
60
|
+
a = to_list([chunk_node(doc) for doc in documents], flatten=True, dropna=True)
|
61
|
+
return pile(a)
|
62
|
+
|
63
|
+
|
64
|
+
def chunk(
|
65
|
+
docs,
|
66
|
+
field: str = "content",
|
67
|
+
chunk_size: int = 1500,
|
68
|
+
overlap: float = 0.1,
|
69
|
+
threshold: int = 200,
|
70
|
+
chunker="text_chunker",
|
71
|
+
chunker_type=ChunkerType.PLAIN,
|
72
|
+
chunker_args=None,
|
73
|
+
chunker_kwargs=None,
|
74
|
+
chunking_kwargs=None,
|
75
|
+
documents_convert_func=None,
|
76
|
+
to_lion: bool | Callable = True,
|
77
|
+
):
|
78
|
+
"""
|
79
|
+
Chunks documents using the specified chunker.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
docs (list): List of documents to be chunked.
|
83
|
+
field (str, optional): The field to chunk. Defaults to "content".
|
84
|
+
chunk_size (int, optional): The size of each chunk. Defaults to 1500.
|
85
|
+
overlap (float, optional): The overlap between chunks. Defaults to 0.1.
|
86
|
+
threshold (int, optional): The threshold for chunking. Defaults to 200.
|
87
|
+
chunker (str, optional): The chunker function or its name. Defaults to "text_chunker".
|
88
|
+
chunker_type (ChunkerType, optional): The type of chunker to use. Defaults to ChunkerType.PLAIN.
|
89
|
+
chunker_args (list, optional): Positional arguments for the chunker function. Defaults to None.
|
90
|
+
chunker_kwargs (dict, optional): Keyword arguments for the chunker function. Defaults to None.
|
91
|
+
chunking_kwargs (dict, optional): Additional keyword arguments for chunking. Defaults to None.
|
92
|
+
documents_convert_func (Callable, optional): Function to convert documents. Defaults to None.
|
93
|
+
to_lion (bool | Callable, optional): Whether to convert the data to Node instances or a custom parser. Defaults to True.
|
94
|
+
|
95
|
+
Returns:
|
96
|
+
pile: A pile of chunked Node instances.
|
97
|
+
|
98
|
+
Raises:
|
99
|
+
ValueError: If the chunker_type is not supported.
|
100
|
+
|
101
|
+
Example usage:
|
102
|
+
>>> chunked_docs = chunk(docs, field='text', chunk_size=1000, overlap=0.2)
|
103
|
+
"""
|
104
|
+
|
105
|
+
if chunker_args is None:
|
106
|
+
chunker_args = []
|
107
|
+
if chunker_kwargs is None:
|
108
|
+
chunker_kwargs = {}
|
109
|
+
if chunking_kwargs is None:
|
110
|
+
chunking_kwargs = {}
|
111
|
+
|
112
|
+
if chunker_type == ChunkerType.PLAIN:
|
113
|
+
chunker_kwargs["field"] = field
|
114
|
+
chunker_kwargs["chunk_size"] = chunk_size
|
115
|
+
chunker_kwargs["overlap"] = overlap
|
116
|
+
chunker_kwargs["threshold"] = threshold
|
117
|
+
return chunk_funcs[ChunkerType.PLAIN](
|
118
|
+
docs, chunker, chunker_args, chunker_kwargs
|
119
|
+
)
|
120
|
+
|
121
|
+
elif chunker_type == ChunkerType.LANGCHAIN:
|
122
|
+
return chunk_funcs[ChunkerType.LANGCHAIN](
|
123
|
+
docs,
|
124
|
+
documents_convert_func,
|
125
|
+
chunker,
|
126
|
+
chunker_args,
|
127
|
+
chunker_kwargs,
|
128
|
+
to_lion,
|
129
|
+
)
|
130
|
+
|
131
|
+
elif chunker_type == ChunkerType.LLAMAINDEX:
|
132
|
+
return chunk_funcs[ChunkerType.LLAMAINDEX](
|
133
|
+
docs,
|
134
|
+
documents_convert_func,
|
135
|
+
chunker,
|
136
|
+
chunker_args,
|
137
|
+
chunker_kwargs,
|
138
|
+
to_lion,
|
139
|
+
)
|
140
|
+
|
141
|
+
elif chunker_type == ChunkerType.SELFDEFINED:
|
142
|
+
return chunk_funcs[ChunkerType.SELFDEFINED](
|
143
|
+
docs,
|
144
|
+
chunker,
|
145
|
+
chunker_args,
|
146
|
+
chunker_kwargs,
|
147
|
+
chunking_kwargs,
|
148
|
+
to_lion,
|
149
|
+
)
|
150
|
+
|
151
|
+
else:
|
152
|
+
raise ValueError(
|
153
|
+
f"{chunker_type} is not supported. Please choose from {list(ChunkerType)}"
|
154
|
+
)
|
155
|
+
|
156
|
+
|
157
|
+
def _self_defined_chunker(
|
158
|
+
documents,
|
159
|
+
chunker,
|
160
|
+
chunker_args,
|
161
|
+
chunker_kwargs,
|
162
|
+
chunking_kwargs,
|
163
|
+
to_lion: bool | Callable,
|
164
|
+
):
|
165
|
+
"""
|
166
|
+
Chunks documents using a self-defined chunker.
|
167
|
+
|
168
|
+
Args:
|
169
|
+
documents (list): List of documents to be chunked.
|
170
|
+
chunker (str | Callable): The chunker function or its name.
|
171
|
+
chunker_args (list): Positional arguments for the chunker function.
|
172
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function.
|
173
|
+
chunking_kwargs (dict): Additional keyword arguments for chunking.
|
174
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
175
|
+
|
176
|
+
Returns:
|
177
|
+
pile: A pile of chunked Node instances or custom parsed nodes.
|
178
|
+
|
179
|
+
Raises:
|
180
|
+
ValueError: If the self-defined chunker is not valid.
|
181
|
+
|
182
|
+
Example usage:
|
183
|
+
>>> chunked_docs = _self_defined_chunker(docs, custom_chunker, ['arg1'], {'key': 'value'}, {}, custom_parser)
|
184
|
+
"""
|
185
|
+
try:
|
186
|
+
splitter = chunker(*chunker_args, **chunker_kwargs)
|
187
|
+
nodes = splitter.split(documents, **chunking_kwargs)
|
188
|
+
except Exception as e:
|
189
|
+
raise ValueError(
|
190
|
+
f"Self defined chunker {chunker} is not valid. Error: {e}"
|
191
|
+
) from e
|
192
|
+
|
193
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
194
|
+
raise ValueError("Please define a valid parser to Node.")
|
195
|
+
elif isinstance(to_lion, Callable):
|
196
|
+
nodes = _datanode_parser(nodes, to_lion)
|
197
|
+
return nodes
|
198
|
+
|
199
|
+
|
200
|
+
def _llama_index_chunker(
|
201
|
+
documents,
|
202
|
+
documents_convert_func,
|
203
|
+
chunker,
|
204
|
+
chunker_args,
|
205
|
+
chunker_kwargs,
|
206
|
+
to_lion: bool | Callable,
|
207
|
+
):
|
208
|
+
"""
|
209
|
+
Chunks documents using a LlamaIndex chunker.
|
210
|
+
|
211
|
+
Args:
|
212
|
+
documents (list): List of documents to be chunked.
|
213
|
+
documents_convert_func (Callable): Function to convert documents.
|
214
|
+
chunker (str | Callable): The chunker function or its name.
|
215
|
+
chunker_args (list): Positional arguments for the chunker function.
|
216
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function.
|
217
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
218
|
+
|
219
|
+
Returns:
|
220
|
+
pile: A pile of chunked Node instances or custom parsed nodes.
|
221
|
+
|
222
|
+
Example usage:
|
223
|
+
>>> chunked_docs = _llama_index_chunker(docs, convert_func, llama_chunker, ['arg1'], {'key': 'value'}, True)
|
224
|
+
"""
|
225
|
+
if documents_convert_func:
|
226
|
+
documents = documents_convert_func(documents, "llama_index")
|
227
|
+
nodes = LlamaIndexBridge.llama_index_parse_node(
|
228
|
+
documents, chunker, chunker_args, chunker_kwargs
|
229
|
+
)
|
230
|
+
|
231
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
232
|
+
nodes = [Node.from_llama_index(i) for i in nodes]
|
233
|
+
elif isinstance(to_lion, Callable):
|
234
|
+
nodes = _datanode_parser(nodes, to_lion)
|
235
|
+
return nodes
|
236
|
+
|
237
|
+
|
238
|
+
def _langchain_chunker(
|
239
|
+
documents,
|
240
|
+
documents_convert_func,
|
241
|
+
chunker,
|
242
|
+
chunker_args,
|
243
|
+
chunker_kwargs,
|
244
|
+
to_lion: bool | Callable,
|
245
|
+
):
|
246
|
+
"""
|
247
|
+
Chunks documents using a Langchain chunker.
|
248
|
+
|
249
|
+
Args:
|
250
|
+
documents (list): List of documents to be chunked.
|
251
|
+
documents_convert_func (Callable): Function to convert documents.
|
252
|
+
chunker (str | Callable): The chunker function or its name.
|
253
|
+
chunker_args (list): Positional arguments for the chunker function.
|
254
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function.
|
255
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
256
|
+
|
257
|
+
Returns:
|
258
|
+
pile: A pile of chunked Node instances or custom parsed nodes.
|
259
|
+
|
260
|
+
Example usage:
|
261
|
+
>>> chunked_docs = _langchain_chunker(docs, convert_func, langchain_chunker, ['arg1'], {'key': 'value'}, True)
|
262
|
+
"""
|
263
|
+
if documents_convert_func:
|
264
|
+
documents = documents_convert_func(documents, "langchain")
|
265
|
+
nodes = LangchainBridge.langchain_text_splitter(
|
266
|
+
documents, chunker, chunker_args, chunker_kwargs
|
267
|
+
)
|
268
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
269
|
+
if isinstance(documents, str):
|
270
|
+
nodes = [Node(content=i) for i in nodes]
|
271
|
+
else:
|
272
|
+
nodes = [Node.from_langchain(i) for i in nodes]
|
273
|
+
elif isinstance(to_lion, Callable):
|
274
|
+
nodes = _datanode_parser(nodes, to_lion)
|
275
|
+
return nodes
|
276
|
+
|
277
|
+
|
278
|
+
def _plain_chunker(documents, chunker, chunker_args, chunker_kwargs):
|
279
|
+
"""
|
280
|
+
Chunks documents using a plain chunker.
|
281
|
+
|
282
|
+
Args:
|
283
|
+
documents (list): List of documents to be chunked.
|
284
|
+
chunker (str | Callable): The chunker function or its name.
|
285
|
+
chunker_args (list): Positional arguments for the chunker function.
|
286
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function.
|
287
|
+
|
288
|
+
Returns:
|
289
|
+
pile: A pile of chunked Node instances.
|
290
|
+
|
291
|
+
Raises:
|
292
|
+
ValueError: If the chunker is not supported.
|
293
|
+
|
294
|
+
Example usage:
|
295
|
+
>>> chunked_docs = _plain_chunker(docs, 'text_chunker', ['arg1'], {'key': 'value'})
|
296
|
+
"""
|
297
|
+
try:
|
298
|
+
if chunker == "text_chunker":
|
299
|
+
chunker = text_chunker
|
300
|
+
return chunker(documents, chunker_args, chunker_kwargs)
|
301
|
+
except Exception as e:
|
302
|
+
raise ValueError(
|
303
|
+
f"Reader {chunker} is currently not supported. Error: {e}"
|
304
|
+
) from e
|
305
|
+
|
306
|
+
|
307
|
+
chunk_funcs = {
|
308
|
+
ChunkerType.PLAIN: _plain_chunker,
|
309
|
+
ChunkerType.LANGCHAIN: _langchain_chunker,
|
310
|
+
ChunkerType.LLAMAINDEX: _llama_index_chunker,
|
311
|
+
ChunkerType.SELFDEFINED: _self_defined_chunker,
|
312
|
+
}
|