lionagi 0.0.312__py3-none-any.whl → 0.2.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- lionagi/__init__.py +61 -3
- lionagi/core/__init__.py +0 -14
- lionagi/core/_setting/_setting.py +59 -0
- lionagi/core/action/__init__.py +14 -0
- lionagi/core/action/function_calling.py +136 -0
- lionagi/core/action/manual.py +1 -0
- lionagi/core/action/node.py +109 -0
- lionagi/core/action/tool.py +114 -0
- lionagi/core/action/tool_manager.py +356 -0
- lionagi/core/agent/__init__.py +0 -3
- lionagi/core/agent/base_agent.py +45 -36
- lionagi/core/agent/eval/evaluator.py +1 -0
- lionagi/core/agent/eval/vote.py +40 -0
- lionagi/core/agent/learn/learner.py +59 -0
- lionagi/core/agent/plan/unit_template.py +1 -0
- lionagi/core/collections/__init__.py +17 -0
- lionagi/core/collections/_logger.py +319 -0
- lionagi/core/collections/abc/__init__.py +53 -0
- lionagi/core/collections/abc/component.py +615 -0
- lionagi/core/collections/abc/concepts.py +297 -0
- lionagi/core/collections/abc/exceptions.py +150 -0
- lionagi/core/collections/abc/util.py +45 -0
- lionagi/core/collections/exchange.py +161 -0
- lionagi/core/collections/flow.py +426 -0
- lionagi/core/collections/model.py +419 -0
- lionagi/core/collections/pile.py +913 -0
- lionagi/core/collections/progression.py +236 -0
- lionagi/core/collections/util.py +64 -0
- lionagi/core/director/direct.py +314 -0
- lionagi/core/director/director.py +2 -0
- lionagi/core/engine/branch_engine.py +333 -0
- lionagi/core/engine/instruction_map_engine.py +204 -0
- lionagi/core/engine/sandbox_.py +14 -0
- lionagi/core/engine/script_engine.py +99 -0
- lionagi/core/executor/base_executor.py +90 -0
- lionagi/core/executor/graph_executor.py +330 -0
- lionagi/core/executor/neo4j_executor.py +384 -0
- lionagi/core/generic/__init__.py +7 -0
- lionagi/core/generic/edge.py +112 -0
- lionagi/core/generic/edge_condition.py +16 -0
- lionagi/core/generic/graph.py +236 -0
- lionagi/core/generic/hyperedge.py +1 -0
- lionagi/core/generic/node.py +220 -0
- lionagi/core/generic/tree.py +48 -0
- lionagi/core/generic/tree_node.py +79 -0
- lionagi/core/mail/__init__.py +7 -3
- lionagi/core/mail/mail.py +25 -0
- lionagi/core/mail/mail_manager.py +142 -58
- lionagi/core/mail/package.py +45 -0
- lionagi/core/mail/start_mail.py +36 -0
- lionagi/core/message/__init__.py +19 -0
- lionagi/core/message/action_request.py +133 -0
- lionagi/core/message/action_response.py +135 -0
- lionagi/core/message/assistant_response.py +95 -0
- lionagi/core/message/instruction.py +234 -0
- lionagi/core/message/message.py +101 -0
- lionagi/core/message/system.py +86 -0
- lionagi/core/message/util.py +283 -0
- lionagi/core/report/__init__.py +4 -0
- lionagi/core/report/base.py +217 -0
- lionagi/core/report/form.py +231 -0
- lionagi/core/report/report.py +166 -0
- lionagi/core/report/util.py +28 -0
- lionagi/core/rule/__init__.py +0 -0
- lionagi/core/rule/_default.py +16 -0
- lionagi/core/rule/action.py +99 -0
- lionagi/core/rule/base.py +238 -0
- lionagi/core/rule/boolean.py +56 -0
- lionagi/core/rule/choice.py +47 -0
- lionagi/core/rule/mapping.py +96 -0
- lionagi/core/rule/number.py +71 -0
- lionagi/core/rule/rulebook.py +109 -0
- lionagi/core/rule/string.py +52 -0
- lionagi/core/rule/util.py +35 -0
- lionagi/core/session/__init__.py +0 -3
- lionagi/core/session/branch.py +431 -0
- lionagi/core/session/directive_mixin.py +287 -0
- lionagi/core/session/session.py +230 -902
- lionagi/core/structure/__init__.py +1 -0
- lionagi/core/structure/chain.py +1 -0
- lionagi/core/structure/forest.py +1 -0
- lionagi/core/structure/graph.py +1 -0
- lionagi/core/structure/tree.py +1 -0
- lionagi/core/unit/__init__.py +5 -0
- lionagi/core/unit/parallel_unit.py +245 -0
- lionagi/core/unit/template/__init__.py +0 -0
- lionagi/core/unit/template/action.py +81 -0
- lionagi/core/unit/template/base.py +51 -0
- lionagi/core/unit/template/plan.py +84 -0
- lionagi/core/unit/template/predict.py +109 -0
- lionagi/core/unit/template/score.py +124 -0
- lionagi/core/unit/template/select.py +104 -0
- lionagi/core/unit/unit.py +362 -0
- lionagi/core/unit/unit_form.py +305 -0
- lionagi/core/unit/unit_mixin.py +1168 -0
- lionagi/core/unit/util.py +71 -0
- lionagi/core/validator/__init__.py +0 -0
- lionagi/core/validator/validator.py +364 -0
- lionagi/core/work/__init__.py +0 -0
- lionagi/core/work/work.py +76 -0
- lionagi/core/work/work_function.py +101 -0
- lionagi/core/work/work_queue.py +103 -0
- lionagi/core/work/worker.py +258 -0
- lionagi/core/work/worklog.py +120 -0
- lionagi/experimental/__init__.py +0 -0
- lionagi/experimental/compressor/__init__.py +0 -0
- lionagi/experimental/compressor/base.py +46 -0
- lionagi/experimental/compressor/llm_compressor.py +247 -0
- lionagi/experimental/compressor/llm_summarizer.py +61 -0
- lionagi/experimental/compressor/util.py +70 -0
- lionagi/experimental/directive/__init__.py +19 -0
- lionagi/experimental/directive/parser/__init__.py +0 -0
- lionagi/experimental/directive/parser/base_parser.py +282 -0
- lionagi/experimental/directive/template/__init__.py +0 -0
- lionagi/experimental/directive/template/base_template.py +79 -0
- lionagi/experimental/directive/template/schema.py +36 -0
- lionagi/experimental/directive/tokenizer.py +73 -0
- lionagi/experimental/evaluator/__init__.py +0 -0
- lionagi/experimental/evaluator/ast_evaluator.py +131 -0
- lionagi/experimental/evaluator/base_evaluator.py +218 -0
- lionagi/experimental/knowledge/__init__.py +0 -0
- lionagi/experimental/knowledge/base.py +10 -0
- lionagi/experimental/knowledge/graph.py +0 -0
- lionagi/experimental/memory/__init__.py +0 -0
- lionagi/experimental/strategies/__init__.py +0 -0
- lionagi/experimental/strategies/base.py +1 -0
- lionagi/integrations/bridge/autogen_/__init__.py +0 -0
- lionagi/integrations/bridge/autogen_/autogen_.py +124 -0
- lionagi/integrations/bridge/langchain_/documents.py +4 -0
- lionagi/integrations/bridge/llamaindex_/index.py +30 -0
- lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +6 -0
- lionagi/integrations/bridge/llamaindex_/llama_pack.py +227 -0
- lionagi/integrations/bridge/llamaindex_/node_parser.py +6 -9
- lionagi/integrations/bridge/pydantic_/pydantic_bridge.py +1 -0
- lionagi/integrations/bridge/transformers_/__init__.py +0 -0
- lionagi/integrations/bridge/transformers_/install_.py +36 -0
- lionagi/integrations/chunker/__init__.py +0 -0
- lionagi/integrations/chunker/chunk.py +312 -0
- lionagi/integrations/config/oai_configs.py +38 -7
- lionagi/integrations/config/ollama_configs.py +1 -1
- lionagi/integrations/config/openrouter_configs.py +14 -2
- lionagi/integrations/loader/__init__.py +0 -0
- lionagi/integrations/loader/load.py +253 -0
- lionagi/integrations/loader/load_util.py +195 -0
- lionagi/integrations/provider/_mapping.py +46 -0
- lionagi/integrations/provider/litellm.py +2 -1
- lionagi/integrations/provider/mlx_service.py +16 -9
- lionagi/integrations/provider/oai.py +91 -4
- lionagi/integrations/provider/ollama.py +7 -6
- lionagi/integrations/provider/openrouter.py +115 -8
- lionagi/integrations/provider/services.py +2 -2
- lionagi/integrations/provider/transformers.py +18 -22
- lionagi/integrations/storage/__init__.py +3 -0
- lionagi/integrations/storage/neo4j.py +665 -0
- lionagi/integrations/storage/storage_util.py +287 -0
- lionagi/integrations/storage/structure_excel.py +285 -0
- lionagi/integrations/storage/to_csv.py +63 -0
- lionagi/integrations/storage/to_excel.py +83 -0
- lionagi/libs/__init__.py +26 -1
- lionagi/libs/ln_api.py +78 -23
- lionagi/libs/ln_context.py +37 -0
- lionagi/libs/ln_convert.py +21 -9
- lionagi/libs/ln_func_call.py +69 -28
- lionagi/libs/ln_image.py +107 -0
- lionagi/libs/ln_knowledge_graph.py +405 -0
- lionagi/libs/ln_nested.py +26 -11
- lionagi/libs/ln_parse.py +110 -14
- lionagi/libs/ln_queue.py +117 -0
- lionagi/libs/ln_tokenize.py +164 -0
- lionagi/{core/prompt/field_validator.py → libs/ln_validate.py} +79 -14
- lionagi/libs/special_tokens.py +172 -0
- lionagi/libs/sys_util.py +107 -2
- lionagi/lions/__init__.py +0 -0
- lionagi/lions/coder/__init__.py +0 -0
- lionagi/lions/coder/add_feature.py +20 -0
- lionagi/lions/coder/base_prompts.py +22 -0
- lionagi/lions/coder/code_form.py +13 -0
- lionagi/lions/coder/coder.py +168 -0
- lionagi/lions/coder/util.py +96 -0
- lionagi/lions/researcher/__init__.py +0 -0
- lionagi/lions/researcher/data_source/__init__.py +0 -0
- lionagi/lions/researcher/data_source/finhub_.py +191 -0
- lionagi/lions/researcher/data_source/google_.py +199 -0
- lionagi/lions/researcher/data_source/wiki_.py +96 -0
- lionagi/lions/researcher/data_source/yfinance_.py +21 -0
- lionagi/tests/integrations/__init__.py +0 -0
- lionagi/tests/libs/__init__.py +0 -0
- lionagi/tests/libs/test_field_validators.py +353 -0
- lionagi/tests/{test_libs → libs}/test_func_call.py +23 -21
- lionagi/tests/{test_libs → libs}/test_nested.py +36 -21
- lionagi/tests/{test_libs → libs}/test_parse.py +1 -1
- lionagi/tests/libs/test_queue.py +67 -0
- lionagi/tests/test_core/collections/__init__.py +0 -0
- lionagi/tests/test_core/collections/test_component.py +206 -0
- lionagi/tests/test_core/collections/test_exchange.py +138 -0
- lionagi/tests/test_core/collections/test_flow.py +145 -0
- lionagi/tests/test_core/collections/test_pile.py +171 -0
- lionagi/tests/test_core/collections/test_progression.py +129 -0
- lionagi/tests/test_core/generic/__init__.py +0 -0
- lionagi/tests/test_core/generic/test_edge.py +67 -0
- lionagi/tests/test_core/generic/test_graph.py +96 -0
- lionagi/tests/test_core/generic/test_node.py +106 -0
- lionagi/tests/test_core/generic/test_tree_node.py +73 -0
- lionagi/tests/test_core/test_branch.py +115 -292
- lionagi/tests/test_core/test_form.py +46 -0
- lionagi/tests/test_core/test_report.py +105 -0
- lionagi/tests/test_core/test_validator.py +111 -0
- lionagi/version.py +1 -1
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/LICENSE +12 -11
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/METADATA +19 -118
- lionagi-0.2.1.dist-info/RECORD +240 -0
- lionagi/core/branch/__init__.py +0 -4
- lionagi/core/branch/base_branch.py +0 -654
- lionagi/core/branch/branch.py +0 -471
- lionagi/core/branch/branch_flow_mixin.py +0 -96
- lionagi/core/branch/executable_branch.py +0 -347
- lionagi/core/branch/util.py +0 -323
- lionagi/core/direct/__init__.py +0 -6
- lionagi/core/direct/predict.py +0 -161
- lionagi/core/direct/score.py +0 -278
- lionagi/core/direct/select.py +0 -169
- lionagi/core/direct/utils.py +0 -87
- lionagi/core/direct/vote.py +0 -64
- lionagi/core/flow/base/baseflow.py +0 -23
- lionagi/core/flow/monoflow/ReAct.py +0 -238
- lionagi/core/flow/monoflow/__init__.py +0 -9
- lionagi/core/flow/monoflow/chat.py +0 -95
- lionagi/core/flow/monoflow/chat_mixin.py +0 -263
- lionagi/core/flow/monoflow/followup.py +0 -214
- lionagi/core/flow/polyflow/__init__.py +0 -1
- lionagi/core/flow/polyflow/chat.py +0 -248
- lionagi/core/mail/schema.py +0 -56
- lionagi/core/messages/__init__.py +0 -3
- lionagi/core/messages/schema.py +0 -533
- lionagi/core/prompt/prompt_template.py +0 -316
- lionagi/core/schema/__init__.py +0 -22
- lionagi/core/schema/action_node.py +0 -29
- lionagi/core/schema/base_mixin.py +0 -296
- lionagi/core/schema/base_node.py +0 -199
- lionagi/core/schema/condition.py +0 -24
- lionagi/core/schema/data_logger.py +0 -354
- lionagi/core/schema/data_node.py +0 -93
- lionagi/core/schema/prompt_template.py +0 -67
- lionagi/core/schema/structure.py +0 -910
- lionagi/core/tool/__init__.py +0 -3
- lionagi/core/tool/tool_manager.py +0 -280
- lionagi/integrations/bridge/pydantic_/base_model.py +0 -7
- lionagi/tests/test_core/test_base_branch.py +0 -427
- lionagi/tests/test_core/test_chat_flow.py +0 -63
- lionagi/tests/test_core/test_mail_manager.py +0 -75
- lionagi/tests/test_core/test_prompts.py +0 -51
- lionagi/tests/test_core/test_session.py +0 -254
- lionagi/tests/test_core/test_session_base_util.py +0 -312
- lionagi/tests/test_core/test_tool_manager.py +0 -95
- lionagi-0.0.312.dist-info/RECORD +0 -111
- /lionagi/core/{branch/base → _setting}/__init__.py +0 -0
- /lionagi/core/{flow → agent/eval}/__init__.py +0 -0
- /lionagi/core/{flow/base → agent/learn}/__init__.py +0 -0
- /lionagi/core/{prompt → agent/plan}/__init__.py +0 -0
- /lionagi/core/{tool/manual.py → agent/plan/plan.py} +0 -0
- /lionagi/{tests/test_integrations → core/director}/__init__.py +0 -0
- /lionagi/{tests/test_libs → core/engine}/__init__.py +0 -0
- /lionagi/{tests/test_libs/test_async.py → core/executor/__init__.py} +0 -0
- /lionagi/tests/{test_libs → libs}/test_api.py +0 -0
- /lionagi/tests/{test_libs → libs}/test_convert.py +0 -0
- /lionagi/tests/{test_libs → libs}/test_sys_util.py +0 -0
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/WHEEL +0 -0
- {lionagi-0.0.312.dist-info → lionagi-0.2.1.dist-info}/top_level.txt +0 -0
@@ -28,6 +28,10 @@ def to_langchain_document(datanode: T, **kwargs: Any) -> Any:
|
|
28
28
|
SysUtil.change_dict_key(dnode, old_key="content", new_key="page_content")
|
29
29
|
SysUtil.change_dict_key(dnode, old_key="lc_id", new_key="id_")
|
30
30
|
dnode = {**dnode, **kwargs}
|
31
|
+
dnode = {k: v for k, v in dnode.items() if v is not None}
|
32
|
+
if "page_content" not in dnode:
|
33
|
+
dnode["page_content"] = ""
|
34
|
+
|
31
35
|
return LangchainDocument(**dnode)
|
32
36
|
|
33
37
|
|
@@ -0,0 +1,30 @@
|
|
1
|
+
class LlamaIndex:
|
2
|
+
|
3
|
+
@classmethod
|
4
|
+
def index(
|
5
|
+
cls,
|
6
|
+
nodes,
|
7
|
+
llm_obj=None,
|
8
|
+
llm_class=None,
|
9
|
+
llm_kwargs=None,
|
10
|
+
index_type=None,
|
11
|
+
**kwargs,
|
12
|
+
):
|
13
|
+
from llama_index.core import Settings
|
14
|
+
from llama_index.llms.openai import OpenAI
|
15
|
+
|
16
|
+
if not llm_obj:
|
17
|
+
llm_class = llm_class or OpenAI
|
18
|
+
llm_kwargs = llm_kwargs or {}
|
19
|
+
if "model" not in llm_kwargs:
|
20
|
+
llm_kwargs["model"] = "gpt-4o"
|
21
|
+
llm_obj = llm_class(**llm_kwargs)
|
22
|
+
|
23
|
+
Settings.llm = llm_obj
|
24
|
+
|
25
|
+
if not index_type:
|
26
|
+
from llama_index.core import VectorStoreIndex
|
27
|
+
|
28
|
+
index_type = VectorStoreIndex
|
29
|
+
|
30
|
+
return index_type(nodes, **kwargs)
|
@@ -100,3 +100,9 @@ class LlamaIndexBridge:
|
|
100
100
|
from .reader import get_llama_index_reader
|
101
101
|
|
102
102
|
return get_llama_index_reader(*args, **kwargs)
|
103
|
+
|
104
|
+
@staticmethod
|
105
|
+
def index(nodes, **kwargs):
|
106
|
+
from .index import LlamaIndex
|
107
|
+
|
108
|
+
return LlamaIndex.index(nodes, **kwargs)
|
@@ -0,0 +1,227 @@
|
|
1
|
+
class LlamaPack:
|
2
|
+
|
3
|
+
@staticmethod
|
4
|
+
def download(pack_name, pack_path):
|
5
|
+
try:
|
6
|
+
from llama_index.llama_pack import download_llama_pack
|
7
|
+
|
8
|
+
return download_llama_pack(pack_name, pack_path)
|
9
|
+
except Exception as e:
|
10
|
+
raise ImportError(f"Error in downloading llama pack: {e}")
|
11
|
+
|
12
|
+
@staticmethod
|
13
|
+
def build(pack_name, pack_path, args=[], **kwargs):
|
14
|
+
pack = LlamaPack.download(pack_name, pack_path)
|
15
|
+
return pack(*args, **kwargs)
|
16
|
+
|
17
|
+
@staticmethod
|
18
|
+
def stock_market_pack(pack_path="./stock_market_data_pack", args=[], **kwargs):
|
19
|
+
name_ = "StockMarketDataQueryEnginePack"
|
20
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
21
|
+
|
22
|
+
@staticmethod
|
23
|
+
def embedded_table_pack(
|
24
|
+
pack_path="./embedded_tables_unstructured_pack", args=[], **kwargs
|
25
|
+
):
|
26
|
+
name_ = "RecursiveRetrieverSmallToBigPack"
|
27
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
28
|
+
|
29
|
+
@staticmethod
|
30
|
+
def rag_evaluator_pack(pack_path="./rag_evaluator_pack", args=[], **kwargs):
|
31
|
+
name_ = "RagEvaluatorPack"
|
32
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
33
|
+
|
34
|
+
@staticmethod
|
35
|
+
def ollma_pack(pack_path="./ollama_pack", args=[], **kwargs):
|
36
|
+
name_ = "OllamaQueryEnginePack"
|
37
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
38
|
+
|
39
|
+
@staticmethod
|
40
|
+
def llm_compiler_agent_pack(
|
41
|
+
pack_path="./llm_compiler_agent_pack", args=[], **kwargs
|
42
|
+
):
|
43
|
+
name_ = "LLMCompilerAgentPack"
|
44
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
45
|
+
|
46
|
+
@staticmethod
|
47
|
+
def resume_screener_pack(pack_path="./resume_screener_pack", args=[], **kwargs):
|
48
|
+
name_ = "ResumeScreenerPack"
|
49
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
50
|
+
|
51
|
+
@staticmethod
|
52
|
+
def ragatouille_retriever_pack(pack_path="./ragatouille_pack", args=[], **kwargs):
|
53
|
+
name_ = "RAGatouilleRetrieverPack"
|
54
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
55
|
+
|
56
|
+
@staticmethod
|
57
|
+
def chain_of_table_pack(pack_path="./chain_of_table_pack", args=[], **kwargs):
|
58
|
+
name_ = "ChainOfTablePack"
|
59
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
60
|
+
|
61
|
+
@staticmethod
|
62
|
+
def hybrid_fusion_retriever_pack(
|
63
|
+
pack_path="./hybrid_fusion_pack", args=[], **kwargs
|
64
|
+
):
|
65
|
+
name_ = "HybridFusionRetrieverPack"
|
66
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
67
|
+
|
68
|
+
@staticmethod
|
69
|
+
def neo4j_query_engine_pack(pack_path="./neo4j_pack", args=[], **kwargs):
|
70
|
+
name_ = "Neo4jQueryEnginePack"
|
71
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
72
|
+
|
73
|
+
@staticmethod
|
74
|
+
def llava_completion_pack(pack_path="./llava_pack", args=[], **kwargs):
|
75
|
+
name_ = "LlavaCompletionPack"
|
76
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
77
|
+
|
78
|
+
@staticmethod
|
79
|
+
def sentence_window_retriever_pack(
|
80
|
+
pack_path="./sentence_window_retriever_pack", args=[], **kwargs
|
81
|
+
):
|
82
|
+
name_ = "SentenceWindowRetrieverPack"
|
83
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
84
|
+
|
85
|
+
@staticmethod
|
86
|
+
def dense_x_retrieval_pack(pack_path="./dense_pack", args=[], **kwargs):
|
87
|
+
name_ = "DenseXRetrievalPack"
|
88
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
89
|
+
|
90
|
+
@staticmethod
|
91
|
+
def zephyr_query_engine_pack(pack_path="./zephyr_pack", args=[], **kwargs):
|
92
|
+
name_ = "ZephyrQueryEnginePack"
|
93
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
94
|
+
|
95
|
+
@staticmethod
|
96
|
+
def query_rewriting_retriever_pack(
|
97
|
+
pack_path="./query_rewriting_pack", args=[], **kwargs
|
98
|
+
):
|
99
|
+
name_ = "QueryRewritingRetrieverPack"
|
100
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
101
|
+
|
102
|
+
@staticmethod
|
103
|
+
def fuzzy_citation_engine_pack(
|
104
|
+
pack_path="./fuzzy_citation_pack", args=[], **kwargs
|
105
|
+
):
|
106
|
+
name_ = "FuzzyCitationEnginePack"
|
107
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
108
|
+
|
109
|
+
@staticmethod
|
110
|
+
def multidoc_auto_retriever_pack(
|
111
|
+
pack_path="./multidoc_autoretrieval_pack", args=[], **kwargs
|
112
|
+
):
|
113
|
+
name_ = "MultiDocAutoRetrieverPack"
|
114
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
115
|
+
|
116
|
+
@staticmethod
|
117
|
+
def auto_merging_retriever_pack(
|
118
|
+
pack_path="./auto_merging_retriever_pack", args=[], **kwargs
|
119
|
+
):
|
120
|
+
name_ = "AutoMergingRetrieverPack"
|
121
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
122
|
+
|
123
|
+
@staticmethod
|
124
|
+
def voyage_query_engine_pack(pack_path="./voyage_pack", args=[], **kwargs):
|
125
|
+
name_ = "VoyageQueryEnginePack"
|
126
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
127
|
+
|
128
|
+
@staticmethod
|
129
|
+
def mix_self_consistency_pack(
|
130
|
+
pack_path="./mix_self_consistency_pack", args=[], **kwargs
|
131
|
+
):
|
132
|
+
name_ = "MixSelfConsistencyPack"
|
133
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
134
|
+
|
135
|
+
@staticmethod
|
136
|
+
def rag_fusion_pipeline_pack(
|
137
|
+
pack_path="./rag_fusion_pipeline_pack", args=[], **kwargs
|
138
|
+
):
|
139
|
+
name_ = "RAGFusionPipelinePack"
|
140
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
141
|
+
|
142
|
+
@staticmethod
|
143
|
+
def multi_document_agents_pack(
|
144
|
+
pack_path="./multi_doc_agents_pack", args=[], **kwargs
|
145
|
+
):
|
146
|
+
name_ = "MultiDocumentAgentsPack"
|
147
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
148
|
+
|
149
|
+
@staticmethod
|
150
|
+
def llama_guard_moderator_pack(pack_path="./llamaguard_pack", args=[], **kwargs):
|
151
|
+
name_ = "LlamaGuardModeratorPack"
|
152
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
153
|
+
|
154
|
+
@staticmethod
|
155
|
+
def evaluator_benchmarker_pack(
|
156
|
+
pack_path="./eval_benchmark_pack", args=[], **kwargs
|
157
|
+
):
|
158
|
+
name_ = "EvaluatorBenchmarkerPack"
|
159
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
160
|
+
|
161
|
+
@staticmethod
|
162
|
+
def amazon_product_extraction_pack(
|
163
|
+
pack_path="./amazon_product_extraction_pack", args=[], **kwargs
|
164
|
+
):
|
165
|
+
name_ = "AmazonProductExtractionPack"
|
166
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
167
|
+
|
168
|
+
@staticmethod
|
169
|
+
def llama_dataset_metadata_pack(
|
170
|
+
pack_path="./llama_dataset_metadata_pack", args=[], **kwargs
|
171
|
+
):
|
172
|
+
name_ = "LlamaDatasetMetadataPack"
|
173
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
174
|
+
|
175
|
+
@staticmethod
|
176
|
+
def multi_tenancy_rag_pack(pack_path="./multitenancy_rag_pack", args=[], **kwargs):
|
177
|
+
name_ = "MultiTenancyRAGPack"
|
178
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
179
|
+
|
180
|
+
@staticmethod
|
181
|
+
def gmail_openai_agent_pack(pack_path="./gmail_pack", args=[], **kwargs):
|
182
|
+
name_ = "GmailOpenAIAgentPack"
|
183
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
184
|
+
|
185
|
+
@staticmethod
|
186
|
+
def snowflake_query_engine_pack(pack_path="./snowflake_pack", args=[], **kwargs):
|
187
|
+
name_ = "SnowflakeQueryEnginePack"
|
188
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
189
|
+
|
190
|
+
@staticmethod
|
191
|
+
def agent_search_retriever_pack(pack_path="./agent_search_pack", args=[], **kwargs):
|
192
|
+
name_ = "AgentSearchRetrieverPack"
|
193
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
194
|
+
|
195
|
+
@staticmethod
|
196
|
+
def vectara_rag_pack(pack_path="./vectara_rag_pack", args=[], **kwargs):
|
197
|
+
name_ = "VectaraRagPack"
|
198
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
199
|
+
|
200
|
+
@staticmethod
|
201
|
+
def chroma_autoretrieval_pack(pack_path="./chroma_pack", args=[], **kwargs):
|
202
|
+
name_ = "ChromaAutoretrievalPack"
|
203
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
204
|
+
|
205
|
+
@staticmethod
|
206
|
+
def arize_phoenix_query_engine_pack(pack_path="./arize_pack", args=[], **kwargs):
|
207
|
+
name_ = "ArizePhoenixQueryEnginePack"
|
208
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
209
|
+
|
210
|
+
@staticmethod
|
211
|
+
def redis_ingestion_pipeline_pack(
|
212
|
+
pack_path="./redis_ingestion_pack", args=[], **kwargs
|
213
|
+
):
|
214
|
+
name_ = "RedisIngestionPipelinePack"
|
215
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
216
|
+
|
217
|
+
@staticmethod
|
218
|
+
def nebula_graph_query_engine_pack(
|
219
|
+
pack_path="./nebulagraph_pack", args=[], **kwargs
|
220
|
+
):
|
221
|
+
name_ = "NebulaGraphQueryEnginePack"
|
222
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
223
|
+
|
224
|
+
@staticmethod
|
225
|
+
def weaviate_retry_engine_pack(pack_path="./weaviate_pack", args=[], **kwargs):
|
226
|
+
name_ = "WeaviateRetryEnginePack"
|
227
|
+
return LlamaPack.build(name_, pack_path, args, **kwargs)
|
@@ -29,19 +29,18 @@ def get_llama_index_node_parser(node_parser: Any):
|
|
29
29
|
import llama_index.core.node_parser
|
30
30
|
|
31
31
|
if not isinstance(node_parser, str) and not issubclass(node_parser, NodeParser):
|
32
|
-
raise TypeError(
|
32
|
+
raise TypeError("node_parser must be a string or NodeParser.")
|
33
33
|
|
34
34
|
if isinstance(node_parser, str):
|
35
35
|
if node_parser == "CodeSplitter":
|
36
36
|
SysUtil.check_import("tree_sitter_languages")
|
37
37
|
|
38
38
|
try:
|
39
|
-
|
40
|
-
return parser
|
39
|
+
return getattr(llama_index.core.node_parser, node_parser)
|
41
40
|
except Exception as e:
|
42
41
|
raise AttributeError(
|
43
42
|
f"llama_index_core has no such attribute:" f" {node_parser}, Error: {e}"
|
44
|
-
)
|
43
|
+
) from e
|
45
44
|
|
46
45
|
elif isinstance(node_parser, NodeParser):
|
47
46
|
return node_parser
|
@@ -75,10 +74,8 @@ def llama_index_parse_node(
|
|
75
74
|
parser = get_llama_index_node_parser(node_parser)
|
76
75
|
try:
|
77
76
|
parser = parser(*parser_args, **parser_kwargs)
|
78
|
-
except:
|
77
|
+
except Exception:
|
79
78
|
parser = parser.from_defaults(*parser_args, **parser_kwargs)
|
80
|
-
|
81
|
-
return nodes
|
82
|
-
|
79
|
+
return parser.get_nodes_from_documents(documents)
|
83
80
|
except Exception as e:
|
84
|
-
raise ValueError(f"Failed to parse. Error: {e}")
|
81
|
+
raise ValueError(f"Failed to parse. Error: {e}") from e
|
@@ -0,0 +1 @@
|
|
1
|
+
from pydantic import BaseModel, Field, ValidationError, AliasChoices, field_serializer
|
File without changes
|
@@ -0,0 +1,36 @@
|
|
1
|
+
import subprocess
|
2
|
+
from lionagi.libs import SysUtil
|
3
|
+
|
4
|
+
|
5
|
+
def get_pytorch_install_command():
|
6
|
+
cpu_arch = SysUtil.get_cpu_architecture()
|
7
|
+
|
8
|
+
if cpu_arch == "apple_silicon":
|
9
|
+
return "pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu"
|
10
|
+
else:
|
11
|
+
# Default CPU installation
|
12
|
+
return "pip install torch torchvision torchaudio"
|
13
|
+
|
14
|
+
|
15
|
+
def install_pytorch():
|
16
|
+
command = get_pytorch_install_command()
|
17
|
+
try:
|
18
|
+
subprocess.run(command.split(), check=True)
|
19
|
+
print("PyTorch installed successfully.")
|
20
|
+
except subprocess.CalledProcessError as e:
|
21
|
+
print(f"Failed to install PyTorch: {e}")
|
22
|
+
|
23
|
+
|
24
|
+
def install_transformers():
|
25
|
+
if not SysUtil.is_package_installed("torch"):
|
26
|
+
in_ = input(
|
27
|
+
"PyTorch is required for transformers. Would you like to install it now? (y/n): "
|
28
|
+
)
|
29
|
+
if in_ == "y":
|
30
|
+
install_pytorch()
|
31
|
+
if not SysUtil.is_package_installed("transformers"):
|
32
|
+
in_ = input(
|
33
|
+
"transformers is required. Would you like to install it now? (y/n): "
|
34
|
+
)
|
35
|
+
if in_ == "y":
|
36
|
+
SysUtil.install_import(package_name="transformers", import_name="pipeline")
|
File without changes
|
@@ -0,0 +1,312 @@
|
|
1
|
+
from typing import Union, Callable
|
2
|
+
|
3
|
+
from lionagi.libs import func_call
|
4
|
+
from lionagi.libs.ln_convert import to_list
|
5
|
+
from lionagi.core.collections import pile
|
6
|
+
from lionagi.core.generic import Node
|
7
|
+
from ..bridge.langchain_.langchain_bridge import LangchainBridge
|
8
|
+
from ..bridge.llamaindex_.llama_index_bridge import LlamaIndexBridge
|
9
|
+
|
10
|
+
|
11
|
+
from ..loader.load_util import ChunkerType, file_to_chunks, _datanode_parser
|
12
|
+
|
13
|
+
|
14
|
+
def datanodes_convert(documents, chunker_type):
|
15
|
+
"""
|
16
|
+
Converts documents to the specified chunker type.
|
17
|
+
|
18
|
+
Args:
|
19
|
+
documents (list): List of documents to be converted.
|
20
|
+
chunker_type (ChunkerType): The type of chunker to convert the documents to.
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
list: The converted documents.
|
24
|
+
|
25
|
+
Example usage:
|
26
|
+
>>> documents = [Node(...), Node(...)]
|
27
|
+
>>> converted_docs = datanodes_convert(documents, ChunkerType.LLAMAINDEX)
|
28
|
+
"""
|
29
|
+
for i in range(len(documents)):
|
30
|
+
if type(documents[i]) == Node:
|
31
|
+
if chunker_type == ChunkerType.LLAMAINDEX:
|
32
|
+
documents[i] = documents[i].to_llama_index()
|
33
|
+
elif chunker_type == ChunkerType.LANGCHAIN:
|
34
|
+
documents[i] = documents[i].to_langchain()
|
35
|
+
return documents
|
36
|
+
|
37
|
+
|
38
|
+
def text_chunker(documents, args, kwargs):
|
39
|
+
"""
|
40
|
+
Chunks text documents into smaller pieces.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
documents (list): List of documents to be chunked.
|
44
|
+
args (tuple): Positional arguments for the chunking function.
|
45
|
+
kwargs (dict): Keyword arguments for the chunking function.
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
pile: A pile of chunked Node instances.
|
49
|
+
|
50
|
+
Example usage:
|
51
|
+
>>> documents = [Node(...), Node(...)]
|
52
|
+
>>> chunked_docs = text_chunker(documents, args, kwargs)
|
53
|
+
"""
|
54
|
+
|
55
|
+
def chunk_node(node):
|
56
|
+
chunks = file_to_chunks(node.to_dict(), *args, **kwargs)
|
57
|
+
func_call.lcall(chunks, lambda chunk: chunk.pop("ln_id"))
|
58
|
+
return [Node.from_obj({**chunk}) for chunk in chunks]
|
59
|
+
|
60
|
+
a = to_list([chunk_node(doc) for doc in documents], flatten=True, dropna=True)
|
61
|
+
return pile(a)
|
62
|
+
|
63
|
+
|
64
|
+
def chunk(
|
65
|
+
docs,
|
66
|
+
field: str = "content",
|
67
|
+
chunk_size: int = 1500,
|
68
|
+
overlap: float = 0.1,
|
69
|
+
threshold: int = 200,
|
70
|
+
chunker="text_chunker",
|
71
|
+
chunker_type=ChunkerType.PLAIN,
|
72
|
+
chunker_args=None,
|
73
|
+
chunker_kwargs=None,
|
74
|
+
chunking_kwargs=None,
|
75
|
+
documents_convert_func=None,
|
76
|
+
to_lion: bool | Callable = True,
|
77
|
+
):
|
78
|
+
"""
|
79
|
+
Chunks documents using the specified chunker.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
docs (list): List of documents to be chunked.
|
83
|
+
field (str, optional): The field to chunk. Defaults to "content".
|
84
|
+
chunk_size (int, optional): The size of each chunk. Defaults to 1500.
|
85
|
+
overlap (float, optional): The overlap between chunks. Defaults to 0.1.
|
86
|
+
threshold (int, optional): The threshold for chunking. Defaults to 200.
|
87
|
+
chunker (str, optional): The chunker function or its name. Defaults to "text_chunker".
|
88
|
+
chunker_type (ChunkerType, optional): The type of chunker to use. Defaults to ChunkerType.PLAIN.
|
89
|
+
chunker_args (list, optional): Positional arguments for the chunker function. Defaults to None.
|
90
|
+
chunker_kwargs (dict, optional): Keyword arguments for the chunker function. Defaults to None.
|
91
|
+
chunking_kwargs (dict, optional): Additional keyword arguments for chunking. Defaults to None.
|
92
|
+
documents_convert_func (Callable, optional): Function to convert documents. Defaults to None.
|
93
|
+
to_lion (bool | Callable, optional): Whether to convert the data to Node instances or a custom parser. Defaults to True.
|
94
|
+
|
95
|
+
Returns:
|
96
|
+
pile: A pile of chunked Node instances.
|
97
|
+
|
98
|
+
Raises:
|
99
|
+
ValueError: If the chunker_type is not supported.
|
100
|
+
|
101
|
+
Example usage:
|
102
|
+
>>> chunked_docs = chunk(docs, field='text', chunk_size=1000, overlap=0.2)
|
103
|
+
"""
|
104
|
+
|
105
|
+
if chunker_args is None:
|
106
|
+
chunker_args = []
|
107
|
+
if chunker_kwargs is None:
|
108
|
+
chunker_kwargs = {}
|
109
|
+
if chunking_kwargs is None:
|
110
|
+
chunking_kwargs = {}
|
111
|
+
|
112
|
+
if chunker_type == ChunkerType.PLAIN:
|
113
|
+
chunker_kwargs["field"] = field
|
114
|
+
chunker_kwargs["chunk_size"] = chunk_size
|
115
|
+
chunker_kwargs["overlap"] = overlap
|
116
|
+
chunker_kwargs["threshold"] = threshold
|
117
|
+
return chunk_funcs[ChunkerType.PLAIN](
|
118
|
+
docs, chunker, chunker_args, chunker_kwargs
|
119
|
+
)
|
120
|
+
|
121
|
+
elif chunker_type == ChunkerType.LANGCHAIN:
|
122
|
+
return chunk_funcs[ChunkerType.LANGCHAIN](
|
123
|
+
docs,
|
124
|
+
documents_convert_func,
|
125
|
+
chunker,
|
126
|
+
chunker_args,
|
127
|
+
chunker_kwargs,
|
128
|
+
to_lion,
|
129
|
+
)
|
130
|
+
|
131
|
+
elif chunker_type == ChunkerType.LLAMAINDEX:
|
132
|
+
return chunk_funcs[ChunkerType.LLAMAINDEX](
|
133
|
+
docs,
|
134
|
+
documents_convert_func,
|
135
|
+
chunker,
|
136
|
+
chunker_args,
|
137
|
+
chunker_kwargs,
|
138
|
+
to_lion,
|
139
|
+
)
|
140
|
+
|
141
|
+
elif chunker_type == ChunkerType.SELFDEFINED:
|
142
|
+
return chunk_funcs[ChunkerType.SELFDEFINED](
|
143
|
+
docs,
|
144
|
+
chunker,
|
145
|
+
chunker_args,
|
146
|
+
chunker_kwargs,
|
147
|
+
chunking_kwargs,
|
148
|
+
to_lion,
|
149
|
+
)
|
150
|
+
|
151
|
+
else:
|
152
|
+
raise ValueError(
|
153
|
+
f"{chunker_type} is not supported. Please choose from {list(ChunkerType)}"
|
154
|
+
)
|
155
|
+
|
156
|
+
|
157
|
+
def _self_defined_chunker(
|
158
|
+
documents,
|
159
|
+
chunker,
|
160
|
+
chunker_args,
|
161
|
+
chunker_kwargs,
|
162
|
+
chunking_kwargs,
|
163
|
+
to_lion: bool | Callable,
|
164
|
+
):
|
165
|
+
"""
|
166
|
+
Chunks documents using a self-defined chunker.
|
167
|
+
|
168
|
+
Args:
|
169
|
+
documents (list): List of documents to be chunked.
|
170
|
+
chunker (str | Callable): The chunker function or its name.
|
171
|
+
chunker_args (list): Positional arguments for the chunker function.
|
172
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function.
|
173
|
+
chunking_kwargs (dict): Additional keyword arguments for chunking.
|
174
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
175
|
+
|
176
|
+
Returns:
|
177
|
+
pile: A pile of chunked Node instances or custom parsed nodes.
|
178
|
+
|
179
|
+
Raises:
|
180
|
+
ValueError: If the self-defined chunker is not valid.
|
181
|
+
|
182
|
+
Example usage:
|
183
|
+
>>> chunked_docs = _self_defined_chunker(docs, custom_chunker, ['arg1'], {'key': 'value'}, {}, custom_parser)
|
184
|
+
"""
|
185
|
+
try:
|
186
|
+
splitter = chunker(*chunker_args, **chunker_kwargs)
|
187
|
+
nodes = splitter.split(documents, **chunking_kwargs)
|
188
|
+
except Exception as e:
|
189
|
+
raise ValueError(
|
190
|
+
f"Self defined chunker {chunker} is not valid. Error: {e}"
|
191
|
+
) from e
|
192
|
+
|
193
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
194
|
+
raise ValueError("Please define a valid parser to Node.")
|
195
|
+
elif isinstance(to_lion, Callable):
|
196
|
+
nodes = _datanode_parser(nodes, to_lion)
|
197
|
+
return nodes
|
198
|
+
|
199
|
+
|
200
|
+
def _llama_index_chunker(
|
201
|
+
documents,
|
202
|
+
documents_convert_func,
|
203
|
+
chunker,
|
204
|
+
chunker_args,
|
205
|
+
chunker_kwargs,
|
206
|
+
to_lion: bool | Callable,
|
207
|
+
):
|
208
|
+
"""
|
209
|
+
Chunks documents using a LlamaIndex chunker.
|
210
|
+
|
211
|
+
Args:
|
212
|
+
documents (list): List of documents to be chunked.
|
213
|
+
documents_convert_func (Callable): Function to convert documents.
|
214
|
+
chunker (str | Callable): The chunker function or its name.
|
215
|
+
chunker_args (list): Positional arguments for the chunker function.
|
216
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function.
|
217
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
218
|
+
|
219
|
+
Returns:
|
220
|
+
pile: A pile of chunked Node instances or custom parsed nodes.
|
221
|
+
|
222
|
+
Example usage:
|
223
|
+
>>> chunked_docs = _llama_index_chunker(docs, convert_func, llama_chunker, ['arg1'], {'key': 'value'}, True)
|
224
|
+
"""
|
225
|
+
if documents_convert_func:
|
226
|
+
documents = documents_convert_func(documents, "llama_index")
|
227
|
+
nodes = LlamaIndexBridge.llama_index_parse_node(
|
228
|
+
documents, chunker, chunker_args, chunker_kwargs
|
229
|
+
)
|
230
|
+
|
231
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
232
|
+
nodes = [Node.from_llama_index(i) for i in nodes]
|
233
|
+
elif isinstance(to_lion, Callable):
|
234
|
+
nodes = _datanode_parser(nodes, to_lion)
|
235
|
+
return nodes
|
236
|
+
|
237
|
+
|
238
|
+
def _langchain_chunker(
|
239
|
+
documents,
|
240
|
+
documents_convert_func,
|
241
|
+
chunker,
|
242
|
+
chunker_args,
|
243
|
+
chunker_kwargs,
|
244
|
+
to_lion: bool | Callable,
|
245
|
+
):
|
246
|
+
"""
|
247
|
+
Chunks documents using a Langchain chunker.
|
248
|
+
|
249
|
+
Args:
|
250
|
+
documents (list): List of documents to be chunked.
|
251
|
+
documents_convert_func (Callable): Function to convert documents.
|
252
|
+
chunker (str | Callable): The chunker function or its name.
|
253
|
+
chunker_args (list): Positional arguments for the chunker function.
|
254
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function.
|
255
|
+
to_lion (bool | Callable): Whether to convert the data to Node instances or a custom parser.
|
256
|
+
|
257
|
+
Returns:
|
258
|
+
pile: A pile of chunked Node instances or custom parsed nodes.
|
259
|
+
|
260
|
+
Example usage:
|
261
|
+
>>> chunked_docs = _langchain_chunker(docs, convert_func, langchain_chunker, ['arg1'], {'key': 'value'}, True)
|
262
|
+
"""
|
263
|
+
if documents_convert_func:
|
264
|
+
documents = documents_convert_func(documents, "langchain")
|
265
|
+
nodes = LangchainBridge.langchain_text_splitter(
|
266
|
+
documents, chunker, chunker_args, chunker_kwargs
|
267
|
+
)
|
268
|
+
if isinstance(to_lion, bool) and to_lion is True:
|
269
|
+
if isinstance(documents, str):
|
270
|
+
nodes = [Node(content=i) for i in nodes]
|
271
|
+
else:
|
272
|
+
nodes = [Node.from_langchain(i) for i in nodes]
|
273
|
+
elif isinstance(to_lion, Callable):
|
274
|
+
nodes = _datanode_parser(nodes, to_lion)
|
275
|
+
return nodes
|
276
|
+
|
277
|
+
|
278
|
+
def _plain_chunker(documents, chunker, chunker_args, chunker_kwargs):
|
279
|
+
"""
|
280
|
+
Chunks documents using a plain chunker.
|
281
|
+
|
282
|
+
Args:
|
283
|
+
documents (list): List of documents to be chunked.
|
284
|
+
chunker (str | Callable): The chunker function or its name.
|
285
|
+
chunker_args (list): Positional arguments for the chunker function.
|
286
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function.
|
287
|
+
|
288
|
+
Returns:
|
289
|
+
pile: A pile of chunked Node instances.
|
290
|
+
|
291
|
+
Raises:
|
292
|
+
ValueError: If the chunker is not supported.
|
293
|
+
|
294
|
+
Example usage:
|
295
|
+
>>> chunked_docs = _plain_chunker(docs, 'text_chunker', ['arg1'], {'key': 'value'})
|
296
|
+
"""
|
297
|
+
try:
|
298
|
+
if chunker == "text_chunker":
|
299
|
+
chunker = text_chunker
|
300
|
+
return chunker(documents, chunker_args, chunker_kwargs)
|
301
|
+
except Exception as e:
|
302
|
+
raise ValueError(
|
303
|
+
f"Reader {chunker} is currently not supported. Error: {e}"
|
304
|
+
) from e
|
305
|
+
|
306
|
+
|
307
|
+
chunk_funcs = {
|
308
|
+
ChunkerType.PLAIN: _plain_chunker,
|
309
|
+
ChunkerType.LANGCHAIN: _langchain_chunker,
|
310
|
+
ChunkerType.LLAMAINDEX: _llama_index_chunker,
|
311
|
+
ChunkerType.SELFDEFINED: _self_defined_chunker,
|
312
|
+
}
|