lionagi 0.0.111__py3-none-any.whl → 0.0.113__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. lionagi/__init__.py +7 -2
  2. lionagi/bridge/__init__.py +7 -0
  3. lionagi/bridge/langchain.py +131 -0
  4. lionagi/bridge/llama_index.py +157 -0
  5. lionagi/configs/__init__.py +7 -0
  6. lionagi/configs/oai_configs.py +49 -0
  7. lionagi/configs/openrouter_config.py +49 -0
  8. lionagi/core/__init__.py +15 -0
  9. lionagi/{session/conversation.py → core/conversations.py} +10 -17
  10. lionagi/core/flows.py +1 -0
  11. lionagi/core/instruction_sets.py +1 -0
  12. lionagi/{session/message.py → core/messages.py} +5 -5
  13. lionagi/core/sessions.py +262 -0
  14. lionagi/datastore/__init__.py +1 -0
  15. lionagi/datastore/chroma.py +1 -0
  16. lionagi/datastore/deeplake.py +1 -0
  17. lionagi/datastore/elasticsearch.py +1 -0
  18. lionagi/datastore/lantern.py +1 -0
  19. lionagi/datastore/pinecone.py +1 -0
  20. lionagi/datastore/postgres.py +1 -0
  21. lionagi/datastore/qdrant.py +1 -0
  22. lionagi/loader/__init__.py +12 -0
  23. lionagi/loader/chunker.py +157 -0
  24. lionagi/loader/reader.py +124 -0
  25. lionagi/objs/__init__.py +7 -0
  26. lionagi/objs/messenger.py +163 -0
  27. lionagi/objs/tool_registry.py +247 -0
  28. lionagi/schema/__init__.py +11 -0
  29. lionagi/schema/base_condition.py +1 -0
  30. lionagi/schema/base_schema.py +239 -0
  31. lionagi/schema/base_tool.py +9 -0
  32. lionagi/schema/data_logger.py +94 -0
  33. lionagi/services/__init__.py +14 -0
  34. lionagi/services/anthropic.py +1 -0
  35. lionagi/services/anyscale.py +0 -0
  36. lionagi/services/azure.py +1 -0
  37. lionagi/{api/oai_service.py → services/base_api_service.py} +74 -148
  38. lionagi/services/bedrock.py +0 -0
  39. lionagi/services/chatcompletion.py +48 -0
  40. lionagi/services/everlyai.py +0 -0
  41. lionagi/services/gemini.py +0 -0
  42. lionagi/services/gpt4all.py +0 -0
  43. lionagi/services/huggingface.py +0 -0
  44. lionagi/services/litellm.py +1 -0
  45. lionagi/services/localai.py +0 -0
  46. lionagi/services/mistralai.py +0 -0
  47. lionagi/services/oai.py +34 -0
  48. lionagi/services/ollama.py +1 -0
  49. lionagi/services/openllm.py +0 -0
  50. lionagi/services/openrouter.py +32 -0
  51. lionagi/services/perplexity.py +0 -0
  52. lionagi/services/predibase.py +0 -0
  53. lionagi/services/rungpt.py +0 -0
  54. lionagi/services/service_objs.py +282 -0
  55. lionagi/services/vllm.py +0 -0
  56. lionagi/services/xinference.py +0 -0
  57. lionagi/structure/__init__.py +7 -0
  58. lionagi/structure/relationship.py +128 -0
  59. lionagi/structure/structure.py +160 -0
  60. lionagi/tests/__init__.py +0 -0
  61. lionagi/tests/test_flatten_util.py +426 -0
  62. lionagi/tools/__init__.py +0 -0
  63. lionagi/tools/coder.py +1 -0
  64. lionagi/tools/planner.py +1 -0
  65. lionagi/tools/prompter.py +1 -0
  66. lionagi/tools/sandbox.py +1 -0
  67. lionagi/tools/scorer.py +1 -0
  68. lionagi/tools/summarizer.py +1 -0
  69. lionagi/tools/validator.py +1 -0
  70. lionagi/utils/__init__.py +46 -8
  71. lionagi/utils/api_util.py +63 -416
  72. lionagi/utils/call_util.py +347 -0
  73. lionagi/utils/flat_util.py +540 -0
  74. lionagi/utils/io_util.py +102 -0
  75. lionagi/utils/load_utils.py +190 -0
  76. lionagi/utils/sys_util.py +85 -660
  77. lionagi/utils/tool_util.py +82 -199
  78. lionagi/utils/type_util.py +81 -0
  79. lionagi/version.py +1 -1
  80. {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/METADATA +44 -15
  81. lionagi-0.0.113.dist-info/RECORD +84 -0
  82. lionagi/api/__init__.py +0 -8
  83. lionagi/api/oai_config.py +0 -16
  84. lionagi/session/__init__.py +0 -7
  85. lionagi/session/session.py +0 -380
  86. lionagi/utils/doc_util.py +0 -331
  87. lionagi/utils/log_util.py +0 -86
  88. lionagi-0.0.111.dist-info/RECORD +0 -20
  89. {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/LICENSE +0 -0
  90. {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/WHEEL +0 -0
  91. {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,262 @@
1
+ import json
2
+ from typing import Any
3
+ from dotenv import load_dotenv
4
+
5
+ from ..schema import DataLogger
6
+ from ..utils import lcall, alcall
7
+ from ..services import OpenAIService, ChatCompletion
8
+ from ..core.conversations import Conversation
9
+ from ..objs.tool_registry import ToolManager
10
+ from ..configs.oai_configs import oai_schema
11
+
12
+ load_dotenv()
13
+ OAIService = OpenAIService()
14
+
15
+
16
+ class Session:
17
+ """
18
+ The Session class is responsible for managing a conversation session with a given system,
19
+ handling the logging of data, and invoking tools as part of the conversation.
20
+
21
+ Attributes:
22
+ conversation (Conversation): An object to manage the conversation flow and history.
23
+
24
+ system (str): The name of the system with which the conversation is happening.
25
+
26
+ llmconfig (dict): Configuration for the language model.
27
+
28
+ _logger (DataLogger): An object for logging conversation data.
29
+
30
+ service (OpenAIService): A service object for interacting with OpenAI APIs.
31
+
32
+ tool_manager (ToolManager): An object to manage the registration and invocation of tools.
33
+ """
34
+
35
+ def __init__(
36
+ self, system, dir=None, llmconfig=oai_schema['chat']['config'],
37
+ service=OAIService
38
+ ):
39
+ """
40
+ Initializes the Session object.
41
+
42
+ Args:
43
+ system (str): The name of the system with which the session is initiated.
44
+
45
+ dir (str, optional): The directory for saving logs. Defaults to None.
46
+
47
+ llmconfig (dict): Configuration for the language model. Defaults to chat config schema.
48
+
49
+ service (OpenAIService): The service object for API interactions. Defaults to an instance of OpenAIService.
50
+ """
51
+
52
+ self.conversation = Conversation()
53
+ self.system = system
54
+ self.llmconfig = llmconfig
55
+ self.logger_ = DataLogger(dir=dir)
56
+ self.service = service
57
+ self.tool_manager = ToolManager()
58
+
59
+ def set_dir(self, dir):
60
+ """
61
+ Sets the directory where data logs should be saved.
62
+
63
+ Args:
64
+ dir (str): The path to the directory for saving logs.
65
+ """
66
+ self.logger_.dir = dir
67
+
68
+ def set_system(self, system):
69
+ """
70
+ Changes the system associated with the conversation.
71
+
72
+ Args:
73
+ system (str): The name of the new system for the conversation.
74
+ """
75
+ self.conversation.change_system(system)
76
+
77
+ def set_llmconfig(self, llmconfig):
78
+ """
79
+ Updates the language model configuration.
80
+
81
+ Args:
82
+ llmconfig (dict): The new configuration for the language model.
83
+ """
84
+ self.llmconfig = llmconfig
85
+
86
+ def set_service(self, service):
87
+ """
88
+ Sets the service object used for API interactions.
89
+
90
+ Args:
91
+ service (OpenAIService): The new service object.
92
+ """
93
+ self.service = service
94
+
95
+ async def _output(self, invoke=True, out=True):
96
+ """
97
+ Processes the output from the conversation, possibly invoking tools and returning the latest response.
98
+
99
+ Args:
100
+ invoke (bool): Indicates whether to invoke tools based on the latest response. Defaults to True.
101
+
102
+ out (bool): Determines whether to return the latest response content. Defaults to True.
103
+
104
+ Returns:
105
+ The content of the latest response if out is True. Otherwise, returns None.
106
+ """
107
+ if invoke:
108
+ try:
109
+ # func, args = self.tool_manager._get_function_call(self.conversation.responses[-1]['content'])
110
+ # outs = await self.tool_manager.invoke(func, args)
111
+ # self.conversation.add_messages(response=outs)
112
+
113
+ tool_uses = json.loads(self.conversation.responses[-1]['content'])
114
+ if 'function_list' in tool_uses.keys():
115
+ func_calls = lcall(tool_uses['function_list'], self.tool_manager._get_function_call)
116
+ else:
117
+ func_calls = lcall(tool_uses['tool_uses'], self.tool_manager._get_function_call)
118
+
119
+ outs = await alcall(func_calls, self.tool_manager.invoke)
120
+ for out, f in zip(outs, func_calls):
121
+ response = {"function": f[0], "arguments": f[1], "output": out}
122
+ self.conversation.add_messages(response=response)
123
+
124
+ except:
125
+ pass
126
+ if out:
127
+ return self.conversation.responses[-1]['content']
128
+
129
+ def _is_invoked(self):
130
+ """
131
+ Checks if the last message in the conversation indicates a function call result.
132
+
133
+ Returns:
134
+ bool: True if the last message is a function call result, False otherwise.
135
+ """
136
+ msg = self.conversation.messages[-1]
137
+ try:
138
+ if "function call result" in json.loads(msg['content']).keys():
139
+ return True
140
+ except:
141
+ return False
142
+
143
+ def register_tools(self, tools, update=False, new=False, prefix=None, postfix=None):
144
+ """
145
+ Registers a list of tools to the tool manager and updates the language model configuration.
146
+
147
+ Args:
148
+ tools: A single tool or a list of tools to be registered.
149
+ update (bool): If True, update existing tools. Defaults to False.
150
+ new (bool): If True, add as new tools. Defaults to False.
151
+ prefix: A prefix added to all tool names. Defaults to None.
152
+ postfix: A postfix added to all tool names. Defaults to None.
153
+ """
154
+ if not isinstance(tools, list):
155
+ tools=[tools]
156
+ self.tool_manager.register_tools(tools=tools, update=update, new=new, prefix=prefix, postfix=postfix)
157
+ tools_schema = lcall(tools, lambda tool: tool.to_dict()['schema_'])
158
+ if self.llmconfig['tools'] is None:
159
+ self.llmconfig['tools'] = tools_schema
160
+ else:
161
+ self.llmconfig['tools'] += tools_schema
162
+
163
+ async def initiate(self, instruction, system=None, context=None,
164
+ name=None, invoke=True, out=True, **kwargs) -> Any:
165
+ """
166
+ Initiates a conversation with an instruction and possibly additional context.
167
+
168
+ Args:
169
+ instruction (str): The initial instruction for the conversation.
170
+ system (str, optional): The name of the system to be used. If None, defaults to current system.
171
+ context (str, optional): Additional context for the conversation. Defaults to None.
172
+ name (str, optional): The name associated with the conversation. Defaults to None.
173
+ invoke (bool): Indicates whether to invoke tools. Defaults to True.
174
+ out (bool): Determines whether to return the latest response content. Defaults to True.
175
+ **kwargs: Additional keyword arguments for language model configuration.
176
+
177
+ Returns:
178
+ The output of the conversation if out is True, otherwise None.
179
+ """
180
+ config = {**self.llmconfig, **kwargs}
181
+ system = system or self.system
182
+ self.conversation.initiate_conversation(system=system, instruction=instruction, context=context, name=name)
183
+ await self.call_chatcompletion(**config)
184
+
185
+ return await self._output(invoke, out)
186
+
187
+ async def followup(self, instruction, system=None, context=None,
188
+ out=True, name=None, invoke=True, **kwargs) -> Any:
189
+ """
190
+ Continues the conversation with a follow-up instruction.
191
+
192
+ Args:
193
+ instruction (str): The follow-up instruction for the conversation.
194
+ system (str, optional): The name of the system to be used. If None, defaults to current system.
195
+ context (str, optional): Additional context for the conversation. Defaults to None.
196
+ out (bool): Determines whether to return the latest response content. Defaults to True.
197
+ name (str, optional): The name associated with the conversation. Defaults to None.
198
+ invoke (bool): Indicates whether to invoke tools. Defaults to True.
199
+ **kwargs: Additional keyword arguments for language model configuration.
200
+
201
+ Returns:
202
+ The output of the conversation if out is True, otherwise None.
203
+ """
204
+ if system:
205
+ self.conversation.change_system(system)
206
+ self.conversation.add_messages(instruction=instruction, context=context, name=name)
207
+ config = {**self.llmconfig, **kwargs}
208
+ await self.call_chatcompletion(**config)
209
+
210
+ return await self._output(invoke, out)
211
+
212
+ async def auto_followup(self, instruct, num=3, **kwargs):
213
+ """
214
+ Automatically generates follow-up messages based on whether the last response invoked a tool.
215
+
216
+ Args:
217
+ instruct (str): The instruction to pass for follow-up.
218
+ num (int): The number of follow-ups to attempt. Defaults to 3.
219
+ **kwargs: Additional keyword arguments for the follow-up process.
220
+ """
221
+ cont_ = True
222
+ while num > 0 and cont_ is True:
223
+ await self.followup(instruct, tool_choice="auto", **kwargs)
224
+ num -= 1
225
+ cont_ = True if self._is_invoked() else False
226
+ if num == 0:
227
+ await self.followup(instruct, **kwargs)
228
+
229
+ def messages_to_csv(self, dir=None, filename="messages.csv", **kwargs):
230
+ """
231
+ Exports the conversation messages to a CSV file.
232
+
233
+ Args:
234
+ dir (str, optional): The directory where the CSV should be saved. Defaults to the logger's directory.
235
+ filename (str): The name of the CSV file. Defaults to "messages.csv".
236
+ **kwargs: Additional keyword arguments passed to the CSV writing function.
237
+
238
+ Raises:
239
+ ValueError: If no directory is specified.
240
+ """
241
+ dir = dir or self.logger_.dir
242
+ if dir is None:
243
+ raise ValueError("No directory specified.")
244
+ self.conversation.msg.to_csv(dir=dir, filename=filename, **kwargs)
245
+
246
+ def log_to_csv(self, dir=None, filename="llmlog.csv", **kwargs):
247
+ dir = dir or self.logger_.dir
248
+ if dir is None:
249
+ raise ValueError("No directory specified.")
250
+ self.logger_.to_csv(dir=dir, filename=filename, **kwargs)
251
+
252
+ async def call_chatcompletion(self, schema=oai_schema['chat'], **kwargs):
253
+ payload = ChatCompletion.create_payload(messages=self.conversation.messages, schema=schema, llmconfig=self.llmconfig,**kwargs)
254
+ completion = await self.service.serve(payload=payload)
255
+ if "choices" in completion:
256
+ self.logger_({"input":payload, "output": completion})
257
+ self.conversation.add_messages(response=completion['choices'][0])
258
+ self.conversation.responses.append(self.conversation.messages[-1])
259
+ self.conversation.response_counts += 1
260
+ self.service.status_tracker.num_tasks_succeeded += 1
261
+ else:
262
+ self.service.status_tracker.num_tasks_failed += 1
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1,12 @@
1
+ from .reader import load, ReaderType, text_reader
2
+ from .chunker import chunk, datanodes_convert, ChunkerType, text_chunker
3
+
4
+ __all__ = [
5
+ 'load',
6
+ 'chunk',
7
+ 'datanodes_convert',
8
+ 'text_reader',
9
+ 'text_chunker',
10
+ 'ReaderType',
11
+ 'ChunkerType'
12
+ ]
@@ -0,0 +1,157 @@
1
+ from enum import Enum
2
+ from typing import Union, Callable
3
+
4
+ from ..bridge.langchain import langchain_text_splitter, from_langchain
5
+ from ..bridge.llama_index import llama_index_node_parser, from_llama_index
6
+ from ..schema.base_schema import DataNode
7
+ from ..utils import lcall, file_to_chunks
8
+
9
+ # define an enum to represent different types of chunkers
10
+ class ChunkerType(str, Enum):
11
+ PLAIN = 'plain' # default
12
+ LANGCHAIN = 'langchain' # using langchain functions
13
+ LLAMAINDEX = 'llama_index' # using llamaindex functions
14
+ SELFDEFINED = 'self_defined' # create custom functions
15
+
16
+ # Function to convert documents to a specific format based on the chunker type
17
+ def datanodes_convert(documents, chunker_type):
18
+ """
19
+ Converts a lionagi DataNode documents to a specific format based on the chunker type.
20
+
21
+ Args:
22
+ documents (List[DataNode]): A list of DataNode instances to be converted.
23
+
24
+ chunker_type (ChunkerType): The chunker type to determine the conversion format.
25
+
26
+ Returns:
27
+ List[DataNode]: The list of converted DataNode instances.
28
+ """
29
+ for i in range(len(documents)):
30
+ if type(documents[i]) == DataNode:
31
+ if chunker_type == ChunkerType.LLAMAINDEX:
32
+ documents[i] = documents[i].to_llama_index()
33
+ elif chunker_type == ChunkerType.LANGCHAIN:
34
+ documents[i] = documents[i].to_langchain()
35
+ return documents
36
+
37
+ # Function to chunk text documents
38
+ def text_chunker(documents, args, kwargs):
39
+ """
40
+ Chunks text documents into smaller pieces.
41
+
42
+ Args:
43
+ documents (List[DataNode]): A list of DataNode instances to be chunked.
44
+ args (List[Any]): Positional arguments to be passed to the chunking function.
45
+ kwargs (dict): Keyword arguments to be passed to the chunking function.
46
+
47
+ Returns:
48
+ List[DataNode]: A list of chunked DataNode instances.
49
+ """
50
+ def chunk_node(node):
51
+ chunks = file_to_chunks(node.to_dict(), *args, **kwargs)
52
+ lcall(chunks, lambda chunk: chunk.pop('node_id'))
53
+ chunk_nodes = lcall(chunks, lambda x: DataNode(**x))
54
+ return chunk_nodes
55
+
56
+ nodes = []
57
+ for doc in documents:
58
+ nodes += chunk_node(doc)
59
+ return nodes
60
+
61
+
62
+ def _datanode_parser(nodes, parser):
63
+ """
64
+ Parses raw data into DataNode instances using the provided parser function.
65
+
66
+ Args:
67
+ nodes (List[Any]): A list of raw data to be parsed.
68
+ parser (Callable): A function that parses raw data into DataNode instances.
69
+
70
+ Returns:
71
+ List[DataNode]: A list of parsed DataNode instances.
72
+
73
+ Raises:
74
+ ValueError: If the parser function fails.
75
+ """
76
+ try:
77
+ nodes = parser(nodes)
78
+ except Exception as e:
79
+ raise ValueError(f'DataNode parser {parser} failed. Error:{e}')
80
+ return nodes
81
+
82
+
83
+ def chunk(documents,
84
+ chunker,
85
+ chunker_type=ChunkerType.PLAIN,
86
+ chunker_args=[],
87
+ chunker_kwargs={},
88
+ chunking_kwargs={},
89
+ documents_convert_func=None,
90
+ to_datanode: Union[bool, Callable] = True):
91
+ """
92
+ Chunks documents using the specified chunker and chunker type.
93
+
94
+ Args:
95
+ documents (List[Any]): A list of documents to be chunked.
96
+ chunker (Callable): The chunking function to be used.
97
+ chunker_type (ChunkerType): The type of the chunker. Defaults to ChunkerType.PLAIN.
98
+ chunker_args (List[Any]): Positional arguments for the chunker function. Defaults to an empty list.
99
+ chunker_kwargs (dict): Keyword arguments for the chunker function. Defaults to an empty dict.
100
+ chunking_kwargs (dict): Additional keyword arguments for the chunking process. Defaults to an empty dict.
101
+ documents_convert_func (Callable): A function to convert documents to a specific format. Defaults to None.
102
+ to_datanode (Union[bool, Callable]): Determines whether to convert the result into DataNode instances, or
103
+ a callable to convert the result. Defaults to True.
104
+
105
+ Returns:
106
+ List[DataNode]: A list of chunked DataNode instances after applying the chunker.
107
+
108
+ Raises:
109
+ ValueError: If the chunker fails or an unsupported chunker type is provided.
110
+ """
111
+ if chunker_type == ChunkerType.PLAIN:
112
+ try:
113
+ if chunker == 'text_chunker':
114
+ chunker = text_chunker
115
+ nodes = chunker(documents, chunker_args, chunker_kwargs)
116
+ return nodes
117
+ except Exception as e:
118
+ raise ValueError(f'Reader {chunker} is currently not supported. Error: {e}')
119
+ if chunker_type == ChunkerType.LANGCHAIN:
120
+ if documents_convert_func:
121
+ documents = documents_convert_func(documents, 'langchain')
122
+ nodes = langchain_text_splitter(documents, chunker, chunker_args, chunker_kwargs)
123
+ if isinstance(to_datanode, bool) and to_datanode is True:
124
+ if isinstance(documents, str):
125
+ nodes = lcall(nodes, lambda x: DataNode(content=x))
126
+ else:
127
+ nodes = lcall(nodes, from_langchain)
128
+ elif isinstance(to_datanode, Callable):
129
+ nodes = _datanode_parser(nodes, to_datanode)
130
+ return nodes
131
+
132
+ elif chunker_type == ChunkerType.LLAMAINDEX:
133
+ if documents_convert_func:
134
+ documents = documents_convert_func(documents, 'llama_index')
135
+ nodes = llama_index_node_parser(documents, chunker, chunker_args, chunker_kwargs, chunking_kwargs)
136
+ if isinstance(to_datanode, bool) and to_datanode is True:
137
+ nodes = lcall(nodes, from_llama_index)
138
+ elif isinstance(to_datanode, Callable):
139
+ nodes = _datanode_parser(nodes, to_datanode)
140
+ return nodes
141
+
142
+ elif chunker_type == ChunkerType.SELFDEFINED:
143
+ try:
144
+ splitter = chunker(*chunker_args, **chunker_kwargs)
145
+ nodes = splitter.split(documents, **chunking_kwargs)
146
+ except Exception as e:
147
+ raise ValueError(f'Self defined chunker {chunker} is not valid. Error: {e}')
148
+
149
+ if isinstance(to_datanode, bool) and to_datanode is True:
150
+ raise ValueError(f'Please define a valid parser to DataNode.')
151
+ elif isinstance(to_datanode, Callable):
152
+ nodes = _datanode_parser(nodes, to_datanode)
153
+ return nodes
154
+
155
+ else:
156
+ raise ValueError(f'{chunker_type} is not supported. Please choose from {list(ChunkerType)}')
157
+
@@ -0,0 +1,124 @@
1
+ from enum import Enum
2
+ from typing import Union, Callable
3
+
4
+ from lionagi.bridge.langchain import langchain_loader, from_langchain
5
+ from lionagi.bridge.llama_index import llama_index_reader, from_llama_index
6
+ from lionagi.utils.call_util import lcall
7
+ from lionagi.utils.load_utils import dir_to_nodes
8
+
9
+
10
+ class ReaderType(str, Enum):
11
+ PLAIN = 'PLAIN'
12
+ LANGCHAIN = 'langchain'
13
+ LLAMAINDEX = 'llama_index'
14
+ SELFDEFINED = 'self_defined'
15
+
16
+
17
+ def _datanode_parser(nodes, parser):
18
+ """
19
+ Parses a list of nodes using the given parser function.
20
+
21
+ Args:
22
+ nodes (List[Any]): The list of nodes to be parsed.
23
+
24
+ parser (Callable): The parser function to transform nodes into DataNode instances.
25
+
26
+ Returns:
27
+ List[Any]: A list of parsed nodes.
28
+
29
+ Raises:
30
+ ValueError: If the parser function fails.
31
+ """
32
+ try:
33
+ nodes = parser(nodes)
34
+ except Exception as e:
35
+ raise ValueError(f'DataNode parser {parser} failed. Error:{e}')
36
+ return nodes
37
+
38
+
39
+ def text_reader(args, kwargs):
40
+ """
41
+ Reads text files from a directory and converts them to DataNode instances.
42
+
43
+ Args:
44
+ args (List[Any]): Positional arguments for the dir_to_nodes function.
45
+
46
+ kwargs (dict): Keyword arguments for the dir_to_nodes function.
47
+
48
+ Returns:
49
+ List[Any]: A list of DataNode instances.
50
+ """
51
+ return dir_to_nodes(*args, **kwargs)
52
+
53
+
54
+ def load(reader: Union[str, Callable],
55
+ reader_type=ReaderType.PLAIN,
56
+ reader_args=[],
57
+ reader_kwargs={},
58
+ load_args=[],
59
+ load_kwargs={},
60
+ to_datanode: Union[bool, Callable] = True):
61
+ """
62
+ Loads documents using the specified reader and reader type.
63
+
64
+ Args:
65
+ reader (Union[str, Callable]): The reader function or its name as a string.
66
+
67
+ reader_type (ReaderType): The type of the reader. Defaults to ReaderType.PLAIN.
68
+
69
+ reader_args (List[Any]): Positional arguments for the reader function. Defaults to an empty list.
70
+
71
+ reader_kwargs (dict): Keyword arguments for the reader function. Defaults to an empty dict.
72
+
73
+ load_args (List[Any]): Positional arguments for the loader function. Defaults to an empty list.
74
+
75
+ load_kwargs (dict): Keyword arguments for the loader function. Defaults to an empty dict.
76
+
77
+ to_datanode (Union[bool, Callable]): Determines whether to convert the result into DataNode instances, or
78
+ a callable to convert the result. Defaults to True.
79
+
80
+ Returns:
81
+ List[Any]: A list of loaded and potentially parsed documents.
82
+
83
+ Raises:
84
+ ValueError: If the reader fails or an unsupported reader type is provided.
85
+ """
86
+ if reader_type == ReaderType.PLAIN:
87
+ try:
88
+ if reader == 'text_reader':
89
+ reader = text_reader
90
+ nodes = reader(reader_args, reader_kwargs)
91
+ return nodes
92
+ except Exception as e:
93
+ raise ValueError(f'Reader {reader} is currently not supported. Error: {e}')
94
+ if reader_type == ReaderType.LANGCHAIN:
95
+ nodes = langchain_loader(reader, reader_args, reader_kwargs)
96
+ if isinstance(to_datanode, bool) and to_datanode is True:
97
+ nodes = lcall(nodes, from_langchain)
98
+ elif isinstance(to_datanode, Callable):
99
+ nodes = _datanode_parser(nodes, to_datanode)
100
+ return nodes
101
+
102
+ elif reader_type == ReaderType.LLAMAINDEX:
103
+ nodes = llama_index_reader(reader, reader_args, reader_kwargs, load_args, load_kwargs)
104
+ if isinstance(to_datanode, bool) and to_datanode is True:
105
+ nodes = lcall(nodes, from_llama_index)
106
+ elif isinstance(to_datanode, Callable):
107
+ nodes = _datanode_parser(nodes, to_datanode)
108
+ return nodes
109
+
110
+ elif reader_type == ReaderType.SELFDEFINED:
111
+ try:
112
+ loader = reader(*reader_args, **reader_kwargs)
113
+ nodes = loader.load(*load_args, **load_kwargs)
114
+ except Exception as e:
115
+ raise ValueError(f'Self defined reader {reader} is not valid. Error: {e}')
116
+
117
+ if isinstance(to_datanode, bool) and to_datanode is True:
118
+ raise ValueError(f'Please define a valid parser to DataNode.')
119
+ elif isinstance(to_datanode, Callable):
120
+ nodes = _datanode_parser(nodes, to_datanode)
121
+ return nodes
122
+
123
+ else:
124
+ raise ValueError(f'{reader_type} is not supported. Please choose from {list(ReaderType)}')
@@ -0,0 +1,7 @@
1
+ # # from .messenger import Messenger
2
+ # from .tool_registry import ToolRegistry
3
+
4
+ # __all__ = [
5
+ # 'Messenger',
6
+ # 'ToolRegistry'
7
+ # ]