lionagi 0.0.111__py3-none-any.whl → 0.0.113__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. lionagi/__init__.py +7 -2
  2. lionagi/bridge/__init__.py +7 -0
  3. lionagi/bridge/langchain.py +131 -0
  4. lionagi/bridge/llama_index.py +157 -0
  5. lionagi/configs/__init__.py +7 -0
  6. lionagi/configs/oai_configs.py +49 -0
  7. lionagi/configs/openrouter_config.py +49 -0
  8. lionagi/core/__init__.py +15 -0
  9. lionagi/{session/conversation.py → core/conversations.py} +10 -17
  10. lionagi/core/flows.py +1 -0
  11. lionagi/core/instruction_sets.py +1 -0
  12. lionagi/{session/message.py → core/messages.py} +5 -5
  13. lionagi/core/sessions.py +262 -0
  14. lionagi/datastore/__init__.py +1 -0
  15. lionagi/datastore/chroma.py +1 -0
  16. lionagi/datastore/deeplake.py +1 -0
  17. lionagi/datastore/elasticsearch.py +1 -0
  18. lionagi/datastore/lantern.py +1 -0
  19. lionagi/datastore/pinecone.py +1 -0
  20. lionagi/datastore/postgres.py +1 -0
  21. lionagi/datastore/qdrant.py +1 -0
  22. lionagi/loader/__init__.py +12 -0
  23. lionagi/loader/chunker.py +157 -0
  24. lionagi/loader/reader.py +124 -0
  25. lionagi/objs/__init__.py +7 -0
  26. lionagi/objs/messenger.py +163 -0
  27. lionagi/objs/tool_registry.py +247 -0
  28. lionagi/schema/__init__.py +11 -0
  29. lionagi/schema/base_condition.py +1 -0
  30. lionagi/schema/base_schema.py +239 -0
  31. lionagi/schema/base_tool.py +9 -0
  32. lionagi/schema/data_logger.py +94 -0
  33. lionagi/services/__init__.py +14 -0
  34. lionagi/services/anthropic.py +1 -0
  35. lionagi/services/anyscale.py +0 -0
  36. lionagi/services/azure.py +1 -0
  37. lionagi/{api/oai_service.py → services/base_api_service.py} +74 -148
  38. lionagi/services/bedrock.py +0 -0
  39. lionagi/services/chatcompletion.py +48 -0
  40. lionagi/services/everlyai.py +0 -0
  41. lionagi/services/gemini.py +0 -0
  42. lionagi/services/gpt4all.py +0 -0
  43. lionagi/services/huggingface.py +0 -0
  44. lionagi/services/litellm.py +1 -0
  45. lionagi/services/localai.py +0 -0
  46. lionagi/services/mistralai.py +0 -0
  47. lionagi/services/oai.py +34 -0
  48. lionagi/services/ollama.py +1 -0
  49. lionagi/services/openllm.py +0 -0
  50. lionagi/services/openrouter.py +32 -0
  51. lionagi/services/perplexity.py +0 -0
  52. lionagi/services/predibase.py +0 -0
  53. lionagi/services/rungpt.py +0 -0
  54. lionagi/services/service_objs.py +282 -0
  55. lionagi/services/vllm.py +0 -0
  56. lionagi/services/xinference.py +0 -0
  57. lionagi/structure/__init__.py +7 -0
  58. lionagi/structure/relationship.py +128 -0
  59. lionagi/structure/structure.py +160 -0
  60. lionagi/tests/__init__.py +0 -0
  61. lionagi/tests/test_flatten_util.py +426 -0
  62. lionagi/tools/__init__.py +0 -0
  63. lionagi/tools/coder.py +1 -0
  64. lionagi/tools/planner.py +1 -0
  65. lionagi/tools/prompter.py +1 -0
  66. lionagi/tools/sandbox.py +1 -0
  67. lionagi/tools/scorer.py +1 -0
  68. lionagi/tools/summarizer.py +1 -0
  69. lionagi/tools/validator.py +1 -0
  70. lionagi/utils/__init__.py +46 -8
  71. lionagi/utils/api_util.py +63 -416
  72. lionagi/utils/call_util.py +347 -0
  73. lionagi/utils/flat_util.py +540 -0
  74. lionagi/utils/io_util.py +102 -0
  75. lionagi/utils/load_utils.py +190 -0
  76. lionagi/utils/sys_util.py +85 -660
  77. lionagi/utils/tool_util.py +82 -199
  78. lionagi/utils/type_util.py +81 -0
  79. lionagi/version.py +1 -1
  80. {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/METADATA +44 -15
  81. lionagi-0.0.113.dist-info/RECORD +84 -0
  82. lionagi/api/__init__.py +0 -8
  83. lionagi/api/oai_config.py +0 -16
  84. lionagi/session/__init__.py +0 -7
  85. lionagi/session/session.py +0 -380
  86. lionagi/utils/doc_util.py +0 -331
  87. lionagi/utils/log_util.py +0 -86
  88. lionagi-0.0.111.dist-info/RECORD +0 -20
  89. {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/LICENSE +0 -0
  90. {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/WHEEL +0 -0
  91. {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,262 @@
1
+ import json
2
+ from typing import Any
3
+ from dotenv import load_dotenv
4
+
5
+ from ..schema import DataLogger
6
+ from ..utils import lcall, alcall
7
+ from ..services import OpenAIService, ChatCompletion
8
+ from ..core.conversations import Conversation
9
+ from ..objs.tool_registry import ToolManager
10
+ from ..configs.oai_configs import oai_schema
11
+
12
+ load_dotenv()
13
+ OAIService = OpenAIService()
14
+
15
+
16
+ class Session:
17
+ """
18
+ The Session class is responsible for managing a conversation session with a given system,
19
+ handling the logging of data, and invoking tools as part of the conversation.
20
+
21
+ Attributes:
22
+ conversation (Conversation): An object to manage the conversation flow and history.
23
+
24
+ system (str): The name of the system with which the conversation is happening.
25
+
26
+ llmconfig (dict): Configuration for the language model.
27
+
28
+ _logger (DataLogger): An object for logging conversation data.
29
+
30
+ service (OpenAIService): A service object for interacting with OpenAI APIs.
31
+
32
+ tool_manager (ToolManager): An object to manage the registration and invocation of tools.
33
+ """
34
+
35
+ def __init__(
36
+ self, system, dir=None, llmconfig=oai_schema['chat']['config'],
37
+ service=OAIService
38
+ ):
39
+ """
40
+ Initializes the Session object.
41
+
42
+ Args:
43
+ system (str): The name of the system with which the session is initiated.
44
+
45
+ dir (str, optional): The directory for saving logs. Defaults to None.
46
+
47
+ llmconfig (dict): Configuration for the language model. Defaults to chat config schema.
48
+
49
+ service (OpenAIService): The service object for API interactions. Defaults to an instance of OpenAIService.
50
+ """
51
+
52
+ self.conversation = Conversation()
53
+ self.system = system
54
+ self.llmconfig = llmconfig
55
+ self.logger_ = DataLogger(dir=dir)
56
+ self.service = service
57
+ self.tool_manager = ToolManager()
58
+
59
+ def set_dir(self, dir):
60
+ """
61
+ Sets the directory where data logs should be saved.
62
+
63
+ Args:
64
+ dir (str): The path to the directory for saving logs.
65
+ """
66
+ self.logger_.dir = dir
67
+
68
+ def set_system(self, system):
69
+ """
70
+ Changes the system associated with the conversation.
71
+
72
+ Args:
73
+ system (str): The name of the new system for the conversation.
74
+ """
75
+ self.conversation.change_system(system)
76
+
77
+ def set_llmconfig(self, llmconfig):
78
+ """
79
+ Updates the language model configuration.
80
+
81
+ Args:
82
+ llmconfig (dict): The new configuration for the language model.
83
+ """
84
+ self.llmconfig = llmconfig
85
+
86
+ def set_service(self, service):
87
+ """
88
+ Sets the service object used for API interactions.
89
+
90
+ Args:
91
+ service (OpenAIService): The new service object.
92
+ """
93
+ self.service = service
94
+
95
+ async def _output(self, invoke=True, out=True):
96
+ """
97
+ Processes the output from the conversation, possibly invoking tools and returning the latest response.
98
+
99
+ Args:
100
+ invoke (bool): Indicates whether to invoke tools based on the latest response. Defaults to True.
101
+
102
+ out (bool): Determines whether to return the latest response content. Defaults to True.
103
+
104
+ Returns:
105
+ The content of the latest response if out is True. Otherwise, returns None.
106
+ """
107
+ if invoke:
108
+ try:
109
+ # func, args = self.tool_manager._get_function_call(self.conversation.responses[-1]['content'])
110
+ # outs = await self.tool_manager.invoke(func, args)
111
+ # self.conversation.add_messages(response=outs)
112
+
113
+ tool_uses = json.loads(self.conversation.responses[-1]['content'])
114
+ if 'function_list' in tool_uses.keys():
115
+ func_calls = lcall(tool_uses['function_list'], self.tool_manager._get_function_call)
116
+ else:
117
+ func_calls = lcall(tool_uses['tool_uses'], self.tool_manager._get_function_call)
118
+
119
+ outs = await alcall(func_calls, self.tool_manager.invoke)
120
+ for out, f in zip(outs, func_calls):
121
+ response = {"function": f[0], "arguments": f[1], "output": out}
122
+ self.conversation.add_messages(response=response)
123
+
124
+ except:
125
+ pass
126
+ if out:
127
+ return self.conversation.responses[-1]['content']
128
+
129
+ def _is_invoked(self):
130
+ """
131
+ Checks if the last message in the conversation indicates a function call result.
132
+
133
+ Returns:
134
+ bool: True if the last message is a function call result, False otherwise.
135
+ """
136
+ msg = self.conversation.messages[-1]
137
+ try:
138
+ if "function call result" in json.loads(msg['content']).keys():
139
+ return True
140
+ except:
141
+ return False
142
+
143
+ def register_tools(self, tools, update=False, new=False, prefix=None, postfix=None):
144
+ """
145
+ Registers a list of tools to the tool manager and updates the language model configuration.
146
+
147
+ Args:
148
+ tools: A single tool or a list of tools to be registered.
149
+ update (bool): If True, update existing tools. Defaults to False.
150
+ new (bool): If True, add as new tools. Defaults to False.
151
+ prefix: A prefix added to all tool names. Defaults to None.
152
+ postfix: A postfix added to all tool names. Defaults to None.
153
+ """
154
+ if not isinstance(tools, list):
155
+ tools=[tools]
156
+ self.tool_manager.register_tools(tools=tools, update=update, new=new, prefix=prefix, postfix=postfix)
157
+ tools_schema = lcall(tools, lambda tool: tool.to_dict()['schema_'])
158
+ if self.llmconfig['tools'] is None:
159
+ self.llmconfig['tools'] = tools_schema
160
+ else:
161
+ self.llmconfig['tools'] += tools_schema
162
+
163
+ async def initiate(self, instruction, system=None, context=None,
164
+ name=None, invoke=True, out=True, **kwargs) -> Any:
165
+ """
166
+ Initiates a conversation with an instruction and possibly additional context.
167
+
168
+ Args:
169
+ instruction (str): The initial instruction for the conversation.
170
+ system (str, optional): The name of the system to be used. If None, defaults to current system.
171
+ context (str, optional): Additional context for the conversation. Defaults to None.
172
+ name (str, optional): The name associated with the conversation. Defaults to None.
173
+ invoke (bool): Indicates whether to invoke tools. Defaults to True.
174
+ out (bool): Determines whether to return the latest response content. Defaults to True.
175
+ **kwargs: Additional keyword arguments for language model configuration.
176
+
177
+ Returns:
178
+ The output of the conversation if out is True, otherwise None.
179
+ """
180
+ config = {**self.llmconfig, **kwargs}
181
+ system = system or self.system
182
+ self.conversation.initiate_conversation(system=system, instruction=instruction, context=context, name=name)
183
+ await self.call_chatcompletion(**config)
184
+
185
+ return await self._output(invoke, out)
186
+
187
+ async def followup(self, instruction, system=None, context=None,
188
+ out=True, name=None, invoke=True, **kwargs) -> Any:
189
+ """
190
+ Continues the conversation with a follow-up instruction.
191
+
192
+ Args:
193
+ instruction (str): The follow-up instruction for the conversation.
194
+ system (str, optional): The name of the system to be used. If None, defaults to current system.
195
+ context (str, optional): Additional context for the conversation. Defaults to None.
196
+ out (bool): Determines whether to return the latest response content. Defaults to True.
197
+ name (str, optional): The name associated with the conversation. Defaults to None.
198
+ invoke (bool): Indicates whether to invoke tools. Defaults to True.
199
+ **kwargs: Additional keyword arguments for language model configuration.
200
+
201
+ Returns:
202
+ The output of the conversation if out is True, otherwise None.
203
+ """
204
+ if system:
205
+ self.conversation.change_system(system)
206
+ self.conversation.add_messages(instruction=instruction, context=context, name=name)
207
+ config = {**self.llmconfig, **kwargs}
208
+ await self.call_chatcompletion(**config)
209
+
210
+ return await self._output(invoke, out)
211
+
212
+ async def auto_followup(self, instruct, num=3, **kwargs):
213
+ """
214
+ Automatically generates follow-up messages based on whether the last response invoked a tool.
215
+
216
+ Args:
217
+ instruct (str): The instruction to pass for follow-up.
218
+ num (int): The number of follow-ups to attempt. Defaults to 3.
219
+ **kwargs: Additional keyword arguments for the follow-up process.
220
+ """
221
+ cont_ = True
222
+ while num > 0 and cont_ is True:
223
+ await self.followup(instruct, tool_choice="auto", **kwargs)
224
+ num -= 1
225
+ cont_ = True if self._is_invoked() else False
226
+ if num == 0:
227
+ await self.followup(instruct, **kwargs)
228
+
229
+ def messages_to_csv(self, dir=None, filename="messages.csv", **kwargs):
230
+ """
231
+ Exports the conversation messages to a CSV file.
232
+
233
+ Args:
234
+ dir (str, optional): The directory where the CSV should be saved. Defaults to the logger's directory.
235
+ filename (str): The name of the CSV file. Defaults to "messages.csv".
236
+ **kwargs: Additional keyword arguments passed to the CSV writing function.
237
+
238
+ Raises:
239
+ ValueError: If no directory is specified.
240
+ """
241
+ dir = dir or self.logger_.dir
242
+ if dir is None:
243
+ raise ValueError("No directory specified.")
244
+ self.conversation.msg.to_csv(dir=dir, filename=filename, **kwargs)
245
+
246
+ def log_to_csv(self, dir=None, filename="llmlog.csv", **kwargs):
247
+ dir = dir or self.logger_.dir
248
+ if dir is None:
249
+ raise ValueError("No directory specified.")
250
+ self.logger_.to_csv(dir=dir, filename=filename, **kwargs)
251
+
252
+ async def call_chatcompletion(self, schema=oai_schema['chat'], **kwargs):
253
+ payload = ChatCompletion.create_payload(messages=self.conversation.messages, schema=schema, llmconfig=self.llmconfig,**kwargs)
254
+ completion = await self.service.serve(payload=payload)
255
+ if "choices" in completion:
256
+ self.logger_({"input":payload, "output": completion})
257
+ self.conversation.add_messages(response=completion['choices'][0])
258
+ self.conversation.responses.append(self.conversation.messages[-1])
259
+ self.conversation.response_counts += 1
260
+ self.service.status_tracker.num_tasks_succeeded += 1
261
+ else:
262
+ self.service.status_tracker.num_tasks_failed += 1
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1 @@
1
+ # TODO
@@ -0,0 +1,12 @@
1
+ from .reader import load, ReaderType, text_reader
2
+ from .chunker import chunk, datanodes_convert, ChunkerType, text_chunker
3
+
4
+ __all__ = [
5
+ 'load',
6
+ 'chunk',
7
+ 'datanodes_convert',
8
+ 'text_reader',
9
+ 'text_chunker',
10
+ 'ReaderType',
11
+ 'ChunkerType'
12
+ ]
@@ -0,0 +1,157 @@
1
+ from enum import Enum
2
+ from typing import Union, Callable
3
+
4
+ from ..bridge.langchain import langchain_text_splitter, from_langchain
5
+ from ..bridge.llama_index import llama_index_node_parser, from_llama_index
6
+ from ..schema.base_schema import DataNode
7
+ from ..utils import lcall, file_to_chunks
8
+
9
+ # define an enum to represent different types of chunkers
10
+ class ChunkerType(str, Enum):
11
+ PLAIN = 'plain' # default
12
+ LANGCHAIN = 'langchain' # using langchain functions
13
+ LLAMAINDEX = 'llama_index' # using llamaindex functions
14
+ SELFDEFINED = 'self_defined' # create custom functions
15
+
16
+ # Function to convert documents to a specific format based on the chunker type
17
+ def datanodes_convert(documents, chunker_type):
18
+ """
19
+ Converts a lionagi DataNode documents to a specific format based on the chunker type.
20
+
21
+ Args:
22
+ documents (List[DataNode]): A list of DataNode instances to be converted.
23
+
24
+ chunker_type (ChunkerType): The chunker type to determine the conversion format.
25
+
26
+ Returns:
27
+ List[DataNode]: The list of converted DataNode instances.
28
+ """
29
+ for i in range(len(documents)):
30
+ if type(documents[i]) == DataNode:
31
+ if chunker_type == ChunkerType.LLAMAINDEX:
32
+ documents[i] = documents[i].to_llama_index()
33
+ elif chunker_type == ChunkerType.LANGCHAIN:
34
+ documents[i] = documents[i].to_langchain()
35
+ return documents
36
+
37
+ # Function to chunk text documents
38
+ def text_chunker(documents, args, kwargs):
39
+ """
40
+ Chunks text documents into smaller pieces.
41
+
42
+ Args:
43
+ documents (List[DataNode]): A list of DataNode instances to be chunked.
44
+ args (List[Any]): Positional arguments to be passed to the chunking function.
45
+ kwargs (dict): Keyword arguments to be passed to the chunking function.
46
+
47
+ Returns:
48
+ List[DataNode]: A list of chunked DataNode instances.
49
+ """
50
+ def chunk_node(node):
51
+ chunks = file_to_chunks(node.to_dict(), *args, **kwargs)
52
+ lcall(chunks, lambda chunk: chunk.pop('node_id'))
53
+ chunk_nodes = lcall(chunks, lambda x: DataNode(**x))
54
+ return chunk_nodes
55
+
56
+ nodes = []
57
+ for doc in documents:
58
+ nodes += chunk_node(doc)
59
+ return nodes
60
+
61
+
62
+ def _datanode_parser(nodes, parser):
63
+ """
64
+ Parses raw data into DataNode instances using the provided parser function.
65
+
66
+ Args:
67
+ nodes (List[Any]): A list of raw data to be parsed.
68
+ parser (Callable): A function that parses raw data into DataNode instances.
69
+
70
+ Returns:
71
+ List[DataNode]: A list of parsed DataNode instances.
72
+
73
+ Raises:
74
+ ValueError: If the parser function fails.
75
+ """
76
+ try:
77
+ nodes = parser(nodes)
78
+ except Exception as e:
79
+ raise ValueError(f'DataNode parser {parser} failed. Error:{e}')
80
+ return nodes
81
+
82
+
83
+ def chunk(documents,
84
+ chunker,
85
+ chunker_type=ChunkerType.PLAIN,
86
+ chunker_args=[],
87
+ chunker_kwargs={},
88
+ chunking_kwargs={},
89
+ documents_convert_func=None,
90
+ to_datanode: Union[bool, Callable] = True):
91
+ """
92
+ Chunks documents using the specified chunker and chunker type.
93
+
94
+ Args:
95
+ documents (List[Any]): A list of documents to be chunked.
96
+ chunker (Callable): The chunking function to be used.
97
+ chunker_type (ChunkerType): The type of the chunker. Defaults to ChunkerType.PLAIN.
98
+ chunker_args (List[Any]): Positional arguments for the chunker function. Defaults to an empty list.
99
+ chunker_kwargs (dict): Keyword arguments for the chunker function. Defaults to an empty dict.
100
+ chunking_kwargs (dict): Additional keyword arguments for the chunking process. Defaults to an empty dict.
101
+ documents_convert_func (Callable): A function to convert documents to a specific format. Defaults to None.
102
+ to_datanode (Union[bool, Callable]): Determines whether to convert the result into DataNode instances, or
103
+ a callable to convert the result. Defaults to True.
104
+
105
+ Returns:
106
+ List[DataNode]: A list of chunked DataNode instances after applying the chunker.
107
+
108
+ Raises:
109
+ ValueError: If the chunker fails or an unsupported chunker type is provided.
110
+ """
111
+ if chunker_type == ChunkerType.PLAIN:
112
+ try:
113
+ if chunker == 'text_chunker':
114
+ chunker = text_chunker
115
+ nodes = chunker(documents, chunker_args, chunker_kwargs)
116
+ return nodes
117
+ except Exception as e:
118
+ raise ValueError(f'Reader {chunker} is currently not supported. Error: {e}')
119
+ if chunker_type == ChunkerType.LANGCHAIN:
120
+ if documents_convert_func:
121
+ documents = documents_convert_func(documents, 'langchain')
122
+ nodes = langchain_text_splitter(documents, chunker, chunker_args, chunker_kwargs)
123
+ if isinstance(to_datanode, bool) and to_datanode is True:
124
+ if isinstance(documents, str):
125
+ nodes = lcall(nodes, lambda x: DataNode(content=x))
126
+ else:
127
+ nodes = lcall(nodes, from_langchain)
128
+ elif isinstance(to_datanode, Callable):
129
+ nodes = _datanode_parser(nodes, to_datanode)
130
+ return nodes
131
+
132
+ elif chunker_type == ChunkerType.LLAMAINDEX:
133
+ if documents_convert_func:
134
+ documents = documents_convert_func(documents, 'llama_index')
135
+ nodes = llama_index_node_parser(documents, chunker, chunker_args, chunker_kwargs, chunking_kwargs)
136
+ if isinstance(to_datanode, bool) and to_datanode is True:
137
+ nodes = lcall(nodes, from_llama_index)
138
+ elif isinstance(to_datanode, Callable):
139
+ nodes = _datanode_parser(nodes, to_datanode)
140
+ return nodes
141
+
142
+ elif chunker_type == ChunkerType.SELFDEFINED:
143
+ try:
144
+ splitter = chunker(*chunker_args, **chunker_kwargs)
145
+ nodes = splitter.split(documents, **chunking_kwargs)
146
+ except Exception as e:
147
+ raise ValueError(f'Self defined chunker {chunker} is not valid. Error: {e}')
148
+
149
+ if isinstance(to_datanode, bool) and to_datanode is True:
150
+ raise ValueError(f'Please define a valid parser to DataNode.')
151
+ elif isinstance(to_datanode, Callable):
152
+ nodes = _datanode_parser(nodes, to_datanode)
153
+ return nodes
154
+
155
+ else:
156
+ raise ValueError(f'{chunker_type} is not supported. Please choose from {list(ChunkerType)}')
157
+
@@ -0,0 +1,124 @@
1
+ from enum import Enum
2
+ from typing import Union, Callable
3
+
4
+ from lionagi.bridge.langchain import langchain_loader, from_langchain
5
+ from lionagi.bridge.llama_index import llama_index_reader, from_llama_index
6
+ from lionagi.utils.call_util import lcall
7
+ from lionagi.utils.load_utils import dir_to_nodes
8
+
9
+
10
+ class ReaderType(str, Enum):
11
+ PLAIN = 'PLAIN'
12
+ LANGCHAIN = 'langchain'
13
+ LLAMAINDEX = 'llama_index'
14
+ SELFDEFINED = 'self_defined'
15
+
16
+
17
+ def _datanode_parser(nodes, parser):
18
+ """
19
+ Parses a list of nodes using the given parser function.
20
+
21
+ Args:
22
+ nodes (List[Any]): The list of nodes to be parsed.
23
+
24
+ parser (Callable): The parser function to transform nodes into DataNode instances.
25
+
26
+ Returns:
27
+ List[Any]: A list of parsed nodes.
28
+
29
+ Raises:
30
+ ValueError: If the parser function fails.
31
+ """
32
+ try:
33
+ nodes = parser(nodes)
34
+ except Exception as e:
35
+ raise ValueError(f'DataNode parser {parser} failed. Error:{e}')
36
+ return nodes
37
+
38
+
39
+ def text_reader(args, kwargs):
40
+ """
41
+ Reads text files from a directory and converts them to DataNode instances.
42
+
43
+ Args:
44
+ args (List[Any]): Positional arguments for the dir_to_nodes function.
45
+
46
+ kwargs (dict): Keyword arguments for the dir_to_nodes function.
47
+
48
+ Returns:
49
+ List[Any]: A list of DataNode instances.
50
+ """
51
+ return dir_to_nodes(*args, **kwargs)
52
+
53
+
54
+ def load(reader: Union[str, Callable],
55
+ reader_type=ReaderType.PLAIN,
56
+ reader_args=[],
57
+ reader_kwargs={},
58
+ load_args=[],
59
+ load_kwargs={},
60
+ to_datanode: Union[bool, Callable] = True):
61
+ """
62
+ Loads documents using the specified reader and reader type.
63
+
64
+ Args:
65
+ reader (Union[str, Callable]): The reader function or its name as a string.
66
+
67
+ reader_type (ReaderType): The type of the reader. Defaults to ReaderType.PLAIN.
68
+
69
+ reader_args (List[Any]): Positional arguments for the reader function. Defaults to an empty list.
70
+
71
+ reader_kwargs (dict): Keyword arguments for the reader function. Defaults to an empty dict.
72
+
73
+ load_args (List[Any]): Positional arguments for the loader function. Defaults to an empty list.
74
+
75
+ load_kwargs (dict): Keyword arguments for the loader function. Defaults to an empty dict.
76
+
77
+ to_datanode (Union[bool, Callable]): Determines whether to convert the result into DataNode instances, or
78
+ a callable to convert the result. Defaults to True.
79
+
80
+ Returns:
81
+ List[Any]: A list of loaded and potentially parsed documents.
82
+
83
+ Raises:
84
+ ValueError: If the reader fails or an unsupported reader type is provided.
85
+ """
86
+ if reader_type == ReaderType.PLAIN:
87
+ try:
88
+ if reader == 'text_reader':
89
+ reader = text_reader
90
+ nodes = reader(reader_args, reader_kwargs)
91
+ return nodes
92
+ except Exception as e:
93
+ raise ValueError(f'Reader {reader} is currently not supported. Error: {e}')
94
+ if reader_type == ReaderType.LANGCHAIN:
95
+ nodes = langchain_loader(reader, reader_args, reader_kwargs)
96
+ if isinstance(to_datanode, bool) and to_datanode is True:
97
+ nodes = lcall(nodes, from_langchain)
98
+ elif isinstance(to_datanode, Callable):
99
+ nodes = _datanode_parser(nodes, to_datanode)
100
+ return nodes
101
+
102
+ elif reader_type == ReaderType.LLAMAINDEX:
103
+ nodes = llama_index_reader(reader, reader_args, reader_kwargs, load_args, load_kwargs)
104
+ if isinstance(to_datanode, bool) and to_datanode is True:
105
+ nodes = lcall(nodes, from_llama_index)
106
+ elif isinstance(to_datanode, Callable):
107
+ nodes = _datanode_parser(nodes, to_datanode)
108
+ return nodes
109
+
110
+ elif reader_type == ReaderType.SELFDEFINED:
111
+ try:
112
+ loader = reader(*reader_args, **reader_kwargs)
113
+ nodes = loader.load(*load_args, **load_kwargs)
114
+ except Exception as e:
115
+ raise ValueError(f'Self defined reader {reader} is not valid. Error: {e}')
116
+
117
+ if isinstance(to_datanode, bool) and to_datanode is True:
118
+ raise ValueError(f'Please define a valid parser to DataNode.')
119
+ elif isinstance(to_datanode, Callable):
120
+ nodes = _datanode_parser(nodes, to_datanode)
121
+ return nodes
122
+
123
+ else:
124
+ raise ValueError(f'{reader_type} is not supported. Please choose from {list(ReaderType)}')
@@ -0,0 +1,7 @@
1
+ # # from .messenger import Messenger
2
+ # from .tool_registry import ToolRegistry
3
+
4
+ # __all__ = [
5
+ # 'Messenger',
6
+ # 'ToolRegistry'
7
+ # ]