lionagi 0.0.112__py3-none-any.whl → 0.0.113__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. lionagi/__init__.py +3 -3
  2. lionagi/bridge/__init__.py +7 -0
  3. lionagi/bridge/langchain.py +131 -0
  4. lionagi/bridge/llama_index.py +157 -0
  5. lionagi/configs/__init__.py +7 -0
  6. lionagi/configs/oai_configs.py +49 -0
  7. lionagi/configs/openrouter_config.py +49 -0
  8. lionagi/core/__init__.py +8 -2
  9. lionagi/core/instruction_sets.py +1 -3
  10. lionagi/core/messages.py +2 -2
  11. lionagi/core/sessions.py +174 -27
  12. lionagi/datastore/__init__.py +1 -0
  13. lionagi/loader/__init__.py +9 -4
  14. lionagi/loader/chunker.py +157 -0
  15. lionagi/loader/reader.py +124 -0
  16. lionagi/objs/__init__.py +7 -0
  17. lionagi/objs/messenger.py +163 -0
  18. lionagi/objs/tool_registry.py +247 -0
  19. lionagi/schema/__init__.py +11 -0
  20. lionagi/schema/base_schema.py +239 -0
  21. lionagi/schema/base_tool.py +9 -0
  22. lionagi/schema/data_logger.py +94 -0
  23. lionagi/services/__init__.py +14 -0
  24. lionagi/{service_/oai.py → services/base_api_service.py} +49 -82
  25. lionagi/{endpoint/base_endpoint.py → services/chatcompletion.py} +19 -22
  26. lionagi/services/oai.py +34 -0
  27. lionagi/services/openrouter.py +32 -0
  28. lionagi/{service_/service_utils.py → services/service_objs.py} +0 -1
  29. lionagi/structure/__init__.py +7 -0
  30. lionagi/structure/relationship.py +128 -0
  31. lionagi/structure/structure.py +160 -0
  32. lionagi/tests/test_flatten_util.py +426 -0
  33. lionagi/tools/__init__.py +0 -5
  34. lionagi/tools/coder.py +1 -0
  35. lionagi/tools/scorer.py +1 -0
  36. lionagi/tools/validator.py +1 -0
  37. lionagi/utils/__init__.py +46 -20
  38. lionagi/utils/api_util.py +86 -0
  39. lionagi/utils/call_util.py +347 -0
  40. lionagi/utils/flat_util.py +540 -0
  41. lionagi/utils/io_util.py +102 -0
  42. lionagi/utils/load_utils.py +190 -0
  43. lionagi/utils/sys_util.py +191 -0
  44. lionagi/utils/tool_util.py +92 -0
  45. lionagi/utils/type_util.py +81 -0
  46. lionagi/version.py +1 -1
  47. {lionagi-0.0.112.dist-info → lionagi-0.0.113.dist-info}/METADATA +37 -13
  48. lionagi-0.0.113.dist-info/RECORD +84 -0
  49. lionagi/endpoint/chat_completion.py +0 -20
  50. lionagi/endpoint/endpoint_utils.py +0 -0
  51. lionagi/llm_configs.py +0 -21
  52. lionagi/loader/load_utils.py +0 -161
  53. lionagi/schema.py +0 -275
  54. lionagi/service_/__init__.py +0 -6
  55. lionagi/service_/base_service.py +0 -48
  56. lionagi/service_/openrouter.py +0 -1
  57. lionagi/services.py +0 -1
  58. lionagi/tools/tool_utils.py +0 -75
  59. lionagi/utils/sys_utils.py +0 -799
  60. lionagi-0.0.112.dist-info/RECORD +0 -67
  61. /lionagi/{core/responses.py → datastore/chroma.py} +0 -0
  62. /lionagi/{endpoint/assistants.py → datastore/deeplake.py} +0 -0
  63. /lionagi/{endpoint/audio.py → datastore/elasticsearch.py} +0 -0
  64. /lionagi/{endpoint/embeddings.py → datastore/lantern.py} +0 -0
  65. /lionagi/{endpoint/files.py → datastore/pinecone.py} +0 -0
  66. /lionagi/{endpoint/fine_tuning.py → datastore/postgres.py} +0 -0
  67. /lionagi/{endpoint/images.py → datastore/qdrant.py} +0 -0
  68. /lionagi/{endpoint/messages.py → schema/base_condition.py} +0 -0
  69. /lionagi/{service_ → services}/anthropic.py +0 -0
  70. /lionagi/{service_ → services}/anyscale.py +0 -0
  71. /lionagi/{service_ → services}/azure.py +0 -0
  72. /lionagi/{service_ → services}/bedrock.py +0 -0
  73. /lionagi/{service_ → services}/everlyai.py +0 -0
  74. /lionagi/{service_ → services}/gemini.py +0 -0
  75. /lionagi/{service_ → services}/gpt4all.py +0 -0
  76. /lionagi/{service_ → services}/huggingface.py +0 -0
  77. /lionagi/{service_ → services}/litellm.py +0 -0
  78. /lionagi/{service_ → services}/localai.py +0 -0
  79. /lionagi/{service_ → services}/mistralai.py +0 -0
  80. /lionagi/{service_ → services}/ollama.py +0 -0
  81. /lionagi/{service_ → services}/openllm.py +0 -0
  82. /lionagi/{service_ → services}/perplexity.py +0 -0
  83. /lionagi/{service_ → services}/predibase.py +0 -0
  84. /lionagi/{service_ → services}/rungpt.py +0 -0
  85. /lionagi/{service_ → services}/vllm.py +0 -0
  86. /lionagi/{service_ → services}/xinference.py +0 -0
  87. /lionagi/{endpoint → tests}/__init__.py +0 -0
  88. /lionagi/{endpoint/models.py → tools/planner.py} +0 -0
  89. /lionagi/{endpoint/moderations.py → tools/prompter.py} +0 -0
  90. /lionagi/{endpoint/runs.py → tools/sandbox.py} +0 -0
  91. /lionagi/{endpoint/threads.py → tools/summarizer.py} +0 -0
  92. {lionagi-0.0.112.dist-info → lionagi-0.0.113.dist-info}/LICENSE +0 -0
  93. {lionagi-0.0.112.dist-info → lionagi-0.0.113.dist-info}/WHEEL +0 -0
  94. {lionagi-0.0.112.dist-info → lionagi-0.0.113.dist-info}/top_level.txt +0 -0
lionagi/core/sessions.py CHANGED
@@ -1,57 +1,138 @@
1
1
  import json
2
2
  from typing import Any
3
3
  from dotenv import load_dotenv
4
- load_dotenv()
5
4
 
6
- from .conversations import Conversation
7
- from ..utils.sys_utils import to_list
8
5
  from ..schema import DataLogger
9
- from ..service_.service_utils import StatusTracker
10
- from ..tools.tool_utils import ToolManager
11
- from ..service_.oai import OpenAIService
12
- from ..endpoint.chat_completion import ChatCompletion
13
-
14
- from ..llm_configs import oai_llmconfig, oai_schema
6
+ from ..utils import lcall, alcall
7
+ from ..services import OpenAIService, ChatCompletion
8
+ from ..core.conversations import Conversation
9
+ from ..objs.tool_registry import ToolManager
10
+ from ..configs.oai_configs import oai_schema
15
11
 
16
- status_tracker = StatusTracker()
12
+ load_dotenv()
17
13
  OAIService = OpenAIService()
18
14
 
19
15
 
20
16
  class Session:
17
+ """
18
+ The Session class is responsible for managing a conversation session with a given system,
19
+ handling the logging of data, and invoking tools as part of the conversation.
21
20
 
22
- def __init__(self, system, dir=None, llmconfig=oai_llmconfig, service=OAIService):
21
+ Attributes:
22
+ conversation (Conversation): An object to manage the conversation flow and history.
23
+
24
+ system (str): The name of the system with which the conversation is happening.
25
+
26
+ llmconfig (dict): Configuration for the language model.
27
+
28
+ _logger (DataLogger): An object for logging conversation data.
29
+
30
+ service (OpenAIService): A service object for interacting with OpenAI APIs.
31
+
32
+ tool_manager (ToolManager): An object to manage the registration and invocation of tools.
33
+ """
34
+
35
+ def __init__(
36
+ self, system, dir=None, llmconfig=oai_schema['chat']['config'],
37
+ service=OAIService
38
+ ):
39
+ """
40
+ Initializes the Session object.
41
+
42
+ Args:
43
+ system (str): The name of the system with which the session is initiated.
44
+
45
+ dir (str, optional): The directory for saving logs. Defaults to None.
46
+
47
+ llmconfig (dict): Configuration for the language model. Defaults to chat config schema.
48
+
49
+ service (OpenAIService): The service object for API interactions. Defaults to an instance of OpenAIService.
50
+ """
23
51
 
24
52
  self.conversation = Conversation()
25
53
  self.system = system
26
54
  self.llmconfig = llmconfig
27
- self._logger = DataLogger(dir=dir)
55
+ self.logger_ = DataLogger(dir=dir)
28
56
  self.service = service
29
- self._toolmanager = ToolManager()
57
+ self.tool_manager = ToolManager()
30
58
 
31
59
  def set_dir(self, dir):
32
- self._logger.dir = dir
60
+ """
61
+ Sets the directory where data logs should be saved.
62
+
63
+ Args:
64
+ dir (str): The path to the directory for saving logs.
65
+ """
66
+ self.logger_.dir = dir
33
67
 
34
68
  def set_system(self, system):
69
+ """
70
+ Changes the system associated with the conversation.
71
+
72
+ Args:
73
+ system (str): The name of the new system for the conversation.
74
+ """
35
75
  self.conversation.change_system(system)
36
76
 
37
77
  def set_llmconfig(self, llmconfig):
78
+ """
79
+ Updates the language model configuration.
80
+
81
+ Args:
82
+ llmconfig (dict): The new configuration for the language model.
83
+ """
38
84
  self.llmconfig = llmconfig
39
85
 
40
86
  def set_service(self, service):
87
+ """
88
+ Sets the service object used for API interactions.
89
+
90
+ Args:
91
+ service (OpenAIService): The new service object.
92
+ """
41
93
  self.service = service
42
94
 
43
95
  async def _output(self, invoke=True, out=True):
96
+ """
97
+ Processes the output from the conversation, possibly invoking tools and returning the latest response.
98
+
99
+ Args:
100
+ invoke (bool): Indicates whether to invoke tools based on the latest response. Defaults to True.
101
+
102
+ out (bool): Determines whether to return the latest response content. Defaults to True.
103
+
104
+ Returns:
105
+ The content of the latest response if out is True. Otherwise, returns None.
106
+ """
44
107
  if invoke:
45
108
  try:
46
- func, args = self._toolmanager._get_function_call(self.conversation.responses[-1]['content'])
47
- outs = await self._toolmanager.invoke(func, args)
48
- self.conversation.add_messages(response=outs)
109
+ # func, args = self.tool_manager._get_function_call(self.conversation.responses[-1]['content'])
110
+ # outs = await self.tool_manager.invoke(func, args)
111
+ # self.conversation.add_messages(response=outs)
112
+
113
+ tool_uses = json.loads(self.conversation.responses[-1]['content'])
114
+ if 'function_list' in tool_uses.keys():
115
+ func_calls = lcall(tool_uses['function_list'], self.tool_manager._get_function_call)
116
+ else:
117
+ func_calls = lcall(tool_uses['tool_uses'], self.tool_manager._get_function_call)
118
+
119
+ outs = await alcall(func_calls, self.tool_manager.invoke)
120
+ for out, f in zip(outs, func_calls):
121
+ response = {"function": f[0], "arguments": f[1], "output": out}
122
+ self.conversation.add_messages(response=response)
123
+
49
124
  except:
50
125
  pass
51
126
  if out:
52
127
  return self.conversation.responses[-1]['content']
53
128
 
54
129
  def _is_invoked(self):
130
+ """
131
+ Checks if the last message in the conversation indicates a function call result.
132
+
133
+ Returns:
134
+ bool: True if the last message is a function call result, False otherwise.
135
+ """
55
136
  msg = self.conversation.messages[-1]
56
137
  try:
57
138
  if "function call result" in json.loads(msg['content']).keys():
@@ -60,28 +141,83 @@ class Session:
60
141
  return False
61
142
 
62
143
  def register_tools(self, tools, update=False, new=False, prefix=None, postfix=None):
144
+ """
145
+ Registers a list of tools to the tool manager and updates the language model configuration.
146
+
147
+ Args:
148
+ tools: A single tool or a list of tools to be registered.
149
+ update (bool): If True, update existing tools. Defaults to False.
150
+ new (bool): If True, add as new tools. Defaults to False.
151
+ prefix: A prefix added to all tool names. Defaults to None.
152
+ postfix: A postfix added to all tool names. Defaults to None.
153
+ """
63
154
  if not isinstance(tools, list):
64
155
  tools=[tools]
65
- self._toolmanager.register_tools(tools=tools, update=update, new=new, prefix=prefix, postfix=postfix)
156
+ self.tool_manager.register_tools(tools=tools, update=update, new=new, prefix=prefix, postfix=postfix)
157
+ tools_schema = lcall(tools, lambda tool: tool.to_dict()['schema_'])
158
+ if self.llmconfig['tools'] is None:
159
+ self.llmconfig['tools'] = tools_schema
160
+ else:
161
+ self.llmconfig['tools'] += tools_schema
66
162
 
67
- async def initiate(self, instruction, system=None, context=None, name=None, invoke=True, out=True, **kwargs) -> Any:
163
+ async def initiate(self, instruction, system=None, context=None,
164
+ name=None, invoke=True, out=True, **kwargs) -> Any:
165
+ """
166
+ Initiates a conversation with an instruction and possibly additional context.
167
+
168
+ Args:
169
+ instruction (str): The initial instruction for the conversation.
170
+ system (str, optional): The name of the system to be used. If None, defaults to current system.
171
+ context (str, optional): Additional context for the conversation. Defaults to None.
172
+ name (str, optional): The name associated with the conversation. Defaults to None.
173
+ invoke (bool): Indicates whether to invoke tools. Defaults to True.
174
+ out (bool): Determines whether to return the latest response content. Defaults to True.
175
+ **kwargs: Additional keyword arguments for language model configuration.
176
+
177
+ Returns:
178
+ The output of the conversation if out is True, otherwise None.
179
+ """
68
180
  config = {**self.llmconfig, **kwargs}
69
181
  system = system or self.system
70
182
  self.conversation.initiate_conversation(system=system, instruction=instruction, context=context, name=name)
71
- await self._call_chatcompletion(**config)
183
+ await self.call_chatcompletion(**config)
72
184
 
73
185
  return await self._output(invoke, out)
74
186
 
75
- async def followup(self, instruction, system=None, context=None, out=True, name=None, invoke=True, **kwargs) -> Any:
187
+ async def followup(self, instruction, system=None, context=None,
188
+ out=True, name=None, invoke=True, **kwargs) -> Any:
189
+ """
190
+ Continues the conversation with a follow-up instruction.
191
+
192
+ Args:
193
+ instruction (str): The follow-up instruction for the conversation.
194
+ system (str, optional): The name of the system to be used. If None, defaults to current system.
195
+ context (str, optional): Additional context for the conversation. Defaults to None.
196
+ out (bool): Determines whether to return the latest response content. Defaults to True.
197
+ name (str, optional): The name associated with the conversation. Defaults to None.
198
+ invoke (bool): Indicates whether to invoke tools. Defaults to True.
199
+ **kwargs: Additional keyword arguments for language model configuration.
200
+
201
+ Returns:
202
+ The output of the conversation if out is True, otherwise None.
203
+ """
76
204
  if system:
77
205
  self.conversation.change_system(system)
78
206
  self.conversation.add_messages(instruction=instruction, context=context, name=name)
79
207
  config = {**self.llmconfig, **kwargs}
80
- await self._call_chatcompletion(**config)
208
+ await self.call_chatcompletion(**config)
81
209
 
82
210
  return await self._output(invoke, out)
83
211
 
84
212
  async def auto_followup(self, instruct, num=3, **kwargs):
213
+ """
214
+ Automatically generates follow-up messages based on whether the last response invoked a tool.
215
+
216
+ Args:
217
+ instruct (str): The instruction to pass for follow-up.
218
+ num (int): The number of follow-ups to attempt. Defaults to 3.
219
+ **kwargs: Additional keyword arguments for the follow-up process.
220
+ """
85
221
  cont_ = True
86
222
  while num > 0 and cont_ is True:
87
223
  await self.followup(instruct, tool_choice="auto", **kwargs)
@@ -91,22 +227,33 @@ class Session:
91
227
  await self.followup(instruct, **kwargs)
92
228
 
93
229
  def messages_to_csv(self, dir=None, filename="messages.csv", **kwargs):
94
- dir = dir or self._logger.dir
230
+ """
231
+ Exports the conversation messages to a CSV file.
232
+
233
+ Args:
234
+ dir (str, optional): The directory where the CSV should be saved. Defaults to the logger's directory.
235
+ filename (str): The name of the CSV file. Defaults to "messages.csv".
236
+ **kwargs: Additional keyword arguments passed to the CSV writing function.
237
+
238
+ Raises:
239
+ ValueError: If no directory is specified.
240
+ """
241
+ dir = dir or self.logger_.dir
95
242
  if dir is None:
96
243
  raise ValueError("No directory specified.")
97
244
  self.conversation.msg.to_csv(dir=dir, filename=filename, **kwargs)
98
245
 
99
246
  def log_to_csv(self, dir=None, filename="llmlog.csv", **kwargs):
100
- dir = dir or self._logger.dir
247
+ dir = dir or self.logger_.dir
101
248
  if dir is None:
102
249
  raise ValueError("No directory specified.")
103
- self._logger.to_csv(dir=dir, filename=filename, **kwargs)
250
+ self.logger_.to_csv(dir=dir, filename=filename, **kwargs)
104
251
 
105
- async def _call_chatcompletion(self, schema=oai_schema, **kwargs):
252
+ async def call_chatcompletion(self, schema=oai_schema['chat'], **kwargs):
106
253
  payload = ChatCompletion.create_payload(messages=self.conversation.messages, schema=schema, llmconfig=self.llmconfig,**kwargs)
107
254
  completion = await self.service.serve(payload=payload)
108
255
  if "choices" in completion:
109
- self._logger({"input":payload, "output": completion})
256
+ self.logger_({"input":payload, "output": completion})
110
257
  self.conversation.add_messages(response=completion['choices'][0])
111
258
  self.conversation.responses.append(self.conversation.messages[-1])
112
259
  self.conversation.response_counts += 1
@@ -0,0 +1 @@
1
+ # TODO
@@ -1,7 +1,12 @@
1
- from .load_utils import dir_to_files, dir_to_path, file_to_chunks
1
+ from .reader import load, ReaderType, text_reader
2
+ from .chunker import chunk, datanodes_convert, ChunkerType, text_chunker
2
3
 
3
4
  __all__ = [
4
- "dir_to_files",
5
- "dir_to_path",
6
- "file_to_chunks"
5
+ 'load',
6
+ 'chunk',
7
+ 'datanodes_convert',
8
+ 'text_reader',
9
+ 'text_chunker',
10
+ 'ReaderType',
11
+ 'ChunkerType'
7
12
  ]
lionagi/loader/chunker.py CHANGED
@@ -0,0 +1,157 @@
1
+ from enum import Enum
2
+ from typing import Union, Callable
3
+
4
+ from ..bridge.langchain import langchain_text_splitter, from_langchain
5
+ from ..bridge.llama_index import llama_index_node_parser, from_llama_index
6
+ from ..schema.base_schema import DataNode
7
+ from ..utils import lcall, file_to_chunks
8
+
9
+ # define an enum to represent different types of chunkers
10
+ class ChunkerType(str, Enum):
11
+ PLAIN = 'plain' # default
12
+ LANGCHAIN = 'langchain' # using langchain functions
13
+ LLAMAINDEX = 'llama_index' # using llamaindex functions
14
+ SELFDEFINED = 'self_defined' # create custom functions
15
+
16
+ # Function to convert documents to a specific format based on the chunker type
17
+ def datanodes_convert(documents, chunker_type):
18
+ """
19
+ Converts a lionagi DataNode documents to a specific format based on the chunker type.
20
+
21
+ Args:
22
+ documents (List[DataNode]): A list of DataNode instances to be converted.
23
+
24
+ chunker_type (ChunkerType): The chunker type to determine the conversion format.
25
+
26
+ Returns:
27
+ List[DataNode]: The list of converted DataNode instances.
28
+ """
29
+ for i in range(len(documents)):
30
+ if type(documents[i]) == DataNode:
31
+ if chunker_type == ChunkerType.LLAMAINDEX:
32
+ documents[i] = documents[i].to_llama_index()
33
+ elif chunker_type == ChunkerType.LANGCHAIN:
34
+ documents[i] = documents[i].to_langchain()
35
+ return documents
36
+
37
+ # Function to chunk text documents
38
+ def text_chunker(documents, args, kwargs):
39
+ """
40
+ Chunks text documents into smaller pieces.
41
+
42
+ Args:
43
+ documents (List[DataNode]): A list of DataNode instances to be chunked.
44
+ args (List[Any]): Positional arguments to be passed to the chunking function.
45
+ kwargs (dict): Keyword arguments to be passed to the chunking function.
46
+
47
+ Returns:
48
+ List[DataNode]: A list of chunked DataNode instances.
49
+ """
50
+ def chunk_node(node):
51
+ chunks = file_to_chunks(node.to_dict(), *args, **kwargs)
52
+ lcall(chunks, lambda chunk: chunk.pop('node_id'))
53
+ chunk_nodes = lcall(chunks, lambda x: DataNode(**x))
54
+ return chunk_nodes
55
+
56
+ nodes = []
57
+ for doc in documents:
58
+ nodes += chunk_node(doc)
59
+ return nodes
60
+
61
+
62
+ def _datanode_parser(nodes, parser):
63
+ """
64
+ Parses raw data into DataNode instances using the provided parser function.
65
+
66
+ Args:
67
+ nodes (List[Any]): A list of raw data to be parsed.
68
+ parser (Callable): A function that parses raw data into DataNode instances.
69
+
70
+ Returns:
71
+ List[DataNode]: A list of parsed DataNode instances.
72
+
73
+ Raises:
74
+ ValueError: If the parser function fails.
75
+ """
76
+ try:
77
+ nodes = parser(nodes)
78
+ except Exception as e:
79
+ raise ValueError(f'DataNode parser {parser} failed. Error:{e}')
80
+ return nodes
81
+
82
+
83
+ def chunk(documents,
84
+ chunker,
85
+ chunker_type=ChunkerType.PLAIN,
86
+ chunker_args=[],
87
+ chunker_kwargs={},
88
+ chunking_kwargs={},
89
+ documents_convert_func=None,
90
+ to_datanode: Union[bool, Callable] = True):
91
+ """
92
+ Chunks documents using the specified chunker and chunker type.
93
+
94
+ Args:
95
+ documents (List[Any]): A list of documents to be chunked.
96
+ chunker (Callable): The chunking function to be used.
97
+ chunker_type (ChunkerType): The type of the chunker. Defaults to ChunkerType.PLAIN.
98
+ chunker_args (List[Any]): Positional arguments for the chunker function. Defaults to an empty list.
99
+ chunker_kwargs (dict): Keyword arguments for the chunker function. Defaults to an empty dict.
100
+ chunking_kwargs (dict): Additional keyword arguments for the chunking process. Defaults to an empty dict.
101
+ documents_convert_func (Callable): A function to convert documents to a specific format. Defaults to None.
102
+ to_datanode (Union[bool, Callable]): Determines whether to convert the result into DataNode instances, or
103
+ a callable to convert the result. Defaults to True.
104
+
105
+ Returns:
106
+ List[DataNode]: A list of chunked DataNode instances after applying the chunker.
107
+
108
+ Raises:
109
+ ValueError: If the chunker fails or an unsupported chunker type is provided.
110
+ """
111
+ if chunker_type == ChunkerType.PLAIN:
112
+ try:
113
+ if chunker == 'text_chunker':
114
+ chunker = text_chunker
115
+ nodes = chunker(documents, chunker_args, chunker_kwargs)
116
+ return nodes
117
+ except Exception as e:
118
+ raise ValueError(f'Reader {chunker} is currently not supported. Error: {e}')
119
+ if chunker_type == ChunkerType.LANGCHAIN:
120
+ if documents_convert_func:
121
+ documents = documents_convert_func(documents, 'langchain')
122
+ nodes = langchain_text_splitter(documents, chunker, chunker_args, chunker_kwargs)
123
+ if isinstance(to_datanode, bool) and to_datanode is True:
124
+ if isinstance(documents, str):
125
+ nodes = lcall(nodes, lambda x: DataNode(content=x))
126
+ else:
127
+ nodes = lcall(nodes, from_langchain)
128
+ elif isinstance(to_datanode, Callable):
129
+ nodes = _datanode_parser(nodes, to_datanode)
130
+ return nodes
131
+
132
+ elif chunker_type == ChunkerType.LLAMAINDEX:
133
+ if documents_convert_func:
134
+ documents = documents_convert_func(documents, 'llama_index')
135
+ nodes = llama_index_node_parser(documents, chunker, chunker_args, chunker_kwargs, chunking_kwargs)
136
+ if isinstance(to_datanode, bool) and to_datanode is True:
137
+ nodes = lcall(nodes, from_llama_index)
138
+ elif isinstance(to_datanode, Callable):
139
+ nodes = _datanode_parser(nodes, to_datanode)
140
+ return nodes
141
+
142
+ elif chunker_type == ChunkerType.SELFDEFINED:
143
+ try:
144
+ splitter = chunker(*chunker_args, **chunker_kwargs)
145
+ nodes = splitter.split(documents, **chunking_kwargs)
146
+ except Exception as e:
147
+ raise ValueError(f'Self defined chunker {chunker} is not valid. Error: {e}')
148
+
149
+ if isinstance(to_datanode, bool) and to_datanode is True:
150
+ raise ValueError(f'Please define a valid parser to DataNode.')
151
+ elif isinstance(to_datanode, Callable):
152
+ nodes = _datanode_parser(nodes, to_datanode)
153
+ return nodes
154
+
155
+ else:
156
+ raise ValueError(f'{chunker_type} is not supported. Please choose from {list(ChunkerType)}')
157
+
lionagi/loader/reader.py CHANGED
@@ -0,0 +1,124 @@
1
+ from enum import Enum
2
+ from typing import Union, Callable
3
+
4
+ from lionagi.bridge.langchain import langchain_loader, from_langchain
5
+ from lionagi.bridge.llama_index import llama_index_reader, from_llama_index
6
+ from lionagi.utils.call_util import lcall
7
+ from lionagi.utils.load_utils import dir_to_nodes
8
+
9
+
10
+ class ReaderType(str, Enum):
11
+ PLAIN = 'PLAIN'
12
+ LANGCHAIN = 'langchain'
13
+ LLAMAINDEX = 'llama_index'
14
+ SELFDEFINED = 'self_defined'
15
+
16
+
17
+ def _datanode_parser(nodes, parser):
18
+ """
19
+ Parses a list of nodes using the given parser function.
20
+
21
+ Args:
22
+ nodes (List[Any]): The list of nodes to be parsed.
23
+
24
+ parser (Callable): The parser function to transform nodes into DataNode instances.
25
+
26
+ Returns:
27
+ List[Any]: A list of parsed nodes.
28
+
29
+ Raises:
30
+ ValueError: If the parser function fails.
31
+ """
32
+ try:
33
+ nodes = parser(nodes)
34
+ except Exception as e:
35
+ raise ValueError(f'DataNode parser {parser} failed. Error:{e}')
36
+ return nodes
37
+
38
+
39
+ def text_reader(args, kwargs):
40
+ """
41
+ Reads text files from a directory and converts them to DataNode instances.
42
+
43
+ Args:
44
+ args (List[Any]): Positional arguments for the dir_to_nodes function.
45
+
46
+ kwargs (dict): Keyword arguments for the dir_to_nodes function.
47
+
48
+ Returns:
49
+ List[Any]: A list of DataNode instances.
50
+ """
51
+ return dir_to_nodes(*args, **kwargs)
52
+
53
+
54
+ def load(reader: Union[str, Callable],
55
+ reader_type=ReaderType.PLAIN,
56
+ reader_args=[],
57
+ reader_kwargs={},
58
+ load_args=[],
59
+ load_kwargs={},
60
+ to_datanode: Union[bool, Callable] = True):
61
+ """
62
+ Loads documents using the specified reader and reader type.
63
+
64
+ Args:
65
+ reader (Union[str, Callable]): The reader function or its name as a string.
66
+
67
+ reader_type (ReaderType): The type of the reader. Defaults to ReaderType.PLAIN.
68
+
69
+ reader_args (List[Any]): Positional arguments for the reader function. Defaults to an empty list.
70
+
71
+ reader_kwargs (dict): Keyword arguments for the reader function. Defaults to an empty dict.
72
+
73
+ load_args (List[Any]): Positional arguments for the loader function. Defaults to an empty list.
74
+
75
+ load_kwargs (dict): Keyword arguments for the loader function. Defaults to an empty dict.
76
+
77
+ to_datanode (Union[bool, Callable]): Determines whether to convert the result into DataNode instances, or
78
+ a callable to convert the result. Defaults to True.
79
+
80
+ Returns:
81
+ List[Any]: A list of loaded and potentially parsed documents.
82
+
83
+ Raises:
84
+ ValueError: If the reader fails or an unsupported reader type is provided.
85
+ """
86
+ if reader_type == ReaderType.PLAIN:
87
+ try:
88
+ if reader == 'text_reader':
89
+ reader = text_reader
90
+ nodes = reader(reader_args, reader_kwargs)
91
+ return nodes
92
+ except Exception as e:
93
+ raise ValueError(f'Reader {reader} is currently not supported. Error: {e}')
94
+ if reader_type == ReaderType.LANGCHAIN:
95
+ nodes = langchain_loader(reader, reader_args, reader_kwargs)
96
+ if isinstance(to_datanode, bool) and to_datanode is True:
97
+ nodes = lcall(nodes, from_langchain)
98
+ elif isinstance(to_datanode, Callable):
99
+ nodes = _datanode_parser(nodes, to_datanode)
100
+ return nodes
101
+
102
+ elif reader_type == ReaderType.LLAMAINDEX:
103
+ nodes = llama_index_reader(reader, reader_args, reader_kwargs, load_args, load_kwargs)
104
+ if isinstance(to_datanode, bool) and to_datanode is True:
105
+ nodes = lcall(nodes, from_llama_index)
106
+ elif isinstance(to_datanode, Callable):
107
+ nodes = _datanode_parser(nodes, to_datanode)
108
+ return nodes
109
+
110
+ elif reader_type == ReaderType.SELFDEFINED:
111
+ try:
112
+ loader = reader(*reader_args, **reader_kwargs)
113
+ nodes = loader.load(*load_args, **load_kwargs)
114
+ except Exception as e:
115
+ raise ValueError(f'Self defined reader {reader} is not valid. Error: {e}')
116
+
117
+ if isinstance(to_datanode, bool) and to_datanode is True:
118
+ raise ValueError(f'Please define a valid parser to DataNode.')
119
+ elif isinstance(to_datanode, Callable):
120
+ nodes = _datanode_parser(nodes, to_datanode)
121
+ return nodes
122
+
123
+ else:
124
+ raise ValueError(f'{reader_type} is not supported. Please choose from {list(ReaderType)}')
@@ -0,0 +1,7 @@
1
+ # # from .messenger import Messenger
2
+ # from .tool_registry import ToolRegistry
3
+
4
+ # __all__ = [
5
+ # 'Messenger',
6
+ # 'ToolRegistry'
7
+ # ]