lionagi 0.0.112__py3-none-any.whl → 0.0.113__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. lionagi/__init__.py +3 -3
  2. lionagi/bridge/__init__.py +7 -0
  3. lionagi/bridge/langchain.py +131 -0
  4. lionagi/bridge/llama_index.py +157 -0
  5. lionagi/configs/__init__.py +7 -0
  6. lionagi/configs/oai_configs.py +49 -0
  7. lionagi/configs/openrouter_config.py +49 -0
  8. lionagi/core/__init__.py +8 -2
  9. lionagi/core/instruction_sets.py +1 -3
  10. lionagi/core/messages.py +2 -2
  11. lionagi/core/sessions.py +174 -27
  12. lionagi/datastore/__init__.py +1 -0
  13. lionagi/loader/__init__.py +9 -4
  14. lionagi/loader/chunker.py +157 -0
  15. lionagi/loader/reader.py +124 -0
  16. lionagi/objs/__init__.py +7 -0
  17. lionagi/objs/messenger.py +163 -0
  18. lionagi/objs/tool_registry.py +247 -0
  19. lionagi/schema/__init__.py +11 -0
  20. lionagi/schema/base_schema.py +239 -0
  21. lionagi/schema/base_tool.py +9 -0
  22. lionagi/schema/data_logger.py +94 -0
  23. lionagi/services/__init__.py +14 -0
  24. lionagi/{service_/oai.py → services/base_api_service.py} +49 -82
  25. lionagi/{endpoint/base_endpoint.py → services/chatcompletion.py} +19 -22
  26. lionagi/services/oai.py +34 -0
  27. lionagi/services/openrouter.py +32 -0
  28. lionagi/{service_/service_utils.py → services/service_objs.py} +0 -1
  29. lionagi/structure/__init__.py +7 -0
  30. lionagi/structure/relationship.py +128 -0
  31. lionagi/structure/structure.py +160 -0
  32. lionagi/tests/test_flatten_util.py +426 -0
  33. lionagi/tools/__init__.py +0 -5
  34. lionagi/tools/coder.py +1 -0
  35. lionagi/tools/scorer.py +1 -0
  36. lionagi/tools/validator.py +1 -0
  37. lionagi/utils/__init__.py +46 -20
  38. lionagi/utils/api_util.py +86 -0
  39. lionagi/utils/call_util.py +347 -0
  40. lionagi/utils/flat_util.py +540 -0
  41. lionagi/utils/io_util.py +102 -0
  42. lionagi/utils/load_utils.py +190 -0
  43. lionagi/utils/sys_util.py +191 -0
  44. lionagi/utils/tool_util.py +92 -0
  45. lionagi/utils/type_util.py +81 -0
  46. lionagi/version.py +1 -1
  47. {lionagi-0.0.112.dist-info → lionagi-0.0.113.dist-info}/METADATA +37 -13
  48. lionagi-0.0.113.dist-info/RECORD +84 -0
  49. lionagi/endpoint/chat_completion.py +0 -20
  50. lionagi/endpoint/endpoint_utils.py +0 -0
  51. lionagi/llm_configs.py +0 -21
  52. lionagi/loader/load_utils.py +0 -161
  53. lionagi/schema.py +0 -275
  54. lionagi/service_/__init__.py +0 -6
  55. lionagi/service_/base_service.py +0 -48
  56. lionagi/service_/openrouter.py +0 -1
  57. lionagi/services.py +0 -1
  58. lionagi/tools/tool_utils.py +0 -75
  59. lionagi/utils/sys_utils.py +0 -799
  60. lionagi-0.0.112.dist-info/RECORD +0 -67
  61. /lionagi/{core/responses.py → datastore/chroma.py} +0 -0
  62. /lionagi/{endpoint/assistants.py → datastore/deeplake.py} +0 -0
  63. /lionagi/{endpoint/audio.py → datastore/elasticsearch.py} +0 -0
  64. /lionagi/{endpoint/embeddings.py → datastore/lantern.py} +0 -0
  65. /lionagi/{endpoint/files.py → datastore/pinecone.py} +0 -0
  66. /lionagi/{endpoint/fine_tuning.py → datastore/postgres.py} +0 -0
  67. /lionagi/{endpoint/images.py → datastore/qdrant.py} +0 -0
  68. /lionagi/{endpoint/messages.py → schema/base_condition.py} +0 -0
  69. /lionagi/{service_ → services}/anthropic.py +0 -0
  70. /lionagi/{service_ → services}/anyscale.py +0 -0
  71. /lionagi/{service_ → services}/azure.py +0 -0
  72. /lionagi/{service_ → services}/bedrock.py +0 -0
  73. /lionagi/{service_ → services}/everlyai.py +0 -0
  74. /lionagi/{service_ → services}/gemini.py +0 -0
  75. /lionagi/{service_ → services}/gpt4all.py +0 -0
  76. /lionagi/{service_ → services}/huggingface.py +0 -0
  77. /lionagi/{service_ → services}/litellm.py +0 -0
  78. /lionagi/{service_ → services}/localai.py +0 -0
  79. /lionagi/{service_ → services}/mistralai.py +0 -0
  80. /lionagi/{service_ → services}/ollama.py +0 -0
  81. /lionagi/{service_ → services}/openllm.py +0 -0
  82. /lionagi/{service_ → services}/perplexity.py +0 -0
  83. /lionagi/{service_ → services}/predibase.py +0 -0
  84. /lionagi/{service_ → services}/rungpt.py +0 -0
  85. /lionagi/{service_ → services}/vllm.py +0 -0
  86. /lionagi/{service_ → services}/xinference.py +0 -0
  87. /lionagi/{endpoint → tests}/__init__.py +0 -0
  88. /lionagi/{endpoint/models.py → tools/planner.py} +0 -0
  89. /lionagi/{endpoint/moderations.py → tools/prompter.py} +0 -0
  90. /lionagi/{endpoint/runs.py → tools/sandbox.py} +0 -0
  91. /lionagi/{endpoint/threads.py → tools/summarizer.py} +0 -0
  92. {lionagi-0.0.112.dist-info → lionagi-0.0.113.dist-info}/LICENSE +0 -0
  93. {lionagi-0.0.112.dist-info → lionagi-0.0.113.dist-info}/WHEEL +0 -0
  94. {lionagi-0.0.112.dist-info → lionagi-0.0.113.dist-info}/top_level.txt +0 -0
lionagi/core/sessions.py CHANGED
@@ -1,57 +1,138 @@
1
1
  import json
2
2
  from typing import Any
3
3
  from dotenv import load_dotenv
4
- load_dotenv()
5
4
 
6
- from .conversations import Conversation
7
- from ..utils.sys_utils import to_list
8
5
  from ..schema import DataLogger
9
- from ..service_.service_utils import StatusTracker
10
- from ..tools.tool_utils import ToolManager
11
- from ..service_.oai import OpenAIService
12
- from ..endpoint.chat_completion import ChatCompletion
13
-
14
- from ..llm_configs import oai_llmconfig, oai_schema
6
+ from ..utils import lcall, alcall
7
+ from ..services import OpenAIService, ChatCompletion
8
+ from ..core.conversations import Conversation
9
+ from ..objs.tool_registry import ToolManager
10
+ from ..configs.oai_configs import oai_schema
15
11
 
16
- status_tracker = StatusTracker()
12
+ load_dotenv()
17
13
  OAIService = OpenAIService()
18
14
 
19
15
 
20
16
  class Session:
17
+ """
18
+ The Session class is responsible for managing a conversation session with a given system,
19
+ handling the logging of data, and invoking tools as part of the conversation.
21
20
 
22
- def __init__(self, system, dir=None, llmconfig=oai_llmconfig, service=OAIService):
21
+ Attributes:
22
+ conversation (Conversation): An object to manage the conversation flow and history.
23
+
24
+ system (str): The name of the system with which the conversation is happening.
25
+
26
+ llmconfig (dict): Configuration for the language model.
27
+
28
+ _logger (DataLogger): An object for logging conversation data.
29
+
30
+ service (OpenAIService): A service object for interacting with OpenAI APIs.
31
+
32
+ tool_manager (ToolManager): An object to manage the registration and invocation of tools.
33
+ """
34
+
35
+ def __init__(
36
+ self, system, dir=None, llmconfig=oai_schema['chat']['config'],
37
+ service=OAIService
38
+ ):
39
+ """
40
+ Initializes the Session object.
41
+
42
+ Args:
43
+ system (str): The name of the system with which the session is initiated.
44
+
45
+ dir (str, optional): The directory for saving logs. Defaults to None.
46
+
47
+ llmconfig (dict): Configuration for the language model. Defaults to chat config schema.
48
+
49
+ service (OpenAIService): The service object for API interactions. Defaults to an instance of OpenAIService.
50
+ """
23
51
 
24
52
  self.conversation = Conversation()
25
53
  self.system = system
26
54
  self.llmconfig = llmconfig
27
- self._logger = DataLogger(dir=dir)
55
+ self.logger_ = DataLogger(dir=dir)
28
56
  self.service = service
29
- self._toolmanager = ToolManager()
57
+ self.tool_manager = ToolManager()
30
58
 
31
59
  def set_dir(self, dir):
32
- self._logger.dir = dir
60
+ """
61
+ Sets the directory where data logs should be saved.
62
+
63
+ Args:
64
+ dir (str): The path to the directory for saving logs.
65
+ """
66
+ self.logger_.dir = dir
33
67
 
34
68
  def set_system(self, system):
69
+ """
70
+ Changes the system associated with the conversation.
71
+
72
+ Args:
73
+ system (str): The name of the new system for the conversation.
74
+ """
35
75
  self.conversation.change_system(system)
36
76
 
37
77
  def set_llmconfig(self, llmconfig):
78
+ """
79
+ Updates the language model configuration.
80
+
81
+ Args:
82
+ llmconfig (dict): The new configuration for the language model.
83
+ """
38
84
  self.llmconfig = llmconfig
39
85
 
40
86
  def set_service(self, service):
87
+ """
88
+ Sets the service object used for API interactions.
89
+
90
+ Args:
91
+ service (OpenAIService): The new service object.
92
+ """
41
93
  self.service = service
42
94
 
43
95
  async def _output(self, invoke=True, out=True):
96
+ """
97
+ Processes the output from the conversation, possibly invoking tools and returning the latest response.
98
+
99
+ Args:
100
+ invoke (bool): Indicates whether to invoke tools based on the latest response. Defaults to True.
101
+
102
+ out (bool): Determines whether to return the latest response content. Defaults to True.
103
+
104
+ Returns:
105
+ The content of the latest response if out is True. Otherwise, returns None.
106
+ """
44
107
  if invoke:
45
108
  try:
46
- func, args = self._toolmanager._get_function_call(self.conversation.responses[-1]['content'])
47
- outs = await self._toolmanager.invoke(func, args)
48
- self.conversation.add_messages(response=outs)
109
+ # func, args = self.tool_manager._get_function_call(self.conversation.responses[-1]['content'])
110
+ # outs = await self.tool_manager.invoke(func, args)
111
+ # self.conversation.add_messages(response=outs)
112
+
113
+ tool_uses = json.loads(self.conversation.responses[-1]['content'])
114
+ if 'function_list' in tool_uses.keys():
115
+ func_calls = lcall(tool_uses['function_list'], self.tool_manager._get_function_call)
116
+ else:
117
+ func_calls = lcall(tool_uses['tool_uses'], self.tool_manager._get_function_call)
118
+
119
+ outs = await alcall(func_calls, self.tool_manager.invoke)
120
+ for out, f in zip(outs, func_calls):
121
+ response = {"function": f[0], "arguments": f[1], "output": out}
122
+ self.conversation.add_messages(response=response)
123
+
49
124
  except:
50
125
  pass
51
126
  if out:
52
127
  return self.conversation.responses[-1]['content']
53
128
 
54
129
  def _is_invoked(self):
130
+ """
131
+ Checks if the last message in the conversation indicates a function call result.
132
+
133
+ Returns:
134
+ bool: True if the last message is a function call result, False otherwise.
135
+ """
55
136
  msg = self.conversation.messages[-1]
56
137
  try:
57
138
  if "function call result" in json.loads(msg['content']).keys():
@@ -60,28 +141,83 @@ class Session:
60
141
  return False
61
142
 
62
143
  def register_tools(self, tools, update=False, new=False, prefix=None, postfix=None):
144
+ """
145
+ Registers a list of tools to the tool manager and updates the language model configuration.
146
+
147
+ Args:
148
+ tools: A single tool or a list of tools to be registered.
149
+ update (bool): If True, update existing tools. Defaults to False.
150
+ new (bool): If True, add as new tools. Defaults to False.
151
+ prefix: A prefix added to all tool names. Defaults to None.
152
+ postfix: A postfix added to all tool names. Defaults to None.
153
+ """
63
154
  if not isinstance(tools, list):
64
155
  tools=[tools]
65
- self._toolmanager.register_tools(tools=tools, update=update, new=new, prefix=prefix, postfix=postfix)
156
+ self.tool_manager.register_tools(tools=tools, update=update, new=new, prefix=prefix, postfix=postfix)
157
+ tools_schema = lcall(tools, lambda tool: tool.to_dict()['schema_'])
158
+ if self.llmconfig['tools'] is None:
159
+ self.llmconfig['tools'] = tools_schema
160
+ else:
161
+ self.llmconfig['tools'] += tools_schema
66
162
 
67
- async def initiate(self, instruction, system=None, context=None, name=None, invoke=True, out=True, **kwargs) -> Any:
163
+ async def initiate(self, instruction, system=None, context=None,
164
+ name=None, invoke=True, out=True, **kwargs) -> Any:
165
+ """
166
+ Initiates a conversation with an instruction and possibly additional context.
167
+
168
+ Args:
169
+ instruction (str): The initial instruction for the conversation.
170
+ system (str, optional): The name of the system to be used. If None, defaults to current system.
171
+ context (str, optional): Additional context for the conversation. Defaults to None.
172
+ name (str, optional): The name associated with the conversation. Defaults to None.
173
+ invoke (bool): Indicates whether to invoke tools. Defaults to True.
174
+ out (bool): Determines whether to return the latest response content. Defaults to True.
175
+ **kwargs: Additional keyword arguments for language model configuration.
176
+
177
+ Returns:
178
+ The output of the conversation if out is True, otherwise None.
179
+ """
68
180
  config = {**self.llmconfig, **kwargs}
69
181
  system = system or self.system
70
182
  self.conversation.initiate_conversation(system=system, instruction=instruction, context=context, name=name)
71
- await self._call_chatcompletion(**config)
183
+ await self.call_chatcompletion(**config)
72
184
 
73
185
  return await self._output(invoke, out)
74
186
 
75
- async def followup(self, instruction, system=None, context=None, out=True, name=None, invoke=True, **kwargs) -> Any:
187
+ async def followup(self, instruction, system=None, context=None,
188
+ out=True, name=None, invoke=True, **kwargs) -> Any:
189
+ """
190
+ Continues the conversation with a follow-up instruction.
191
+
192
+ Args:
193
+ instruction (str): The follow-up instruction for the conversation.
194
+ system (str, optional): The name of the system to be used. If None, defaults to current system.
195
+ context (str, optional): Additional context for the conversation. Defaults to None.
196
+ out (bool): Determines whether to return the latest response content. Defaults to True.
197
+ name (str, optional): The name associated with the conversation. Defaults to None.
198
+ invoke (bool): Indicates whether to invoke tools. Defaults to True.
199
+ **kwargs: Additional keyword arguments for language model configuration.
200
+
201
+ Returns:
202
+ The output of the conversation if out is True, otherwise None.
203
+ """
76
204
  if system:
77
205
  self.conversation.change_system(system)
78
206
  self.conversation.add_messages(instruction=instruction, context=context, name=name)
79
207
  config = {**self.llmconfig, **kwargs}
80
- await self._call_chatcompletion(**config)
208
+ await self.call_chatcompletion(**config)
81
209
 
82
210
  return await self._output(invoke, out)
83
211
 
84
212
  async def auto_followup(self, instruct, num=3, **kwargs):
213
+ """
214
+ Automatically generates follow-up messages based on whether the last response invoked a tool.
215
+
216
+ Args:
217
+ instruct (str): The instruction to pass for follow-up.
218
+ num (int): The number of follow-ups to attempt. Defaults to 3.
219
+ **kwargs: Additional keyword arguments for the follow-up process.
220
+ """
85
221
  cont_ = True
86
222
  while num > 0 and cont_ is True:
87
223
  await self.followup(instruct, tool_choice="auto", **kwargs)
@@ -91,22 +227,33 @@ class Session:
91
227
  await self.followup(instruct, **kwargs)
92
228
 
93
229
  def messages_to_csv(self, dir=None, filename="messages.csv", **kwargs):
94
- dir = dir or self._logger.dir
230
+ """
231
+ Exports the conversation messages to a CSV file.
232
+
233
+ Args:
234
+ dir (str, optional): The directory where the CSV should be saved. Defaults to the logger's directory.
235
+ filename (str): The name of the CSV file. Defaults to "messages.csv".
236
+ **kwargs: Additional keyword arguments passed to the CSV writing function.
237
+
238
+ Raises:
239
+ ValueError: If no directory is specified.
240
+ """
241
+ dir = dir or self.logger_.dir
95
242
  if dir is None:
96
243
  raise ValueError("No directory specified.")
97
244
  self.conversation.msg.to_csv(dir=dir, filename=filename, **kwargs)
98
245
 
99
246
  def log_to_csv(self, dir=None, filename="llmlog.csv", **kwargs):
100
- dir = dir or self._logger.dir
247
+ dir = dir or self.logger_.dir
101
248
  if dir is None:
102
249
  raise ValueError("No directory specified.")
103
- self._logger.to_csv(dir=dir, filename=filename, **kwargs)
250
+ self.logger_.to_csv(dir=dir, filename=filename, **kwargs)
104
251
 
105
- async def _call_chatcompletion(self, schema=oai_schema, **kwargs):
252
+ async def call_chatcompletion(self, schema=oai_schema['chat'], **kwargs):
106
253
  payload = ChatCompletion.create_payload(messages=self.conversation.messages, schema=schema, llmconfig=self.llmconfig,**kwargs)
107
254
  completion = await self.service.serve(payload=payload)
108
255
  if "choices" in completion:
109
- self._logger({"input":payload, "output": completion})
256
+ self.logger_({"input":payload, "output": completion})
110
257
  self.conversation.add_messages(response=completion['choices'][0])
111
258
  self.conversation.responses.append(self.conversation.messages[-1])
112
259
  self.conversation.response_counts += 1
@@ -0,0 +1 @@
1
+ # TODO
@@ -1,7 +1,12 @@
1
- from .load_utils import dir_to_files, dir_to_path, file_to_chunks
1
+ from .reader import load, ReaderType, text_reader
2
+ from .chunker import chunk, datanodes_convert, ChunkerType, text_chunker
2
3
 
3
4
  __all__ = [
4
- "dir_to_files",
5
- "dir_to_path",
6
- "file_to_chunks"
5
+ 'load',
6
+ 'chunk',
7
+ 'datanodes_convert',
8
+ 'text_reader',
9
+ 'text_chunker',
10
+ 'ReaderType',
11
+ 'ChunkerType'
7
12
  ]
lionagi/loader/chunker.py CHANGED
@@ -0,0 +1,157 @@
1
+ from enum import Enum
2
+ from typing import Union, Callable
3
+
4
+ from ..bridge.langchain import langchain_text_splitter, from_langchain
5
+ from ..bridge.llama_index import llama_index_node_parser, from_llama_index
6
+ from ..schema.base_schema import DataNode
7
+ from ..utils import lcall, file_to_chunks
8
+
9
+ # define an enum to represent different types of chunkers
10
+ class ChunkerType(str, Enum):
11
+ PLAIN = 'plain' # default
12
+ LANGCHAIN = 'langchain' # using langchain functions
13
+ LLAMAINDEX = 'llama_index' # using llamaindex functions
14
+ SELFDEFINED = 'self_defined' # create custom functions
15
+
16
+ # Function to convert documents to a specific format based on the chunker type
17
+ def datanodes_convert(documents, chunker_type):
18
+ """
19
+ Converts a lionagi DataNode documents to a specific format based on the chunker type.
20
+
21
+ Args:
22
+ documents (List[DataNode]): A list of DataNode instances to be converted.
23
+
24
+ chunker_type (ChunkerType): The chunker type to determine the conversion format.
25
+
26
+ Returns:
27
+ List[DataNode]: The list of converted DataNode instances.
28
+ """
29
+ for i in range(len(documents)):
30
+ if type(documents[i]) == DataNode:
31
+ if chunker_type == ChunkerType.LLAMAINDEX:
32
+ documents[i] = documents[i].to_llama_index()
33
+ elif chunker_type == ChunkerType.LANGCHAIN:
34
+ documents[i] = documents[i].to_langchain()
35
+ return documents
36
+
37
+ # Function to chunk text documents
38
+ def text_chunker(documents, args, kwargs):
39
+ """
40
+ Chunks text documents into smaller pieces.
41
+
42
+ Args:
43
+ documents (List[DataNode]): A list of DataNode instances to be chunked.
44
+ args (List[Any]): Positional arguments to be passed to the chunking function.
45
+ kwargs (dict): Keyword arguments to be passed to the chunking function.
46
+
47
+ Returns:
48
+ List[DataNode]: A list of chunked DataNode instances.
49
+ """
50
+ def chunk_node(node):
51
+ chunks = file_to_chunks(node.to_dict(), *args, **kwargs)
52
+ lcall(chunks, lambda chunk: chunk.pop('node_id'))
53
+ chunk_nodes = lcall(chunks, lambda x: DataNode(**x))
54
+ return chunk_nodes
55
+
56
+ nodes = []
57
+ for doc in documents:
58
+ nodes += chunk_node(doc)
59
+ return nodes
60
+
61
+
62
+ def _datanode_parser(nodes, parser):
63
+ """
64
+ Parses raw data into DataNode instances using the provided parser function.
65
+
66
+ Args:
67
+ nodes (List[Any]): A list of raw data to be parsed.
68
+ parser (Callable): A function that parses raw data into DataNode instances.
69
+
70
+ Returns:
71
+ List[DataNode]: A list of parsed DataNode instances.
72
+
73
+ Raises:
74
+ ValueError: If the parser function fails.
75
+ """
76
+ try:
77
+ nodes = parser(nodes)
78
+ except Exception as e:
79
+ raise ValueError(f'DataNode parser {parser} failed. Error:{e}')
80
+ return nodes
81
+
82
+
83
+ def chunk(documents,
84
+ chunker,
85
+ chunker_type=ChunkerType.PLAIN,
86
+ chunker_args=[],
87
+ chunker_kwargs={},
88
+ chunking_kwargs={},
89
+ documents_convert_func=None,
90
+ to_datanode: Union[bool, Callable] = True):
91
+ """
92
+ Chunks documents using the specified chunker and chunker type.
93
+
94
+ Args:
95
+ documents (List[Any]): A list of documents to be chunked.
96
+ chunker (Callable): The chunking function to be used.
97
+ chunker_type (ChunkerType): The type of the chunker. Defaults to ChunkerType.PLAIN.
98
+ chunker_args (List[Any]): Positional arguments for the chunker function. Defaults to an empty list.
99
+ chunker_kwargs (dict): Keyword arguments for the chunker function. Defaults to an empty dict.
100
+ chunking_kwargs (dict): Additional keyword arguments for the chunking process. Defaults to an empty dict.
101
+ documents_convert_func (Callable): A function to convert documents to a specific format. Defaults to None.
102
+ to_datanode (Union[bool, Callable]): Determines whether to convert the result into DataNode instances, or
103
+ a callable to convert the result. Defaults to True.
104
+
105
+ Returns:
106
+ List[DataNode]: A list of chunked DataNode instances after applying the chunker.
107
+
108
+ Raises:
109
+ ValueError: If the chunker fails or an unsupported chunker type is provided.
110
+ """
111
+ if chunker_type == ChunkerType.PLAIN:
112
+ try:
113
+ if chunker == 'text_chunker':
114
+ chunker = text_chunker
115
+ nodes = chunker(documents, chunker_args, chunker_kwargs)
116
+ return nodes
117
+ except Exception as e:
118
+ raise ValueError(f'Reader {chunker} is currently not supported. Error: {e}')
119
+ if chunker_type == ChunkerType.LANGCHAIN:
120
+ if documents_convert_func:
121
+ documents = documents_convert_func(documents, 'langchain')
122
+ nodes = langchain_text_splitter(documents, chunker, chunker_args, chunker_kwargs)
123
+ if isinstance(to_datanode, bool) and to_datanode is True:
124
+ if isinstance(documents, str):
125
+ nodes = lcall(nodes, lambda x: DataNode(content=x))
126
+ else:
127
+ nodes = lcall(nodes, from_langchain)
128
+ elif isinstance(to_datanode, Callable):
129
+ nodes = _datanode_parser(nodes, to_datanode)
130
+ return nodes
131
+
132
+ elif chunker_type == ChunkerType.LLAMAINDEX:
133
+ if documents_convert_func:
134
+ documents = documents_convert_func(documents, 'llama_index')
135
+ nodes = llama_index_node_parser(documents, chunker, chunker_args, chunker_kwargs, chunking_kwargs)
136
+ if isinstance(to_datanode, bool) and to_datanode is True:
137
+ nodes = lcall(nodes, from_llama_index)
138
+ elif isinstance(to_datanode, Callable):
139
+ nodes = _datanode_parser(nodes, to_datanode)
140
+ return nodes
141
+
142
+ elif chunker_type == ChunkerType.SELFDEFINED:
143
+ try:
144
+ splitter = chunker(*chunker_args, **chunker_kwargs)
145
+ nodes = splitter.split(documents, **chunking_kwargs)
146
+ except Exception as e:
147
+ raise ValueError(f'Self defined chunker {chunker} is not valid. Error: {e}')
148
+
149
+ if isinstance(to_datanode, bool) and to_datanode is True:
150
+ raise ValueError(f'Please define a valid parser to DataNode.')
151
+ elif isinstance(to_datanode, Callable):
152
+ nodes = _datanode_parser(nodes, to_datanode)
153
+ return nodes
154
+
155
+ else:
156
+ raise ValueError(f'{chunker_type} is not supported. Please choose from {list(ChunkerType)}')
157
+
lionagi/loader/reader.py CHANGED
@@ -0,0 +1,124 @@
1
+ from enum import Enum
2
+ from typing import Union, Callable
3
+
4
+ from lionagi.bridge.langchain import langchain_loader, from_langchain
5
+ from lionagi.bridge.llama_index import llama_index_reader, from_llama_index
6
+ from lionagi.utils.call_util import lcall
7
+ from lionagi.utils.load_utils import dir_to_nodes
8
+
9
+
10
+ class ReaderType(str, Enum):
11
+ PLAIN = 'PLAIN'
12
+ LANGCHAIN = 'langchain'
13
+ LLAMAINDEX = 'llama_index'
14
+ SELFDEFINED = 'self_defined'
15
+
16
+
17
+ def _datanode_parser(nodes, parser):
18
+ """
19
+ Parses a list of nodes using the given parser function.
20
+
21
+ Args:
22
+ nodes (List[Any]): The list of nodes to be parsed.
23
+
24
+ parser (Callable): The parser function to transform nodes into DataNode instances.
25
+
26
+ Returns:
27
+ List[Any]: A list of parsed nodes.
28
+
29
+ Raises:
30
+ ValueError: If the parser function fails.
31
+ """
32
+ try:
33
+ nodes = parser(nodes)
34
+ except Exception as e:
35
+ raise ValueError(f'DataNode parser {parser} failed. Error:{e}')
36
+ return nodes
37
+
38
+
39
+ def text_reader(args, kwargs):
40
+ """
41
+ Reads text files from a directory and converts them to DataNode instances.
42
+
43
+ Args:
44
+ args (List[Any]): Positional arguments for the dir_to_nodes function.
45
+
46
+ kwargs (dict): Keyword arguments for the dir_to_nodes function.
47
+
48
+ Returns:
49
+ List[Any]: A list of DataNode instances.
50
+ """
51
+ return dir_to_nodes(*args, **kwargs)
52
+
53
+
54
+ def load(reader: Union[str, Callable],
55
+ reader_type=ReaderType.PLAIN,
56
+ reader_args=[],
57
+ reader_kwargs={},
58
+ load_args=[],
59
+ load_kwargs={},
60
+ to_datanode: Union[bool, Callable] = True):
61
+ """
62
+ Loads documents using the specified reader and reader type.
63
+
64
+ Args:
65
+ reader (Union[str, Callable]): The reader function or its name as a string.
66
+
67
+ reader_type (ReaderType): The type of the reader. Defaults to ReaderType.PLAIN.
68
+
69
+ reader_args (List[Any]): Positional arguments for the reader function. Defaults to an empty list.
70
+
71
+ reader_kwargs (dict): Keyword arguments for the reader function. Defaults to an empty dict.
72
+
73
+ load_args (List[Any]): Positional arguments for the loader function. Defaults to an empty list.
74
+
75
+ load_kwargs (dict): Keyword arguments for the loader function. Defaults to an empty dict.
76
+
77
+ to_datanode (Union[bool, Callable]): Determines whether to convert the result into DataNode instances, or
78
+ a callable to convert the result. Defaults to True.
79
+
80
+ Returns:
81
+ List[Any]: A list of loaded and potentially parsed documents.
82
+
83
+ Raises:
84
+ ValueError: If the reader fails or an unsupported reader type is provided.
85
+ """
86
+ if reader_type == ReaderType.PLAIN:
87
+ try:
88
+ if reader == 'text_reader':
89
+ reader = text_reader
90
+ nodes = reader(reader_args, reader_kwargs)
91
+ return nodes
92
+ except Exception as e:
93
+ raise ValueError(f'Reader {reader} is currently not supported. Error: {e}')
94
+ if reader_type == ReaderType.LANGCHAIN:
95
+ nodes = langchain_loader(reader, reader_args, reader_kwargs)
96
+ if isinstance(to_datanode, bool) and to_datanode is True:
97
+ nodes = lcall(nodes, from_langchain)
98
+ elif isinstance(to_datanode, Callable):
99
+ nodes = _datanode_parser(nodes, to_datanode)
100
+ return nodes
101
+
102
+ elif reader_type == ReaderType.LLAMAINDEX:
103
+ nodes = llama_index_reader(reader, reader_args, reader_kwargs, load_args, load_kwargs)
104
+ if isinstance(to_datanode, bool) and to_datanode is True:
105
+ nodes = lcall(nodes, from_llama_index)
106
+ elif isinstance(to_datanode, Callable):
107
+ nodes = _datanode_parser(nodes, to_datanode)
108
+ return nodes
109
+
110
+ elif reader_type == ReaderType.SELFDEFINED:
111
+ try:
112
+ loader = reader(*reader_args, **reader_kwargs)
113
+ nodes = loader.load(*load_args, **load_kwargs)
114
+ except Exception as e:
115
+ raise ValueError(f'Self defined reader {reader} is not valid. Error: {e}')
116
+
117
+ if isinstance(to_datanode, bool) and to_datanode is True:
118
+ raise ValueError(f'Please define a valid parser to DataNode.')
119
+ elif isinstance(to_datanode, Callable):
120
+ nodes = _datanode_parser(nodes, to_datanode)
121
+ return nodes
122
+
123
+ else:
124
+ raise ValueError(f'{reader_type} is not supported. Please choose from {list(ReaderType)}')
@@ -0,0 +1,7 @@
1
+ # # from .messenger import Messenger
2
+ # from .tool_registry import ToolRegistry
3
+
4
+ # __all__ = [
5
+ # 'Messenger',
6
+ # 'ToolRegistry'
7
+ # ]