lionagi 0.0.111__py3-none-any.whl → 0.0.113__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- lionagi/__init__.py +7 -2
- lionagi/bridge/__init__.py +7 -0
- lionagi/bridge/langchain.py +131 -0
- lionagi/bridge/llama_index.py +157 -0
- lionagi/configs/__init__.py +7 -0
- lionagi/configs/oai_configs.py +49 -0
- lionagi/configs/openrouter_config.py +49 -0
- lionagi/core/__init__.py +15 -0
- lionagi/{session/conversation.py → core/conversations.py} +10 -17
- lionagi/core/flows.py +1 -0
- lionagi/core/instruction_sets.py +1 -0
- lionagi/{session/message.py → core/messages.py} +5 -5
- lionagi/core/sessions.py +262 -0
- lionagi/datastore/__init__.py +1 -0
- lionagi/datastore/chroma.py +1 -0
- lionagi/datastore/deeplake.py +1 -0
- lionagi/datastore/elasticsearch.py +1 -0
- lionagi/datastore/lantern.py +1 -0
- lionagi/datastore/pinecone.py +1 -0
- lionagi/datastore/postgres.py +1 -0
- lionagi/datastore/qdrant.py +1 -0
- lionagi/loader/__init__.py +12 -0
- lionagi/loader/chunker.py +157 -0
- lionagi/loader/reader.py +124 -0
- lionagi/objs/__init__.py +7 -0
- lionagi/objs/messenger.py +163 -0
- lionagi/objs/tool_registry.py +247 -0
- lionagi/schema/__init__.py +11 -0
- lionagi/schema/base_condition.py +1 -0
- lionagi/schema/base_schema.py +239 -0
- lionagi/schema/base_tool.py +9 -0
- lionagi/schema/data_logger.py +94 -0
- lionagi/services/__init__.py +14 -0
- lionagi/services/anthropic.py +1 -0
- lionagi/services/anyscale.py +0 -0
- lionagi/services/azure.py +1 -0
- lionagi/{api/oai_service.py → services/base_api_service.py} +74 -148
- lionagi/services/bedrock.py +0 -0
- lionagi/services/chatcompletion.py +48 -0
- lionagi/services/everlyai.py +0 -0
- lionagi/services/gemini.py +0 -0
- lionagi/services/gpt4all.py +0 -0
- lionagi/services/huggingface.py +0 -0
- lionagi/services/litellm.py +1 -0
- lionagi/services/localai.py +0 -0
- lionagi/services/mistralai.py +0 -0
- lionagi/services/oai.py +34 -0
- lionagi/services/ollama.py +1 -0
- lionagi/services/openllm.py +0 -0
- lionagi/services/openrouter.py +32 -0
- lionagi/services/perplexity.py +0 -0
- lionagi/services/predibase.py +0 -0
- lionagi/services/rungpt.py +0 -0
- lionagi/services/service_objs.py +282 -0
- lionagi/services/vllm.py +0 -0
- lionagi/services/xinference.py +0 -0
- lionagi/structure/__init__.py +7 -0
- lionagi/structure/relationship.py +128 -0
- lionagi/structure/structure.py +160 -0
- lionagi/tests/__init__.py +0 -0
- lionagi/tests/test_flatten_util.py +426 -0
- lionagi/tools/__init__.py +0 -0
- lionagi/tools/coder.py +1 -0
- lionagi/tools/planner.py +1 -0
- lionagi/tools/prompter.py +1 -0
- lionagi/tools/sandbox.py +1 -0
- lionagi/tools/scorer.py +1 -0
- lionagi/tools/summarizer.py +1 -0
- lionagi/tools/validator.py +1 -0
- lionagi/utils/__init__.py +46 -8
- lionagi/utils/api_util.py +63 -416
- lionagi/utils/call_util.py +347 -0
- lionagi/utils/flat_util.py +540 -0
- lionagi/utils/io_util.py +102 -0
- lionagi/utils/load_utils.py +190 -0
- lionagi/utils/sys_util.py +85 -660
- lionagi/utils/tool_util.py +82 -199
- lionagi/utils/type_util.py +81 -0
- lionagi/version.py +1 -1
- {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/METADATA +44 -15
- lionagi-0.0.113.dist-info/RECORD +84 -0
- lionagi/api/__init__.py +0 -8
- lionagi/api/oai_config.py +0 -16
- lionagi/session/__init__.py +0 -7
- lionagi/session/session.py +0 -380
- lionagi/utils/doc_util.py +0 -331
- lionagi/utils/log_util.py +0 -86
- lionagi-0.0.111.dist-info/RECORD +0 -20
- {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/LICENSE +0 -0
- {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/WHEEL +0 -0
- {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/top_level.txt +0 -0
lionagi/core/sessions.py
ADDED
@@ -0,0 +1,262 @@
|
|
1
|
+
import json
|
2
|
+
from typing import Any
|
3
|
+
from dotenv import load_dotenv
|
4
|
+
|
5
|
+
from ..schema import DataLogger
|
6
|
+
from ..utils import lcall, alcall
|
7
|
+
from ..services import OpenAIService, ChatCompletion
|
8
|
+
from ..core.conversations import Conversation
|
9
|
+
from ..objs.tool_registry import ToolManager
|
10
|
+
from ..configs.oai_configs import oai_schema
|
11
|
+
|
12
|
+
load_dotenv()
|
13
|
+
OAIService = OpenAIService()
|
14
|
+
|
15
|
+
|
16
|
+
class Session:
|
17
|
+
"""
|
18
|
+
The Session class is responsible for managing a conversation session with a given system,
|
19
|
+
handling the logging of data, and invoking tools as part of the conversation.
|
20
|
+
|
21
|
+
Attributes:
|
22
|
+
conversation (Conversation): An object to manage the conversation flow and history.
|
23
|
+
|
24
|
+
system (str): The name of the system with which the conversation is happening.
|
25
|
+
|
26
|
+
llmconfig (dict): Configuration for the language model.
|
27
|
+
|
28
|
+
_logger (DataLogger): An object for logging conversation data.
|
29
|
+
|
30
|
+
service (OpenAIService): A service object for interacting with OpenAI APIs.
|
31
|
+
|
32
|
+
tool_manager (ToolManager): An object to manage the registration and invocation of tools.
|
33
|
+
"""
|
34
|
+
|
35
|
+
def __init__(
|
36
|
+
self, system, dir=None, llmconfig=oai_schema['chat']['config'],
|
37
|
+
service=OAIService
|
38
|
+
):
|
39
|
+
"""
|
40
|
+
Initializes the Session object.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
system (str): The name of the system with which the session is initiated.
|
44
|
+
|
45
|
+
dir (str, optional): The directory for saving logs. Defaults to None.
|
46
|
+
|
47
|
+
llmconfig (dict): Configuration for the language model. Defaults to chat config schema.
|
48
|
+
|
49
|
+
service (OpenAIService): The service object for API interactions. Defaults to an instance of OpenAIService.
|
50
|
+
"""
|
51
|
+
|
52
|
+
self.conversation = Conversation()
|
53
|
+
self.system = system
|
54
|
+
self.llmconfig = llmconfig
|
55
|
+
self.logger_ = DataLogger(dir=dir)
|
56
|
+
self.service = service
|
57
|
+
self.tool_manager = ToolManager()
|
58
|
+
|
59
|
+
def set_dir(self, dir):
|
60
|
+
"""
|
61
|
+
Sets the directory where data logs should be saved.
|
62
|
+
|
63
|
+
Args:
|
64
|
+
dir (str): The path to the directory for saving logs.
|
65
|
+
"""
|
66
|
+
self.logger_.dir = dir
|
67
|
+
|
68
|
+
def set_system(self, system):
|
69
|
+
"""
|
70
|
+
Changes the system associated with the conversation.
|
71
|
+
|
72
|
+
Args:
|
73
|
+
system (str): The name of the new system for the conversation.
|
74
|
+
"""
|
75
|
+
self.conversation.change_system(system)
|
76
|
+
|
77
|
+
def set_llmconfig(self, llmconfig):
|
78
|
+
"""
|
79
|
+
Updates the language model configuration.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
llmconfig (dict): The new configuration for the language model.
|
83
|
+
"""
|
84
|
+
self.llmconfig = llmconfig
|
85
|
+
|
86
|
+
def set_service(self, service):
|
87
|
+
"""
|
88
|
+
Sets the service object used for API interactions.
|
89
|
+
|
90
|
+
Args:
|
91
|
+
service (OpenAIService): The new service object.
|
92
|
+
"""
|
93
|
+
self.service = service
|
94
|
+
|
95
|
+
async def _output(self, invoke=True, out=True):
|
96
|
+
"""
|
97
|
+
Processes the output from the conversation, possibly invoking tools and returning the latest response.
|
98
|
+
|
99
|
+
Args:
|
100
|
+
invoke (bool): Indicates whether to invoke tools based on the latest response. Defaults to True.
|
101
|
+
|
102
|
+
out (bool): Determines whether to return the latest response content. Defaults to True.
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
The content of the latest response if out is True. Otherwise, returns None.
|
106
|
+
"""
|
107
|
+
if invoke:
|
108
|
+
try:
|
109
|
+
# func, args = self.tool_manager._get_function_call(self.conversation.responses[-1]['content'])
|
110
|
+
# outs = await self.tool_manager.invoke(func, args)
|
111
|
+
# self.conversation.add_messages(response=outs)
|
112
|
+
|
113
|
+
tool_uses = json.loads(self.conversation.responses[-1]['content'])
|
114
|
+
if 'function_list' in tool_uses.keys():
|
115
|
+
func_calls = lcall(tool_uses['function_list'], self.tool_manager._get_function_call)
|
116
|
+
else:
|
117
|
+
func_calls = lcall(tool_uses['tool_uses'], self.tool_manager._get_function_call)
|
118
|
+
|
119
|
+
outs = await alcall(func_calls, self.tool_manager.invoke)
|
120
|
+
for out, f in zip(outs, func_calls):
|
121
|
+
response = {"function": f[0], "arguments": f[1], "output": out}
|
122
|
+
self.conversation.add_messages(response=response)
|
123
|
+
|
124
|
+
except:
|
125
|
+
pass
|
126
|
+
if out:
|
127
|
+
return self.conversation.responses[-1]['content']
|
128
|
+
|
129
|
+
def _is_invoked(self):
|
130
|
+
"""
|
131
|
+
Checks if the last message in the conversation indicates a function call result.
|
132
|
+
|
133
|
+
Returns:
|
134
|
+
bool: True if the last message is a function call result, False otherwise.
|
135
|
+
"""
|
136
|
+
msg = self.conversation.messages[-1]
|
137
|
+
try:
|
138
|
+
if "function call result" in json.loads(msg['content']).keys():
|
139
|
+
return True
|
140
|
+
except:
|
141
|
+
return False
|
142
|
+
|
143
|
+
def register_tools(self, tools, update=False, new=False, prefix=None, postfix=None):
|
144
|
+
"""
|
145
|
+
Registers a list of tools to the tool manager and updates the language model configuration.
|
146
|
+
|
147
|
+
Args:
|
148
|
+
tools: A single tool or a list of tools to be registered.
|
149
|
+
update (bool): If True, update existing tools. Defaults to False.
|
150
|
+
new (bool): If True, add as new tools. Defaults to False.
|
151
|
+
prefix: A prefix added to all tool names. Defaults to None.
|
152
|
+
postfix: A postfix added to all tool names. Defaults to None.
|
153
|
+
"""
|
154
|
+
if not isinstance(tools, list):
|
155
|
+
tools=[tools]
|
156
|
+
self.tool_manager.register_tools(tools=tools, update=update, new=new, prefix=prefix, postfix=postfix)
|
157
|
+
tools_schema = lcall(tools, lambda tool: tool.to_dict()['schema_'])
|
158
|
+
if self.llmconfig['tools'] is None:
|
159
|
+
self.llmconfig['tools'] = tools_schema
|
160
|
+
else:
|
161
|
+
self.llmconfig['tools'] += tools_schema
|
162
|
+
|
163
|
+
async def initiate(self, instruction, system=None, context=None,
|
164
|
+
name=None, invoke=True, out=True, **kwargs) -> Any:
|
165
|
+
"""
|
166
|
+
Initiates a conversation with an instruction and possibly additional context.
|
167
|
+
|
168
|
+
Args:
|
169
|
+
instruction (str): The initial instruction for the conversation.
|
170
|
+
system (str, optional): The name of the system to be used. If None, defaults to current system.
|
171
|
+
context (str, optional): Additional context for the conversation. Defaults to None.
|
172
|
+
name (str, optional): The name associated with the conversation. Defaults to None.
|
173
|
+
invoke (bool): Indicates whether to invoke tools. Defaults to True.
|
174
|
+
out (bool): Determines whether to return the latest response content. Defaults to True.
|
175
|
+
**kwargs: Additional keyword arguments for language model configuration.
|
176
|
+
|
177
|
+
Returns:
|
178
|
+
The output of the conversation if out is True, otherwise None.
|
179
|
+
"""
|
180
|
+
config = {**self.llmconfig, **kwargs}
|
181
|
+
system = system or self.system
|
182
|
+
self.conversation.initiate_conversation(system=system, instruction=instruction, context=context, name=name)
|
183
|
+
await self.call_chatcompletion(**config)
|
184
|
+
|
185
|
+
return await self._output(invoke, out)
|
186
|
+
|
187
|
+
async def followup(self, instruction, system=None, context=None,
|
188
|
+
out=True, name=None, invoke=True, **kwargs) -> Any:
|
189
|
+
"""
|
190
|
+
Continues the conversation with a follow-up instruction.
|
191
|
+
|
192
|
+
Args:
|
193
|
+
instruction (str): The follow-up instruction for the conversation.
|
194
|
+
system (str, optional): The name of the system to be used. If None, defaults to current system.
|
195
|
+
context (str, optional): Additional context for the conversation. Defaults to None.
|
196
|
+
out (bool): Determines whether to return the latest response content. Defaults to True.
|
197
|
+
name (str, optional): The name associated with the conversation. Defaults to None.
|
198
|
+
invoke (bool): Indicates whether to invoke tools. Defaults to True.
|
199
|
+
**kwargs: Additional keyword arguments for language model configuration.
|
200
|
+
|
201
|
+
Returns:
|
202
|
+
The output of the conversation if out is True, otherwise None.
|
203
|
+
"""
|
204
|
+
if system:
|
205
|
+
self.conversation.change_system(system)
|
206
|
+
self.conversation.add_messages(instruction=instruction, context=context, name=name)
|
207
|
+
config = {**self.llmconfig, **kwargs}
|
208
|
+
await self.call_chatcompletion(**config)
|
209
|
+
|
210
|
+
return await self._output(invoke, out)
|
211
|
+
|
212
|
+
async def auto_followup(self, instruct, num=3, **kwargs):
|
213
|
+
"""
|
214
|
+
Automatically generates follow-up messages based on whether the last response invoked a tool.
|
215
|
+
|
216
|
+
Args:
|
217
|
+
instruct (str): The instruction to pass for follow-up.
|
218
|
+
num (int): The number of follow-ups to attempt. Defaults to 3.
|
219
|
+
**kwargs: Additional keyword arguments for the follow-up process.
|
220
|
+
"""
|
221
|
+
cont_ = True
|
222
|
+
while num > 0 and cont_ is True:
|
223
|
+
await self.followup(instruct, tool_choice="auto", **kwargs)
|
224
|
+
num -= 1
|
225
|
+
cont_ = True if self._is_invoked() else False
|
226
|
+
if num == 0:
|
227
|
+
await self.followup(instruct, **kwargs)
|
228
|
+
|
229
|
+
def messages_to_csv(self, dir=None, filename="messages.csv", **kwargs):
|
230
|
+
"""
|
231
|
+
Exports the conversation messages to a CSV file.
|
232
|
+
|
233
|
+
Args:
|
234
|
+
dir (str, optional): The directory where the CSV should be saved. Defaults to the logger's directory.
|
235
|
+
filename (str): The name of the CSV file. Defaults to "messages.csv".
|
236
|
+
**kwargs: Additional keyword arguments passed to the CSV writing function.
|
237
|
+
|
238
|
+
Raises:
|
239
|
+
ValueError: If no directory is specified.
|
240
|
+
"""
|
241
|
+
dir = dir or self.logger_.dir
|
242
|
+
if dir is None:
|
243
|
+
raise ValueError("No directory specified.")
|
244
|
+
self.conversation.msg.to_csv(dir=dir, filename=filename, **kwargs)
|
245
|
+
|
246
|
+
def log_to_csv(self, dir=None, filename="llmlog.csv", **kwargs):
|
247
|
+
dir = dir or self.logger_.dir
|
248
|
+
if dir is None:
|
249
|
+
raise ValueError("No directory specified.")
|
250
|
+
self.logger_.to_csv(dir=dir, filename=filename, **kwargs)
|
251
|
+
|
252
|
+
async def call_chatcompletion(self, schema=oai_schema['chat'], **kwargs):
|
253
|
+
payload = ChatCompletion.create_payload(messages=self.conversation.messages, schema=schema, llmconfig=self.llmconfig,**kwargs)
|
254
|
+
completion = await self.service.serve(payload=payload)
|
255
|
+
if "choices" in completion:
|
256
|
+
self.logger_({"input":payload, "output": completion})
|
257
|
+
self.conversation.add_messages(response=completion['choices'][0])
|
258
|
+
self.conversation.responses.append(self.conversation.messages[-1])
|
259
|
+
self.conversation.response_counts += 1
|
260
|
+
self.service.status_tracker.num_tasks_succeeded += 1
|
261
|
+
else:
|
262
|
+
self.service.status_tracker.num_tasks_failed += 1
|
@@ -0,0 +1 @@
|
|
1
|
+
# TODO
|
@@ -0,0 +1 @@
|
|
1
|
+
# TODO
|
@@ -0,0 +1 @@
|
|
1
|
+
# TODO
|
@@ -0,0 +1 @@
|
|
1
|
+
# TODO
|
@@ -0,0 +1 @@
|
|
1
|
+
# TODO
|
@@ -0,0 +1 @@
|
|
1
|
+
# TODO
|
@@ -0,0 +1 @@
|
|
1
|
+
# TODO
|
@@ -0,0 +1 @@
|
|
1
|
+
# TODO
|
@@ -0,0 +1,12 @@
|
|
1
|
+
from .reader import load, ReaderType, text_reader
|
2
|
+
from .chunker import chunk, datanodes_convert, ChunkerType, text_chunker
|
3
|
+
|
4
|
+
__all__ = [
|
5
|
+
'load',
|
6
|
+
'chunk',
|
7
|
+
'datanodes_convert',
|
8
|
+
'text_reader',
|
9
|
+
'text_chunker',
|
10
|
+
'ReaderType',
|
11
|
+
'ChunkerType'
|
12
|
+
]
|
@@ -0,0 +1,157 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
from typing import Union, Callable
|
3
|
+
|
4
|
+
from ..bridge.langchain import langchain_text_splitter, from_langchain
|
5
|
+
from ..bridge.llama_index import llama_index_node_parser, from_llama_index
|
6
|
+
from ..schema.base_schema import DataNode
|
7
|
+
from ..utils import lcall, file_to_chunks
|
8
|
+
|
9
|
+
# define an enum to represent different types of chunkers
|
10
|
+
class ChunkerType(str, Enum):
|
11
|
+
PLAIN = 'plain' # default
|
12
|
+
LANGCHAIN = 'langchain' # using langchain functions
|
13
|
+
LLAMAINDEX = 'llama_index' # using llamaindex functions
|
14
|
+
SELFDEFINED = 'self_defined' # create custom functions
|
15
|
+
|
16
|
+
# Function to convert documents to a specific format based on the chunker type
|
17
|
+
def datanodes_convert(documents, chunker_type):
|
18
|
+
"""
|
19
|
+
Converts a lionagi DataNode documents to a specific format based on the chunker type.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
documents (List[DataNode]): A list of DataNode instances to be converted.
|
23
|
+
|
24
|
+
chunker_type (ChunkerType): The chunker type to determine the conversion format.
|
25
|
+
|
26
|
+
Returns:
|
27
|
+
List[DataNode]: The list of converted DataNode instances.
|
28
|
+
"""
|
29
|
+
for i in range(len(documents)):
|
30
|
+
if type(documents[i]) == DataNode:
|
31
|
+
if chunker_type == ChunkerType.LLAMAINDEX:
|
32
|
+
documents[i] = documents[i].to_llama_index()
|
33
|
+
elif chunker_type == ChunkerType.LANGCHAIN:
|
34
|
+
documents[i] = documents[i].to_langchain()
|
35
|
+
return documents
|
36
|
+
|
37
|
+
# Function to chunk text documents
|
38
|
+
def text_chunker(documents, args, kwargs):
|
39
|
+
"""
|
40
|
+
Chunks text documents into smaller pieces.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
documents (List[DataNode]): A list of DataNode instances to be chunked.
|
44
|
+
args (List[Any]): Positional arguments to be passed to the chunking function.
|
45
|
+
kwargs (dict): Keyword arguments to be passed to the chunking function.
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
List[DataNode]: A list of chunked DataNode instances.
|
49
|
+
"""
|
50
|
+
def chunk_node(node):
|
51
|
+
chunks = file_to_chunks(node.to_dict(), *args, **kwargs)
|
52
|
+
lcall(chunks, lambda chunk: chunk.pop('node_id'))
|
53
|
+
chunk_nodes = lcall(chunks, lambda x: DataNode(**x))
|
54
|
+
return chunk_nodes
|
55
|
+
|
56
|
+
nodes = []
|
57
|
+
for doc in documents:
|
58
|
+
nodes += chunk_node(doc)
|
59
|
+
return nodes
|
60
|
+
|
61
|
+
|
62
|
+
def _datanode_parser(nodes, parser):
|
63
|
+
"""
|
64
|
+
Parses raw data into DataNode instances using the provided parser function.
|
65
|
+
|
66
|
+
Args:
|
67
|
+
nodes (List[Any]): A list of raw data to be parsed.
|
68
|
+
parser (Callable): A function that parses raw data into DataNode instances.
|
69
|
+
|
70
|
+
Returns:
|
71
|
+
List[DataNode]: A list of parsed DataNode instances.
|
72
|
+
|
73
|
+
Raises:
|
74
|
+
ValueError: If the parser function fails.
|
75
|
+
"""
|
76
|
+
try:
|
77
|
+
nodes = parser(nodes)
|
78
|
+
except Exception as e:
|
79
|
+
raise ValueError(f'DataNode parser {parser} failed. Error:{e}')
|
80
|
+
return nodes
|
81
|
+
|
82
|
+
|
83
|
+
def chunk(documents,
|
84
|
+
chunker,
|
85
|
+
chunker_type=ChunkerType.PLAIN,
|
86
|
+
chunker_args=[],
|
87
|
+
chunker_kwargs={},
|
88
|
+
chunking_kwargs={},
|
89
|
+
documents_convert_func=None,
|
90
|
+
to_datanode: Union[bool, Callable] = True):
|
91
|
+
"""
|
92
|
+
Chunks documents using the specified chunker and chunker type.
|
93
|
+
|
94
|
+
Args:
|
95
|
+
documents (List[Any]): A list of documents to be chunked.
|
96
|
+
chunker (Callable): The chunking function to be used.
|
97
|
+
chunker_type (ChunkerType): The type of the chunker. Defaults to ChunkerType.PLAIN.
|
98
|
+
chunker_args (List[Any]): Positional arguments for the chunker function. Defaults to an empty list.
|
99
|
+
chunker_kwargs (dict): Keyword arguments for the chunker function. Defaults to an empty dict.
|
100
|
+
chunking_kwargs (dict): Additional keyword arguments for the chunking process. Defaults to an empty dict.
|
101
|
+
documents_convert_func (Callable): A function to convert documents to a specific format. Defaults to None.
|
102
|
+
to_datanode (Union[bool, Callable]): Determines whether to convert the result into DataNode instances, or
|
103
|
+
a callable to convert the result. Defaults to True.
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
List[DataNode]: A list of chunked DataNode instances after applying the chunker.
|
107
|
+
|
108
|
+
Raises:
|
109
|
+
ValueError: If the chunker fails or an unsupported chunker type is provided.
|
110
|
+
"""
|
111
|
+
if chunker_type == ChunkerType.PLAIN:
|
112
|
+
try:
|
113
|
+
if chunker == 'text_chunker':
|
114
|
+
chunker = text_chunker
|
115
|
+
nodes = chunker(documents, chunker_args, chunker_kwargs)
|
116
|
+
return nodes
|
117
|
+
except Exception as e:
|
118
|
+
raise ValueError(f'Reader {chunker} is currently not supported. Error: {e}')
|
119
|
+
if chunker_type == ChunkerType.LANGCHAIN:
|
120
|
+
if documents_convert_func:
|
121
|
+
documents = documents_convert_func(documents, 'langchain')
|
122
|
+
nodes = langchain_text_splitter(documents, chunker, chunker_args, chunker_kwargs)
|
123
|
+
if isinstance(to_datanode, bool) and to_datanode is True:
|
124
|
+
if isinstance(documents, str):
|
125
|
+
nodes = lcall(nodes, lambda x: DataNode(content=x))
|
126
|
+
else:
|
127
|
+
nodes = lcall(nodes, from_langchain)
|
128
|
+
elif isinstance(to_datanode, Callable):
|
129
|
+
nodes = _datanode_parser(nodes, to_datanode)
|
130
|
+
return nodes
|
131
|
+
|
132
|
+
elif chunker_type == ChunkerType.LLAMAINDEX:
|
133
|
+
if documents_convert_func:
|
134
|
+
documents = documents_convert_func(documents, 'llama_index')
|
135
|
+
nodes = llama_index_node_parser(documents, chunker, chunker_args, chunker_kwargs, chunking_kwargs)
|
136
|
+
if isinstance(to_datanode, bool) and to_datanode is True:
|
137
|
+
nodes = lcall(nodes, from_llama_index)
|
138
|
+
elif isinstance(to_datanode, Callable):
|
139
|
+
nodes = _datanode_parser(nodes, to_datanode)
|
140
|
+
return nodes
|
141
|
+
|
142
|
+
elif chunker_type == ChunkerType.SELFDEFINED:
|
143
|
+
try:
|
144
|
+
splitter = chunker(*chunker_args, **chunker_kwargs)
|
145
|
+
nodes = splitter.split(documents, **chunking_kwargs)
|
146
|
+
except Exception as e:
|
147
|
+
raise ValueError(f'Self defined chunker {chunker} is not valid. Error: {e}')
|
148
|
+
|
149
|
+
if isinstance(to_datanode, bool) and to_datanode is True:
|
150
|
+
raise ValueError(f'Please define a valid parser to DataNode.')
|
151
|
+
elif isinstance(to_datanode, Callable):
|
152
|
+
nodes = _datanode_parser(nodes, to_datanode)
|
153
|
+
return nodes
|
154
|
+
|
155
|
+
else:
|
156
|
+
raise ValueError(f'{chunker_type} is not supported. Please choose from {list(ChunkerType)}')
|
157
|
+
|
lionagi/loader/reader.py
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
from typing import Union, Callable
|
3
|
+
|
4
|
+
from lionagi.bridge.langchain import langchain_loader, from_langchain
|
5
|
+
from lionagi.bridge.llama_index import llama_index_reader, from_llama_index
|
6
|
+
from lionagi.utils.call_util import lcall
|
7
|
+
from lionagi.utils.load_utils import dir_to_nodes
|
8
|
+
|
9
|
+
|
10
|
+
class ReaderType(str, Enum):
|
11
|
+
PLAIN = 'PLAIN'
|
12
|
+
LANGCHAIN = 'langchain'
|
13
|
+
LLAMAINDEX = 'llama_index'
|
14
|
+
SELFDEFINED = 'self_defined'
|
15
|
+
|
16
|
+
|
17
|
+
def _datanode_parser(nodes, parser):
|
18
|
+
"""
|
19
|
+
Parses a list of nodes using the given parser function.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
nodes (List[Any]): The list of nodes to be parsed.
|
23
|
+
|
24
|
+
parser (Callable): The parser function to transform nodes into DataNode instances.
|
25
|
+
|
26
|
+
Returns:
|
27
|
+
List[Any]: A list of parsed nodes.
|
28
|
+
|
29
|
+
Raises:
|
30
|
+
ValueError: If the parser function fails.
|
31
|
+
"""
|
32
|
+
try:
|
33
|
+
nodes = parser(nodes)
|
34
|
+
except Exception as e:
|
35
|
+
raise ValueError(f'DataNode parser {parser} failed. Error:{e}')
|
36
|
+
return nodes
|
37
|
+
|
38
|
+
|
39
|
+
def text_reader(args, kwargs):
|
40
|
+
"""
|
41
|
+
Reads text files from a directory and converts them to DataNode instances.
|
42
|
+
|
43
|
+
Args:
|
44
|
+
args (List[Any]): Positional arguments for the dir_to_nodes function.
|
45
|
+
|
46
|
+
kwargs (dict): Keyword arguments for the dir_to_nodes function.
|
47
|
+
|
48
|
+
Returns:
|
49
|
+
List[Any]: A list of DataNode instances.
|
50
|
+
"""
|
51
|
+
return dir_to_nodes(*args, **kwargs)
|
52
|
+
|
53
|
+
|
54
|
+
def load(reader: Union[str, Callable],
|
55
|
+
reader_type=ReaderType.PLAIN,
|
56
|
+
reader_args=[],
|
57
|
+
reader_kwargs={},
|
58
|
+
load_args=[],
|
59
|
+
load_kwargs={},
|
60
|
+
to_datanode: Union[bool, Callable] = True):
|
61
|
+
"""
|
62
|
+
Loads documents using the specified reader and reader type.
|
63
|
+
|
64
|
+
Args:
|
65
|
+
reader (Union[str, Callable]): The reader function or its name as a string.
|
66
|
+
|
67
|
+
reader_type (ReaderType): The type of the reader. Defaults to ReaderType.PLAIN.
|
68
|
+
|
69
|
+
reader_args (List[Any]): Positional arguments for the reader function. Defaults to an empty list.
|
70
|
+
|
71
|
+
reader_kwargs (dict): Keyword arguments for the reader function. Defaults to an empty dict.
|
72
|
+
|
73
|
+
load_args (List[Any]): Positional arguments for the loader function. Defaults to an empty list.
|
74
|
+
|
75
|
+
load_kwargs (dict): Keyword arguments for the loader function. Defaults to an empty dict.
|
76
|
+
|
77
|
+
to_datanode (Union[bool, Callable]): Determines whether to convert the result into DataNode instances, or
|
78
|
+
a callable to convert the result. Defaults to True.
|
79
|
+
|
80
|
+
Returns:
|
81
|
+
List[Any]: A list of loaded and potentially parsed documents.
|
82
|
+
|
83
|
+
Raises:
|
84
|
+
ValueError: If the reader fails or an unsupported reader type is provided.
|
85
|
+
"""
|
86
|
+
if reader_type == ReaderType.PLAIN:
|
87
|
+
try:
|
88
|
+
if reader == 'text_reader':
|
89
|
+
reader = text_reader
|
90
|
+
nodes = reader(reader_args, reader_kwargs)
|
91
|
+
return nodes
|
92
|
+
except Exception as e:
|
93
|
+
raise ValueError(f'Reader {reader} is currently not supported. Error: {e}')
|
94
|
+
if reader_type == ReaderType.LANGCHAIN:
|
95
|
+
nodes = langchain_loader(reader, reader_args, reader_kwargs)
|
96
|
+
if isinstance(to_datanode, bool) and to_datanode is True:
|
97
|
+
nodes = lcall(nodes, from_langchain)
|
98
|
+
elif isinstance(to_datanode, Callable):
|
99
|
+
nodes = _datanode_parser(nodes, to_datanode)
|
100
|
+
return nodes
|
101
|
+
|
102
|
+
elif reader_type == ReaderType.LLAMAINDEX:
|
103
|
+
nodes = llama_index_reader(reader, reader_args, reader_kwargs, load_args, load_kwargs)
|
104
|
+
if isinstance(to_datanode, bool) and to_datanode is True:
|
105
|
+
nodes = lcall(nodes, from_llama_index)
|
106
|
+
elif isinstance(to_datanode, Callable):
|
107
|
+
nodes = _datanode_parser(nodes, to_datanode)
|
108
|
+
return nodes
|
109
|
+
|
110
|
+
elif reader_type == ReaderType.SELFDEFINED:
|
111
|
+
try:
|
112
|
+
loader = reader(*reader_args, **reader_kwargs)
|
113
|
+
nodes = loader.load(*load_args, **load_kwargs)
|
114
|
+
except Exception as e:
|
115
|
+
raise ValueError(f'Self defined reader {reader} is not valid. Error: {e}')
|
116
|
+
|
117
|
+
if isinstance(to_datanode, bool) and to_datanode is True:
|
118
|
+
raise ValueError(f'Please define a valid parser to DataNode.')
|
119
|
+
elif isinstance(to_datanode, Callable):
|
120
|
+
nodes = _datanode_parser(nodes, to_datanode)
|
121
|
+
return nodes
|
122
|
+
|
123
|
+
else:
|
124
|
+
raise ValueError(f'{reader_type} is not supported. Please choose from {list(ReaderType)}')
|