lionagi 0.0.208__py3-none-any.whl → 0.0.210__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- lionagi/__init__.py +4 -6
- lionagi/api_service/base_endpoint.py +65 -0
- lionagi/api_service/base_rate_limiter.py +121 -0
- lionagi/api_service/base_service.py +146 -0
- lionagi/api_service/chat_completion.py +6 -0
- lionagi/api_service/embeddings.py +6 -0
- lionagi/api_service/payload_package.py +47 -0
- lionagi/api_service/status_tracker.py +29 -0
- lionagi/core/__init__.py +5 -9
- lionagi/core/branch.py +1191 -0
- lionagi/core/flow.py +423 -0
- lionagi/core/{instruction_set/instruction_set.py → instruction_set.py} +3 -3
- lionagi/core/session.py +872 -0
- lionagi/schema/__init__.py +5 -8
- lionagi/schema/base_schema.py +821 -0
- lionagi/{_services → services}/base_service.py +4 -4
- lionagi/{_services → services}/oai.py +4 -4
- lionagi/structures/graph.py +1 -1
- lionagi/structures/relationship.py +1 -1
- lionagi/structures/structure.py +1 -1
- lionagi/tools/tool_manager.py +0 -163
- lionagi/tools/tool_util.py +2 -1
- lionagi/utils/__init__.py +7 -14
- lionagi/utils/api_util.py +63 -2
- lionagi/utils/core_utils.py +338 -0
- lionagi/utils/sys_util.py +3 -3
- lionagi/version.py +1 -1
- {lionagi-0.0.208.dist-info → lionagi-0.0.210.dist-info}/METADATA +28 -29
- lionagi-0.0.210.dist-info/RECORD +56 -0
- lionagi/_services/anthropic.py +0 -79
- lionagi/_services/anyscale.py +0 -0
- lionagi/_services/azure.py +0 -1
- lionagi/_services/bedrock.py +0 -0
- lionagi/_services/everlyai.py +0 -0
- lionagi/_services/gemini.py +0 -0
- lionagi/_services/gpt4all.py +0 -0
- lionagi/_services/huggingface.py +0 -0
- lionagi/_services/litellm.py +0 -33
- lionagi/_services/localai.py +0 -0
- lionagi/_services/openllm.py +0 -0
- lionagi/_services/openrouter.py +0 -44
- lionagi/_services/perplexity.py +0 -0
- lionagi/_services/predibase.py +0 -0
- lionagi/_services/rungpt.py +0 -0
- lionagi/_services/vllm.py +0 -0
- lionagi/_services/xinference.py +0 -0
- lionagi/agents/planner.py +0 -1
- lionagi/agents/prompter.py +0 -1
- lionagi/agents/scorer.py +0 -1
- lionagi/agents/summarizer.py +0 -1
- lionagi/agents/validator.py +0 -1
- lionagi/bridge/__init__.py +0 -22
- lionagi/bridge/langchain.py +0 -195
- lionagi/bridge/llama_index.py +0 -266
- lionagi/core/branch/__init__.py +0 -0
- lionagi/core/branch/branch.py +0 -841
- lionagi/core/branch/cluster.py +0 -1
- lionagi/core/branch/conversation.py +0 -787
- lionagi/core/core_util.py +0 -0
- lionagi/core/flow/__init__.py +0 -0
- lionagi/core/flow/flow.py +0 -19
- lionagi/core/flow/flow_util.py +0 -62
- lionagi/core/instruction_set/__init__.py +0 -0
- lionagi/core/messages/__init__.py +0 -0
- lionagi/core/sessions/__init__.py +0 -0
- lionagi/core/sessions/session.py +0 -504
- lionagi/datastores/__init__.py +0 -1
- lionagi/datastores/chroma.py +0 -1
- lionagi/datastores/deeplake.py +0 -1
- lionagi/datastores/elasticsearch.py +0 -1
- lionagi/datastores/lantern.py +0 -1
- lionagi/datastores/pinecone.py +0 -1
- lionagi/datastores/postgres.py +0 -1
- lionagi/datastores/qdrant.py +0 -1
- lionagi/loaders/__init__.py +0 -18
- lionagi/loaders/chunker.py +0 -166
- lionagi/loaders/load_util.py +0 -240
- lionagi/loaders/reader.py +0 -122
- lionagi/models/__init__.py +0 -0
- lionagi/models/base_model.py +0 -0
- lionagi/models/imodel.py +0 -53
- lionagi/schema/async_queue.py +0 -158
- lionagi/schema/base_condition.py +0 -1
- lionagi/schema/base_node.py +0 -422
- lionagi/schema/base_tool.py +0 -44
- lionagi/schema/data_logger.py +0 -126
- lionagi/schema/data_node.py +0 -88
- lionagi/schema/status_tracker.py +0 -37
- lionagi/tests/test_utils/test_encrypt_util.py +0 -323
- lionagi/utils/encrypt_util.py +0 -283
- lionagi/utils/url_util.py +0 -55
- lionagi-0.0.208.dist-info/RECORD +0 -106
- lionagi/{agents → api_service}/__init__.py +0 -0
- lionagi/core/{branch/branch_manager.py → branch_manager.py} +0 -0
- lionagi/core/{messages/messages.py → messages.py} +3 -3
- /lionagi/{_services → services}/__init__.py +0 -0
- /lionagi/{_services → services}/mistralai.py +0 -0
- /lionagi/{_services → services}/mlx_service.py +0 -0
- /lionagi/{_services → services}/ollama.py +0 -0
- /lionagi/{_services → services}/services.py +0 -0
- /lionagi/{_services → services}/transformers.py +0 -0
- {lionagi-0.0.208.dist-info → lionagi-0.0.210.dist-info}/LICENSE +0 -0
- {lionagi-0.0.208.dist-info → lionagi-0.0.210.dist-info}/WHEEL +0 -0
- {lionagi-0.0.208.dist-info → lionagi-0.0.210.dist-info}/top_level.txt +0 -0
lionagi/loaders/chunker.py
DELETED
@@ -1,166 +0,0 @@
|
|
1
|
-
from typing import Union, Callable
|
2
|
-
|
3
|
-
from ..utils import lcall
|
4
|
-
from ..schema import DataNode
|
5
|
-
from ..bridge import langchain_text_splitter, from_langchain, llama_index_node_parser, from_llama_index
|
6
|
-
from .load_util import ChunkerType, file_to_chunks
|
7
|
-
|
8
|
-
|
9
|
-
def datanodes_convert(documents, chunker_type):
|
10
|
-
"""
|
11
|
-
Converts a list of DataNode documents to a specific format based on the chunker type.
|
12
|
-
|
13
|
-
Args:
|
14
|
-
documents: A list of DataNode instances to be converted.
|
15
|
-
chunker_type: The chunker type to determine the conversion format.
|
16
|
-
|
17
|
-
Returns:
|
18
|
-
The list of converted DataNode instances.
|
19
|
-
|
20
|
-
Example usage:
|
21
|
-
>>> documents = [DataNode(content="Example content")]
|
22
|
-
>>> converted = datanodes_convert(documents, ChunkerType.LANGCHAIN)
|
23
|
-
"""
|
24
|
-
for i in range(len(documents)):
|
25
|
-
if type(documents[i]) == DataNode:
|
26
|
-
if chunker_type == ChunkerType.LLAMAINDEX:
|
27
|
-
documents[i] = documents[i].to_llama_index()
|
28
|
-
elif chunker_type == ChunkerType.LANGCHAIN:
|
29
|
-
documents[i] = documents[i].to_langchain()
|
30
|
-
return documents
|
31
|
-
|
32
|
-
def text_chunker(documents, args, kwargs):
|
33
|
-
"""
|
34
|
-
Chunks text documents into smaller pieces.
|
35
|
-
|
36
|
-
Args:
|
37
|
-
documents: A list of DataNode instances to be chunked.
|
38
|
-
args: Positional arguments to be passed to the chunking function.
|
39
|
-
kwargs: Keyword arguments to be passed to the chunking function.
|
40
|
-
|
41
|
-
Returns:
|
42
|
-
A list of chunked DataNode instances.
|
43
|
-
|
44
|
-
Example usage:
|
45
|
-
>>> documents = [DataNode(content="Example content")]
|
46
|
-
>>> args = []
|
47
|
-
>>> kwargs = {"chunk_size": 100}
|
48
|
-
>>> chunked_docs = text_chunker(documents, args, kwargs)
|
49
|
-
"""
|
50
|
-
def chunk_node(node):
|
51
|
-
chunks = file_to_chunks(node.to_dict(), *args, **kwargs)
|
52
|
-
lcall(chunks, lambda chunk: chunk.pop('node_id'))
|
53
|
-
chunk_nodes = lcall(chunks, lambda x: DataNode(**x))
|
54
|
-
return chunk_nodes
|
55
|
-
|
56
|
-
nodes = []
|
57
|
-
for doc in documents:
|
58
|
-
nodes += chunk_node(doc)
|
59
|
-
return nodes
|
60
|
-
|
61
|
-
|
62
|
-
def _datanode_parser(nodes, parser):
|
63
|
-
"""
|
64
|
-
Parses raw data into DataNode instances using the provided parser function.
|
65
|
-
|
66
|
-
Args:
|
67
|
-
nodes: A list of raw data to be parsed.
|
68
|
-
parser: A function that parses raw data into DataNode instances.
|
69
|
-
|
70
|
-
Returns:
|
71
|
-
A list of parsed DataNode instances.
|
72
|
-
|
73
|
-
Raises:
|
74
|
-
ValueError: If the parser function fails.
|
75
|
-
|
76
|
-
Example usage:
|
77
|
-
>>> raw_data = [{"content": "Example content"}]
|
78
|
-
>>> parser = lambda x: [DataNode(**d) for d in x]
|
79
|
-
>>> parsed_nodes = _datanode_parser(raw_data, parser)
|
80
|
-
"""
|
81
|
-
try:
|
82
|
-
nodes = parser(nodes)
|
83
|
-
except Exception as e:
|
84
|
-
raise ValueError(f'DataNode parser {parser} failed. Error:{e}')
|
85
|
-
return nodes
|
86
|
-
|
87
|
-
|
88
|
-
def chunk(documents,
|
89
|
-
chunker,
|
90
|
-
chunker_type=ChunkerType.PLAIN,
|
91
|
-
chunker_args=[],
|
92
|
-
chunker_kwargs={},
|
93
|
-
chunking_kwargs={},
|
94
|
-
documents_convert_func=None,
|
95
|
-
to_datanode: Union[bool, Callable] = True):
|
96
|
-
"""
|
97
|
-
Chunks documents using the specified chunker and chunker type.
|
98
|
-
|
99
|
-
Args:
|
100
|
-
documents: A list of documents to be chunked.
|
101
|
-
chunker: The chunking function to be used.
|
102
|
-
chunker_type: The type of the chunker. Defaults to ChunkerType.PLAIN.
|
103
|
-
chunker_args: Positional arguments for the chunker function. Defaults to an empty list.
|
104
|
-
chunker_kwargs: Keyword arguments for the chunker function. Defaults to an empty dict.
|
105
|
-
chunking_kwargs: Additional keyword arguments for the chunking process. Defaults to an empty dict.
|
106
|
-
documents_convert_func: A function to convert documents to a specific format. Defaults to None.
|
107
|
-
to_datanode: Determines whether to convert the result into DataNode instances, or a callable to convert the result. Defaults to True.
|
108
|
-
|
109
|
-
Returns:
|
110
|
-
A list of chunked DataNode instances after applying the chunker.
|
111
|
-
|
112
|
-
Raises:
|
113
|
-
ValueError: If the chunker fails or an unsupported chunker type is provided.
|
114
|
-
|
115
|
-
Example usage:
|
116
|
-
>>> documents = ["Long text document...", "Another long text..."]
|
117
|
-
>>> chunked_docs = chunk(documents, text_chunker, ChunkerType.PLAIN, chunker_args=[], chunker_kwargs={"chunk_size": 100})
|
118
|
-
"""
|
119
|
-
if chunker_type == ChunkerType.PLAIN:
|
120
|
-
try:
|
121
|
-
if chunker == 'text_chunker':
|
122
|
-
chunker = text_chunker
|
123
|
-
nodes = chunker(documents, chunker_args, chunker_kwargs)
|
124
|
-
return nodes
|
125
|
-
except Exception as e:
|
126
|
-
raise ValueError(f'Reader {chunker} is currently not supported. Error: {e}')
|
127
|
-
if chunker_type == ChunkerType.LANGCHAIN:
|
128
|
-
if documents_convert_func:
|
129
|
-
documents = documents_convert_func(documents, 'langchain')
|
130
|
-
nodes = langchain_text_splitter(documents, chunker, chunker_args, chunker_kwargs)
|
131
|
-
if isinstance(to_datanode, bool) and to_datanode is True:
|
132
|
-
if isinstance(documents, str):
|
133
|
-
nodes = lcall(nodes, lambda x: DataNode(content=x))
|
134
|
-
else:
|
135
|
-
nodes = lcall(nodes, from_langchain)
|
136
|
-
elif isinstance(to_datanode, Callable):
|
137
|
-
nodes = _datanode_parser(nodes, to_datanode)
|
138
|
-
return nodes
|
139
|
-
|
140
|
-
elif chunker_type == ChunkerType.LLAMAINDEX:
|
141
|
-
if documents_convert_func:
|
142
|
-
documents = documents_convert_func(documents, 'llama_index')
|
143
|
-
nodes = llama_index_node_parser(documents, chunker, chunker_args, chunker_kwargs, chunking_kwargs)
|
144
|
-
|
145
|
-
if isinstance(to_datanode, bool) and to_datanode is True:
|
146
|
-
nodes = lcall(nodes, from_llama_index)
|
147
|
-
elif isinstance(to_datanode, Callable):
|
148
|
-
nodes = _datanode_parser(nodes, to_datanode)
|
149
|
-
return nodes
|
150
|
-
|
151
|
-
elif chunker_type == ChunkerType.SELFDEFINED:
|
152
|
-
try:
|
153
|
-
splitter = chunker(*chunker_args, **chunker_kwargs)
|
154
|
-
nodes = splitter.split(documents, **chunking_kwargs)
|
155
|
-
except Exception as e:
|
156
|
-
raise ValueError(f'Self defined chunker {chunker} is not valid. Error: {e}')
|
157
|
-
|
158
|
-
if isinstance(to_datanode, bool) and to_datanode is True:
|
159
|
-
raise ValueError(f'Please define a valid parser to DataNode.')
|
160
|
-
elif isinstance(to_datanode, Callable):
|
161
|
-
nodes = _datanode_parser(nodes, to_datanode)
|
162
|
-
return nodes
|
163
|
-
|
164
|
-
else:
|
165
|
-
raise ValueError(f'{chunker_type} is not supported. Please choose from {list(ChunkerType)}')
|
166
|
-
|
lionagi/loaders/load_util.py
DELETED
@@ -1,240 +0,0 @@
|
|
1
|
-
# use utils and schema
|
2
|
-
import math
|
3
|
-
from enum import Enum
|
4
|
-
from pathlib import Path
|
5
|
-
from typing import List, Union, Dict, Any, Tuple
|
6
|
-
|
7
|
-
from ..utils import to_list, lcall
|
8
|
-
from ..schema import DataNode
|
9
|
-
|
10
|
-
class ReaderType(str, Enum):
|
11
|
-
PLAIN = 'plain'
|
12
|
-
LANGCHAIN = 'langchain'
|
13
|
-
LLAMAINDEX = 'llama_index'
|
14
|
-
SELFDEFINED = 'self_defined'
|
15
|
-
|
16
|
-
|
17
|
-
class ChunkerType(str, Enum):
|
18
|
-
PLAIN = 'plain' # default
|
19
|
-
LANGCHAIN = 'langchain' # using langchain functions
|
20
|
-
LLAMAINDEX = 'llama_index' # using llamaindex functions
|
21
|
-
SELFDEFINED = 'self_defined' # create custom functions
|
22
|
-
|
23
|
-
|
24
|
-
def dir_to_path(
|
25
|
-
dir: str, ext: str, recursive: bool = False,
|
26
|
-
flatten: bool = True
|
27
|
-
) -> List[Path]:
|
28
|
-
"""
|
29
|
-
Generates a list of file paths from a directory with the given file extension.
|
30
|
-
|
31
|
-
Parameters:
|
32
|
-
dir (str): The directory to search for files.
|
33
|
-
|
34
|
-
ext (str): The file extension to filter by.
|
35
|
-
|
36
|
-
recursive (bool): Whether to search subdirectories recursively. Defaults to False.
|
37
|
-
|
38
|
-
flatten (bool): Whether to flatten the list. Defaults to True.
|
39
|
-
|
40
|
-
Returns:
|
41
|
-
List[Path]: A list of Paths to the files.
|
42
|
-
|
43
|
-
Raises:
|
44
|
-
ValueError: If the directory or extension is invalid.
|
45
|
-
"""
|
46
|
-
|
47
|
-
def _dir_to_path(ext):
|
48
|
-
tem = '**/*' if recursive else '*'
|
49
|
-
return list(Path(dir).glob(tem + ext))
|
50
|
-
|
51
|
-
try:
|
52
|
-
return to_list(lcall(ext, _dir_to_path, flatten=True), flatten=flatten)
|
53
|
-
except:
|
54
|
-
raise ValueError("Invalid directory or extension, please check the path")
|
55
|
-
|
56
|
-
def dir_to_nodes(
|
57
|
-
dir: str, ext: Union[List[str], str],
|
58
|
-
recursive: bool = False, flatten: bool = True,
|
59
|
-
clean_text: bool = True
|
60
|
-
) -> List[DataNode]:
|
61
|
-
"""
|
62
|
-
Converts directory contents into DataNode objects based on specified file extensions.
|
63
|
-
|
64
|
-
This function first retrieves a list of file paths from the specified directory, matching the given file extension. It then reads the content of these files, optionally cleaning the text, and converts each file's content into a DataNode object.
|
65
|
-
|
66
|
-
Parameters:
|
67
|
-
dir (str): The directory path from which to read files.
|
68
|
-
ext: The file extension(s) to include. Can be a single string or a list/tuple of strings.
|
69
|
-
recursive (bool, optional): If True, the function searches for files recursively in subdirectories. Defaults to False.
|
70
|
-
flatten (bool, optional): If True, flattens the directory structure in the returned paths. Defaults to True.
|
71
|
-
clean_text (bool, optional): If True, cleans the text read from files. Defaults to True.
|
72
|
-
|
73
|
-
Returns:
|
74
|
-
list: A list of DataNode objects created from the files in the specified directory.
|
75
|
-
|
76
|
-
Example:
|
77
|
-
nodes = dir_to_nodes("/path/to/dir", ".txt", recursive=True)
|
78
|
-
# This would read all .txt files in /path/to/dir and its subdirectories,
|
79
|
-
# converting them into DataNode objects.
|
80
|
-
"""
|
81
|
-
|
82
|
-
path_list = dir_to_path(dir, ext, recursive, flatten)
|
83
|
-
files_info = lcall(path_list, read_text, clean=clean_text)
|
84
|
-
nodes = lcall(files_info, lambda x: DataNode(content=x[0], metadata=x[1]))
|
85
|
-
return nodes
|
86
|
-
|
87
|
-
def chunk_text(input: str,
|
88
|
-
chunk_size: int,
|
89
|
-
overlap: float,
|
90
|
-
threshold: int) -> List[Union[str, None]]:
|
91
|
-
"""
|
92
|
-
Chunks the input text into smaller parts, with optional overlap and threshold for final chunk.
|
93
|
-
|
94
|
-
Parameters:
|
95
|
-
input (str): The input text to chunk.
|
96
|
-
|
97
|
-
chunk_size (int): The size of each chunk.
|
98
|
-
|
99
|
-
overlap (float): The amount of overlap between chunks.
|
100
|
-
|
101
|
-
threshold (int): The minimum size of the final chunk.
|
102
|
-
|
103
|
-
Returns:
|
104
|
-
List[Union[str, None]]: A list of text chunks.
|
105
|
-
|
106
|
-
Raises:
|
107
|
-
ValueError: If an error occurs during chunking.
|
108
|
-
"""
|
109
|
-
|
110
|
-
def _chunk_n1():
|
111
|
-
return [input]
|
112
|
-
|
113
|
-
def _chunk_n2():
|
114
|
-
chunks = []
|
115
|
-
chunks.append(input[:chunk_size + overlap_size])
|
116
|
-
|
117
|
-
if len(input) - chunk_size > threshold:
|
118
|
-
chunks.append(input[chunk_size - overlap_size:])
|
119
|
-
else:
|
120
|
-
return _chunk_n1()
|
121
|
-
|
122
|
-
return chunks
|
123
|
-
|
124
|
-
def _chunk_n3():
|
125
|
-
chunks = []
|
126
|
-
chunks.append(input[:chunk_size + overlap_size])
|
127
|
-
for i in range(1, n_chunks - 1):
|
128
|
-
start_idx = chunk_size * i - overlap_size
|
129
|
-
end_idx = chunk_size * (i + 1) + overlap_size
|
130
|
-
chunks.append(input[start_idx:end_idx])
|
131
|
-
|
132
|
-
if len(input) - chunk_size * (n_chunks - 1) > threshold:
|
133
|
-
chunks.append(input[chunk_size * (n_chunks - 1) - overlap_size:])
|
134
|
-
else:
|
135
|
-
chunks[-1] += input[chunk_size * (n_chunks - 1) + overlap_size:]
|
136
|
-
|
137
|
-
return chunks
|
138
|
-
|
139
|
-
try:
|
140
|
-
if not isinstance(input, str): input = str(input)
|
141
|
-
|
142
|
-
n_chunks = math.ceil(len(input) / chunk_size)
|
143
|
-
overlap_size = int(overlap / 2)
|
144
|
-
|
145
|
-
if n_chunks == 1:
|
146
|
-
return _chunk_n1()
|
147
|
-
|
148
|
-
elif n_chunks == 2:
|
149
|
-
return _chunk_n2()
|
150
|
-
|
151
|
-
elif n_chunks > 2:
|
152
|
-
return _chunk_n3()
|
153
|
-
|
154
|
-
except Exception as e:
|
155
|
-
raise ValueError(f"An error occurred while chunking the text. {e}")
|
156
|
-
|
157
|
-
def read_text(filepath: str, clean: bool = True) -> Tuple[str, dict]:
|
158
|
-
"""
|
159
|
-
Reads text from a file and optionally cleans it, returning the content and metadata.
|
160
|
-
|
161
|
-
Parameters:
|
162
|
-
filepath (str): The path to the file to read.
|
163
|
-
|
164
|
-
clean (bool): Whether to clean the text by replacing certain characters. Defaults to True.
|
165
|
-
|
166
|
-
Returns:
|
167
|
-
Tuple[str, dict]: A tuple containing the content and metadata of the file.
|
168
|
-
|
169
|
-
Raises:
|
170
|
-
FileNotFoundError: If the file cannot be found.
|
171
|
-
|
172
|
-
PermissionError: If there are permissions issues.
|
173
|
-
|
174
|
-
OSError: For other OS-related errors.
|
175
|
-
"""
|
176
|
-
def _get_metadata():
|
177
|
-
import os
|
178
|
-
from datetime import datetime
|
179
|
-
file = filepath
|
180
|
-
size = os.path.getsize(filepath)
|
181
|
-
creation_date = datetime.fromtimestamp(os.path.getctime(filepath)).date()
|
182
|
-
modified_date = datetime.fromtimestamp(os.path.getmtime(filepath)).date()
|
183
|
-
last_accessed_date = datetime.fromtimestamp(os.path.getatime(filepath)).date()
|
184
|
-
return {'file': str(file),
|
185
|
-
'size': size,
|
186
|
-
'creation_date': str(creation_date),
|
187
|
-
'modified_date': str(modified_date),
|
188
|
-
'last_accessed_date': str(last_accessed_date)}
|
189
|
-
try:
|
190
|
-
with open(filepath, 'r') as f:
|
191
|
-
content = f.read()
|
192
|
-
if clean:
|
193
|
-
# Define characters to replace and their replacements
|
194
|
-
replacements = {'\\': ' ', '\n': ' ', '\t': ' ', ' ': ' ', '\'': ' '}
|
195
|
-
for old, new in replacements.items():
|
196
|
-
content = content.replace(old, new)
|
197
|
-
metadata = _get_metadata()
|
198
|
-
return content, metadata
|
199
|
-
except Exception as e:
|
200
|
-
raise e
|
201
|
-
|
202
|
-
def _file_to_chunks(input: Dict[str, Any],
|
203
|
-
field: str = 'content',
|
204
|
-
chunk_size: int = 1500,
|
205
|
-
overlap: float = 0.1,
|
206
|
-
threshold: int = 200) -> List[Dict[str, Any]]:
|
207
|
-
try:
|
208
|
-
out = {key: value for key, value in input.items() if key != field}
|
209
|
-
out.update({"chunk_overlap": overlap, "chunk_threshold": threshold})
|
210
|
-
|
211
|
-
chunks = chunk_text(input[field], chunk_size=chunk_size, overlap=overlap, threshold=threshold)
|
212
|
-
logs = []
|
213
|
-
for i, chunk in enumerate(chunks):
|
214
|
-
chunk_dict = out.copy()
|
215
|
-
chunk_dict.update({
|
216
|
-
'file_chunks': len(chunks),
|
217
|
-
'chunk_id': i + 1,
|
218
|
-
'chunk_size': len(chunk),
|
219
|
-
f'chunk_{field}': chunk
|
220
|
-
})
|
221
|
-
logs.append(chunk_dict)
|
222
|
-
|
223
|
-
return logs
|
224
|
-
|
225
|
-
except Exception as e:
|
226
|
-
raise ValueError(f"An error occurred while chunking the file. {e}")
|
227
|
-
|
228
|
-
|
229
|
-
# needs doing TODO
|
230
|
-
def file_to_chunks(input,
|
231
|
-
# project='project',
|
232
|
-
# output_dir='data/logs/sources/',
|
233
|
-
chunk_func = _file_to_chunks, **kwargs):
|
234
|
-
# out_to_csv=False,
|
235
|
-
# filename=None,
|
236
|
-
# verbose=True,
|
237
|
-
# timestamp=True,
|
238
|
-
# logger=None,
|
239
|
-
logs = to_list(lcall(input, chunk_func, **kwargs), flatten=True)
|
240
|
-
return logs
|
lionagi/loaders/reader.py
DELETED
@@ -1,122 +0,0 @@
|
|
1
|
-
from typing import Union, Callable
|
2
|
-
|
3
|
-
from ..utils import lcall
|
4
|
-
from ..bridge import langchain_loader, from_langchain, llama_index_reader, from_llama_index
|
5
|
-
from .load_util import dir_to_nodes, ReaderType
|
6
|
-
|
7
|
-
|
8
|
-
def _datanode_parser(nodes, parser):
|
9
|
-
"""
|
10
|
-
Parses raw data into DataNode instances using the provided parser function.
|
11
|
-
|
12
|
-
Args:
|
13
|
-
nodes: The list of raw data to be parsed.
|
14
|
-
parser: The parser function to transform nodes into DataNode instances.
|
15
|
-
|
16
|
-
Returns:
|
17
|
-
A list of parsed DataNode instances.
|
18
|
-
|
19
|
-
Raises:
|
20
|
-
ValueError: If the parser function fails.
|
21
|
-
|
22
|
-
Example usage:
|
23
|
-
>>> raw_nodes = [{'content': 'Example content'}]
|
24
|
-
>>> parser = lambda x: [DataNode(**node) for node in x]
|
25
|
-
>>> datanodes = _datanode_parser(raw_nodes, parser)
|
26
|
-
"""
|
27
|
-
try:
|
28
|
-
nodes = parser(nodes)
|
29
|
-
except Exception as e:
|
30
|
-
raise ValueError(f'DataNode parser {parser} failed. Error:{e}')
|
31
|
-
return nodes
|
32
|
-
|
33
|
-
def text_reader(args, kwargs):
|
34
|
-
"""
|
35
|
-
Reads text files from a directory and converts them to DataNode instances.
|
36
|
-
|
37
|
-
Args:
|
38
|
-
args: Positional arguments for the dir_to_nodes function.
|
39
|
-
kwargs: Keyword arguments for the dir_to_nodes function.
|
40
|
-
|
41
|
-
Returns:
|
42
|
-
A list of DataNode instances.
|
43
|
-
|
44
|
-
Example usage:
|
45
|
-
>>> args = ['path/to/text/files']
|
46
|
-
>>> kwargs = {'file_extension': 'txt'}
|
47
|
-
>>> nodes = text_reader(args, kwargs)
|
48
|
-
"""
|
49
|
-
return dir_to_nodes(*args, **kwargs)
|
50
|
-
|
51
|
-
|
52
|
-
def load(reader: Union[str, Callable],
|
53
|
-
reader_type=ReaderType.PLAIN,
|
54
|
-
reader_args=[],
|
55
|
-
reader_kwargs={},
|
56
|
-
load_args=[],
|
57
|
-
load_kwargs={},
|
58
|
-
to_datanode: Union[bool, Callable] = True):
|
59
|
-
"""
|
60
|
-
Loads documents using the specified reader and reader type.
|
61
|
-
|
62
|
-
Args:
|
63
|
-
reader: The reader function or its name as a string.
|
64
|
-
reader_type: The type of the reader. Defaults to ReaderType.PLAIN.
|
65
|
-
reader_args: Positional arguments for the reader function. Defaults to an empty list.
|
66
|
-
reader_kwargs: Keyword arguments for the reader function. Defaults to an empty dict.
|
67
|
-
load_args: Positional arguments for the loader function. Defaults to an empty list.
|
68
|
-
load_kwargs: Keyword arguments for the loader function. Defaults to an empty dict.
|
69
|
-
to_datanode: Determines whether to convert the result into DataNode instances, or
|
70
|
-
a callable to convert the result. Defaults to True.
|
71
|
-
|
72
|
-
Returns:
|
73
|
-
A list of loaded and potentially parsed documents.
|
74
|
-
|
75
|
-
Raises:
|
76
|
-
ValueError: If the reader fails or an unsupported reader type is provided.
|
77
|
-
|
78
|
-
Example usage:
|
79
|
-
>>> reader = 'text_reader'
|
80
|
-
>>> reader_args = ['path/to/text/files']
|
81
|
-
>>> reader_kwargs = {'file_extension': 'txt'}
|
82
|
-
>>> nodes = load(reader, reader_args=reader_args, reader_kwargs=reader_kwargs)
|
83
|
-
"""
|
84
|
-
if reader_type == ReaderType.PLAIN:
|
85
|
-
try:
|
86
|
-
if reader == 'text_reader':
|
87
|
-
reader = text_reader
|
88
|
-
nodes = reader(reader_args, reader_kwargs)
|
89
|
-
return nodes
|
90
|
-
except Exception as e:
|
91
|
-
raise ValueError(f'Reader {reader} is currently not supported. Error: {e}')
|
92
|
-
if reader_type == ReaderType.LANGCHAIN:
|
93
|
-
nodes = langchain_loader(reader, reader_args, reader_kwargs)
|
94
|
-
if isinstance(to_datanode, bool) and to_datanode is True:
|
95
|
-
nodes = lcall(nodes, from_langchain)
|
96
|
-
elif isinstance(to_datanode, Callable):
|
97
|
-
nodes = _datanode_parser(nodes, to_datanode)
|
98
|
-
return nodes
|
99
|
-
|
100
|
-
elif reader_type == ReaderType.LLAMAINDEX:
|
101
|
-
nodes = llama_index_reader(reader, reader_args, reader_kwargs, load_args, load_kwargs)
|
102
|
-
if isinstance(to_datanode, bool) and to_datanode is True:
|
103
|
-
nodes = lcall(nodes, from_llama_index)
|
104
|
-
elif isinstance(to_datanode, Callable):
|
105
|
-
nodes = _datanode_parser(nodes, to_datanode)
|
106
|
-
return nodes
|
107
|
-
|
108
|
-
elif reader_type == ReaderType.SELFDEFINED:
|
109
|
-
try:
|
110
|
-
loader = reader(*reader_args, **reader_kwargs)
|
111
|
-
nodes = loader.load(*load_args, **load_kwargs)
|
112
|
-
except Exception as e:
|
113
|
-
raise ValueError(f'Self defined reader {reader} is not valid. Error: {e}')
|
114
|
-
|
115
|
-
if isinstance(to_datanode, bool) and to_datanode is True:
|
116
|
-
raise ValueError(f'Please define a valid parser to DataNode.')
|
117
|
-
elif isinstance(to_datanode, Callable):
|
118
|
-
nodes = _datanode_parser(nodes, to_datanode)
|
119
|
-
return nodes
|
120
|
-
|
121
|
-
else:
|
122
|
-
raise ValueError(f'{reader_type} is not supported. Please choose from {list(ReaderType)}')
|
lionagi/models/__init__.py
DELETED
File without changes
|
lionagi/models/base_model.py
DELETED
File without changes
|
lionagi/models/imodel.py
DELETED
@@ -1,53 +0,0 @@
|
|
1
|
-
# from ..configs import oai_schema
|
2
|
-
|
3
|
-
# class BaseIntelligentModel:
|
4
|
-
|
5
|
-
# def __init__(
|
6
|
-
# self,
|
7
|
-
# service=None,
|
8
|
-
# default_config=oai_schema['chat']['config'],
|
9
|
-
# **kwargs
|
10
|
-
# ) -> None:
|
11
|
-
# # kwargs are the individual parameters that the model can take
|
12
|
-
# # different for different models
|
13
|
-
# self.service=service,
|
14
|
-
# self.config = {**default_config, **kwargs},
|
15
|
-
|
16
|
-
# async def __call__(
|
17
|
-
# self,
|
18
|
-
# payload,
|
19
|
-
# service=None,
|
20
|
-
# endpoint_='chat/completions',
|
21
|
-
# method='post'
|
22
|
-
# ):
|
23
|
-
# service = service or self.service
|
24
|
-
# return await service.serve(
|
25
|
-
# payload=payload, endpoint_=endpoint_, method=method
|
26
|
-
# )
|
27
|
-
|
28
|
-
# def set_service(self, service):
|
29
|
-
# self.service = service
|
30
|
-
|
31
|
-
# def set_config(self, config):
|
32
|
-
# self.config=config
|
33
|
-
|
34
|
-
# def change_model(self, model):
|
35
|
-
# self.config['model'] = model
|
36
|
-
|
37
|
-
# def change_temperature(self, temperature):
|
38
|
-
# self.config['temperature'] = temperature
|
39
|
-
|
40
|
-
# def revert_to_default_config(self):
|
41
|
-
# self.config = oai_schema['chat']['config']
|
42
|
-
|
43
|
-
# def modify_config(self, **kwargs):
|
44
|
-
# self.config = {**self.config, **kwargs}
|
45
|
-
|
46
|
-
|
47
|
-
# class IModel(BaseIntelligentModel):
|
48
|
-
|
49
|
-
# def __init__(
|
50
|
-
# self, service=None, default_model_kwargs=None, **kwargs
|
51
|
-
# ) -> None:
|
52
|
-
# super().__init__(service, default_model_kwargs, **kwargs)
|
53
|
-
|