lionagi 0.0.111__py3-none-any.whl → 0.0.113__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- lionagi/__init__.py +7 -2
- lionagi/bridge/__init__.py +7 -0
- lionagi/bridge/langchain.py +131 -0
- lionagi/bridge/llama_index.py +157 -0
- lionagi/configs/__init__.py +7 -0
- lionagi/configs/oai_configs.py +49 -0
- lionagi/configs/openrouter_config.py +49 -0
- lionagi/core/__init__.py +15 -0
- lionagi/{session/conversation.py → core/conversations.py} +10 -17
- lionagi/core/flows.py +1 -0
- lionagi/core/instruction_sets.py +1 -0
- lionagi/{session/message.py → core/messages.py} +5 -5
- lionagi/core/sessions.py +262 -0
- lionagi/datastore/__init__.py +1 -0
- lionagi/datastore/chroma.py +1 -0
- lionagi/datastore/deeplake.py +1 -0
- lionagi/datastore/elasticsearch.py +1 -0
- lionagi/datastore/lantern.py +1 -0
- lionagi/datastore/pinecone.py +1 -0
- lionagi/datastore/postgres.py +1 -0
- lionagi/datastore/qdrant.py +1 -0
- lionagi/loader/__init__.py +12 -0
- lionagi/loader/chunker.py +157 -0
- lionagi/loader/reader.py +124 -0
- lionagi/objs/__init__.py +7 -0
- lionagi/objs/messenger.py +163 -0
- lionagi/objs/tool_registry.py +247 -0
- lionagi/schema/__init__.py +11 -0
- lionagi/schema/base_condition.py +1 -0
- lionagi/schema/base_schema.py +239 -0
- lionagi/schema/base_tool.py +9 -0
- lionagi/schema/data_logger.py +94 -0
- lionagi/services/__init__.py +14 -0
- lionagi/services/anthropic.py +1 -0
- lionagi/services/anyscale.py +0 -0
- lionagi/services/azure.py +1 -0
- lionagi/{api/oai_service.py → services/base_api_service.py} +74 -148
- lionagi/services/bedrock.py +0 -0
- lionagi/services/chatcompletion.py +48 -0
- lionagi/services/everlyai.py +0 -0
- lionagi/services/gemini.py +0 -0
- lionagi/services/gpt4all.py +0 -0
- lionagi/services/huggingface.py +0 -0
- lionagi/services/litellm.py +1 -0
- lionagi/services/localai.py +0 -0
- lionagi/services/mistralai.py +0 -0
- lionagi/services/oai.py +34 -0
- lionagi/services/ollama.py +1 -0
- lionagi/services/openllm.py +0 -0
- lionagi/services/openrouter.py +32 -0
- lionagi/services/perplexity.py +0 -0
- lionagi/services/predibase.py +0 -0
- lionagi/services/rungpt.py +0 -0
- lionagi/services/service_objs.py +282 -0
- lionagi/services/vllm.py +0 -0
- lionagi/services/xinference.py +0 -0
- lionagi/structure/__init__.py +7 -0
- lionagi/structure/relationship.py +128 -0
- lionagi/structure/structure.py +160 -0
- lionagi/tests/__init__.py +0 -0
- lionagi/tests/test_flatten_util.py +426 -0
- lionagi/tools/__init__.py +0 -0
- lionagi/tools/coder.py +1 -0
- lionagi/tools/planner.py +1 -0
- lionagi/tools/prompter.py +1 -0
- lionagi/tools/sandbox.py +1 -0
- lionagi/tools/scorer.py +1 -0
- lionagi/tools/summarizer.py +1 -0
- lionagi/tools/validator.py +1 -0
- lionagi/utils/__init__.py +46 -8
- lionagi/utils/api_util.py +63 -416
- lionagi/utils/call_util.py +347 -0
- lionagi/utils/flat_util.py +540 -0
- lionagi/utils/io_util.py +102 -0
- lionagi/utils/load_utils.py +190 -0
- lionagi/utils/sys_util.py +85 -660
- lionagi/utils/tool_util.py +82 -199
- lionagi/utils/type_util.py +81 -0
- lionagi/version.py +1 -1
- {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/METADATA +44 -15
- lionagi-0.0.113.dist-info/RECORD +84 -0
- lionagi/api/__init__.py +0 -8
- lionagi/api/oai_config.py +0 -16
- lionagi/session/__init__.py +0 -7
- lionagi/session/session.py +0 -380
- lionagi/utils/doc_util.py +0 -331
- lionagi/utils/log_util.py +0 -86
- lionagi-0.0.111.dist-info/RECORD +0 -20
- {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/LICENSE +0 -0
- {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/WHEEL +0 -0
- {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,190 @@
|
|
1
|
+
import math
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import List, Union, Dict, Any, Tuple
|
4
|
+
|
5
|
+
from .type_util import to_list
|
6
|
+
from .call_util import lcall
|
7
|
+
from .io_util import to_csv
|
8
|
+
from ..schema.base_schema import DataNode
|
9
|
+
|
10
|
+
|
11
|
+
def dir_to_path(
|
12
|
+
dir: str, ext: str, recursive: bool = False,
|
13
|
+
flatten: bool = True) -> List[Path]:
|
14
|
+
"""
|
15
|
+
Generates a list of file paths from a directory with the given file extension.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
directory (str): The directory to search for files.
|
19
|
+
extension (str): The file extension to filter by.
|
20
|
+
recursive (bool): Whether to search subdirectories recursively. Defaults to False.
|
21
|
+
flatten (bool): Whether to flatten the list. Defaults to True.
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
List[Path]: A list of Paths to the files.
|
25
|
+
|
26
|
+
Raises:
|
27
|
+
ValueError: If the directory or extension is invalid.
|
28
|
+
"""
|
29
|
+
|
30
|
+
def _dir_to_path(ext):
|
31
|
+
tem = '**/*' if recursive else '*'
|
32
|
+
return list(Path(dir).glob(tem + ext))
|
33
|
+
|
34
|
+
try:
|
35
|
+
return to_list(lcall(ext, _dir_to_path, flatten=True), flatten=flatten)
|
36
|
+
except:
|
37
|
+
raise ValueError("Invalid directory or extension, please check the path")
|
38
|
+
|
39
|
+
def dir_to_nodes(dir: str, ext, recursive: bool = False, flatten: bool = True, clean_text: bool = True):
|
40
|
+
path_list = dir_to_path(dir, ext, recursive, flatten)
|
41
|
+
files_info = lcall(path_list, read_text, clean=clean_text)
|
42
|
+
nodes = lcall(files_info, lambda x: DataNode(content=x[0], metadata=x[1]))
|
43
|
+
return nodes
|
44
|
+
|
45
|
+
def chunk_text(input: str,
|
46
|
+
chunk_size: int,
|
47
|
+
overlap: float,
|
48
|
+
threshold: int) -> List[Union[str, None]]:
|
49
|
+
"""
|
50
|
+
Chunks the input text into smaller parts, with optional overlap and threshold for final chunk.
|
51
|
+
|
52
|
+
Args:
|
53
|
+
text (str): The input text to chunk.
|
54
|
+
chunk_size (int): The size of each chunk.
|
55
|
+
overlap (float): The amount of overlap between chunks.
|
56
|
+
threshold (int): The minimum size of the final chunk.
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
List[Union[str, None]]: A list of text chunks.
|
60
|
+
|
61
|
+
Raises:
|
62
|
+
ValueError: If an error occurs during chunking.
|
63
|
+
"""
|
64
|
+
|
65
|
+
def _chunk_n1():
|
66
|
+
return [input]
|
67
|
+
|
68
|
+
def _chunk_n2():
|
69
|
+
chunks = []
|
70
|
+
chunks.append(input[:chunk_size + overlap_size])
|
71
|
+
|
72
|
+
if len(input) - chunk_size > threshold:
|
73
|
+
chunks.append(input[chunk_size - overlap_size:])
|
74
|
+
else:
|
75
|
+
return _chunk_n1()
|
76
|
+
|
77
|
+
return chunks
|
78
|
+
|
79
|
+
def _chunk_n3():
|
80
|
+
chunks = []
|
81
|
+
chunks.append(input[:chunk_size + overlap_size])
|
82
|
+
for i in range(1, n_chunks - 1):
|
83
|
+
start_idx = chunk_size * i - overlap_size
|
84
|
+
end_idx = chunk_size * (i + 1) + overlap_size
|
85
|
+
chunks.append(input[start_idx:end_idx])
|
86
|
+
|
87
|
+
if len(input) - chunk_size * (n_chunks - 1) > threshold:
|
88
|
+
chunks.append(input[chunk_size * (n_chunks - 1) - overlap_size:])
|
89
|
+
else:
|
90
|
+
chunks[-1] += input[chunk_size * (n_chunks - 1) + overlap_size:]
|
91
|
+
|
92
|
+
return chunks
|
93
|
+
|
94
|
+
try:
|
95
|
+
if not isinstance(input, str): input = str(input)
|
96
|
+
|
97
|
+
n_chunks = math.ceil(len(input) / chunk_size)
|
98
|
+
overlap_size = int(overlap / 2)
|
99
|
+
|
100
|
+
if n_chunks == 1:
|
101
|
+
return _chunk_n1()
|
102
|
+
|
103
|
+
elif n_chunks == 2:
|
104
|
+
return _chunk_n2()
|
105
|
+
|
106
|
+
elif n_chunks > 2:
|
107
|
+
return _chunk_n3()
|
108
|
+
|
109
|
+
except Exception as e:
|
110
|
+
raise ValueError(f"An error occurred while chunking the text. {e}")
|
111
|
+
|
112
|
+
def read_text(filepath: str, clean: bool = True) -> Tuple[str, dict]:
|
113
|
+
"""
|
114
|
+
Reads text from a file and optionally cleans it, returning the content and metadata.
|
115
|
+
|
116
|
+
Args:
|
117
|
+
filepath (str): The path to the file to read.
|
118
|
+
clean (bool): Whether to clean the text by replacing certain characters. Defaults to True.
|
119
|
+
|
120
|
+
Returns:
|
121
|
+
Tuple[str, dict]: A tuple containing the content and metadata of the file.
|
122
|
+
|
123
|
+
Raises:
|
124
|
+
FileNotFoundError: If the file cannot be found.
|
125
|
+
PermissionError: If there are permissions issues.
|
126
|
+
OSError: For other OS-related errors.
|
127
|
+
"""
|
128
|
+
def _get_metadata():
|
129
|
+
import os
|
130
|
+
from datetime import datetime
|
131
|
+
file = filepath
|
132
|
+
size = os.path.getsize(filepath)
|
133
|
+
creation_date = datetime.fromtimestamp(os.path.getctime(filepath)).date()
|
134
|
+
modified_date = datetime.fromtimestamp(os.path.getmtime(filepath)).date()
|
135
|
+
last_accessed_date = datetime.fromtimestamp(os.path.getatime(filepath)).date()
|
136
|
+
return {'file': str(file),
|
137
|
+
'size': size,
|
138
|
+
'creation_date': str(creation_date),
|
139
|
+
'modified_date': str(modified_date),
|
140
|
+
'last_accessed_date': str(last_accessed_date)}
|
141
|
+
try:
|
142
|
+
with open(filepath, 'r') as f:
|
143
|
+
content = f.read()
|
144
|
+
if clean:
|
145
|
+
# Define characters to replace and their replacements
|
146
|
+
replacements = {'\\': ' ', '\n': ' ', '\t': ' ', ' ': ' ', '\'': ' '}
|
147
|
+
for old, new in replacements.items():
|
148
|
+
content = content.replace(old, new)
|
149
|
+
metadata = _get_metadata()
|
150
|
+
return content, metadata
|
151
|
+
except Exception as e:
|
152
|
+
raise e
|
153
|
+
|
154
|
+
def _file_to_chunks(input: Dict[str, Any],
|
155
|
+
field: str = 'content',
|
156
|
+
chunk_size: int = 1500,
|
157
|
+
overlap: float = 0.1,
|
158
|
+
threshold: int = 200) -> List[Dict[str, Any]]:
|
159
|
+
try:
|
160
|
+
out = {key: value for key, value in input.items() if key != field}
|
161
|
+
out.update({"chunk_overlap": overlap, "chunk_threshold": threshold})
|
162
|
+
|
163
|
+
chunks = chunk_text(input[field], chunk_size=chunk_size, overlap=overlap, threshold=threshold)
|
164
|
+
logs = []
|
165
|
+
for i, chunk in enumerate(chunks):
|
166
|
+
chunk_dict = out.copy()
|
167
|
+
chunk_dict.update({
|
168
|
+
'file_chunks': len(chunks),
|
169
|
+
'chunk_id': i + 1,
|
170
|
+
'chunk_size': len(chunk),
|
171
|
+
f'chunk_{field}': chunk
|
172
|
+
})
|
173
|
+
logs.append(chunk_dict)
|
174
|
+
|
175
|
+
return logs
|
176
|
+
|
177
|
+
except Exception as e:
|
178
|
+
raise ValueError(f"An error occurred while chunking the file. {e}")
|
179
|
+
|
180
|
+
def file_to_chunks(input,
|
181
|
+
# project='project',
|
182
|
+
# output_dir='data/logs/sources/',
|
183
|
+
chunk_func = _file_to_chunks, **kwargs):
|
184
|
+
# out_to_csv=False,
|
185
|
+
# filename=None,
|
186
|
+
# verbose=True,
|
187
|
+
# timestamp=True,
|
188
|
+
# logger=None,
|
189
|
+
logs = to_list(lcall(input, chunk_func, **kwargs), flatten=True)
|
190
|
+
return logs
|