lionagi 0.0.111__py3-none-any.whl → 0.0.113__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/__init__.py +7 -2
- lionagi/bridge/__init__.py +7 -0
- lionagi/bridge/langchain.py +131 -0
- lionagi/bridge/llama_index.py +157 -0
- lionagi/configs/__init__.py +7 -0
- lionagi/configs/oai_configs.py +49 -0
- lionagi/configs/openrouter_config.py +49 -0
- lionagi/core/__init__.py +15 -0
- lionagi/{session/conversation.py → core/conversations.py} +10 -17
- lionagi/core/flows.py +1 -0
- lionagi/core/instruction_sets.py +1 -0
- lionagi/{session/message.py → core/messages.py} +5 -5
- lionagi/core/sessions.py +262 -0
- lionagi/datastore/__init__.py +1 -0
- lionagi/datastore/chroma.py +1 -0
- lionagi/datastore/deeplake.py +1 -0
- lionagi/datastore/elasticsearch.py +1 -0
- lionagi/datastore/lantern.py +1 -0
- lionagi/datastore/pinecone.py +1 -0
- lionagi/datastore/postgres.py +1 -0
- lionagi/datastore/qdrant.py +1 -0
- lionagi/loader/__init__.py +12 -0
- lionagi/loader/chunker.py +157 -0
- lionagi/loader/reader.py +124 -0
- lionagi/objs/__init__.py +7 -0
- lionagi/objs/messenger.py +163 -0
- lionagi/objs/tool_registry.py +247 -0
- lionagi/schema/__init__.py +11 -0
- lionagi/schema/base_condition.py +1 -0
- lionagi/schema/base_schema.py +239 -0
- lionagi/schema/base_tool.py +9 -0
- lionagi/schema/data_logger.py +94 -0
- lionagi/services/__init__.py +14 -0
- lionagi/services/anthropic.py +1 -0
- lionagi/services/anyscale.py +0 -0
- lionagi/services/azure.py +1 -0
- lionagi/{api/oai_service.py → services/base_api_service.py} +74 -148
- lionagi/services/bedrock.py +0 -0
- lionagi/services/chatcompletion.py +48 -0
- lionagi/services/everlyai.py +0 -0
- lionagi/services/gemini.py +0 -0
- lionagi/services/gpt4all.py +0 -0
- lionagi/services/huggingface.py +0 -0
- lionagi/services/litellm.py +1 -0
- lionagi/services/localai.py +0 -0
- lionagi/services/mistralai.py +0 -0
- lionagi/services/oai.py +34 -0
- lionagi/services/ollama.py +1 -0
- lionagi/services/openllm.py +0 -0
- lionagi/services/openrouter.py +32 -0
- lionagi/services/perplexity.py +0 -0
- lionagi/services/predibase.py +0 -0
- lionagi/services/rungpt.py +0 -0
- lionagi/services/service_objs.py +282 -0
- lionagi/services/vllm.py +0 -0
- lionagi/services/xinference.py +0 -0
- lionagi/structure/__init__.py +7 -0
- lionagi/structure/relationship.py +128 -0
- lionagi/structure/structure.py +160 -0
- lionagi/tests/__init__.py +0 -0
- lionagi/tests/test_flatten_util.py +426 -0
- lionagi/tools/__init__.py +0 -0
- lionagi/tools/coder.py +1 -0
- lionagi/tools/planner.py +1 -0
- lionagi/tools/prompter.py +1 -0
- lionagi/tools/sandbox.py +1 -0
- lionagi/tools/scorer.py +1 -0
- lionagi/tools/summarizer.py +1 -0
- lionagi/tools/validator.py +1 -0
- lionagi/utils/__init__.py +46 -8
- lionagi/utils/api_util.py +63 -416
- lionagi/utils/call_util.py +347 -0
- lionagi/utils/flat_util.py +540 -0
- lionagi/utils/io_util.py +102 -0
- lionagi/utils/load_utils.py +190 -0
- lionagi/utils/sys_util.py +85 -660
- lionagi/utils/tool_util.py +82 -199
- lionagi/utils/type_util.py +81 -0
- lionagi/version.py +1 -1
- {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/METADATA +44 -15
- lionagi-0.0.113.dist-info/RECORD +84 -0
- lionagi/api/__init__.py +0 -8
- lionagi/api/oai_config.py +0 -16
- lionagi/session/__init__.py +0 -7
- lionagi/session/session.py +0 -380
- lionagi/utils/doc_util.py +0 -331
- lionagi/utils/log_util.py +0 -86
- lionagi-0.0.111.dist-info/RECORD +0 -20
- {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/LICENSE +0 -0
- {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/WHEEL +0 -0
- {lionagi-0.0.111.dist-info → lionagi-0.0.113.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,190 @@
|
|
1
|
+
import math
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import List, Union, Dict, Any, Tuple
|
4
|
+
|
5
|
+
from .type_util import to_list
|
6
|
+
from .call_util import lcall
|
7
|
+
from .io_util import to_csv
|
8
|
+
from ..schema.base_schema import DataNode
|
9
|
+
|
10
|
+
|
11
|
+
def dir_to_path(
|
12
|
+
dir: str, ext: str, recursive: bool = False,
|
13
|
+
flatten: bool = True) -> List[Path]:
|
14
|
+
"""
|
15
|
+
Generates a list of file paths from a directory with the given file extension.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
directory (str): The directory to search for files.
|
19
|
+
extension (str): The file extension to filter by.
|
20
|
+
recursive (bool): Whether to search subdirectories recursively. Defaults to False.
|
21
|
+
flatten (bool): Whether to flatten the list. Defaults to True.
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
List[Path]: A list of Paths to the files.
|
25
|
+
|
26
|
+
Raises:
|
27
|
+
ValueError: If the directory or extension is invalid.
|
28
|
+
"""
|
29
|
+
|
30
|
+
def _dir_to_path(ext):
|
31
|
+
tem = '**/*' if recursive else '*'
|
32
|
+
return list(Path(dir).glob(tem + ext))
|
33
|
+
|
34
|
+
try:
|
35
|
+
return to_list(lcall(ext, _dir_to_path, flatten=True), flatten=flatten)
|
36
|
+
except:
|
37
|
+
raise ValueError("Invalid directory or extension, please check the path")
|
38
|
+
|
39
|
+
def dir_to_nodes(dir: str, ext, recursive: bool = False, flatten: bool = True, clean_text: bool = True):
|
40
|
+
path_list = dir_to_path(dir, ext, recursive, flatten)
|
41
|
+
files_info = lcall(path_list, read_text, clean=clean_text)
|
42
|
+
nodes = lcall(files_info, lambda x: DataNode(content=x[0], metadata=x[1]))
|
43
|
+
return nodes
|
44
|
+
|
45
|
+
def chunk_text(input: str,
|
46
|
+
chunk_size: int,
|
47
|
+
overlap: float,
|
48
|
+
threshold: int) -> List[Union[str, None]]:
|
49
|
+
"""
|
50
|
+
Chunks the input text into smaller parts, with optional overlap and threshold for final chunk.
|
51
|
+
|
52
|
+
Args:
|
53
|
+
text (str): The input text to chunk.
|
54
|
+
chunk_size (int): The size of each chunk.
|
55
|
+
overlap (float): The amount of overlap between chunks.
|
56
|
+
threshold (int): The minimum size of the final chunk.
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
List[Union[str, None]]: A list of text chunks.
|
60
|
+
|
61
|
+
Raises:
|
62
|
+
ValueError: If an error occurs during chunking.
|
63
|
+
"""
|
64
|
+
|
65
|
+
def _chunk_n1():
|
66
|
+
return [input]
|
67
|
+
|
68
|
+
def _chunk_n2():
|
69
|
+
chunks = []
|
70
|
+
chunks.append(input[:chunk_size + overlap_size])
|
71
|
+
|
72
|
+
if len(input) - chunk_size > threshold:
|
73
|
+
chunks.append(input[chunk_size - overlap_size:])
|
74
|
+
else:
|
75
|
+
return _chunk_n1()
|
76
|
+
|
77
|
+
return chunks
|
78
|
+
|
79
|
+
def _chunk_n3():
|
80
|
+
chunks = []
|
81
|
+
chunks.append(input[:chunk_size + overlap_size])
|
82
|
+
for i in range(1, n_chunks - 1):
|
83
|
+
start_idx = chunk_size * i - overlap_size
|
84
|
+
end_idx = chunk_size * (i + 1) + overlap_size
|
85
|
+
chunks.append(input[start_idx:end_idx])
|
86
|
+
|
87
|
+
if len(input) - chunk_size * (n_chunks - 1) > threshold:
|
88
|
+
chunks.append(input[chunk_size * (n_chunks - 1) - overlap_size:])
|
89
|
+
else:
|
90
|
+
chunks[-1] += input[chunk_size * (n_chunks - 1) + overlap_size:]
|
91
|
+
|
92
|
+
return chunks
|
93
|
+
|
94
|
+
try:
|
95
|
+
if not isinstance(input, str): input = str(input)
|
96
|
+
|
97
|
+
n_chunks = math.ceil(len(input) / chunk_size)
|
98
|
+
overlap_size = int(overlap / 2)
|
99
|
+
|
100
|
+
if n_chunks == 1:
|
101
|
+
return _chunk_n1()
|
102
|
+
|
103
|
+
elif n_chunks == 2:
|
104
|
+
return _chunk_n2()
|
105
|
+
|
106
|
+
elif n_chunks > 2:
|
107
|
+
return _chunk_n3()
|
108
|
+
|
109
|
+
except Exception as e:
|
110
|
+
raise ValueError(f"An error occurred while chunking the text. {e}")
|
111
|
+
|
112
|
+
def read_text(filepath: str, clean: bool = True) -> Tuple[str, dict]:
|
113
|
+
"""
|
114
|
+
Reads text from a file and optionally cleans it, returning the content and metadata.
|
115
|
+
|
116
|
+
Args:
|
117
|
+
filepath (str): The path to the file to read.
|
118
|
+
clean (bool): Whether to clean the text by replacing certain characters. Defaults to True.
|
119
|
+
|
120
|
+
Returns:
|
121
|
+
Tuple[str, dict]: A tuple containing the content and metadata of the file.
|
122
|
+
|
123
|
+
Raises:
|
124
|
+
FileNotFoundError: If the file cannot be found.
|
125
|
+
PermissionError: If there are permissions issues.
|
126
|
+
OSError: For other OS-related errors.
|
127
|
+
"""
|
128
|
+
def _get_metadata():
|
129
|
+
import os
|
130
|
+
from datetime import datetime
|
131
|
+
file = filepath
|
132
|
+
size = os.path.getsize(filepath)
|
133
|
+
creation_date = datetime.fromtimestamp(os.path.getctime(filepath)).date()
|
134
|
+
modified_date = datetime.fromtimestamp(os.path.getmtime(filepath)).date()
|
135
|
+
last_accessed_date = datetime.fromtimestamp(os.path.getatime(filepath)).date()
|
136
|
+
return {'file': str(file),
|
137
|
+
'size': size,
|
138
|
+
'creation_date': str(creation_date),
|
139
|
+
'modified_date': str(modified_date),
|
140
|
+
'last_accessed_date': str(last_accessed_date)}
|
141
|
+
try:
|
142
|
+
with open(filepath, 'r') as f:
|
143
|
+
content = f.read()
|
144
|
+
if clean:
|
145
|
+
# Define characters to replace and their replacements
|
146
|
+
replacements = {'\\': ' ', '\n': ' ', '\t': ' ', ' ': ' ', '\'': ' '}
|
147
|
+
for old, new in replacements.items():
|
148
|
+
content = content.replace(old, new)
|
149
|
+
metadata = _get_metadata()
|
150
|
+
return content, metadata
|
151
|
+
except Exception as e:
|
152
|
+
raise e
|
153
|
+
|
154
|
+
def _file_to_chunks(input: Dict[str, Any],
|
155
|
+
field: str = 'content',
|
156
|
+
chunk_size: int = 1500,
|
157
|
+
overlap: float = 0.1,
|
158
|
+
threshold: int = 200) -> List[Dict[str, Any]]:
|
159
|
+
try:
|
160
|
+
out = {key: value for key, value in input.items() if key != field}
|
161
|
+
out.update({"chunk_overlap": overlap, "chunk_threshold": threshold})
|
162
|
+
|
163
|
+
chunks = chunk_text(input[field], chunk_size=chunk_size, overlap=overlap, threshold=threshold)
|
164
|
+
logs = []
|
165
|
+
for i, chunk in enumerate(chunks):
|
166
|
+
chunk_dict = out.copy()
|
167
|
+
chunk_dict.update({
|
168
|
+
'file_chunks': len(chunks),
|
169
|
+
'chunk_id': i + 1,
|
170
|
+
'chunk_size': len(chunk),
|
171
|
+
f'chunk_{field}': chunk
|
172
|
+
})
|
173
|
+
logs.append(chunk_dict)
|
174
|
+
|
175
|
+
return logs
|
176
|
+
|
177
|
+
except Exception as e:
|
178
|
+
raise ValueError(f"An error occurred while chunking the file. {e}")
|
179
|
+
|
180
|
+
def file_to_chunks(input,
|
181
|
+
# project='project',
|
182
|
+
# output_dir='data/logs/sources/',
|
183
|
+
chunk_func = _file_to_chunks, **kwargs):
|
184
|
+
# out_to_csv=False,
|
185
|
+
# filename=None,
|
186
|
+
# verbose=True,
|
187
|
+
# timestamp=True,
|
188
|
+
# logger=None,
|
189
|
+
logs = to_list(lcall(input, chunk_func, **kwargs), flatten=True)
|
190
|
+
return logs
|