lionagi 0.9.6__py3-none-any.whl → 0.9.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/libs/file/chunk.py +17 -0
- lionagi/libs/file/process.py +108 -7
- lionagi/tools/types.py +3 -1
- lionagi/version.py +1 -1
- {lionagi-0.9.6.dist-info → lionagi-0.9.8.dist-info}/METADATA +16 -1
- {lionagi-0.9.6.dist-info → lionagi-0.9.8.dist-info}/RECORD +8 -8
- {lionagi-0.9.6.dist-info → lionagi-0.9.8.dist-info}/WHEEL +0 -0
- {lionagi-0.9.6.dist-info → lionagi-0.9.8.dist-info}/licenses/LICENSE +0 -0
lionagi/libs/file/chunk.py
CHANGED
@@ -218,6 +218,7 @@ def chunk_content(
|
|
218
218
|
threshold: int = 256,
|
219
219
|
metadata: dict[str, Any] = {},
|
220
220
|
return_tokens: bool = False,
|
221
|
+
as_node: bool = False,
|
221
222
|
**kwargs: Any,
|
222
223
|
) -> list[dict[str, Any]]:
|
223
224
|
"""
|
@@ -257,6 +258,22 @@ def chunk_content(
|
|
257
258
|
threshold=threshold,
|
258
259
|
)
|
259
260
|
|
261
|
+
if as_node:
|
262
|
+
from lionagi.protocols.graph.node import Node
|
263
|
+
|
264
|
+
return [
|
265
|
+
Node(
|
266
|
+
content=chunk,
|
267
|
+
metadata={
|
268
|
+
"chunk_id": i + 1,
|
269
|
+
"total_chunks": len(chunks),
|
270
|
+
"chunk_size": len(chunk),
|
271
|
+
**metadata,
|
272
|
+
},
|
273
|
+
)
|
274
|
+
for i, chunk in enumerate(chunks)
|
275
|
+
]
|
276
|
+
|
260
277
|
return [
|
261
278
|
{
|
262
279
|
"chunk_content": chunk,
|
lionagi/libs/file/process.py
CHANGED
@@ -6,7 +6,9 @@ import logging
|
|
6
6
|
from collections.abc import Callable
|
7
7
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
8
8
|
from pathlib import Path
|
9
|
-
from typing import Any
|
9
|
+
from typing import Any, Literal
|
10
|
+
|
11
|
+
from lionagi.utils import lcall
|
10
12
|
|
11
13
|
from .chunk import chunk_content
|
12
14
|
from .save import save_chunks
|
@@ -87,7 +89,7 @@ def dir_to_files(
|
|
87
89
|
|
88
90
|
def file_to_chunks(
|
89
91
|
file_path: str | Path,
|
90
|
-
|
92
|
+
chunk_by: Literal["chars", "tokens"] = "chars",
|
91
93
|
chunk_size: int = 1500,
|
92
94
|
overlap: float = 0.1,
|
93
95
|
threshold: int = 200,
|
@@ -97,6 +99,7 @@ def file_to_chunks(
|
|
97
99
|
verbose: bool = False,
|
98
100
|
timestamp: bool = True,
|
99
101
|
random_hash_digits: int = 4,
|
102
|
+
as_node: bool = False,
|
100
103
|
) -> list[dict[str, Any]]:
|
101
104
|
"""
|
102
105
|
Process a file and split its content into chunks.
|
@@ -124,9 +127,10 @@ def file_to_chunks(
|
|
124
127
|
ValueError: If there's an error processing the file.
|
125
128
|
"""
|
126
129
|
try:
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
+
if isinstance(file_path, str):
|
131
|
+
file_path = Path(file_path)
|
132
|
+
|
133
|
+
text = file_path.read_text(encoding=encoding)
|
130
134
|
|
131
135
|
metadata = {
|
132
136
|
"file_path": str(file_path),
|
@@ -136,14 +140,111 @@ def file_to_chunks(
|
|
136
140
|
}
|
137
141
|
|
138
142
|
chunks = chunk_content(
|
139
|
-
|
143
|
+
text,
|
144
|
+
chunk_by=chunk_by,
|
145
|
+
chunk_size=chunk_size,
|
146
|
+
overlap=overlap,
|
147
|
+
threshold=threshold,
|
148
|
+
metadata=metadata,
|
149
|
+
as_node=as_node,
|
140
150
|
)
|
141
151
|
|
142
152
|
if output_dir:
|
143
153
|
save_chunks(
|
144
|
-
chunks,
|
154
|
+
chunks=chunks,
|
155
|
+
output_dir=output_dir,
|
156
|
+
verbose=verbose,
|
157
|
+
timestamp=timestamp,
|
158
|
+
random_hash_digits=random_hash_digits,
|
145
159
|
)
|
146
160
|
|
147
161
|
return chunks
|
148
162
|
except Exception as e:
|
149
163
|
raise ValueError(f"Error processing file {file_path}: {e}") from e
|
164
|
+
|
165
|
+
|
166
|
+
def chunk(
|
167
|
+
url_or_path: str | Path,
|
168
|
+
*,
|
169
|
+
file_types: list[str] | None = None, # only local files
|
170
|
+
recursive: bool = False, # only local files
|
171
|
+
chunk_by: Literal["chars", "tokens"] = "chars",
|
172
|
+
chunk_size: int = 1500,
|
173
|
+
overlap: float = 0.1,
|
174
|
+
threshold: int = 200,
|
175
|
+
output_file: str | Path | None = None,
|
176
|
+
metadata: dict[str, Any] | None = None,
|
177
|
+
reader_tool: Callable = None,
|
178
|
+
):
|
179
|
+
if isinstance(url_or_path, str):
|
180
|
+
url_or_path = Path(url_or_path)
|
181
|
+
|
182
|
+
chunks = None
|
183
|
+
files = None
|
184
|
+
if url_or_path.exists():
|
185
|
+
if url_or_path.is_dir():
|
186
|
+
files = dir_to_files(
|
187
|
+
directory=url_or_path,
|
188
|
+
file_types=file_types,
|
189
|
+
recursive=recursive,
|
190
|
+
)
|
191
|
+
elif url_or_path.is_file():
|
192
|
+
files = [url_or_path]
|
193
|
+
else:
|
194
|
+
files = (
|
195
|
+
[str(url_or_path)]
|
196
|
+
if not isinstance(url_or_path, list)
|
197
|
+
else url_or_path
|
198
|
+
)
|
199
|
+
|
200
|
+
if reader_tool is None:
|
201
|
+
reader_tool = lambda x: x.read_text(encoding="utf-8")
|
202
|
+
|
203
|
+
if reader_tool == "docling":
|
204
|
+
from lionagi.libs.package.imports import check_import
|
205
|
+
|
206
|
+
DocumentConverter = check_import(
|
207
|
+
"docling",
|
208
|
+
module_name="document_converter",
|
209
|
+
import_name="DocumentConverter",
|
210
|
+
)
|
211
|
+
converter = DocumentConverter()
|
212
|
+
reader_tool = lambda x: converter.convert(
|
213
|
+
x
|
214
|
+
).document.export_to_markdown()
|
215
|
+
|
216
|
+
texts = lcall(files, reader_tool)
|
217
|
+
chunks = lcall(
|
218
|
+
texts,
|
219
|
+
chunk_content,
|
220
|
+
chunk_by=chunk_by,
|
221
|
+
chunk_size=chunk_size,
|
222
|
+
overlap=overlap,
|
223
|
+
threshold=threshold,
|
224
|
+
metadata=metadata,
|
225
|
+
as_node=True,
|
226
|
+
flatten=True,
|
227
|
+
)
|
228
|
+
if threshold:
|
229
|
+
chunks = [c for c in chunks if len(c.content) > threshold]
|
230
|
+
|
231
|
+
if output_file:
|
232
|
+
from lionagi.protocols.generic.pile import Pile
|
233
|
+
|
234
|
+
output_file = Path(output_file)
|
235
|
+
if output_file.suffix == ".csv":
|
236
|
+
p = Pile(chunks)
|
237
|
+
p.to_csv_file(output_file)
|
238
|
+
|
239
|
+
elif output_file.suffix == ".json":
|
240
|
+
p = Pile(chunks)
|
241
|
+
p.to_json_file(output_file, use_pd=True)
|
242
|
+
|
243
|
+
elif output_file.suffix in Pile.list_adapters():
|
244
|
+
p = Pile(chunks)
|
245
|
+
p.adapt_to(output_file.suffix, fp=output_file)
|
246
|
+
|
247
|
+
else:
|
248
|
+
raise ValueError(f"Unsupported output file format: {output_file}")
|
249
|
+
|
250
|
+
return chunks
|
lionagi/tools/types.py
CHANGED
lionagi/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.9.
|
1
|
+
__version__ = "0.9.8"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: lionagi
|
3
|
-
Version: 0.9.
|
3
|
+
Version: 0.9.8
|
4
4
|
Summary: An Intelligence Operating System.
|
5
5
|
Author-email: HaiyangLi <quantocean.li@gmail.com>
|
6
6
|
License: Apache License
|
@@ -227,6 +227,13 @@ Requires-Dist: pillow>=10.0.0
|
|
227
227
|
Requires-Dist: pydantic>=2.0.0
|
228
228
|
Requires-Dist: python-dotenv>=1.0.1
|
229
229
|
Requires-Dist: tiktoken>=0.8.0
|
230
|
+
Provides-Extra: llms
|
231
|
+
Requires-Dist: litellm>=1.59.5; extra == 'llms'
|
232
|
+
Provides-Extra: ollama
|
233
|
+
Requires-Dist: ollama>=0.4.7; extra == 'ollama'
|
234
|
+
Requires-Dist: openai>=1.60.0; extra == 'ollama'
|
235
|
+
Provides-Extra: tools
|
236
|
+
Requires-Dist: docling>=2.15.1; extra == 'tools'
|
230
237
|
Description-Content-Type: text/markdown
|
231
238
|
|
232
239
|

|
@@ -358,6 +365,14 @@ analysis = await branch.communicate("Analyze these stats", imodel=sonnet)
|
|
358
365
|
|
359
366
|
Seamlessly route to different models in the same workflow.
|
360
367
|
|
368
|
+
### optional dependencies
|
369
|
+
|
370
|
+
```
|
371
|
+
pip install "lionagi[tools]"
|
372
|
+
pip install "lionagi[llms]"
|
373
|
+
pip install "lionagi[ollama]"
|
374
|
+
```
|
375
|
+
|
361
376
|
## Community & Contributing
|
362
377
|
|
363
378
|
We welcome issues, ideas, and pull requests:
|
@@ -4,14 +4,14 @@ lionagi/_errors.py,sha256=JlBTFJnRWtVYcRxKb7fWFiJHLbykl1E19mSJ8sXYVxg,455
|
|
4
4
|
lionagi/_types.py,sha256=9g7iytvSj3UjZxD-jL06_fxuNfgZyWT3Qnp0XYp1wQU,63
|
5
5
|
lionagi/settings.py,sha256=W52mM34E6jXF3GyqCFzVREKZrmnUqtZm_BVDsUiDI_s,1627
|
6
6
|
lionagi/utils.py,sha256=K36D9AAGiMPR4eM9tYoiVgvH-NdPPSeMQPls09s7keQ,73223
|
7
|
-
lionagi/version.py,sha256=
|
7
|
+
lionagi/version.py,sha256=GI3Cr15wC6XhdahtMoSOf62d2gYve5G_PiR_xx_RGKc,22
|
8
8
|
lionagi/libs/__init__.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
|
9
9
|
lionagi/libs/parse.py,sha256=JRS3bql0InHJqATnAatl-hQv4N--XXw4P77JHhTFnrc,1011
|
10
10
|
lionagi/libs/file/__init__.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
|
11
|
-
lionagi/libs/file/chunk.py,sha256=
|
11
|
+
lionagi/libs/file/chunk.py,sha256=_C-b3M6Fe1O8Mb8MAromNpFqkFx7vfpPZvzqV5MUlus,9282
|
12
12
|
lionagi/libs/file/file_ops.py,sha256=HBiIh1EljIJ5VTIXuyvJM0ppSs0YYOPUWmgDMJT634U,3430
|
13
13
|
lionagi/libs/file/params.py,sha256=SZ5DkoffWfxWudOAYCfCxpL8UIm-1UjeyTtploo-Lqs,5824
|
14
|
-
lionagi/libs/file/process.py,sha256=
|
14
|
+
lionagi/libs/file/process.py,sha256=bIe4AdQ7eT0NHSMn0_Ail_-ltlM21YWqUWiPGXEPuHU,8264
|
15
15
|
lionagi/libs/file/save.py,sha256=TCxVlKxFFnr3xZ-HAXPpTomQoyiVrp6nKRoj-bcQt4k,2863
|
16
16
|
lionagi/libs/nested/__init__.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
|
17
17
|
lionagi/libs/nested/flatten.py,sha256=sB4jxZRoaUbjak9RbIWVWNKz2hzkhQJPFffV_Ws1GA0,5479
|
@@ -195,7 +195,7 @@ lionagi/session/prompts.py,sha256=AhuHL19s0TijVZX3tMKUKMi6l88xeVdpkuEn2vJSRyU,32
|
|
195
195
|
lionagi/session/session.py,sha256=8SuNMiJX6IAW6Ou8aDK0LsVG7zcD5yd22sakMyrd3pw,8987
|
196
196
|
lionagi/tools/__init__.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
|
197
197
|
lionagi/tools/base.py,sha256=R5T8hliDfJwXitNcgs2RPogE3yYserRjfRAVzAY2kM4,349
|
198
|
-
lionagi/tools/types.py,sha256=
|
198
|
+
lionagi/tools/types.py,sha256=XtJLY0m-Yi_ZLWhm0KycayvqMCZd--HxfQ0x9vFUYDE,230
|
199
199
|
lionagi/tools/browser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
200
200
|
lionagi/tools/browser/providers/__init__.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
|
201
201
|
lionagi/tools/browser/providers/browser_use_.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
|
@@ -213,7 +213,7 @@ lionagi/tools/file/writer.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,
|
|
213
213
|
lionagi/tools/file/providers/__init__.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
|
214
214
|
lionagi/tools/file/providers/docling_.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
|
215
215
|
lionagi/tools/query/__init__.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
|
216
|
-
lionagi-0.9.
|
217
|
-
lionagi-0.9.
|
218
|
-
lionagi-0.9.
|
219
|
-
lionagi-0.9.
|
216
|
+
lionagi-0.9.8.dist-info/METADATA,sha256=Ywnd678RVNXSuGJYVwjOs9LAO8Ec6PXR8zY8nM0oemc,18436
|
217
|
+
lionagi-0.9.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
218
|
+
lionagi-0.9.8.dist-info/licenses/LICENSE,sha256=VXFWsdoN5AAknBCgFqQNgPWYx7OPp-PFEP961zGdOjc,11288
|
219
|
+
lionagi-0.9.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|