lionagi 0.9.6__py3-none-any.whl → 0.9.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -218,6 +218,7 @@ def chunk_content(
218
218
  threshold: int = 256,
219
219
  metadata: dict[str, Any] = {},
220
220
  return_tokens: bool = False,
221
+ as_node: bool = False,
221
222
  **kwargs: Any,
222
223
  ) -> list[dict[str, Any]]:
223
224
  """
@@ -257,6 +258,22 @@ def chunk_content(
257
258
  threshold=threshold,
258
259
  )
259
260
 
261
+ if as_node:
262
+ from lionagi.protocols.graph.node import Node
263
+
264
+ return [
265
+ Node(
266
+ content=chunk,
267
+ metadata={
268
+ "chunk_id": i + 1,
269
+ "total_chunks": len(chunks),
270
+ "chunk_size": len(chunk),
271
+ **metadata,
272
+ },
273
+ )
274
+ for i, chunk in enumerate(chunks)
275
+ ]
276
+
260
277
  return [
261
278
  {
262
279
  "chunk_content": chunk,
@@ -6,7 +6,9 @@ import logging
6
6
  from collections.abc import Callable
7
7
  from concurrent.futures import ThreadPoolExecutor, as_completed
8
8
  from pathlib import Path
9
- from typing import Any
9
+ from typing import Any, Literal
10
+
11
+ from lionagi.utils import lcall
10
12
 
11
13
  from .chunk import chunk_content
12
14
  from .save import save_chunks
@@ -87,7 +89,7 @@ def dir_to_files(
87
89
 
88
90
  def file_to_chunks(
89
91
  file_path: str | Path,
90
- chunk_func: Callable[[str, int, float, int], list[str]],
92
+ chunk_by: Literal["chars", "tokens"] = "chars",
91
93
  chunk_size: int = 1500,
92
94
  overlap: float = 0.1,
93
95
  threshold: int = 200,
@@ -97,6 +99,7 @@ def file_to_chunks(
97
99
  verbose: bool = False,
98
100
  timestamp: bool = True,
99
101
  random_hash_digits: int = 4,
102
+ as_node: bool = False,
100
103
  ) -> list[dict[str, Any]]:
101
104
  """
102
105
  Process a file and split its content into chunks.
@@ -124,9 +127,10 @@ def file_to_chunks(
124
127
  ValueError: If there's an error processing the file.
125
128
  """
126
129
  try:
127
- file_path = Path(file_path)
128
- with open(file_path, encoding=encoding) as f:
129
- content = f.read()
130
+ if isinstance(file_path, str):
131
+ file_path = Path(file_path)
132
+
133
+ text = file_path.read_text(encoding=encoding)
130
134
 
131
135
  metadata = {
132
136
  "file_path": str(file_path),
@@ -136,14 +140,111 @@ def file_to_chunks(
136
140
  }
137
141
 
138
142
  chunks = chunk_content(
139
- content, chunk_func, chunk_size, overlap, threshold, metadata
143
+ text,
144
+ chunk_by=chunk_by,
145
+ chunk_size=chunk_size,
146
+ overlap=overlap,
147
+ threshold=threshold,
148
+ metadata=metadata,
149
+ as_node=as_node,
140
150
  )
141
151
 
142
152
  if output_dir:
143
153
  save_chunks(
144
- chunks, output_dir, verbose, timestamp, random_hash_digits
154
+ chunks=chunks,
155
+ output_dir=output_dir,
156
+ verbose=verbose,
157
+ timestamp=timestamp,
158
+ random_hash_digits=random_hash_digits,
145
159
  )
146
160
 
147
161
  return chunks
148
162
  except Exception as e:
149
163
  raise ValueError(f"Error processing file {file_path}: {e}") from e
164
+
165
+
166
+ def chunk(
167
+ url_or_path: str | Path,
168
+ *,
169
+ file_types: list[str] | None = None, # only local files
170
+ recursive: bool = False, # only local files
171
+ chunk_by: Literal["chars", "tokens"] = "chars",
172
+ chunk_size: int = 1500,
173
+ overlap: float = 0.1,
174
+ threshold: int = 200,
175
+ output_file: str | Path | None = None,
176
+ metadata: dict[str, Any] | None = None,
177
+ reader_tool: Callable = None,
178
+ ):
179
+ if isinstance(url_or_path, str):
180
+ url_or_path = Path(url_or_path)
181
+
182
+ chunks = None
183
+ files = None
184
+ if url_or_path.exists():
185
+ if url_or_path.is_dir():
186
+ files = dir_to_files(
187
+ directory=url_or_path,
188
+ file_types=file_types,
189
+ recursive=recursive,
190
+ )
191
+ elif url_or_path.is_file():
192
+ files = [url_or_path]
193
+ else:
194
+ files = (
195
+ [str(url_or_path)]
196
+ if not isinstance(url_or_path, list)
197
+ else url_or_path
198
+ )
199
+
200
+ if reader_tool is None:
201
+ reader_tool = lambda x: x.read_text(encoding="utf-8")
202
+
203
+ if reader_tool == "docling":
204
+ from lionagi.libs.package.imports import check_import
205
+
206
+ DocumentConverter = check_import(
207
+ "docling",
208
+ module_name="document_converter",
209
+ import_name="DocumentConverter",
210
+ )
211
+ converter = DocumentConverter()
212
+ reader_tool = lambda x: converter.convert(
213
+ x
214
+ ).document.export_to_markdown()
215
+
216
+ texts = lcall(files, reader_tool)
217
+ chunks = lcall(
218
+ texts,
219
+ chunk_content,
220
+ chunk_by=chunk_by,
221
+ chunk_size=chunk_size,
222
+ overlap=overlap,
223
+ threshold=threshold,
224
+ metadata=metadata,
225
+ as_node=True,
226
+ flatten=True,
227
+ )
228
+ if threshold:
229
+ chunks = [c for c in chunks if len(c.content) > threshold]
230
+
231
+ if output_file:
232
+ from lionagi.protocols.generic.pile import Pile
233
+
234
+ output_file = Path(output_file)
235
+ if output_file.suffix == ".csv":
236
+ p = Pile(chunks)
237
+ p.to_csv_file(output_file)
238
+
239
+ elif output_file.suffix == ".json":
240
+ p = Pile(chunks)
241
+ p.to_json_file(output_file, use_pd=True)
242
+
243
+ elif output_file.suffix in Pile.list_adapters():
244
+ p = Pile(chunks)
245
+ p.adapt_to(output_file.suffix, fp=output_file)
246
+
247
+ else:
248
+ raise ValueError(f"Unsupported output file format: {output_file}")
249
+
250
+ return chunks
lionagi/tools/types.py CHANGED
@@ -3,6 +3,8 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
 
6
+ from lionagi.libs.file.process import chunk
7
+
6
8
  from .file.reader import ReaderTool
7
9
 
8
- __all__ = ("ReaderTool",)
10
+ __all__ = ("ReaderTool", "chunk")
lionagi/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.9.6"
1
+ __version__ = "0.9.8"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lionagi
3
- Version: 0.9.6
3
+ Version: 0.9.8
4
4
  Summary: An Intelligence Operating System.
5
5
  Author-email: HaiyangLi <quantocean.li@gmail.com>
6
6
  License: Apache License
@@ -227,6 +227,13 @@ Requires-Dist: pillow>=10.0.0
227
227
  Requires-Dist: pydantic>=2.0.0
228
228
  Requires-Dist: python-dotenv>=1.0.1
229
229
  Requires-Dist: tiktoken>=0.8.0
230
+ Provides-Extra: llms
231
+ Requires-Dist: litellm>=1.59.5; extra == 'llms'
232
+ Provides-Extra: ollama
233
+ Requires-Dist: ollama>=0.4.7; extra == 'ollama'
234
+ Requires-Dist: openai>=1.60.0; extra == 'ollama'
235
+ Provides-Extra: tools
236
+ Requires-Dist: docling>=2.15.1; extra == 'tools'
230
237
  Description-Content-Type: text/markdown
231
238
 
232
239
  ![PyPI - Version](https://img.shields.io/pypi/v/lionagi?labelColor=233476aa&color=231fc935)
@@ -358,6 +365,14 @@ analysis = await branch.communicate("Analyze these stats", imodel=sonnet)
358
365
 
359
366
  Seamlessly route to different models in the same workflow.
360
367
 
368
+ ### optional dependencies
369
+
370
+ ```
371
+ pip install "lionagi[tools]"
372
+ pip install "lionagi[llms]"
373
+ pip install "lionagi[ollama]"
374
+ ```
375
+
361
376
  ## Community & Contributing
362
377
 
363
378
  We welcome issues, ideas, and pull requests:
@@ -4,14 +4,14 @@ lionagi/_errors.py,sha256=JlBTFJnRWtVYcRxKb7fWFiJHLbykl1E19mSJ8sXYVxg,455
4
4
  lionagi/_types.py,sha256=9g7iytvSj3UjZxD-jL06_fxuNfgZyWT3Qnp0XYp1wQU,63
5
5
  lionagi/settings.py,sha256=W52mM34E6jXF3GyqCFzVREKZrmnUqtZm_BVDsUiDI_s,1627
6
6
  lionagi/utils.py,sha256=K36D9AAGiMPR4eM9tYoiVgvH-NdPPSeMQPls09s7keQ,73223
7
- lionagi/version.py,sha256=IgVHjr-TeioZYLJSkvpT80LLGi6U3ONzR1cfYfd5XNQ,22
7
+ lionagi/version.py,sha256=GI3Cr15wC6XhdahtMoSOf62d2gYve5G_PiR_xx_RGKc,22
8
8
  lionagi/libs/__init__.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
9
9
  lionagi/libs/parse.py,sha256=JRS3bql0InHJqATnAatl-hQv4N--XXw4P77JHhTFnrc,1011
10
10
  lionagi/libs/file/__init__.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
11
- lionagi/libs/file/chunk.py,sha256=UiN92z52tlK2fUlnOYpAzTrjZV8sVDmPTnutnWkWH8Y,8835
11
+ lionagi/libs/file/chunk.py,sha256=_C-b3M6Fe1O8Mb8MAromNpFqkFx7vfpPZvzqV5MUlus,9282
12
12
  lionagi/libs/file/file_ops.py,sha256=HBiIh1EljIJ5VTIXuyvJM0ppSs0YYOPUWmgDMJT634U,3430
13
13
  lionagi/libs/file/params.py,sha256=SZ5DkoffWfxWudOAYCfCxpL8UIm-1UjeyTtploo-Lqs,5824
14
- lionagi/libs/file/process.py,sha256=1nILukF_JXr03hVPQKmqOTwFYv9FhgZirLQuZG2u4KY,5486
14
+ lionagi/libs/file/process.py,sha256=bIe4AdQ7eT0NHSMn0_Ail_-ltlM21YWqUWiPGXEPuHU,8264
15
15
  lionagi/libs/file/save.py,sha256=TCxVlKxFFnr3xZ-HAXPpTomQoyiVrp6nKRoj-bcQt4k,2863
16
16
  lionagi/libs/nested/__init__.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
17
17
  lionagi/libs/nested/flatten.py,sha256=sB4jxZRoaUbjak9RbIWVWNKz2hzkhQJPFffV_Ws1GA0,5479
@@ -195,7 +195,7 @@ lionagi/session/prompts.py,sha256=AhuHL19s0TijVZX3tMKUKMi6l88xeVdpkuEn2vJSRyU,32
195
195
  lionagi/session/session.py,sha256=8SuNMiJX6IAW6Ou8aDK0LsVG7zcD5yd22sakMyrd3pw,8987
196
196
  lionagi/tools/__init__.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
197
197
  lionagi/tools/base.py,sha256=R5T8hliDfJwXitNcgs2RPogE3yYserRjfRAVzAY2kM4,349
198
- lionagi/tools/types.py,sha256=f4TgF9LJ86P5dHIXNAHMSnLDnjto45M8Q_UJlyI-G3Y,177
198
+ lionagi/tools/types.py,sha256=XtJLY0m-Yi_ZLWhm0KycayvqMCZd--HxfQ0x9vFUYDE,230
199
199
  lionagi/tools/browser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
200
200
  lionagi/tools/browser/providers/__init__.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
201
201
  lionagi/tools/browser/providers/browser_use_.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
@@ -213,7 +213,7 @@ lionagi/tools/file/writer.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,
213
213
  lionagi/tools/file/providers/__init__.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
214
214
  lionagi/tools/file/providers/docling_.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
215
215
  lionagi/tools/query/__init__.py,sha256=5y5joOZzfFWERl75auAcNcKC3lImVJ5ZZGvvHZUFCJM,112
216
- lionagi-0.9.6.dist-info/METADATA,sha256=WGUAokWhfSlKKpws48WJRgBmwQJ1MTBAQd3o_EVdPdg,18053
217
- lionagi-0.9.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
218
- lionagi-0.9.6.dist-info/licenses/LICENSE,sha256=VXFWsdoN5AAknBCgFqQNgPWYx7OPp-PFEP961zGdOjc,11288
219
- lionagi-0.9.6.dist-info/RECORD,,
216
+ lionagi-0.9.8.dist-info/METADATA,sha256=Ywnd678RVNXSuGJYVwjOs9LAO8Ec6PXR8zY8nM0oemc,18436
217
+ lionagi-0.9.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
218
+ lionagi-0.9.8.dist-info/licenses/LICENSE,sha256=VXFWsdoN5AAknBCgFqQNgPWYx7OPp-PFEP961zGdOjc,11288
219
+ lionagi-0.9.8.dist-info/RECORD,,