lionagi 0.15.8__py3-none-any.whl → 0.15.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/__init__.py +4 -6
- lionagi/adapters/async_postgres_adapter.py +55 -319
- lionagi/libs/file/_utils.py +10 -0
- lionagi/libs/file/process.py +16 -13
- lionagi/libs/file/save.py +3 -2
- lionagi/libs/schema/load_pydantic_model_from_schema.py +2 -1
- lionagi/libs/unstructured/pdf_to_image.py +2 -2
- lionagi/libs/validate/string_similarity.py +4 -4
- lionagi/ln/__init__.py +38 -0
- lionagi/ln/_extract_json.py +60 -0
- lionagi/ln/_fuzzy_json.py +116 -0
- lionagi/ln/_json_dump.py +75 -0
- lionagi/ln/_models.py +0 -1
- lionagi/models/field_model.py +8 -6
- lionagi/operations/__init__.py +3 -0
- lionagi/operations/builder.py +10 -0
- lionagi/protocols/generic/element.py +56 -53
- lionagi/protocols/generic/event.py +46 -67
- lionagi/protocols/generic/pile.py +56 -1
- lionagi/protocols/generic/progression.py +11 -11
- lionagi/protocols/graph/_utils.py +22 -0
- lionagi/protocols/graph/graph.py +17 -21
- lionagi/protocols/graph/node.py +23 -5
- lionagi/protocols/messages/manager.py +41 -45
- lionagi/protocols/messages/message.py +3 -1
- lionagi/protocols/operatives/step.py +2 -19
- lionagi/protocols/types.py +1 -2
- lionagi/service/connections/providers/claude_code_.py +9 -7
- lionagi/service/third_party/claude_code.py +3 -2
- lionagi/session/session.py +14 -2
- lionagi/tools/file/reader.py +5 -6
- lionagi/utils.py +8 -385
- lionagi/version.py +1 -1
- {lionagi-0.15.8.dist-info → lionagi-0.15.11.dist-info}/METADATA +2 -2
- {lionagi-0.15.8.dist-info → lionagi-0.15.11.dist-info}/RECORD +37 -37
- lionagi/libs/package/__init__.py +0 -3
- lionagi/libs/package/imports.py +0 -21
- lionagi/libs/package/management.py +0 -62
- lionagi/libs/package/params.py +0 -30
- lionagi/libs/package/system.py +0 -22
- {lionagi-0.15.8.dist-info → lionagi-0.15.11.dist-info}/WHEEL +0 -0
- {lionagi-0.15.8.dist-info → lionagi-0.15.11.dist-info}/licenses/LICENSE +0 -0
lionagi/__init__.py
CHANGED
@@ -7,9 +7,9 @@ import logging
|
|
7
7
|
from pydantic import BaseModel, Field
|
8
8
|
|
9
9
|
from . import _types as types
|
10
|
-
from .
|
11
|
-
from .operations import OperationGraphBuilder as Builder
|
12
|
-
from .operations import
|
10
|
+
from . import ln as ln
|
11
|
+
from .operations.builder import OperationGraphBuilder as Builder
|
12
|
+
from .operations.node import Operation
|
13
13
|
from .service.imodel import iModel
|
14
14
|
from .session.session import Branch, Session
|
15
15
|
from .version import __version__
|
@@ -28,7 +28,5 @@ __all__ = (
|
|
28
28
|
"logger",
|
29
29
|
"Builder",
|
30
30
|
"Operation",
|
31
|
-
"
|
32
|
-
"flow",
|
33
|
-
"plan",
|
31
|
+
"ln",
|
34
32
|
)
|
@@ -1,16 +1,16 @@
|
|
1
1
|
"""
|
2
2
|
Clean LionAGI async PostgreSQL adapter for integration into lionagi core.
|
3
3
|
|
4
|
-
This adapter
|
5
|
-
serialization while providing seamless async persistence.
|
4
|
+
This adapter leverages pydapter v1.0.2+ CRUD operations.
|
6
5
|
"""
|
7
6
|
|
8
7
|
from __future__ import annotations
|
9
8
|
|
10
|
-
from
|
11
|
-
from typing import Any, ClassVar, TypeVar
|
9
|
+
from typing import ClassVar, TypeVar
|
12
10
|
|
13
|
-
|
11
|
+
import sqlalchemy as sa
|
12
|
+
from pydapter.extras.async_postgres_ import AsyncPostgresAdapter
|
13
|
+
from sqlalchemy.ext.asyncio import create_async_engine
|
14
14
|
|
15
15
|
from ._utils import check_async_postgres_available
|
16
16
|
|
@@ -19,61 +19,20 @@ _ASYNC_POSTGRES_AVAILABLE = check_async_postgres_available()
|
|
19
19
|
if isinstance(_ASYNC_POSTGRES_AVAILABLE, ImportError):
|
20
20
|
raise _ASYNC_POSTGRES_AVAILABLE
|
21
21
|
|
22
|
-
import sqlalchemy as sa
|
23
|
-
from pydapter.extras.async_postgres_ import AsyncPostgresAdapter
|
24
|
-
from sqlalchemy.ext.asyncio import create_async_engine
|
25
|
-
|
26
22
|
T = TypeVar("T")
|
27
23
|
|
28
24
|
|
29
25
|
class LionAGIAsyncPostgresAdapter(AsyncPostgresAdapter[T]):
|
30
26
|
"""
|
31
|
-
|
32
|
-
|
33
|
-
Solves core issues:
|
34
|
-
1. SQLAlchemy async table inspection ("Inspection on an AsyncConnection is currently not supported")
|
35
|
-
2. LionAGI float timestamp serialization (created_at as float → datetime)
|
36
|
-
3. Datetime objects in JSON content (datetime → ISO strings)
|
37
|
-
4. Automatic metadata field mapping via LionAGIPostgresAdapter
|
27
|
+
Zero-config async PostgreSQL adapter for lionagi Nodes.
|
38
28
|
|
39
|
-
|
40
|
-
-
|
41
|
-
-
|
42
|
-
- Cross-database compatibility (PostgreSQL/SQLite)
|
43
|
-
- Handles all lionagi data serialization edge cases
|
29
|
+
- Auto-creates tables with lionagi schema
|
30
|
+
- Changes default adapt_meth to "to_dict" for lionagi Elements
|
31
|
+
- Everything else handled by parent AsyncPostgresAdapter
|
44
32
|
"""
|
45
33
|
|
46
34
|
obj_key: ClassVar[str] = "lionagi_async_pg"
|
47
35
|
|
48
|
-
@classmethod
|
49
|
-
def _table(cls, meta: sa.MetaData, name: str) -> sa.Table:
|
50
|
-
"""
|
51
|
-
Override parent's _table to avoid async inspection issues.
|
52
|
-
|
53
|
-
Uses JSON for SQLite compatibility, JSONB for PostgreSQL performance.
|
54
|
-
"""
|
55
|
-
# Determine JSON type based on database (check connection URL if available)
|
56
|
-
json_type = sa.JSON # Default safe option that works everywhere
|
57
|
-
|
58
|
-
# Try to detect PostgreSQL from the connection
|
59
|
-
if hasattr(meta, "bind") and meta.bind:
|
60
|
-
engine_url = str(meta.bind.engine.url)
|
61
|
-
if "postgresql" in engine_url and "sqlite" not in engine_url:
|
62
|
-
json_type = sa.dialects.postgresql.JSONB
|
63
|
-
|
64
|
-
return sa.Table(
|
65
|
-
name,
|
66
|
-
meta,
|
67
|
-
sa.Column("id", sa.String, primary_key=True),
|
68
|
-
sa.Column("content", json_type),
|
69
|
-
sa.Column(
|
70
|
-
"node_metadata", json_type
|
71
|
-
), # mapped from lionagi metadata
|
72
|
-
sa.Column("created_at", sa.DateTime),
|
73
|
-
sa.Column("embedding", json_type),
|
74
|
-
# Note: No autoload_with to avoid async inspection error
|
75
|
-
)
|
76
|
-
|
77
36
|
@classmethod
|
78
37
|
async def to_obj(
|
79
38
|
cls,
|
@@ -81,282 +40,59 @@ class LionAGIAsyncPostgresAdapter(AsyncPostgresAdapter[T]):
|
|
81
40
|
/,
|
82
41
|
*,
|
83
42
|
many: bool = True,
|
84
|
-
adapt_meth: str = "
|
43
|
+
adapt_meth: str = "as_jsonable", # Default to to_dict for lionagi
|
85
44
|
**kw,
|
86
45
|
):
|
87
|
-
"""
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
table = kw.get("table")
|
99
|
-
|
100
|
-
if not engine_url or not table:
|
101
|
-
raise ValueError(
|
102
|
-
"Missing required 'dsn' and 'table' parameters"
|
103
|
-
)
|
104
|
-
|
105
|
-
# Ensure table exists with lionagi schema
|
106
|
-
await cls._ensure_table_exists(engine_url, table)
|
107
|
-
|
108
|
-
# Prepare data with lionagi fixes
|
109
|
-
items = subj if isinstance(subj, list) else [subj]
|
110
|
-
if not items:
|
111
|
-
return {"inserted_count": 0}
|
112
|
-
|
113
|
-
# Convert nodes to database rows with serialization fixes
|
114
|
-
rows = []
|
115
|
-
for item in items:
|
116
|
-
data = getattr(item, adapt_meth)()
|
117
|
-
fixed_data = cls._fix_lionagi_data(data)
|
118
|
-
rows.append(fixed_data)
|
119
|
-
|
120
|
-
# Execute async insert
|
121
|
-
engine = create_async_engine(engine_url, future=True)
|
122
|
-
async with engine.begin() as conn:
|
123
|
-
meta = sa.MetaData()
|
124
|
-
meta.bind = conn
|
125
|
-
table_obj = cls._table(meta, table)
|
126
|
-
await conn.execute(sa.insert(table_obj), rows)
|
127
|
-
|
128
|
-
return {"inserted_count": len(rows)}
|
129
|
-
|
130
|
-
except Exception as e:
|
131
|
-
raise QueryError(
|
132
|
-
f"Error in lionagi async adapter: {e}",
|
133
|
-
adapter="lionagi_async_pg",
|
134
|
-
) from e
|
46
|
+
"""Write lionagi Node(s) to PostgreSQL with CRUD support."""
|
47
|
+
# Auto-create table if needed
|
48
|
+
if table := kw.get("table"):
|
49
|
+
if engine_url := (kw.get("dsn") or kw.get("engine_url")):
|
50
|
+
await cls._ensure_table(engine_url, table)
|
51
|
+
elif engine := kw.get("engine"):
|
52
|
+
await cls._ensure_table(engine, table)
|
53
|
+
|
54
|
+
return await super().to_obj(
|
55
|
+
subj, many=many, adapt_meth=adapt_meth, **kw
|
56
|
+
)
|
135
57
|
|
136
58
|
@classmethod
|
137
|
-
async def
|
59
|
+
async def _ensure_table(cls, engine_or_url, table_name: str):
|
138
60
|
"""Create table with lionagi schema if it doesn't exist."""
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
# Use the same _table method to ensure consistency
|
146
|
-
table = cls._table(meta, table_name)
|
147
|
-
|
148
|
-
# Create just this table
|
149
|
-
await conn.run_sync(table.create, checkfirst=True)
|
150
|
-
|
151
|
-
except Exception:
|
152
|
-
# Table might already exist, continue
|
153
|
-
pass
|
154
|
-
|
155
|
-
@classmethod
|
156
|
-
def _fix_lionagi_data(cls, data: dict) -> dict:
|
157
|
-
"""
|
158
|
-
Fix lionagi Node data for database storage.
|
159
|
-
|
160
|
-
Handles:
|
161
|
-
1. Float timestamp → datetime for created_at
|
162
|
-
2. Datetime objects in content → ISO strings
|
163
|
-
"""
|
164
|
-
# Fix created_at timestamp
|
165
|
-
if "created_at" in data and isinstance(
|
166
|
-
data["created_at"], (int, float)
|
167
|
-
):
|
168
|
-
data["created_at"] = datetime.fromtimestamp(data["created_at"])
|
169
|
-
|
170
|
-
# Fix datetime objects in content
|
171
|
-
if "content" in data and isinstance(data["content"], dict):
|
172
|
-
data["content"] = cls._serialize_datetime_recursive(
|
173
|
-
data["content"]
|
174
|
-
)
|
175
|
-
|
176
|
-
return data
|
177
|
-
|
178
|
-
@classmethod
|
179
|
-
def _serialize_datetime_recursive(cls, obj: Any) -> Any:
|
180
|
-
"""Recursively convert datetime objects to ISO strings."""
|
181
|
-
if isinstance(obj, datetime):
|
182
|
-
return obj.isoformat()
|
183
|
-
elif isinstance(obj, dict):
|
184
|
-
return {
|
185
|
-
k: cls._serialize_datetime_recursive(v) for k, v in obj.items()
|
186
|
-
}
|
187
|
-
elif isinstance(obj, list):
|
188
|
-
return [cls._serialize_datetime_recursive(item) for item in obj]
|
61
|
+
# Handle both engine and URL
|
62
|
+
should_dispose = None
|
63
|
+
if isinstance(engine_or_url, str):
|
64
|
+
engine = create_async_engine(engine_or_url, future=True)
|
65
|
+
should_dispose = True
|
189
66
|
else:
|
190
|
-
|
191
|
-
|
192
|
-
@classmethod
|
193
|
-
async def from_obj(
|
194
|
-
cls,
|
195
|
-
node_cls: type[T],
|
196
|
-
obj: Any,
|
197
|
-
/,
|
198
|
-
*,
|
199
|
-
adapt_meth: str = "from_dict",
|
200
|
-
many: bool = True,
|
201
|
-
**kw,
|
202
|
-
) -> T | list[T] | None:
|
203
|
-
"""
|
204
|
-
Read lionagi Node(s) from database with automatic data reconstruction.
|
205
|
-
|
206
|
-
Handles:
|
207
|
-
1. Database querying with filters
|
208
|
-
2. Reverse metadata field mapping (node_metadata → metadata)
|
209
|
-
3. Reverse data serialization (ISO strings → datetime objects)
|
210
|
-
4. Node object reconstruction
|
67
|
+
engine = engine_or_url
|
68
|
+
should_dispose = False
|
211
69
|
|
212
|
-
Args:
|
213
|
-
node_cls: The Node class to instantiate
|
214
|
-
obj: Database connection parameters (dict with dsn, table, etc.)
|
215
|
-
adapt_meth: Adaptation method (unused but required by pydapter)
|
216
|
-
many: Whether to return list or single object
|
217
|
-
**kw: Additional query parameters (where, limit, order_by)
|
218
|
-
|
219
|
-
Returns:
|
220
|
-
Single Node, list of Nodes, or None if no results found
|
221
|
-
"""
|
222
70
|
try:
|
223
|
-
# Merge obj parameters with kw parameters
|
224
|
-
if isinstance(obj, dict):
|
225
|
-
params = {**obj, **kw}
|
226
|
-
else:
|
227
|
-
params = kw
|
228
|
-
|
229
|
-
# Validate required parameters
|
230
|
-
engine_url = params.get("dsn") or params.get("engine_url")
|
231
|
-
table = params.get("table")
|
232
|
-
|
233
|
-
if not engine_url or not table:
|
234
|
-
raise ValueError(
|
235
|
-
"Missing required 'dsn' and 'table' parameters"
|
236
|
-
)
|
237
|
-
|
238
|
-
# Build query
|
239
|
-
engine = create_async_engine(engine_url, future=True)
|
240
71
|
async with engine.begin() as conn:
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
# Add WHERE conditions if provided
|
249
|
-
where_conditions = params.get("where")
|
250
|
-
if where_conditions:
|
251
|
-
if isinstance(where_conditions, dict):
|
252
|
-
# Convert dict to column conditions
|
253
|
-
for col_name, value in where_conditions.items():
|
254
|
-
if hasattr(table_obj.c, col_name):
|
255
|
-
query = query.where(
|
256
|
-
getattr(table_obj.c, col_name) == value
|
257
|
-
)
|
258
|
-
else:
|
259
|
-
# Assume it's already a SQLAlchemy condition
|
260
|
-
query = query.where(where_conditions)
|
261
|
-
|
262
|
-
# Add ordering if provided
|
263
|
-
order_by = params.get("order_by")
|
264
|
-
if order_by:
|
265
|
-
if isinstance(order_by, str):
|
266
|
-
if hasattr(table_obj.c, order_by):
|
267
|
-
query = query.order_by(
|
268
|
-
getattr(table_obj.c, order_by)
|
269
|
-
)
|
270
|
-
else:
|
271
|
-
query = query.order_by(order_by)
|
272
|
-
|
273
|
-
# Add limit if provided
|
274
|
-
limit = params.get("limit")
|
275
|
-
if limit:
|
276
|
-
query = query.limit(limit)
|
277
|
-
|
278
|
-
# Execute query
|
279
|
-
result = await conn.execute(query)
|
280
|
-
rows = result.fetchall()
|
281
|
-
|
282
|
-
# Use many parameter from params if provided, otherwise use method parameter
|
283
|
-
return_many = params.get("many", many)
|
284
|
-
|
285
|
-
if not rows:
|
286
|
-
return [] if return_many else None
|
287
|
-
|
288
|
-
# Convert database rows back to Node objects
|
289
|
-
nodes = []
|
290
|
-
for row in rows:
|
291
|
-
# Convert row to dict
|
292
|
-
row_dict = dict(row._mapping)
|
293
|
-
|
294
|
-
# Apply reverse lionagi data transformations
|
295
|
-
node_data = cls._reverse_lionagi_data(row_dict)
|
296
|
-
|
297
|
-
# Create Node instance
|
298
|
-
node = node_cls(**node_data)
|
299
|
-
nodes.append(node)
|
300
|
-
|
301
|
-
if return_many:
|
302
|
-
return nodes
|
303
|
-
else:
|
304
|
-
return nodes[-1] if nodes else None
|
305
|
-
|
306
|
-
except Exception as e:
|
307
|
-
raise QueryError(
|
308
|
-
f"Error reading from lionagi async adapter: {e}",
|
309
|
-
adapter="lionagi_async_pg",
|
310
|
-
) from e
|
311
|
-
|
312
|
-
@classmethod
|
313
|
-
def _reverse_lionagi_data(cls, row_data: dict) -> dict:
|
314
|
-
"""
|
315
|
-
Reverse lionagi data transformations from database storage.
|
316
|
-
|
317
|
-
Handles:
|
318
|
-
1. Database field mapping (node_metadata → metadata)
|
319
|
-
2. ISO string → datetime objects in content
|
320
|
-
3. Proper lionagi Node field structure
|
321
|
-
"""
|
322
|
-
# Create a copy to avoid modifying original
|
323
|
-
data = row_data.copy()
|
324
|
-
|
325
|
-
# Reverse field mapping: node_metadata → metadata
|
326
|
-
if "node_metadata" in data:
|
327
|
-
data["metadata"] = data.pop("node_metadata")
|
328
|
-
|
329
|
-
# Reverse datetime serialization in content
|
330
|
-
if "content" in data and isinstance(data["content"], dict):
|
331
|
-
data["content"] = cls._deserialize_datetime_recursive(
|
332
|
-
data["content"]
|
333
|
-
)
|
334
|
-
|
335
|
-
return data
|
72
|
+
# Determine JSON type based on database
|
73
|
+
engine_url = str(engine.url)
|
74
|
+
json_type = (
|
75
|
+
sa.dialects.postgresql.JSONB
|
76
|
+
if "postgresql" in engine_url
|
77
|
+
else sa.JSON
|
78
|
+
)
|
336
79
|
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
k: cls._deserialize_datetime_recursive(v)
|
357
|
-
for k, v in obj.items()
|
358
|
-
}
|
359
|
-
elif isinstance(obj, list):
|
360
|
-
return [cls._deserialize_datetime_recursive(item) for item in obj]
|
361
|
-
else:
|
362
|
-
return obj
|
80
|
+
# Create table with lionagi schema
|
81
|
+
await conn.run_sync(
|
82
|
+
lambda sync_conn: sa.Table(
|
83
|
+
table_name,
|
84
|
+
sa.MetaData(),
|
85
|
+
sa.Column("id", sa.String, primary_key=True),
|
86
|
+
sa.Column("content", json_type),
|
87
|
+
sa.Column(
|
88
|
+
"metadata", json_type
|
89
|
+
), # Use metadata directly now
|
90
|
+
sa.Column(
|
91
|
+
"created_at", sa.Float
|
92
|
+
), # Stored as float timestamp
|
93
|
+
sa.Column("embedding", json_type, nullable=True),
|
94
|
+
).create(sync_conn, checkfirst=True)
|
95
|
+
)
|
96
|
+
finally:
|
97
|
+
if should_dispose:
|
98
|
+
await engine.dispose()
|
@@ -0,0 +1,10 @@
|
|
1
|
+
def check_docling_available():
|
2
|
+
try:
|
3
|
+
from docling.document_converter import DocumentConverter # noqa: F401
|
4
|
+
|
5
|
+
return True
|
6
|
+
except Exception:
|
7
|
+
return ImportError(
|
8
|
+
"The 'docling' package is required for this feature. "
|
9
|
+
"Please install it via 'pip install lionagi[reader]'."
|
10
|
+
)
|
lionagi/libs/file/process.py
CHANGED
@@ -8,11 +8,14 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
8
8
|
from pathlib import Path
|
9
9
|
from typing import Any, Literal
|
10
10
|
|
11
|
-
from lionagi
|
11
|
+
from lionagi import ln
|
12
12
|
|
13
|
+
from ._utils import check_docling_available
|
13
14
|
from .chunk import chunk_content
|
14
15
|
from .save import save_chunks
|
15
16
|
|
17
|
+
_HAS_DOCLING = check_docling_available()
|
18
|
+
|
16
19
|
|
17
20
|
def dir_to_files(
|
18
21
|
directory: str | Path,
|
@@ -206,24 +209,24 @@ def chunk(
|
|
206
209
|
reader_tool = lambda x: Path(x).read_text(encoding="utf-8")
|
207
210
|
|
208
211
|
if reader_tool == "docling":
|
209
|
-
|
212
|
+
if _HAS_DOCLING is not True:
|
213
|
+
raise _HAS_DOCLING
|
210
214
|
|
211
|
-
|
212
|
-
|
213
|
-
module_name="document_converter",
|
214
|
-
import_name="DocumentConverter",
|
215
|
+
from docling.document_converter import ( # noqa: F401
|
216
|
+
DocumentConverter,
|
215
217
|
)
|
218
|
+
|
216
219
|
converter = DocumentConverter()
|
217
220
|
reader_tool = lambda x: converter.convert(
|
218
221
|
x
|
219
222
|
).document.export_to_markdown()
|
220
223
|
|
221
|
-
texts = lcall(files, reader_tool)
|
224
|
+
texts = ln.lcall(files, reader_tool)
|
222
225
|
|
223
226
|
else:
|
224
227
|
texts = [text]
|
225
228
|
|
226
|
-
chunks = lcall(
|
229
|
+
chunks = ln.lcall(
|
227
230
|
texts,
|
228
231
|
chunk_content,
|
229
232
|
chunk_by=chunk_by,
|
@@ -244,15 +247,15 @@ def chunk(
|
|
244
247
|
output_file = Path(output_file)
|
245
248
|
if output_file.suffix == ".csv":
|
246
249
|
p = Pile(chunks)
|
247
|
-
p.
|
250
|
+
p.dump(output_file, "csv")
|
248
251
|
|
249
|
-
|
252
|
+
if output_file.suffix == "json":
|
250
253
|
p = Pile(chunks)
|
251
|
-
p.
|
254
|
+
p.dump(output_file, "json")
|
252
255
|
|
253
|
-
|
256
|
+
if output_file.suffix == ".parquet":
|
254
257
|
p = Pile(chunks)
|
255
|
-
p.
|
258
|
+
p.dump(output_file, "parquet")
|
256
259
|
|
257
260
|
else:
|
258
261
|
raise ValueError(f"Unsupported output file format: {output_file}")
|
lionagi/libs/file/save.py
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
4
4
|
|
5
|
-
import json
|
6
5
|
import logging
|
7
6
|
from pathlib import Path
|
8
7
|
from typing import Any
|
@@ -78,6 +77,8 @@ def save_chunks(
|
|
78
77
|
random_hash_digits: int,
|
79
78
|
) -> None:
|
80
79
|
"""Helper function to save chunks to files."""
|
80
|
+
from lionagi import ln
|
81
|
+
|
81
82
|
output_path = Path(output_dir)
|
82
83
|
for i, chunk in enumerate(chunks):
|
83
84
|
file_path = create_path(
|
@@ -88,7 +89,7 @@ def save_chunks(
|
|
88
89
|
random_hash_digits=random_hash_digits,
|
89
90
|
)
|
90
91
|
save_to_file(
|
91
|
-
|
92
|
+
ln.json_dumps(chunk),
|
92
93
|
directory=file_path.parent,
|
93
94
|
filename=file_path.name,
|
94
95
|
verbose=verbose,
|
@@ -11,6 +11,7 @@ from typing import Any, TypeVar
|
|
11
11
|
|
12
12
|
from pydantic import BaseModel, PydanticUserError
|
13
13
|
|
14
|
+
from lionagi import ln
|
14
15
|
from lionagi.utils import is_import_installed
|
15
16
|
|
16
17
|
_HAS_DATAMODEL_CODE_GENERATOR = is_import_installed("datamodel_code_generator")
|
@@ -98,7 +99,7 @@ def load_pydantic_model_from_schema(
|
|
98
99
|
sanitized_title # Update the name to use
|
99
100
|
)
|
100
101
|
schema_dict = schema
|
101
|
-
schema_input_data =
|
102
|
+
schema_input_data = ln.json_dumps(schema_dict)
|
102
103
|
except TypeError as e:
|
103
104
|
error_msg = "Invalid dictionary provided for schema"
|
104
105
|
raise ValueError(error_msg) from e
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from lionagi.utils import
|
1
|
+
from lionagi.utils import import_module, is_import_installed
|
2
2
|
|
3
3
|
_HAS_PDF2IMAGE = is_import_installed("pdf2image")
|
4
4
|
|
@@ -25,7 +25,7 @@ def pdf_to_images(
|
|
25
25
|
|
26
26
|
import os
|
27
27
|
|
28
|
-
convert_from_path =
|
28
|
+
convert_from_path = import_module(
|
29
29
|
"pdf2image", import_name="convert_from_path"
|
30
30
|
)
|
31
31
|
|
@@ -320,13 +320,13 @@ def string_similarity(
|
|
320
320
|
# Sort by score (descending) and index (ascending) for stable ordering
|
321
321
|
results.sort(key=lambda x: (-x.score, x.index))
|
322
322
|
|
323
|
-
# Return results
|
324
|
-
if return_most_similar:
|
325
|
-
return results[0].word
|
326
|
-
|
327
323
|
# Filter exact matches for case sensitive comparisons
|
328
324
|
if case_sensitive:
|
329
325
|
max_score = results[0].score
|
330
326
|
results = [r for r in results if r.score == max_score]
|
331
327
|
|
328
|
+
# Return results
|
329
|
+
if return_most_similar:
|
330
|
+
return results[0].word
|
331
|
+
|
332
332
|
return [r.word for r in results]
|
lionagi/ln/__init__.py
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
from ._async_call import AlcallParams, BcallParams, alcall, bcall
|
2
|
+
from ._extract_json import extract_json
|
3
|
+
from ._fuzzy_json import fuzzy_json
|
2
4
|
from ._hash import hash_dict
|
5
|
+
from ._json_dump import (
|
6
|
+
DEFAULT_SERIALIZER,
|
7
|
+
DEFAULT_SERIALIZER_OPTION,
|
8
|
+
get_orjson_default,
|
9
|
+
json_dumps,
|
10
|
+
)
|
3
11
|
from ._list_call import LcallParams, lcall
|
4
12
|
from ._models import DataClass, Params
|
5
13
|
from ._to_list import ToListParams, to_list
|
@@ -46,4 +54,34 @@ __all__ = (
|
|
46
54
|
"bcall",
|
47
55
|
"AlcallParams",
|
48
56
|
"BcallParams",
|
57
|
+
"get_orjson_default",
|
58
|
+
"DEFAULT_SERIALIZER",
|
59
|
+
"DEFAULT_SERIALIZER_OPTION",
|
60
|
+
"json_dumps",
|
61
|
+
"TaskGroup",
|
62
|
+
"create_task_group",
|
63
|
+
"CancelScope",
|
64
|
+
"move_on_after",
|
65
|
+
"fail_after",
|
66
|
+
"ConnectionPool",
|
67
|
+
"WorkerPool",
|
68
|
+
"parallel_requests",
|
69
|
+
"retry_with_timeout",
|
70
|
+
"Lock",
|
71
|
+
"Semaphore",
|
72
|
+
"CapacityLimiter",
|
73
|
+
"Event",
|
74
|
+
"Condition",
|
75
|
+
"get_cancelled_exc_class",
|
76
|
+
"shield",
|
77
|
+
"ResourceTracker",
|
78
|
+
"resource_leak_detector",
|
79
|
+
"track_resource",
|
80
|
+
"untrack_resource",
|
81
|
+
"cleanup_check",
|
82
|
+
"get_global_tracker",
|
83
|
+
"is_coro_func",
|
84
|
+
"ConcurrencyEvent",
|
85
|
+
"fuzzy_json",
|
86
|
+
"extract_json",
|
49
87
|
)
|