langroid 0.52.2__py3-none-any.whl → 0.52.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/parsing/file_attachment.py +34 -35
- langroid/parsing/pdf_utils.py +6 -3
- {langroid-0.52.2.dist-info → langroid-0.52.4.dist-info}/METADATA +1 -1
- {langroid-0.52.2.dist-info → langroid-0.52.4.dist-info}/RECORD +6 -6
- {langroid-0.52.2.dist-info → langroid-0.52.4.dist-info}/WHEEL +0 -0
- {langroid-0.52.2.dist-info → langroid-0.52.4.dist-info}/licenses/LICENSE +0 -0
@@ -209,39 +209,38 @@ class FileAttachment(BaseModel):
|
|
209
209
|
Returns:
|
210
210
|
Dictionary with file data
|
211
211
|
"""
|
212
|
-
if
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
#
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
# Otherwise use base64 data URI
|
228
|
-
else:
|
229
|
-
image_url_dict["url"] = self.to_data_uri()
|
230
|
-
|
231
|
-
# Add detail parameter if specified
|
232
|
-
if self.detail:
|
233
|
-
image_url_dict["detail"] = self.detail
|
234
|
-
|
235
|
-
return dict(
|
236
|
-
type="image_url",
|
237
|
-
image_url=image_url_dict,
|
238
|
-
)
|
212
|
+
if (
|
213
|
+
self.mime_type
|
214
|
+
and self.mime_type.startswith("image/")
|
215
|
+
or "gemini" in model.lower()
|
216
|
+
):
|
217
|
+
# for gemini models, we use `image_url` for both pdf-files and images
|
218
|
+
|
219
|
+
image_url_dict = {}
|
220
|
+
|
221
|
+
# If we have a URL and it's a full http/https URL, use it directly
|
222
|
+
if self.url and (
|
223
|
+
self.url.startswith("http://") or self.url.startswith("https://")
|
224
|
+
):
|
225
|
+
image_url_dict["url"] = self.url
|
226
|
+
# Otherwise use base64 data URI
|
239
227
|
else:
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
228
|
+
image_url_dict["url"] = self.to_data_uri()
|
229
|
+
|
230
|
+
# Add detail parameter if specified
|
231
|
+
if self.detail:
|
232
|
+
image_url_dict["detail"] = self.detail
|
233
|
+
|
234
|
+
return dict(
|
235
|
+
type="image_url",
|
236
|
+
image_url=image_url_dict,
|
237
|
+
)
|
238
|
+
else:
|
239
|
+
# For non-image files
|
240
|
+
return dict(
|
241
|
+
type="file",
|
242
|
+
file=dict(
|
243
|
+
filename=self.filename,
|
244
|
+
file_data=self.to_data_uri(),
|
245
|
+
),
|
246
|
+
)
|
langroid/parsing/pdf_utils.py
CHANGED
@@ -17,12 +17,12 @@ if fitz is None:
|
|
17
17
|
|
18
18
|
|
19
19
|
def pdf_split_pages(
|
20
|
-
input_pdf: Union[BytesIO, BinaryIO],
|
20
|
+
input_pdf: Union[BytesIO, BinaryIO, str],
|
21
21
|
) -> Tuple[List[Path], TemporaryDirectory[Any]]:
|
22
22
|
"""Splits a PDF into individual pages in a temporary directory.
|
23
23
|
|
24
24
|
Args:
|
25
|
-
input_pdf: Input PDF file in bytes or
|
25
|
+
input_pdf: Input PDF file in bytes, binary mode, or a file path
|
26
26
|
max_workers: Maximum number of concurrent workers for parallel processing
|
27
27
|
|
28
28
|
Returns:
|
@@ -36,7 +36,10 @@ def pdf_split_pages(
|
|
36
36
|
tmp_dir.cleanup() # Clean up temp files when done
|
37
37
|
"""
|
38
38
|
tmp_dir = tempfile.TemporaryDirectory()
|
39
|
-
|
39
|
+
if isinstance(input_pdf, str):
|
40
|
+
doc = fitz.open(input_pdf)
|
41
|
+
else:
|
42
|
+
doc = fitz.open(stream=input_pdf, filetype="pdf")
|
40
43
|
paths = []
|
41
44
|
|
42
45
|
for page_num in range(len(doc)):
|
@@ -83,12 +83,12 @@ langroid/parsing/__init__.py,sha256=2oUWJJAxIavq9Wtw5RGlkXLq3GF3zgXeVLLW4j7yeb8,
|
|
83
83
|
langroid/parsing/agent_chats.py,sha256=sbZRV9ujdM5QXvvuHVjIi2ysYSYlap-uqfMMUKulrW0,1068
|
84
84
|
langroid/parsing/code_parser.py,sha256=5ze0MBytrGGkU69pA_bJDjRm6QZz_QYfPcIwkagUa7U,3796
|
85
85
|
langroid/parsing/document_parser.py,sha256=cUcp4JKS_LpsjX7OqnGBhHorDHx7FG5pvKGjRBkQoMw,57685
|
86
|
-
langroid/parsing/file_attachment.py,sha256=
|
86
|
+
langroid/parsing/file_attachment.py,sha256=ryJVhVFOhINrfkf9Z0vWTTwCnm80z2qzXgp20_miP98,7362
|
87
87
|
langroid/parsing/md_parser.py,sha256=JUgsUpCaeAuBndmtDaJR9HMZaje1gmtXtaLXJHst3i8,21340
|
88
88
|
langroid/parsing/para_sentence_split.py,sha256=AJBzZojP3zpB-_IMiiHismhqcvkrVBQ3ZINoQyx_bE4,2000
|
89
89
|
langroid/parsing/parse_json.py,sha256=aADo38bAHQhC8on4aWZZzVzSDy-dK35vRLZsFI2ewh8,4756
|
90
90
|
langroid/parsing/parser.py,sha256=uaAITarcGI2504zcP_dLhp3LjNdh9A6R_yS-o_VcaH8,15599
|
91
|
-
langroid/parsing/pdf_utils.py,sha256=
|
91
|
+
langroid/parsing/pdf_utils.py,sha256=9HnwhbZvpBUhW8WjY9OpGPKaIt3oe_a1AuqhWKqNQ6s,1616
|
92
92
|
langroid/parsing/repo_loader.py,sha256=NpysuyzRHvgL3F4BB_wGo5sCUnZ3FOlVCJmZ7CaUdbs,30202
|
93
93
|
langroid/parsing/routing.py,sha256=-FcnlqldzL4ZoxuDwXjQPNHgBe9F9-F4R6q7b_z9CvI,1232
|
94
94
|
langroid/parsing/search.py,sha256=0NJ5-Rou_BbrHAD7O9b20bKjZJnbadjObvGm4Zq8Kis,9818
|
@@ -130,7 +130,7 @@ langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZ
|
|
130
130
|
langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
|
131
131
|
langroid/vector_store/qdrantdb.py,sha256=O6dSBoDZ0jzfeVBd7LLvsXu083xs2fxXtPa9gGX3JX4,18443
|
132
132
|
langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
|
133
|
-
langroid-0.52.
|
134
|
-
langroid-0.52.
|
135
|
-
langroid-0.52.
|
136
|
-
langroid-0.52.
|
133
|
+
langroid-0.52.4.dist-info/METADATA,sha256=8q70bM9X5i9DkmZ9jg3zAEn5Lk-vUKaJRF46A2EnSvA,63642
|
134
|
+
langroid-0.52.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
135
|
+
langroid-0.52.4.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
|
136
|
+
langroid-0.52.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|