langroid 0.52.3__py3-none-any.whl → 0.52.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/parsing/pdf_utils.py +6 -3
- {langroid-0.52.3.dist-info → langroid-0.52.4.dist-info}/METADATA +1 -1
- {langroid-0.52.3.dist-info → langroid-0.52.4.dist-info}/RECORD +5 -5
- {langroid-0.52.3.dist-info → langroid-0.52.4.dist-info}/WHEEL +0 -0
- {langroid-0.52.3.dist-info → langroid-0.52.4.dist-info}/licenses/LICENSE +0 -0
langroid/parsing/pdf_utils.py
CHANGED
@@ -17,12 +17,12 @@ if fitz is None:
|
|
17
17
|
|
18
18
|
|
19
19
|
def pdf_split_pages(
|
20
|
-
input_pdf: Union[BytesIO, BinaryIO],
|
20
|
+
input_pdf: Union[BytesIO, BinaryIO, str],
|
21
21
|
) -> Tuple[List[Path], TemporaryDirectory[Any]]:
|
22
22
|
"""Splits a PDF into individual pages in a temporary directory.
|
23
23
|
|
24
24
|
Args:
|
25
|
-
input_pdf: Input PDF file in bytes or
|
25
|
+
input_pdf: Input PDF file in bytes, binary mode, or a file path
|
26
26
|
max_workers: Maximum number of concurrent workers for parallel processing
|
27
27
|
|
28
28
|
Returns:
|
@@ -36,7 +36,10 @@ def pdf_split_pages(
|
|
36
36
|
tmp_dir.cleanup() # Clean up temp files when done
|
37
37
|
"""
|
38
38
|
tmp_dir = tempfile.TemporaryDirectory()
|
39
|
-
|
39
|
+
if isinstance(input_pdf, str):
|
40
|
+
doc = fitz.open(input_pdf)
|
41
|
+
else:
|
42
|
+
doc = fitz.open(stream=input_pdf, filetype="pdf")
|
40
43
|
paths = []
|
41
44
|
|
42
45
|
for page_num in range(len(doc)):
|
@@ -88,7 +88,7 @@ langroid/parsing/md_parser.py,sha256=JUgsUpCaeAuBndmtDaJR9HMZaje1gmtXtaLXJHst3i8
|
|
88
88
|
langroid/parsing/para_sentence_split.py,sha256=AJBzZojP3zpB-_IMiiHismhqcvkrVBQ3ZINoQyx_bE4,2000
|
89
89
|
langroid/parsing/parse_json.py,sha256=aADo38bAHQhC8on4aWZZzVzSDy-dK35vRLZsFI2ewh8,4756
|
90
90
|
langroid/parsing/parser.py,sha256=uaAITarcGI2504zcP_dLhp3LjNdh9A6R_yS-o_VcaH8,15599
|
91
|
-
langroid/parsing/pdf_utils.py,sha256=
|
91
|
+
langroid/parsing/pdf_utils.py,sha256=9HnwhbZvpBUhW8WjY9OpGPKaIt3oe_a1AuqhWKqNQ6s,1616
|
92
92
|
langroid/parsing/repo_loader.py,sha256=NpysuyzRHvgL3F4BB_wGo5sCUnZ3FOlVCJmZ7CaUdbs,30202
|
93
93
|
langroid/parsing/routing.py,sha256=-FcnlqldzL4ZoxuDwXjQPNHgBe9F9-F4R6q7b_z9CvI,1232
|
94
94
|
langroid/parsing/search.py,sha256=0NJ5-Rou_BbrHAD7O9b20bKjZJnbadjObvGm4Zq8Kis,9818
|
@@ -130,7 +130,7 @@ langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZ
|
|
130
130
|
langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
|
131
131
|
langroid/vector_store/qdrantdb.py,sha256=O6dSBoDZ0jzfeVBd7LLvsXu083xs2fxXtPa9gGX3JX4,18443
|
132
132
|
langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
|
133
|
-
langroid-0.52.
|
134
|
-
langroid-0.52.
|
135
|
-
langroid-0.52.
|
136
|
-
langroid-0.52.
|
133
|
+
langroid-0.52.4.dist-info/METADATA,sha256=8q70bM9X5i9DkmZ9jg3zAEn5Lk-vUKaJRF46A2EnSvA,63642
|
134
|
+
langroid-0.52.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
135
|
+
langroid-0.52.4.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
|
136
|
+
langroid-0.52.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|