fmtr.tools 1.3.61__py3-none-any.whl → 1.3.62__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fmtr.tools might be problematic. Click here for more details.
- fmtr/tools/pdf_tools.py +28 -4
- fmtr/tools/version +1 -1
- {fmtr_tools-1.3.61.dist-info → fmtr_tools-1.3.62.dist-info}/METADATA +48 -48
- {fmtr_tools-1.3.61.dist-info → fmtr_tools-1.3.62.dist-info}/RECORD +8 -8
- {fmtr_tools-1.3.61.dist-info → fmtr_tools-1.3.62.dist-info}/WHEEL +0 -0
- {fmtr_tools-1.3.61.dist-info → fmtr_tools-1.3.62.dist-info}/entry_points.txt +0 -0
- {fmtr_tools-1.3.61.dist-info → fmtr_tools-1.3.62.dist-info}/licenses/LICENSE +0 -0
- {fmtr_tools-1.3.61.dist-info → fmtr_tools-1.3.62.dist-info}/top_level.txt +0 -0
fmtr/tools/pdf_tools.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
from typing import List, Tuple, Dict, Any, Self
|
|
2
|
+
|
|
1
3
|
import pymupdf as pm
|
|
2
4
|
import pymupdf4llm
|
|
3
|
-
from typing import List, Tuple, Dict, Any, Self
|
|
4
5
|
|
|
5
6
|
from fmtr.tools import data_modelling_tools
|
|
6
7
|
|
|
@@ -179,10 +180,10 @@ class Document(pm.Document):
|
|
|
179
180
|
"""
|
|
180
181
|
return pymupdf4llm.to_markdown(self, **kwargs)
|
|
181
182
|
|
|
182
|
-
def
|
|
183
|
+
def to_text_pages(self) -> List[str]:
|
|
183
184
|
"""
|
|
184
185
|
|
|
185
|
-
Simple text output.
|
|
186
|
+
Simple text output per-page.
|
|
186
187
|
|
|
187
188
|
"""
|
|
188
189
|
lines = []
|
|
@@ -190,9 +191,32 @@ class Document(pm.Document):
|
|
|
190
191
|
text = page.get_text()
|
|
191
192
|
lines.append(text)
|
|
192
193
|
|
|
193
|
-
|
|
194
|
+
return lines
|
|
195
|
+
|
|
196
|
+
def to_text(self) -> str:
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
Simple text output.
|
|
200
|
+
|
|
201
|
+
"""
|
|
202
|
+
|
|
203
|
+
text = '\n'.join(self.to_text_pages())
|
|
194
204
|
return text
|
|
195
205
|
|
|
206
|
+
def split(self) -> List[Self]:
|
|
207
|
+
"""
|
|
208
|
+
|
|
209
|
+
Split pages into individual documents.
|
|
210
|
+
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
documents = []
|
|
214
|
+
for i, page in enumerate(self, start=1):
|
|
215
|
+
document = self.__class__()
|
|
216
|
+
document.insert_pdf(self, from_page=i, to_page=i)
|
|
217
|
+
documents.append(document)
|
|
218
|
+
|
|
219
|
+
return documents
|
|
196
220
|
|
|
197
221
|
if __name__ == '__main__':
|
|
198
222
|
from fmtr.tools.path_tools import Path
|
fmtr/tools/version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
1.3.
|
|
1
|
+
1.3.62
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fmtr.tools
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.62
|
|
4
4
|
Summary: Collection of high-level tools to simplify everyday development tasks, with a focus on AI/ML
|
|
5
5
|
Home-page: https://github.com/fmtr/fmtr.tools
|
|
6
6
|
Author: Frontmatter
|
|
@@ -154,68 +154,68 @@ Provides-Extra: db-document
|
|
|
154
154
|
Requires-Dist: beanie[odm]; extra == "db-document"
|
|
155
155
|
Requires-Dist: motor; extra == "db-document"
|
|
156
156
|
Provides-Extra: all
|
|
157
|
-
Requires-Dist: sre_yield; extra == "all"
|
|
158
|
-
Requires-Dist: dask[bag]; extra == "all"
|
|
159
|
-
Requires-Dist: bokeh; extra == "all"
|
|
160
|
-
Requires-Dist: tinynetrc; extra == "all"
|
|
161
|
-
Requires-Dist: python-on-whales; extra == "all"
|
|
162
157
|
Requires-Dist: json_repair; extra == "all"
|
|
158
|
+
Requires-Dist: flet[all]; extra == "all"
|
|
159
|
+
Requires-Dist: logfire[httpx]; extra == "all"
|
|
160
|
+
Requires-Dist: pydantic; extra == "all"
|
|
163
161
|
Requires-Dist: dnspython[doh]; extra == "all"
|
|
164
|
-
Requires-Dist:
|
|
165
|
-
Requires-Dist:
|
|
166
|
-
Requires-Dist:
|
|
167
|
-
Requires-Dist: cachetools; extra == "all"
|
|
168
|
-
Requires-Dist: pandas; extra == "all"
|
|
169
|
-
Requires-Dist: torchvision; extra == "all"
|
|
170
|
-
Requires-Dist: peft; extra == "all"
|
|
171
|
-
Requires-Dist: pydantic-settings; extra == "all"
|
|
172
|
-
Requires-Dist: httpx_retries; extra == "all"
|
|
173
|
-
Requires-Dist: torchaudio; extra == "all"
|
|
174
|
-
Requires-Dist: distributed; extra == "all"
|
|
175
|
-
Requires-Dist: flet-webview; extra == "all"
|
|
162
|
+
Requires-Dist: sentence_transformers; extra == "all"
|
|
163
|
+
Requires-Dist: pymupdf; extra == "all"
|
|
164
|
+
Requires-Dist: bokeh; extra == "all"
|
|
176
165
|
Requires-Dist: tokenizers; extra == "all"
|
|
177
|
-
Requires-Dist:
|
|
178
|
-
Requires-Dist:
|
|
179
|
-
Requires-Dist:
|
|
180
|
-
Requires-Dist:
|
|
181
|
-
Requires-Dist:
|
|
182
|
-
Requires-Dist: pyyaml; extra == "all"
|
|
166
|
+
Requires-Dist: deepmerge; extra == "all"
|
|
167
|
+
Requires-Dist: html2text; extra == "all"
|
|
168
|
+
Requires-Dist: transformers[sentencepiece]; extra == "all"
|
|
169
|
+
Requires-Dist: google-auth-httplib2; extra == "all"
|
|
170
|
+
Requires-Dist: dask[bag]; extra == "all"
|
|
183
171
|
Requires-Dist: pydantic-extra-types; extra == "all"
|
|
184
172
|
Requires-Dist: fastapi; extra == "all"
|
|
185
|
-
Requires-Dist:
|
|
186
|
-
Requires-Dist:
|
|
173
|
+
Requires-Dist: torchaudio; extra == "all"
|
|
174
|
+
Requires-Dist: deepdiff; extra == "all"
|
|
187
175
|
Requires-Dist: setuptools; extra == "all"
|
|
188
|
-
Requires-Dist:
|
|
176
|
+
Requires-Dist: flet-video; extra == "all"
|
|
189
177
|
Requires-Dist: uvicorn[standard]; extra == "all"
|
|
190
|
-
Requires-Dist:
|
|
191
|
-
Requires-Dist:
|
|
192
|
-
Requires-Dist:
|
|
193
|
-
Requires-Dist:
|
|
178
|
+
Requires-Dist: huggingface_hub; extra == "all"
|
|
179
|
+
Requires-Dist: contexttimer; extra == "all"
|
|
180
|
+
Requires-Dist: openpyxl; extra == "all"
|
|
181
|
+
Requires-Dist: pytest-cov; extra == "all"
|
|
182
|
+
Requires-Dist: flet-webview; extra == "all"
|
|
183
|
+
Requires-Dist: tinynetrc; extra == "all"
|
|
184
|
+
Requires-Dist: pandas; extra == "all"
|
|
185
|
+
Requires-Dist: httpx_retries; extra == "all"
|
|
186
|
+
Requires-Dist: peft; extra == "all"
|
|
194
187
|
Requires-Dist: google-api-python-client; extra == "all"
|
|
195
|
-
Requires-Dist: beanie[odm]; extra == "all"
|
|
196
|
-
Requires-Dist: logfire; extra == "all"
|
|
197
188
|
Requires-Dist: faker; extra == "all"
|
|
189
|
+
Requires-Dist: odfpy; extra == "all"
|
|
198
190
|
Requires-Dist: Unidecode; extra == "all"
|
|
199
|
-
Requires-Dist: huggingface_hub; extra == "all"
|
|
200
191
|
Requires-Dist: openai; extra == "all"
|
|
201
|
-
Requires-Dist:
|
|
202
|
-
Requires-Dist:
|
|
192
|
+
Requires-Dist: google-auth; extra == "all"
|
|
193
|
+
Requires-Dist: sre_yield; extra == "all"
|
|
194
|
+
Requires-Dist: diskcache; extra == "all"
|
|
195
|
+
Requires-Dist: torchvision; extra == "all"
|
|
203
196
|
Requires-Dist: filetype; extra == "all"
|
|
204
|
-
Requires-Dist:
|
|
205
|
-
Requires-Dist:
|
|
206
|
-
Requires-Dist:
|
|
197
|
+
Requires-Dist: pymupdf4llm; extra == "all"
|
|
198
|
+
Requires-Dist: pydantic-ai[logfire,openai]; extra == "all"
|
|
199
|
+
Requires-Dist: regex; extra == "all"
|
|
200
|
+
Requires-Dist: playwright; extra == "all"
|
|
201
|
+
Requires-Dist: semver; extra == "all"
|
|
202
|
+
Requires-Dist: logfire; extra == "all"
|
|
203
|
+
Requires-Dist: beanie[odm]; extra == "all"
|
|
204
|
+
Requires-Dist: pydantic-settings; extra == "all"
|
|
205
|
+
Requires-Dist: appdirs; extra == "all"
|
|
206
|
+
Requires-Dist: httpx; extra == "all"
|
|
207
207
|
Requires-Dist: motor; extra == "all"
|
|
208
|
-
Requires-Dist:
|
|
208
|
+
Requires-Dist: yamlscript; extra == "all"
|
|
209
|
+
Requires-Dist: cachetools; extra == "all"
|
|
210
|
+
Requires-Dist: google-auth-oauthlib; extra == "all"
|
|
209
211
|
Requires-Dist: pydevd-pycharm~=251.25410.159; extra == "all"
|
|
210
|
-
Requires-Dist: logfire[
|
|
211
|
-
Requires-Dist:
|
|
212
|
-
Requires-Dist:
|
|
213
|
-
Requires-Dist:
|
|
214
|
-
Requires-Dist: playwright; extra == "all"
|
|
212
|
+
Requires-Dist: logfire[fastapi]; extra == "all"
|
|
213
|
+
Requires-Dist: pyyaml; extra == "all"
|
|
214
|
+
Requires-Dist: ollama; extra == "all"
|
|
215
|
+
Requires-Dist: python-on-whales; extra == "all"
|
|
215
216
|
Requires-Dist: tabulate; extra == "all"
|
|
216
|
-
Requires-Dist:
|
|
217
|
-
Requires-Dist:
|
|
218
|
-
Requires-Dist: deepmerge; extra == "all"
|
|
217
|
+
Requires-Dist: distributed; extra == "all"
|
|
218
|
+
Requires-Dist: pycountry; extra == "all"
|
|
219
219
|
Dynamic: author
|
|
220
220
|
Dynamic: author-email
|
|
221
221
|
Dynamic: description
|
|
@@ -32,7 +32,7 @@ fmtr/tools/openai_tools.py,sha256=6SUgejgzUzmlKKct2_ePXntvMegu3FJgfk9x7aqtqYc,74
|
|
|
32
32
|
fmtr/tools/packaging_tools.py,sha256=FlgOTnDRHZWQL2iR-wucTsyGEHRE-MlddKL30MPmUqE,253
|
|
33
33
|
fmtr/tools/parallel_tools.py,sha256=QEb_gN1StkxsqYaH4HSjiJX8Y3gpb2uKNsOzG4uFpaM,3071
|
|
34
34
|
fmtr/tools/pattern_tools.py,sha256=DlEKzNJKhwFmU3-awoGkN5Xy-yLF_bsoj8eoSMCEytE,6018
|
|
35
|
-
fmtr/tools/pdf_tools.py,sha256=
|
|
35
|
+
fmtr/tools/pdf_tools.py,sha256=6XQCNyytQSnJSc38gdMOFVcPXnPwfOlk6y4QVqmJLp8,4810
|
|
36
36
|
fmtr/tools/platform_tools.py,sha256=7p69CmAHe_sF68Fx9uVhns1k5EewTHTWgUYzkl6ZQKA,308
|
|
37
37
|
fmtr/tools/process_tools.py,sha256=Ysh5Dk2QFBhXQerArjKdt7xZd3JrN5Ho02AaOjH0Nnw,1425
|
|
38
38
|
fmtr/tools/profiling_tools.py,sha256=jpXVjaNKPydTasEQVNXvxzGtMhXPit08AnJddkU8uIc,46
|
|
@@ -45,7 +45,7 @@ fmtr/tools/tabular_tools.py,sha256=mw6vOij1Ch-pVAyHMPtm5zj__ULZN_TKeBYOfj33wFM,1
|
|
|
45
45
|
fmtr/tools/tokenization_tools.py,sha256=me-IBzSLyNYejLybwjO9CNB6Mj2NYfKPaOVThXyaGNg,4268
|
|
46
46
|
fmtr/tools/tools.py,sha256=CAsApa1YwVdNE6H66Vjivs_mXYvOas3rh7fPELAnTpk,795
|
|
47
47
|
fmtr/tools/unicode_tools.py,sha256=yS_9wpu8ogNoiIL7s1G_8bETFFO_YQlo4LNPv1NLDeY,52
|
|
48
|
-
fmtr/tools/version,sha256=
|
|
48
|
+
fmtr/tools/version,sha256=FKKPeGwGcnITJYQNL1W7YAPGPvigwhisVg9K99HxWho,6
|
|
49
49
|
fmtr/tools/webhook_tools.py,sha256=q3pVJ1NCem2SrMuFcLxiWd7DibFs7Q-uGtojfXd3Qcg,380
|
|
50
50
|
fmtr/tools/yaml_tools.py,sha256=Bhhyd6GQVKO72Lp8ky7bAUjIB_65Hdh0Q45SKIEe6S8,1901
|
|
51
51
|
fmtr/tools/ai_tools/__init__.py,sha256=O8VRlPnnQCncg2ZZ2l_VdWLJf4jkKH6dkZFVbv6o7IM,388
|
|
@@ -85,9 +85,9 @@ fmtr/tools/tests/test_path.py,sha256=AkZQa6_8BQ-VaCyL_J-iKmdf2ZaM-xFYR37Kun3k4_g
|
|
|
85
85
|
fmtr/tools/tests/test_yaml.py,sha256=jc0TwwKu9eC0LvFGNMERdgBue591xwLxYXFbtsRwXVM,287
|
|
86
86
|
fmtr/tools/version_tools/__init__.py,sha256=cjE6nO6AoVOUp3RwgTbqL9wiw8J1l2pHJOz6Gn6bxjA,326
|
|
87
87
|
fmtr/tools/version_tools/version_tools.py,sha256=Hcc6yferZS1hHbugRTdiHhSNmXEEG0hjCiTTXKna-YY,1127
|
|
88
|
-
fmtr_tools-1.3.
|
|
89
|
-
fmtr_tools-1.3.
|
|
90
|
-
fmtr_tools-1.3.
|
|
91
|
-
fmtr_tools-1.3.
|
|
92
|
-
fmtr_tools-1.3.
|
|
93
|
-
fmtr_tools-1.3.
|
|
88
|
+
fmtr_tools-1.3.62.dist-info/licenses/LICENSE,sha256=FW9aa6vVN5IjRQWLT43hs4_koYSmpcbIovlKeAJ0_cI,10757
|
|
89
|
+
fmtr_tools-1.3.62.dist-info/METADATA,sha256=ylVUFuwLtaNvhNbmm4P8w1MCEZ5Ddkmu6Bc_exClAW4,17455
|
|
90
|
+
fmtr_tools-1.3.62.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
91
|
+
fmtr_tools-1.3.62.dist-info/entry_points.txt,sha256=h-r__Xh5njtFqreMLg6cGuTFS4Qh-QqJPU1HB-_BS-Q,357
|
|
92
|
+
fmtr_tools-1.3.62.dist-info/top_level.txt,sha256=LXem9xCgNOD72tE2gRKESdiQTL902mfFkwWb6-dlwEE,5
|
|
93
|
+
fmtr_tools-1.3.62.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|