fmtr.tools 1.3.61__py3-none-any.whl → 1.3.63__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fmtr.tools might be problematic. Click here for more details.
- fmtr/tools/pdf_tools.py +28 -4
- fmtr/tools/string_tools.py +21 -5
- fmtr/tools/version +1 -1
- {fmtr_tools-1.3.61.dist-info → fmtr_tools-1.3.63.dist-info}/METADATA +52 -52
- {fmtr_tools-1.3.61.dist-info → fmtr_tools-1.3.63.dist-info}/RECORD +9 -9
- {fmtr_tools-1.3.61.dist-info → fmtr_tools-1.3.63.dist-info}/WHEEL +0 -0
- {fmtr_tools-1.3.61.dist-info → fmtr_tools-1.3.63.dist-info}/entry_points.txt +0 -0
- {fmtr_tools-1.3.61.dist-info → fmtr_tools-1.3.63.dist-info}/licenses/LICENSE +0 -0
- {fmtr_tools-1.3.61.dist-info → fmtr_tools-1.3.63.dist-info}/top_level.txt +0 -0
fmtr/tools/pdf_tools.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
from typing import List, Tuple, Dict, Any, Self
|
|
2
|
+
|
|
1
3
|
import pymupdf as pm
|
|
2
4
|
import pymupdf4llm
|
|
3
|
-
from typing import List, Tuple, Dict, Any, Self
|
|
4
5
|
|
|
5
6
|
from fmtr.tools import data_modelling_tools
|
|
6
7
|
|
|
@@ -179,10 +180,10 @@ class Document(pm.Document):
|
|
|
179
180
|
"""
|
|
180
181
|
return pymupdf4llm.to_markdown(self, **kwargs)
|
|
181
182
|
|
|
182
|
-
def
|
|
183
|
+
def to_text_pages(self) -> List[str]:
|
|
183
184
|
"""
|
|
184
185
|
|
|
185
|
-
Simple text output.
|
|
186
|
+
Simple text output per-page.
|
|
186
187
|
|
|
187
188
|
"""
|
|
188
189
|
lines = []
|
|
@@ -190,9 +191,32 @@ class Document(pm.Document):
|
|
|
190
191
|
text = page.get_text()
|
|
191
192
|
lines.append(text)
|
|
192
193
|
|
|
193
|
-
|
|
194
|
+
return lines
|
|
195
|
+
|
|
196
|
+
def to_text(self) -> str:
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
Simple text output.
|
|
200
|
+
|
|
201
|
+
"""
|
|
202
|
+
|
|
203
|
+
text = '\n'.join(self.to_text_pages())
|
|
194
204
|
return text
|
|
195
205
|
|
|
206
|
+
def split(self) -> List[Self]:
|
|
207
|
+
"""
|
|
208
|
+
|
|
209
|
+
Split pages into individual documents.
|
|
210
|
+
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
documents = []
|
|
214
|
+
for i, page in enumerate(self, start=1):
|
|
215
|
+
document = self.__class__()
|
|
216
|
+
document.insert_pdf(self, from_page=i, to_page=i)
|
|
217
|
+
documents.append(document)
|
|
218
|
+
|
|
219
|
+
return documents
|
|
196
220
|
|
|
197
221
|
if __name__ == '__main__':
|
|
198
222
|
from fmtr.tools.path_tools import Path
|
fmtr/tools/string_tools.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
from collections import namedtuple
|
|
2
|
-
from string import Formatter
|
|
3
|
-
|
|
4
1
|
import re
|
|
5
2
|
from dataclasses import dataclass
|
|
6
3
|
from textwrap import dedent
|
|
7
4
|
from typing import List
|
|
8
5
|
|
|
6
|
+
from collections import namedtuple
|
|
7
|
+
from string import Formatter
|
|
8
|
+
|
|
9
9
|
from fmtr.tools.datatype_tools import is_none
|
|
10
10
|
|
|
11
11
|
ELLIPSIS = '…'
|
|
@@ -151,14 +151,14 @@ def truncate_mid(text, length=None, sep=ELLIPSIS, return_type=str):
|
|
|
151
151
|
)
|
|
152
152
|
|
|
153
153
|
|
|
154
|
-
def flatten(raw):
|
|
154
|
+
def flatten(raw, sep=' '):
|
|
155
155
|
"""
|
|
156
156
|
|
|
157
157
|
Flatten a multiline string to a single line
|
|
158
158
|
|
|
159
159
|
"""
|
|
160
160
|
lines = raw.splitlines()
|
|
161
|
-
text =
|
|
161
|
+
text = sep.join(lines)
|
|
162
162
|
text = text.strip()
|
|
163
163
|
return text
|
|
164
164
|
|
|
@@ -175,6 +175,22 @@ def join(strings, sep=' '):
|
|
|
175
175
|
return text
|
|
176
176
|
|
|
177
177
|
|
|
178
|
+
def join_natural(items, sep=', ', conj='and'):
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
Natural language list
|
|
182
|
+
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
if not items:
|
|
186
|
+
return ""
|
|
187
|
+
if len(items) == 1:
|
|
188
|
+
return items[0]
|
|
189
|
+
firsts, last = items[:-1], items[-1]
|
|
190
|
+
firsts_str = join(firsts, sep=sep)
|
|
191
|
+
text = f"{firsts_str} {conj} {last}"
|
|
192
|
+
return text
|
|
193
|
+
|
|
178
194
|
class Mask:
|
|
179
195
|
"""
|
|
180
196
|
|
fmtr/tools/version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
1.3.
|
|
1
|
+
1.3.63
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fmtr.tools
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.63
|
|
4
4
|
Summary: Collection of high-level tools to simplify everyday development tasks, with a focus on AI/ML
|
|
5
5
|
Home-page: https://github.com/fmtr/fmtr.tools
|
|
6
6
|
Author: Frontmatter
|
|
@@ -154,68 +154,68 @@ Provides-Extra: db-document
|
|
|
154
154
|
Requires-Dist: beanie[odm]; extra == "db-document"
|
|
155
155
|
Requires-Dist: motor; extra == "db-document"
|
|
156
156
|
Provides-Extra: all
|
|
157
|
+
Requires-Dist: uvicorn[standard]; extra == "all"
|
|
158
|
+
Requires-Dist: contexttimer; extra == "all"
|
|
159
|
+
Requires-Dist: pymupdf; extra == "all"
|
|
160
|
+
Requires-Dist: motor; extra == "all"
|
|
161
|
+
Requires-Dist: huggingface_hub; extra == "all"
|
|
162
|
+
Requires-Dist: google-auth-httplib2; extra == "all"
|
|
157
163
|
Requires-Dist: sre_yield; extra == "all"
|
|
158
|
-
Requires-Dist: dask[bag]; extra == "all"
|
|
159
|
-
Requires-Dist: bokeh; extra == "all"
|
|
160
|
-
Requires-Dist: tinynetrc; extra == "all"
|
|
161
|
-
Requires-Dist: python-on-whales; extra == "all"
|
|
162
164
|
Requires-Dist: json_repair; extra == "all"
|
|
163
|
-
Requires-Dist:
|
|
164
|
-
Requires-Dist:
|
|
165
|
-
Requires-Dist: transformers[sentencepiece]; extra == "all"
|
|
166
|
-
Requires-Dist: httpx; extra == "all"
|
|
167
|
-
Requires-Dist: cachetools; extra == "all"
|
|
168
|
-
Requires-Dist: pandas; extra == "all"
|
|
169
|
-
Requires-Dist: torchvision; extra == "all"
|
|
170
|
-
Requires-Dist: peft; extra == "all"
|
|
171
|
-
Requires-Dist: pydantic-settings; extra == "all"
|
|
172
|
-
Requires-Dist: httpx_retries; extra == "all"
|
|
173
|
-
Requires-Dist: torchaudio; extra == "all"
|
|
174
|
-
Requires-Dist: distributed; extra == "all"
|
|
165
|
+
Requires-Dist: fastapi; extra == "all"
|
|
166
|
+
Requires-Dist: tabulate; extra == "all"
|
|
175
167
|
Requires-Dist: flet-webview; extra == "all"
|
|
176
|
-
Requires-Dist:
|
|
177
|
-
Requires-Dist:
|
|
178
|
-
Requires-Dist: pytest-cov; extra == "all"
|
|
179
|
-
Requires-Dist: pycountry; extra == "all"
|
|
180
|
-
Requires-Dist: regex; extra == "all"
|
|
181
|
-
Requires-Dist: semver; extra == "all"
|
|
182
|
-
Requires-Dist: pyyaml; extra == "all"
|
|
168
|
+
Requires-Dist: beanie[odm]; extra == "all"
|
|
169
|
+
Requires-Dist: openai; extra == "all"
|
|
183
170
|
Requires-Dist: pydantic-extra-types; extra == "all"
|
|
184
|
-
Requires-Dist:
|
|
185
|
-
Requires-Dist:
|
|
186
|
-
Requires-Dist:
|
|
187
|
-
Requires-Dist:
|
|
188
|
-
Requires-Dist:
|
|
189
|
-
Requires-Dist:
|
|
190
|
-
Requires-Dist:
|
|
171
|
+
Requires-Dist: torchaudio; extra == "all"
|
|
172
|
+
Requires-Dist: flet-video; extra == "all"
|
|
173
|
+
Requires-Dist: tinynetrc; extra == "all"
|
|
174
|
+
Requires-Dist: flet[all]; extra == "all"
|
|
175
|
+
Requires-Dist: pycountry; extra == "all"
|
|
176
|
+
Requires-Dist: torchvision; extra == "all"
|
|
177
|
+
Requires-Dist: deepdiff; extra == "all"
|
|
178
|
+
Requires-Dist: pydantic; extra == "all"
|
|
179
|
+
Requires-Dist: httpx_retries; extra == "all"
|
|
180
|
+
Requires-Dist: logfire[fastapi]; extra == "all"
|
|
181
|
+
Requires-Dist: diskcache; extra == "all"
|
|
182
|
+
Requires-Dist: dask[bag]; extra == "all"
|
|
191
183
|
Requires-Dist: yamlscript; extra == "all"
|
|
184
|
+
Requires-Dist: Unidecode; extra == "all"
|
|
185
|
+
Requires-Dist: openpyxl; extra == "all"
|
|
186
|
+
Requires-Dist: httpx; extra == "all"
|
|
187
|
+
Requires-Dist: transformers[sentencepiece]; extra == "all"
|
|
192
188
|
Requires-Dist: html2text; extra == "all"
|
|
193
|
-
Requires-Dist:
|
|
194
|
-
Requires-Dist: google-api-python-client; extra == "all"
|
|
195
|
-
Requires-Dist: beanie[odm]; extra == "all"
|
|
189
|
+
Requires-Dist: pytest-cov; extra == "all"
|
|
196
190
|
Requires-Dist: logfire; extra == "all"
|
|
191
|
+
Requires-Dist: appdirs; extra == "all"
|
|
192
|
+
Requires-Dist: python-on-whales; extra == "all"
|
|
193
|
+
Requires-Dist: semver; extra == "all"
|
|
194
|
+
Requires-Dist: pydevd-pycharm~=251.25410.159; extra == "all"
|
|
195
|
+
Requires-Dist: pandas; extra == "all"
|
|
196
|
+
Requires-Dist: distributed; extra == "all"
|
|
197
|
+
Requires-Dist: filetype; extra == "all"
|
|
197
198
|
Requires-Dist: faker; extra == "all"
|
|
198
|
-
Requires-Dist: Unidecode; extra == "all"
|
|
199
|
-
Requires-Dist: huggingface_hub; extra == "all"
|
|
200
|
-
Requires-Dist: openai; extra == "all"
|
|
201
199
|
Requires-Dist: sentence_transformers; extra == "all"
|
|
202
|
-
Requires-Dist:
|
|
203
|
-
Requires-Dist:
|
|
204
|
-
Requires-Dist:
|
|
205
|
-
Requires-Dist:
|
|
206
|
-
Requires-Dist:
|
|
207
|
-
Requires-Dist:
|
|
208
|
-
Requires-Dist:
|
|
209
|
-
Requires-Dist:
|
|
200
|
+
Requires-Dist: regex; extra == "all"
|
|
201
|
+
Requires-Dist: deepmerge; extra == "all"
|
|
202
|
+
Requires-Dist: google-api-python-client; extra == "all"
|
|
203
|
+
Requires-Dist: setuptools; extra == "all"
|
|
204
|
+
Requires-Dist: google-auth; extra == "all"
|
|
205
|
+
Requires-Dist: pydantic-ai[logfire,openai]; extra == "all"
|
|
206
|
+
Requires-Dist: pydantic-settings; extra == "all"
|
|
207
|
+
Requires-Dist: dnspython[doh]; extra == "all"
|
|
208
|
+
Requires-Dist: ollama; extra == "all"
|
|
209
|
+
Requires-Dist: cachetools; extra == "all"
|
|
210
|
+
Requires-Dist: pymupdf4llm; extra == "all"
|
|
211
|
+
Requires-Dist: pyyaml; extra == "all"
|
|
210
212
|
Requires-Dist: logfire[httpx]; extra == "all"
|
|
211
|
-
Requires-Dist: contexttimer; extra == "all"
|
|
212
|
-
Requires-Dist: diskcache; extra == "all"
|
|
213
|
-
Requires-Dist: flet[all]; extra == "all"
|
|
214
213
|
Requires-Dist: playwright; extra == "all"
|
|
215
|
-
Requires-Dist:
|
|
216
|
-
Requires-Dist:
|
|
217
|
-
Requires-Dist:
|
|
218
|
-
Requires-Dist:
|
|
214
|
+
Requires-Dist: odfpy; extra == "all"
|
|
215
|
+
Requires-Dist: tokenizers; extra == "all"
|
|
216
|
+
Requires-Dist: bokeh; extra == "all"
|
|
217
|
+
Requires-Dist: google-auth-oauthlib; extra == "all"
|
|
218
|
+
Requires-Dist: peft; extra == "all"
|
|
219
219
|
Dynamic: author
|
|
220
220
|
Dynamic: author-email
|
|
221
221
|
Dynamic: description
|
|
@@ -32,7 +32,7 @@ fmtr/tools/openai_tools.py,sha256=6SUgejgzUzmlKKct2_ePXntvMegu3FJgfk9x7aqtqYc,74
|
|
|
32
32
|
fmtr/tools/packaging_tools.py,sha256=FlgOTnDRHZWQL2iR-wucTsyGEHRE-MlddKL30MPmUqE,253
|
|
33
33
|
fmtr/tools/parallel_tools.py,sha256=QEb_gN1StkxsqYaH4HSjiJX8Y3gpb2uKNsOzG4uFpaM,3071
|
|
34
34
|
fmtr/tools/pattern_tools.py,sha256=DlEKzNJKhwFmU3-awoGkN5Xy-yLF_bsoj8eoSMCEytE,6018
|
|
35
|
-
fmtr/tools/pdf_tools.py,sha256=
|
|
35
|
+
fmtr/tools/pdf_tools.py,sha256=6XQCNyytQSnJSc38gdMOFVcPXnPwfOlk6y4QVqmJLp8,4810
|
|
36
36
|
fmtr/tools/platform_tools.py,sha256=7p69CmAHe_sF68Fx9uVhns1k5EewTHTWgUYzkl6ZQKA,308
|
|
37
37
|
fmtr/tools/process_tools.py,sha256=Ysh5Dk2QFBhXQerArjKdt7xZd3JrN5Ho02AaOjH0Nnw,1425
|
|
38
38
|
fmtr/tools/profiling_tools.py,sha256=jpXVjaNKPydTasEQVNXvxzGtMhXPit08AnJddkU8uIc,46
|
|
@@ -40,12 +40,12 @@ fmtr/tools/random_tools.py,sha256=4VlQdk5THbR8ka4pZaLbk_ZO_4yy6PF_lHZes_rgenY,22
|
|
|
40
40
|
fmtr/tools/semantic_tools.py,sha256=cxY9NSAHWj4nEc6Oj4qA1omR3dWbl2OuH7_PkINc6_E,1386
|
|
41
41
|
fmtr/tools/settings_tools.py,sha256=o11W3T60UZSvCTkh_eEQq1Mx74GycQ6JxUr0plBDbsk,2356
|
|
42
42
|
fmtr/tools/spaces_tools.py,sha256=D_he3mve6DruB3OPS6QyzqD05ChHnRTb4buViKPe7To,1099
|
|
43
|
-
fmtr/tools/string_tools.py,sha256=
|
|
43
|
+
fmtr/tools/string_tools.py,sha256=Lz_H9l25OOoxE48QBJ_Upkk5nno7dPA6G2Gc0Wo8rOk,5275
|
|
44
44
|
fmtr/tools/tabular_tools.py,sha256=mw6vOij1Ch-pVAyHMPtm5zj__ULZN_TKeBYOfj33wFM,1634
|
|
45
45
|
fmtr/tools/tokenization_tools.py,sha256=me-IBzSLyNYejLybwjO9CNB6Mj2NYfKPaOVThXyaGNg,4268
|
|
46
46
|
fmtr/tools/tools.py,sha256=CAsApa1YwVdNE6H66Vjivs_mXYvOas3rh7fPELAnTpk,795
|
|
47
47
|
fmtr/tools/unicode_tools.py,sha256=yS_9wpu8ogNoiIL7s1G_8bETFFO_YQlo4LNPv1NLDeY,52
|
|
48
|
-
fmtr/tools/version,sha256=
|
|
48
|
+
fmtr/tools/version,sha256=1-w7DNphgnwgQa3EckgnbD0Qx9ULcbjMgAOknBRyoWI,6
|
|
49
49
|
fmtr/tools/webhook_tools.py,sha256=q3pVJ1NCem2SrMuFcLxiWd7DibFs7Q-uGtojfXd3Qcg,380
|
|
50
50
|
fmtr/tools/yaml_tools.py,sha256=Bhhyd6GQVKO72Lp8ky7bAUjIB_65Hdh0Q45SKIEe6S8,1901
|
|
51
51
|
fmtr/tools/ai_tools/__init__.py,sha256=O8VRlPnnQCncg2ZZ2l_VdWLJf4jkKH6dkZFVbv6o7IM,388
|
|
@@ -85,9 +85,9 @@ fmtr/tools/tests/test_path.py,sha256=AkZQa6_8BQ-VaCyL_J-iKmdf2ZaM-xFYR37Kun3k4_g
|
|
|
85
85
|
fmtr/tools/tests/test_yaml.py,sha256=jc0TwwKu9eC0LvFGNMERdgBue591xwLxYXFbtsRwXVM,287
|
|
86
86
|
fmtr/tools/version_tools/__init__.py,sha256=cjE6nO6AoVOUp3RwgTbqL9wiw8J1l2pHJOz6Gn6bxjA,326
|
|
87
87
|
fmtr/tools/version_tools/version_tools.py,sha256=Hcc6yferZS1hHbugRTdiHhSNmXEEG0hjCiTTXKna-YY,1127
|
|
88
|
-
fmtr_tools-1.3.
|
|
89
|
-
fmtr_tools-1.3.
|
|
90
|
-
fmtr_tools-1.3.
|
|
91
|
-
fmtr_tools-1.3.
|
|
92
|
-
fmtr_tools-1.3.
|
|
93
|
-
fmtr_tools-1.3.
|
|
88
|
+
fmtr_tools-1.3.63.dist-info/licenses/LICENSE,sha256=FW9aa6vVN5IjRQWLT43hs4_koYSmpcbIovlKeAJ0_cI,10757
|
|
89
|
+
fmtr_tools-1.3.63.dist-info/METADATA,sha256=nSjkJECziz3f5ss_9fCnnyyVJCoAKvOl0LzYDPShNzo,17455
|
|
90
|
+
fmtr_tools-1.3.63.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
91
|
+
fmtr_tools-1.3.63.dist-info/entry_points.txt,sha256=h-r__Xh5njtFqreMLg6cGuTFS4Qh-QqJPU1HB-_BS-Q,357
|
|
92
|
+
fmtr_tools-1.3.63.dist-info/top_level.txt,sha256=LXem9xCgNOD72tE2gRKESdiQTL902mfFkwWb6-dlwEE,5
|
|
93
|
+
fmtr_tools-1.3.63.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|