fmtr.tools 1.3.61__py3-none-any.whl → 1.3.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fmtr.tools might be problematic. Click here for more details.

fmtr/tools/pdf_tools.py CHANGED
@@ -1,6 +1,7 @@
1
+ from typing import List, Tuple, Dict, Any, Self
2
+
1
3
  import pymupdf as pm
2
4
  import pymupdf4llm
3
- from typing import List, Tuple, Dict, Any, Self
4
5
 
5
6
  from fmtr.tools import data_modelling_tools
6
7
 
@@ -179,10 +180,10 @@ class Document(pm.Document):
179
180
  """
180
181
  return pymupdf4llm.to_markdown(self, **kwargs)
181
182
 
182
- def to_text(self):
183
+ def to_text_pages(self) -> List[str]:
183
184
  """
184
185
 
185
- Simple text output.
186
+ Simple text output per-page.
186
187
 
187
188
  """
188
189
  lines = []
@@ -190,9 +191,32 @@ class Document(pm.Document):
190
191
  text = page.get_text()
191
192
  lines.append(text)
192
193
 
193
- text = '\n'.join(lines)
194
+ return lines
195
+
196
+ def to_text(self) -> str:
197
+ """
198
+
199
+ Simple text output.
200
+
201
+ """
202
+
203
+ text = '\n'.join(self.to_text_pages())
194
204
  return text
195
205
 
206
+ def split(self) -> List[Self]:
207
+ """
208
+
209
+ Split pages into individual documents.
210
+
211
+ """
212
+
213
+ documents = []
214
+ for i, page in enumerate(self, start=1):
215
+ document = self.__class__()
216
+ document.insert_pdf(self, from_page=i, to_page=i)
217
+ documents.append(document)
218
+
219
+ return documents
196
220
 
197
221
  if __name__ == '__main__':
198
222
  from fmtr.tools.path_tools import Path
fmtr/tools/version CHANGED
@@ -1 +1 @@
1
- 1.3.61
1
+ 1.3.62
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fmtr.tools
3
- Version: 1.3.61
3
+ Version: 1.3.62
4
4
  Summary: Collection of high-level tools to simplify everyday development tasks, with a focus on AI/ML
5
5
  Home-page: https://github.com/fmtr/fmtr.tools
6
6
  Author: Frontmatter
@@ -154,68 +154,68 @@ Provides-Extra: db-document
154
154
  Requires-Dist: beanie[odm]; extra == "db-document"
155
155
  Requires-Dist: motor; extra == "db-document"
156
156
  Provides-Extra: all
157
- Requires-Dist: sre_yield; extra == "all"
158
- Requires-Dist: dask[bag]; extra == "all"
159
- Requires-Dist: bokeh; extra == "all"
160
- Requires-Dist: tinynetrc; extra == "all"
161
- Requires-Dist: python-on-whales; extra == "all"
162
157
  Requires-Dist: json_repair; extra == "all"
158
+ Requires-Dist: flet[all]; extra == "all"
159
+ Requires-Dist: logfire[httpx]; extra == "all"
160
+ Requires-Dist: pydantic; extra == "all"
163
161
  Requires-Dist: dnspython[doh]; extra == "all"
164
- Requires-Dist: pymupdf4llm; extra == "all"
165
- Requires-Dist: transformers[sentencepiece]; extra == "all"
166
- Requires-Dist: httpx; extra == "all"
167
- Requires-Dist: cachetools; extra == "all"
168
- Requires-Dist: pandas; extra == "all"
169
- Requires-Dist: torchvision; extra == "all"
170
- Requires-Dist: peft; extra == "all"
171
- Requires-Dist: pydantic-settings; extra == "all"
172
- Requires-Dist: httpx_retries; extra == "all"
173
- Requires-Dist: torchaudio; extra == "all"
174
- Requires-Dist: distributed; extra == "all"
175
- Requires-Dist: flet-webview; extra == "all"
162
+ Requires-Dist: sentence_transformers; extra == "all"
163
+ Requires-Dist: pymupdf; extra == "all"
164
+ Requires-Dist: bokeh; extra == "all"
176
165
  Requires-Dist: tokenizers; extra == "all"
177
- Requires-Dist: google-auth; extra == "all"
178
- Requires-Dist: pytest-cov; extra == "all"
179
- Requires-Dist: pycountry; extra == "all"
180
- Requires-Dist: regex; extra == "all"
181
- Requires-Dist: semver; extra == "all"
182
- Requires-Dist: pyyaml; extra == "all"
166
+ Requires-Dist: deepmerge; extra == "all"
167
+ Requires-Dist: html2text; extra == "all"
168
+ Requires-Dist: transformers[sentencepiece]; extra == "all"
169
+ Requires-Dist: google-auth-httplib2; extra == "all"
170
+ Requires-Dist: dask[bag]; extra == "all"
183
171
  Requires-Dist: pydantic-extra-types; extra == "all"
184
172
  Requires-Dist: fastapi; extra == "all"
185
- Requires-Dist: ollama; extra == "all"
186
- Requires-Dist: odfpy; extra == "all"
173
+ Requires-Dist: torchaudio; extra == "all"
174
+ Requires-Dist: deepdiff; extra == "all"
187
175
  Requires-Dist: setuptools; extra == "all"
188
- Requires-Dist: openpyxl; extra == "all"
176
+ Requires-Dist: flet-video; extra == "all"
189
177
  Requires-Dist: uvicorn[standard]; extra == "all"
190
- Requires-Dist: google-auth-oauthlib; extra == "all"
191
- Requires-Dist: yamlscript; extra == "all"
192
- Requires-Dist: html2text; extra == "all"
193
- Requires-Dist: appdirs; extra == "all"
178
+ Requires-Dist: huggingface_hub; extra == "all"
179
+ Requires-Dist: contexttimer; extra == "all"
180
+ Requires-Dist: openpyxl; extra == "all"
181
+ Requires-Dist: pytest-cov; extra == "all"
182
+ Requires-Dist: flet-webview; extra == "all"
183
+ Requires-Dist: tinynetrc; extra == "all"
184
+ Requires-Dist: pandas; extra == "all"
185
+ Requires-Dist: httpx_retries; extra == "all"
186
+ Requires-Dist: peft; extra == "all"
194
187
  Requires-Dist: google-api-python-client; extra == "all"
195
- Requires-Dist: beanie[odm]; extra == "all"
196
- Requires-Dist: logfire; extra == "all"
197
188
  Requires-Dist: faker; extra == "all"
189
+ Requires-Dist: odfpy; extra == "all"
198
190
  Requires-Dist: Unidecode; extra == "all"
199
- Requires-Dist: huggingface_hub; extra == "all"
200
191
  Requires-Dist: openai; extra == "all"
201
- Requires-Dist: sentence_transformers; extra == "all"
202
- Requires-Dist: logfire[fastapi]; extra == "all"
192
+ Requires-Dist: google-auth; extra == "all"
193
+ Requires-Dist: sre_yield; extra == "all"
194
+ Requires-Dist: diskcache; extra == "all"
195
+ Requires-Dist: torchvision; extra == "all"
203
196
  Requires-Dist: filetype; extra == "all"
204
- Requires-Dist: deepdiff; extra == "all"
205
- Requires-Dist: google-auth-httplib2; extra == "all"
206
- Requires-Dist: pymupdf; extra == "all"
197
+ Requires-Dist: pymupdf4llm; extra == "all"
198
+ Requires-Dist: pydantic-ai[logfire,openai]; extra == "all"
199
+ Requires-Dist: regex; extra == "all"
200
+ Requires-Dist: playwright; extra == "all"
201
+ Requires-Dist: semver; extra == "all"
202
+ Requires-Dist: logfire; extra == "all"
203
+ Requires-Dist: beanie[odm]; extra == "all"
204
+ Requires-Dist: pydantic-settings; extra == "all"
205
+ Requires-Dist: appdirs; extra == "all"
206
+ Requires-Dist: httpx; extra == "all"
207
207
  Requires-Dist: motor; extra == "all"
208
- Requires-Dist: flet-video; extra == "all"
208
+ Requires-Dist: yamlscript; extra == "all"
209
+ Requires-Dist: cachetools; extra == "all"
210
+ Requires-Dist: google-auth-oauthlib; extra == "all"
209
211
  Requires-Dist: pydevd-pycharm~=251.25410.159; extra == "all"
210
- Requires-Dist: logfire[httpx]; extra == "all"
211
- Requires-Dist: contexttimer; extra == "all"
212
- Requires-Dist: diskcache; extra == "all"
213
- Requires-Dist: flet[all]; extra == "all"
214
- Requires-Dist: playwright; extra == "all"
212
+ Requires-Dist: logfire[fastapi]; extra == "all"
213
+ Requires-Dist: pyyaml; extra == "all"
214
+ Requires-Dist: ollama; extra == "all"
215
+ Requires-Dist: python-on-whales; extra == "all"
215
216
  Requires-Dist: tabulate; extra == "all"
216
- Requires-Dist: pydantic; extra == "all"
217
- Requires-Dist: pydantic-ai[logfire,openai]; extra == "all"
218
- Requires-Dist: deepmerge; extra == "all"
217
+ Requires-Dist: distributed; extra == "all"
218
+ Requires-Dist: pycountry; extra == "all"
219
219
  Dynamic: author
220
220
  Dynamic: author-email
221
221
  Dynamic: description
@@ -32,7 +32,7 @@ fmtr/tools/openai_tools.py,sha256=6SUgejgzUzmlKKct2_ePXntvMegu3FJgfk9x7aqtqYc,74
32
32
  fmtr/tools/packaging_tools.py,sha256=FlgOTnDRHZWQL2iR-wucTsyGEHRE-MlddKL30MPmUqE,253
33
33
  fmtr/tools/parallel_tools.py,sha256=QEb_gN1StkxsqYaH4HSjiJX8Y3gpb2uKNsOzG4uFpaM,3071
34
34
  fmtr/tools/pattern_tools.py,sha256=DlEKzNJKhwFmU3-awoGkN5Xy-yLF_bsoj8eoSMCEytE,6018
35
- fmtr/tools/pdf_tools.py,sha256=9ElZRvXtYlvZkw8koB31xSGDM2B9JHQT7FVVIaLoXQI,4311
35
+ fmtr/tools/pdf_tools.py,sha256=6XQCNyytQSnJSc38gdMOFVcPXnPwfOlk6y4QVqmJLp8,4810
36
36
  fmtr/tools/platform_tools.py,sha256=7p69CmAHe_sF68Fx9uVhns1k5EewTHTWgUYzkl6ZQKA,308
37
37
  fmtr/tools/process_tools.py,sha256=Ysh5Dk2QFBhXQerArjKdt7xZd3JrN5Ho02AaOjH0Nnw,1425
38
38
  fmtr/tools/profiling_tools.py,sha256=jpXVjaNKPydTasEQVNXvxzGtMhXPit08AnJddkU8uIc,46
@@ -45,7 +45,7 @@ fmtr/tools/tabular_tools.py,sha256=mw6vOij1Ch-pVAyHMPtm5zj__ULZN_TKeBYOfj33wFM,1
45
45
  fmtr/tools/tokenization_tools.py,sha256=me-IBzSLyNYejLybwjO9CNB6Mj2NYfKPaOVThXyaGNg,4268
46
46
  fmtr/tools/tools.py,sha256=CAsApa1YwVdNE6H66Vjivs_mXYvOas3rh7fPELAnTpk,795
47
47
  fmtr/tools/unicode_tools.py,sha256=yS_9wpu8ogNoiIL7s1G_8bETFFO_YQlo4LNPv1NLDeY,52
48
- fmtr/tools/version,sha256=Z9USkquVEFCARqM5U6DLcxzsj6T0ruK_4fsrwxu4VXU,6
48
+ fmtr/tools/version,sha256=FKKPeGwGcnITJYQNL1W7YAPGPvigwhisVg9K99HxWho,6
49
49
  fmtr/tools/webhook_tools.py,sha256=q3pVJ1NCem2SrMuFcLxiWd7DibFs7Q-uGtojfXd3Qcg,380
50
50
  fmtr/tools/yaml_tools.py,sha256=Bhhyd6GQVKO72Lp8ky7bAUjIB_65Hdh0Q45SKIEe6S8,1901
51
51
  fmtr/tools/ai_tools/__init__.py,sha256=O8VRlPnnQCncg2ZZ2l_VdWLJf4jkKH6dkZFVbv6o7IM,388
@@ -85,9 +85,9 @@ fmtr/tools/tests/test_path.py,sha256=AkZQa6_8BQ-VaCyL_J-iKmdf2ZaM-xFYR37Kun3k4_g
85
85
  fmtr/tools/tests/test_yaml.py,sha256=jc0TwwKu9eC0LvFGNMERdgBue591xwLxYXFbtsRwXVM,287
86
86
  fmtr/tools/version_tools/__init__.py,sha256=cjE6nO6AoVOUp3RwgTbqL9wiw8J1l2pHJOz6Gn6bxjA,326
87
87
  fmtr/tools/version_tools/version_tools.py,sha256=Hcc6yferZS1hHbugRTdiHhSNmXEEG0hjCiTTXKna-YY,1127
88
- fmtr_tools-1.3.61.dist-info/licenses/LICENSE,sha256=FW9aa6vVN5IjRQWLT43hs4_koYSmpcbIovlKeAJ0_cI,10757
89
- fmtr_tools-1.3.61.dist-info/METADATA,sha256=fqS-UiWsEhwxOhJPB3BHTE9apKaAHvujkNXfU-vSWTs,17455
90
- fmtr_tools-1.3.61.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
91
- fmtr_tools-1.3.61.dist-info/entry_points.txt,sha256=h-r__Xh5njtFqreMLg6cGuTFS4Qh-QqJPU1HB-_BS-Q,357
92
- fmtr_tools-1.3.61.dist-info/top_level.txt,sha256=LXem9xCgNOD72tE2gRKESdiQTL902mfFkwWb6-dlwEE,5
93
- fmtr_tools-1.3.61.dist-info/RECORD,,
88
+ fmtr_tools-1.3.62.dist-info/licenses/LICENSE,sha256=FW9aa6vVN5IjRQWLT43hs4_koYSmpcbIovlKeAJ0_cI,10757
89
+ fmtr_tools-1.3.62.dist-info/METADATA,sha256=ylVUFuwLtaNvhNbmm4P8w1MCEZ5Ddkmu6Bc_exClAW4,17455
90
+ fmtr_tools-1.3.62.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
91
+ fmtr_tools-1.3.62.dist-info/entry_points.txt,sha256=h-r__Xh5njtFqreMLg6cGuTFS4Qh-QqJPU1HB-_BS-Q,357
92
+ fmtr_tools-1.3.62.dist-info/top_level.txt,sha256=LXem9xCgNOD72tE2gRKESdiQTL902mfFkwWb6-dlwEE,5
93
+ fmtr_tools-1.3.62.dist-info/RECORD,,