fmtr.tools 1.3.61__py3-none-any.whl → 1.3.63__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fmtr.tools might be problematic. Click here for more details.

fmtr/tools/pdf_tools.py CHANGED
@@ -1,6 +1,7 @@
1
+ from typing import List, Tuple, Dict, Any, Self
2
+
1
3
  import pymupdf as pm
2
4
  import pymupdf4llm
3
- from typing import List, Tuple, Dict, Any, Self
4
5
 
5
6
  from fmtr.tools import data_modelling_tools
6
7
 
@@ -179,10 +180,10 @@ class Document(pm.Document):
179
180
  """
180
181
  return pymupdf4llm.to_markdown(self, **kwargs)
181
182
 
182
- def to_text(self):
183
+ def to_text_pages(self) -> List[str]:
183
184
  """
184
185
 
185
- Simple text output.
186
+ Simple text output per-page.
186
187
 
187
188
  """
188
189
  lines = []
@@ -190,9 +191,32 @@ class Document(pm.Document):
190
191
  text = page.get_text()
191
192
  lines.append(text)
192
193
 
193
- text = '\n'.join(lines)
194
+ return lines
195
+
196
+ def to_text(self) -> str:
197
+ """
198
+
199
+ Simple text output.
200
+
201
+ """
202
+
203
+ text = '\n'.join(self.to_text_pages())
194
204
  return text
195
205
 
206
+ def split(self) -> List[Self]:
207
+ """
208
+
209
+ Split pages into individual documents.
210
+
211
+ """
212
+
213
+ documents = []
214
+ for i, page in enumerate(self, start=1):
215
+ document = self.__class__()
216
+ document.insert_pdf(self, from_page=i, to_page=i)
217
+ documents.append(document)
218
+
219
+ return documents
196
220
 
197
221
  if __name__ == '__main__':
198
222
  from fmtr.tools.path_tools import Path
@@ -1,11 +1,11 @@
1
- from collections import namedtuple
2
- from string import Formatter
3
-
4
1
  import re
5
2
  from dataclasses import dataclass
6
3
  from textwrap import dedent
7
4
  from typing import List
8
5
 
6
+ from collections import namedtuple
7
+ from string import Formatter
8
+
9
9
  from fmtr.tools.datatype_tools import is_none
10
10
 
11
11
  ELLIPSIS = '…'
@@ -151,14 +151,14 @@ def truncate_mid(text, length=None, sep=ELLIPSIS, return_type=str):
151
151
  )
152
152
 
153
153
 
154
- def flatten(raw):
154
+ def flatten(raw, sep=' '):
155
155
  """
156
156
 
157
157
  Flatten a multiline string to a single line
158
158
 
159
159
  """
160
160
  lines = raw.splitlines()
161
- text = ' '.join(lines)
161
+ text = sep.join(lines)
162
162
  text = text.strip()
163
163
  return text
164
164
 
@@ -175,6 +175,22 @@ def join(strings, sep=' '):
175
175
  return text
176
176
 
177
177
 
178
+ def join_natural(items, sep=', ', conj='and'):
179
+ """
180
+
181
+ Natural language list
182
+
183
+ """
184
+
185
+ if not items:
186
+ return ""
187
+ if len(items) == 1:
188
+ return items[0]
189
+ firsts, last = items[:-1], items[-1]
190
+ firsts_str = join(firsts, sep=sep)
191
+ text = f"{firsts_str} {conj} {last}"
192
+ return text
193
+
178
194
  class Mask:
179
195
  """
180
196
 
fmtr/tools/version CHANGED
@@ -1 +1 @@
1
- 1.3.61
1
+ 1.3.63
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fmtr.tools
3
- Version: 1.3.61
3
+ Version: 1.3.63
4
4
  Summary: Collection of high-level tools to simplify everyday development tasks, with a focus on AI/ML
5
5
  Home-page: https://github.com/fmtr/fmtr.tools
6
6
  Author: Frontmatter
@@ -154,68 +154,68 @@ Provides-Extra: db-document
154
154
  Requires-Dist: beanie[odm]; extra == "db-document"
155
155
  Requires-Dist: motor; extra == "db-document"
156
156
  Provides-Extra: all
157
+ Requires-Dist: uvicorn[standard]; extra == "all"
158
+ Requires-Dist: contexttimer; extra == "all"
159
+ Requires-Dist: pymupdf; extra == "all"
160
+ Requires-Dist: motor; extra == "all"
161
+ Requires-Dist: huggingface_hub; extra == "all"
162
+ Requires-Dist: google-auth-httplib2; extra == "all"
157
163
  Requires-Dist: sre_yield; extra == "all"
158
- Requires-Dist: dask[bag]; extra == "all"
159
- Requires-Dist: bokeh; extra == "all"
160
- Requires-Dist: tinynetrc; extra == "all"
161
- Requires-Dist: python-on-whales; extra == "all"
162
164
  Requires-Dist: json_repair; extra == "all"
163
- Requires-Dist: dnspython[doh]; extra == "all"
164
- Requires-Dist: pymupdf4llm; extra == "all"
165
- Requires-Dist: transformers[sentencepiece]; extra == "all"
166
- Requires-Dist: httpx; extra == "all"
167
- Requires-Dist: cachetools; extra == "all"
168
- Requires-Dist: pandas; extra == "all"
169
- Requires-Dist: torchvision; extra == "all"
170
- Requires-Dist: peft; extra == "all"
171
- Requires-Dist: pydantic-settings; extra == "all"
172
- Requires-Dist: httpx_retries; extra == "all"
173
- Requires-Dist: torchaudio; extra == "all"
174
- Requires-Dist: distributed; extra == "all"
165
+ Requires-Dist: fastapi; extra == "all"
166
+ Requires-Dist: tabulate; extra == "all"
175
167
  Requires-Dist: flet-webview; extra == "all"
176
- Requires-Dist: tokenizers; extra == "all"
177
- Requires-Dist: google-auth; extra == "all"
178
- Requires-Dist: pytest-cov; extra == "all"
179
- Requires-Dist: pycountry; extra == "all"
180
- Requires-Dist: regex; extra == "all"
181
- Requires-Dist: semver; extra == "all"
182
- Requires-Dist: pyyaml; extra == "all"
168
+ Requires-Dist: beanie[odm]; extra == "all"
169
+ Requires-Dist: openai; extra == "all"
183
170
  Requires-Dist: pydantic-extra-types; extra == "all"
184
- Requires-Dist: fastapi; extra == "all"
185
- Requires-Dist: ollama; extra == "all"
186
- Requires-Dist: odfpy; extra == "all"
187
- Requires-Dist: setuptools; extra == "all"
188
- Requires-Dist: openpyxl; extra == "all"
189
- Requires-Dist: uvicorn[standard]; extra == "all"
190
- Requires-Dist: google-auth-oauthlib; extra == "all"
171
+ Requires-Dist: torchaudio; extra == "all"
172
+ Requires-Dist: flet-video; extra == "all"
173
+ Requires-Dist: tinynetrc; extra == "all"
174
+ Requires-Dist: flet[all]; extra == "all"
175
+ Requires-Dist: pycountry; extra == "all"
176
+ Requires-Dist: torchvision; extra == "all"
177
+ Requires-Dist: deepdiff; extra == "all"
178
+ Requires-Dist: pydantic; extra == "all"
179
+ Requires-Dist: httpx_retries; extra == "all"
180
+ Requires-Dist: logfire[fastapi]; extra == "all"
181
+ Requires-Dist: diskcache; extra == "all"
182
+ Requires-Dist: dask[bag]; extra == "all"
191
183
  Requires-Dist: yamlscript; extra == "all"
184
+ Requires-Dist: Unidecode; extra == "all"
185
+ Requires-Dist: openpyxl; extra == "all"
186
+ Requires-Dist: httpx; extra == "all"
187
+ Requires-Dist: transformers[sentencepiece]; extra == "all"
192
188
  Requires-Dist: html2text; extra == "all"
193
- Requires-Dist: appdirs; extra == "all"
194
- Requires-Dist: google-api-python-client; extra == "all"
195
- Requires-Dist: beanie[odm]; extra == "all"
189
+ Requires-Dist: pytest-cov; extra == "all"
196
190
  Requires-Dist: logfire; extra == "all"
191
+ Requires-Dist: appdirs; extra == "all"
192
+ Requires-Dist: python-on-whales; extra == "all"
193
+ Requires-Dist: semver; extra == "all"
194
+ Requires-Dist: pydevd-pycharm~=251.25410.159; extra == "all"
195
+ Requires-Dist: pandas; extra == "all"
196
+ Requires-Dist: distributed; extra == "all"
197
+ Requires-Dist: filetype; extra == "all"
197
198
  Requires-Dist: faker; extra == "all"
198
- Requires-Dist: Unidecode; extra == "all"
199
- Requires-Dist: huggingface_hub; extra == "all"
200
- Requires-Dist: openai; extra == "all"
201
199
  Requires-Dist: sentence_transformers; extra == "all"
202
- Requires-Dist: logfire[fastapi]; extra == "all"
203
- Requires-Dist: filetype; extra == "all"
204
- Requires-Dist: deepdiff; extra == "all"
205
- Requires-Dist: google-auth-httplib2; extra == "all"
206
- Requires-Dist: pymupdf; extra == "all"
207
- Requires-Dist: motor; extra == "all"
208
- Requires-Dist: flet-video; extra == "all"
209
- Requires-Dist: pydevd-pycharm~=251.25410.159; extra == "all"
200
+ Requires-Dist: regex; extra == "all"
201
+ Requires-Dist: deepmerge; extra == "all"
202
+ Requires-Dist: google-api-python-client; extra == "all"
203
+ Requires-Dist: setuptools; extra == "all"
204
+ Requires-Dist: google-auth; extra == "all"
205
+ Requires-Dist: pydantic-ai[logfire,openai]; extra == "all"
206
+ Requires-Dist: pydantic-settings; extra == "all"
207
+ Requires-Dist: dnspython[doh]; extra == "all"
208
+ Requires-Dist: ollama; extra == "all"
209
+ Requires-Dist: cachetools; extra == "all"
210
+ Requires-Dist: pymupdf4llm; extra == "all"
211
+ Requires-Dist: pyyaml; extra == "all"
210
212
  Requires-Dist: logfire[httpx]; extra == "all"
211
- Requires-Dist: contexttimer; extra == "all"
212
- Requires-Dist: diskcache; extra == "all"
213
- Requires-Dist: flet[all]; extra == "all"
214
213
  Requires-Dist: playwright; extra == "all"
215
- Requires-Dist: tabulate; extra == "all"
216
- Requires-Dist: pydantic; extra == "all"
217
- Requires-Dist: pydantic-ai[logfire,openai]; extra == "all"
218
- Requires-Dist: deepmerge; extra == "all"
214
+ Requires-Dist: odfpy; extra == "all"
215
+ Requires-Dist: tokenizers; extra == "all"
216
+ Requires-Dist: bokeh; extra == "all"
217
+ Requires-Dist: google-auth-oauthlib; extra == "all"
218
+ Requires-Dist: peft; extra == "all"
219
219
  Dynamic: author
220
220
  Dynamic: author-email
221
221
  Dynamic: description
@@ -32,7 +32,7 @@ fmtr/tools/openai_tools.py,sha256=6SUgejgzUzmlKKct2_ePXntvMegu3FJgfk9x7aqtqYc,74
32
32
  fmtr/tools/packaging_tools.py,sha256=FlgOTnDRHZWQL2iR-wucTsyGEHRE-MlddKL30MPmUqE,253
33
33
  fmtr/tools/parallel_tools.py,sha256=QEb_gN1StkxsqYaH4HSjiJX8Y3gpb2uKNsOzG4uFpaM,3071
34
34
  fmtr/tools/pattern_tools.py,sha256=DlEKzNJKhwFmU3-awoGkN5Xy-yLF_bsoj8eoSMCEytE,6018
35
- fmtr/tools/pdf_tools.py,sha256=9ElZRvXtYlvZkw8koB31xSGDM2B9JHQT7FVVIaLoXQI,4311
35
+ fmtr/tools/pdf_tools.py,sha256=6XQCNyytQSnJSc38gdMOFVcPXnPwfOlk6y4QVqmJLp8,4810
36
36
  fmtr/tools/platform_tools.py,sha256=7p69CmAHe_sF68Fx9uVhns1k5EewTHTWgUYzkl6ZQKA,308
37
37
  fmtr/tools/process_tools.py,sha256=Ysh5Dk2QFBhXQerArjKdt7xZd3JrN5Ho02AaOjH0Nnw,1425
38
38
  fmtr/tools/profiling_tools.py,sha256=jpXVjaNKPydTasEQVNXvxzGtMhXPit08AnJddkU8uIc,46
@@ -40,12 +40,12 @@ fmtr/tools/random_tools.py,sha256=4VlQdk5THbR8ka4pZaLbk_ZO_4yy6PF_lHZes_rgenY,22
40
40
  fmtr/tools/semantic_tools.py,sha256=cxY9NSAHWj4nEc6Oj4qA1omR3dWbl2OuH7_PkINc6_E,1386
41
41
  fmtr/tools/settings_tools.py,sha256=o11W3T60UZSvCTkh_eEQq1Mx74GycQ6JxUr0plBDbsk,2356
42
42
  fmtr/tools/spaces_tools.py,sha256=D_he3mve6DruB3OPS6QyzqD05ChHnRTb4buViKPe7To,1099
43
- fmtr/tools/string_tools.py,sha256=On6YRLTAK1i6mmMpOUWVM618CykJiuaoyKIsU1cB_mA,4952
43
+ fmtr/tools/string_tools.py,sha256=Lz_H9l25OOoxE48QBJ_Upkk5nno7dPA6G2Gc0Wo8rOk,5275
44
44
  fmtr/tools/tabular_tools.py,sha256=mw6vOij1Ch-pVAyHMPtm5zj__ULZN_TKeBYOfj33wFM,1634
45
45
  fmtr/tools/tokenization_tools.py,sha256=me-IBzSLyNYejLybwjO9CNB6Mj2NYfKPaOVThXyaGNg,4268
46
46
  fmtr/tools/tools.py,sha256=CAsApa1YwVdNE6H66Vjivs_mXYvOas3rh7fPELAnTpk,795
47
47
  fmtr/tools/unicode_tools.py,sha256=yS_9wpu8ogNoiIL7s1G_8bETFFO_YQlo4LNPv1NLDeY,52
48
- fmtr/tools/version,sha256=Z9USkquVEFCARqM5U6DLcxzsj6T0ruK_4fsrwxu4VXU,6
48
+ fmtr/tools/version,sha256=1-w7DNphgnwgQa3EckgnbD0Qx9ULcbjMgAOknBRyoWI,6
49
49
  fmtr/tools/webhook_tools.py,sha256=q3pVJ1NCem2SrMuFcLxiWd7DibFs7Q-uGtojfXd3Qcg,380
50
50
  fmtr/tools/yaml_tools.py,sha256=Bhhyd6GQVKO72Lp8ky7bAUjIB_65Hdh0Q45SKIEe6S8,1901
51
51
  fmtr/tools/ai_tools/__init__.py,sha256=O8VRlPnnQCncg2ZZ2l_VdWLJf4jkKH6dkZFVbv6o7IM,388
@@ -85,9 +85,9 @@ fmtr/tools/tests/test_path.py,sha256=AkZQa6_8BQ-VaCyL_J-iKmdf2ZaM-xFYR37Kun3k4_g
85
85
  fmtr/tools/tests/test_yaml.py,sha256=jc0TwwKu9eC0LvFGNMERdgBue591xwLxYXFbtsRwXVM,287
86
86
  fmtr/tools/version_tools/__init__.py,sha256=cjE6nO6AoVOUp3RwgTbqL9wiw8J1l2pHJOz6Gn6bxjA,326
87
87
  fmtr/tools/version_tools/version_tools.py,sha256=Hcc6yferZS1hHbugRTdiHhSNmXEEG0hjCiTTXKna-YY,1127
88
- fmtr_tools-1.3.61.dist-info/licenses/LICENSE,sha256=FW9aa6vVN5IjRQWLT43hs4_koYSmpcbIovlKeAJ0_cI,10757
89
- fmtr_tools-1.3.61.dist-info/METADATA,sha256=fqS-UiWsEhwxOhJPB3BHTE9apKaAHvujkNXfU-vSWTs,17455
90
- fmtr_tools-1.3.61.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
91
- fmtr_tools-1.3.61.dist-info/entry_points.txt,sha256=h-r__Xh5njtFqreMLg6cGuTFS4Qh-QqJPU1HB-_BS-Q,357
92
- fmtr_tools-1.3.61.dist-info/top_level.txt,sha256=LXem9xCgNOD72tE2gRKESdiQTL902mfFkwWb6-dlwEE,5
93
- fmtr_tools-1.3.61.dist-info/RECORD,,
88
+ fmtr_tools-1.3.63.dist-info/licenses/LICENSE,sha256=FW9aa6vVN5IjRQWLT43hs4_koYSmpcbIovlKeAJ0_cI,10757
89
+ fmtr_tools-1.3.63.dist-info/METADATA,sha256=nSjkJECziz3f5ss_9fCnnyyVJCoAKvOl0LzYDPShNzo,17455
90
+ fmtr_tools-1.3.63.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
91
+ fmtr_tools-1.3.63.dist-info/entry_points.txt,sha256=h-r__Xh5njtFqreMLg6cGuTFS4Qh-QqJPU1HB-_BS-Q,357
92
+ fmtr_tools-1.3.63.dist-info/top_level.txt,sha256=LXem9xCgNOD72tE2gRKESdiQTL902mfFkwWb6-dlwEE,5
93
+ fmtr_tools-1.3.63.dist-info/RECORD,,