fmtr.tools 1.3.48__py3-none-any.whl → 1.3.49__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fmtr.tools might be problematic. Click here for more details.

@@ -64,3 +64,12 @@ def strip_none(*items):
64
64
 
65
65
  """
66
66
  return [item for item in items if not is_none(item)]
67
+
68
+
69
+ def dedupe(items):
70
+ """
71
+
72
+ Deduplicate a list of items, retaining order
73
+
74
+ """
75
+ return list(dict.fromkeys(items))
@@ -145,6 +145,32 @@ class Path(type(Path())):
145
145
  """
146
146
  return self.mkdir(parents=True, exist_ok=True)
147
147
 
148
+ def with_suffix(self, suffix: str) -> 'Path':
149
+ """
150
+
151
+ Pathlib doesn't add a dot prefix, but then errors if you don't provide one, which feels rather obnoxious.
152
+
153
+ """
154
+ if not suffix.startswith('.'):
155
+ suffix = f'.{suffix}'
156
+ return super().with_suffix(suffix)
157
+
158
+ def get_conversion_path(self, suffix: str) -> 'Path':
159
+ """
160
+
161
+ Fetch the equivalent path for a different format in the standard conversion directory structure.
162
+ .../xyz/filename.xyx -> ../abc/filename.abc
163
+
164
+ """
165
+
166
+ old_dir = self.parent.name
167
+
168
+ if old_dir != self.suffix.removeprefix('.'):
169
+ raise ValueError(f"Expected parent directory '{old_dir}' to match file extension '{suffix}'")
170
+
171
+ new = self.parent.parent / suffix / f'{self.stem}.{suffix}'
172
+ return new
173
+
148
174
  @property
149
175
  def exist(self):
150
176
  """
fmtr/tools/pdf_tools.py CHANGED
@@ -179,6 +179,20 @@ class Document(pm.Document):
179
179
  """
180
180
  return pymupdf4llm.to_markdown(self, **kwargs)
181
181
 
182
+ def to_text(self):
183
+ """
184
+
185
+ Simple text output.
186
+
187
+ """
188
+ lines = []
189
+ for page in self:
190
+ text = page.get_text()
191
+ lines.append(text)
192
+
193
+ text = '\n'.join(lines)
194
+ return text
195
+
182
196
 
183
197
  if __name__ == '__main__':
184
198
  from fmtr.tools.path_tools import Path
@@ -1,7 +1,68 @@
1
+ import deepdiff
2
+ import numpy as np
1
3
  import pandas as pd
2
4
 
5
+ from fmtr.tools.iterator_tools import dedupe
6
+
3
7
  Table = DataFrame = pd.DataFrame
4
8
  Series = pd.Series
5
9
 
10
+ nan = np.nan
11
+
6
12
  CONCAT_HORIZONTALLY = 1
7
13
  CONCAT_VERTICALLY = 0
14
+
15
+
16
+ def normalize_nan(df, value=np.nan):
17
+ return df.replace({pd.NA: value, None: value, np.nan: value})
18
+
19
+
20
+ class Differ:
21
+ """
22
+
23
+ Diff two dataframes via DeepDiff, after shape normalization, datatype simplification, etc.
24
+
25
+ """
26
+
27
+ def __init__(self, left: Table, right: Table):
28
+
29
+ self.cols = dedupe(left.columns.tolist() + right.columns.tolist())
30
+ self.rows = dedupe(left.index.values.tolist() + right.index.values.tolist())
31
+ self.left = self.process(left)
32
+ self.right = self.process(right)
33
+ self.dfs = [self.left, self.right]
34
+
35
+ def process(self, df: Table) -> Table:
36
+ """
37
+
38
+ Ensure same rows/columns, plus simplify datatypes via JSON round-robin.
39
+
40
+ """
41
+
42
+ df_rows = set(df.index.values.tolist())
43
+ for row in self.rows:
44
+ if row in df_rows:
45
+ continue
46
+ df.loc[len(df)] = None
47
+
48
+ df_cols = set(df.columns.tolist())
49
+ for col in self.cols:
50
+ if col in df_cols:
51
+ continue
52
+ df[col] = None
53
+
54
+ df = pd.read_json(df.to_json(date_format='iso'))
55
+ df = normalize_nan(df, value=None)
56
+
57
+ return df
58
+
59
+ def get_diff(self) -> deepdiff.DeepDiff:
60
+ """
61
+
62
+ Cast to dicts and get diff
63
+
64
+ """
65
+
66
+ dicts = [df.to_dict(orient='index') for df in self.dfs]
67
+ diff = deepdiff.DeepDiff(*dicts, ignore_order=True)
68
+ return diff
fmtr/tools/version CHANGED
@@ -1 +1 @@
1
- 1.3.48
1
+ 1.3.49
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fmtr.tools
3
- Version: 1.3.48
3
+ Version: 1.3.49
4
4
  Summary: Collection of high-level tools to simplify everyday development tasks, with a focus on AI/ML
5
5
  Home-page: https://github.com/fmtr/fmtr.tools
6
6
  Author: Frontmatter
@@ -81,16 +81,19 @@ Requires-Dist: pandas; extra == "semantic"
81
81
  Requires-Dist: tabulate; extra == "semantic"
82
82
  Requires-Dist: openpyxl; extra == "semantic"
83
83
  Requires-Dist: odfpy; extra == "semantic"
84
+ Requires-Dist: deepdiff; extra == "semantic"
84
85
  Provides-Extra: metric
85
86
  Requires-Dist: pandas; extra == "metric"
86
87
  Requires-Dist: tabulate; extra == "metric"
87
88
  Requires-Dist: openpyxl; extra == "metric"
88
89
  Requires-Dist: odfpy; extra == "metric"
90
+ Requires-Dist: deepdiff; extra == "metric"
89
91
  Provides-Extra: tabular
90
92
  Requires-Dist: pandas; extra == "tabular"
91
93
  Requires-Dist: tabulate; extra == "tabular"
92
94
  Requires-Dist: openpyxl; extra == "tabular"
93
95
  Requires-Dist: odfpy; extra == "tabular"
96
+ Requires-Dist: deepdiff; extra == "tabular"
94
97
  Provides-Extra: html
95
98
  Requires-Dist: html2text; extra == "html"
96
99
  Provides-Extra: interface
@@ -151,67 +154,68 @@ Provides-Extra: db-document
151
154
  Requires-Dist: beanie[odm]; extra == "db-document"
152
155
  Requires-Dist: motor; extra == "db-document"
153
156
  Provides-Extra: all
154
- Requires-Dist: dnspython[doh]; extra == "all"
155
- Requires-Dist: playwright; extra == "all"
156
- Requires-Dist: yamlscript; extra == "all"
157
- Requires-Dist: uvicorn[standard]; extra == "all"
158
- Requires-Dist: logfire[fastapi]; extra == "all"
159
- Requires-Dist: beanie[odm]; extra == "all"
160
- Requires-Dist: cachetools; extra == "all"
161
- Requires-Dist: google-auth-oauthlib; extra == "all"
162
- Requires-Dist: deepmerge; extra == "all"
163
- Requires-Dist: appdirs; extra == "all"
164
- Requires-Dist: semver; extra == "all"
165
- Requires-Dist: contexttimer; extra == "all"
166
- Requires-Dist: Unidecode; extra == "all"
167
- Requires-Dist: pycountry; extra == "all"
168
- Requires-Dist: html2text; extra == "all"
169
- Requires-Dist: setuptools; extra == "all"
170
- Requires-Dist: pydevd-pycharm~=251.25410.159; extra == "all"
171
- Requires-Dist: bokeh; extra == "all"
172
- Requires-Dist: pydantic-extra-types; extra == "all"
173
- Requires-Dist: pymupdf; extra == "all"
174
- Requires-Dist: openpyxl; extra == "all"
175
- Requires-Dist: flet-video; extra == "all"
176
157
  Requires-Dist: pydantic-ai[logfire,openai]; extra == "all"
158
+ Requires-Dist: torchaudio; extra == "all"
159
+ Requires-Dist: odfpy; extra == "all"
160
+ Requires-Dist: pymupdf; extra == "all"
177
161
  Requires-Dist: distributed; extra == "all"
178
162
  Requires-Dist: json_repair; extra == "all"
179
- Requires-Dist: dask[bag]; extra == "all"
180
- Requires-Dist: google-auth; extra == "all"
163
+ Requires-Dist: bokeh; extra == "all"
181
164
  Requires-Dist: tinynetrc; extra == "all"
182
- Requires-Dist: diskcache; extra == "all"
183
165
  Requires-Dist: fastapi; extra == "all"
184
- Requires-Dist: httpx; extra == "all"
185
- Requires-Dist: logfire[httpx]; extra == "all"
186
- Requires-Dist: motor; extra == "all"
187
- Requires-Dist: ollama; extra == "all"
188
- Requires-Dist: flet[all]; extra == "all"
189
- Requires-Dist: regex; extra == "all"
166
+ Requires-Dist: pycountry; extra == "all"
190
167
  Requires-Dist: sre_yield; extra == "all"
191
- Requires-Dist: transformers[sentencepiece]; extra == "all"
168
+ Requires-Dist: semver; extra == "all"
169
+ Requires-Dist: html2text; extra == "all"
192
170
  Requires-Dist: pytest-cov; extra == "all"
193
- Requires-Dist: faker; extra == "all"
194
171
  Requires-Dist: pyyaml; extra == "all"
195
- Requires-Dist: torchaudio; extra == "all"
196
- Requires-Dist: sentence_transformers; extra == "all"
197
- Requires-Dist: tabulate; extra == "all"
198
- Requires-Dist: google-auth-httplib2; extra == "all"
199
- Requires-Dist: pandas; extra == "all"
200
- Requires-Dist: google-api-python-client; extra == "all"
172
+ Requires-Dist: appdirs; extra == "all"
173
+ Requires-Dist: pydantic; extra == "all"
201
174
  Requires-Dist: httpx_retries; extra == "all"
175
+ Requires-Dist: ollama; extra == "all"
176
+ Requires-Dist: pandas; extra == "all"
177
+ Requires-Dist: pydantic-extra-types; extra == "all"
178
+ Requires-Dist: google-auth; extra == "all"
179
+ Requires-Dist: dask[bag]; extra == "all"
180
+ Requires-Dist: uvicorn[standard]; extra == "all"
181
+ Requires-Dist: httpx; extra == "all"
202
182
  Requires-Dist: pydantic-settings; extra == "all"
203
- Requires-Dist: peft; extra == "all"
183
+ Requires-Dist: sentence_transformers; extra == "all"
184
+ Requires-Dist: tokenizers; extra == "all"
185
+ Requires-Dist: deepdiff; extra == "all"
186
+ Requires-Dist: diskcache; extra == "all"
187
+ Requires-Dist: faker; extra == "all"
188
+ Requires-Dist: playwright; extra == "all"
189
+ Requires-Dist: contexttimer; extra == "all"
190
+ Requires-Dist: google-api-python-client; extra == "all"
204
191
  Requires-Dist: filetype; extra == "all"
205
- Requires-Dist: torchvision; extra == "all"
206
- Requires-Dist: flet-webview; extra == "all"
207
- Requires-Dist: huggingface_hub; extra == "all"
208
- Requires-Dist: docker; extra == "all"
209
- Requires-Dist: odfpy; extra == "all"
210
- Requires-Dist: pydantic; extra == "all"
192
+ Requires-Dist: motor; extra == "all"
193
+ Requires-Dist: deepmerge; extra == "all"
194
+ Requires-Dist: peft; extra == "all"
195
+ Requires-Dist: yamlscript; extra == "all"
196
+ Requires-Dist: transformers[sentencepiece]; extra == "all"
197
+ Requires-Dist: google-auth-oauthlib; extra == "all"
211
198
  Requires-Dist: pymupdf4llm; extra == "all"
199
+ Requires-Dist: setuptools; extra == "all"
200
+ Requires-Dist: tabulate; extra == "all"
212
201
  Requires-Dist: logfire; extra == "all"
213
- Requires-Dist: tokenizers; extra == "all"
202
+ Requires-Dist: beanie[odm]; extra == "all"
203
+ Requires-Dist: pydevd-pycharm~=251.25410.159; extra == "all"
204
+ Requires-Dist: flet-video; extra == "all"
205
+ Requires-Dist: logfire[httpx]; extra == "all"
206
+ Requires-Dist: regex; extra == "all"
214
207
  Requires-Dist: openai; extra == "all"
208
+ Requires-Dist: docker; extra == "all"
209
+ Requires-Dist: huggingface_hub; extra == "all"
210
+ Requires-Dist: dnspython[doh]; extra == "all"
211
+ Requires-Dist: google-auth-httplib2; extra == "all"
212
+ Requires-Dist: Unidecode; extra == "all"
213
+ Requires-Dist: logfire[fastapi]; extra == "all"
214
+ Requires-Dist: openpyxl; extra == "all"
215
+ Requires-Dist: torchvision; extra == "all"
216
+ Requires-Dist: cachetools; extra == "all"
217
+ Requires-Dist: flet-webview; extra == "all"
218
+ Requires-Dist: flet[all]; extra == "all"
215
219
  Dynamic: author
216
220
  Dynamic: author-email
217
221
  Dynamic: description
@@ -21,7 +21,7 @@ fmtr/tools/http_tools.py,sha256=RVwGrBNMyjfbpgAPCSnxEkXfSzXXWARb3ayq981ONQE,464
21
21
  fmtr/tools/import_tools.py,sha256=XJmiWLukRncJAcaGReDn4jIz1_IpVBjfYCQHH1hIg7c,588
22
22
  fmtr/tools/inherit_tools.py,sha256=gTGL4mRm5RsbFW76s25AbuAJ2vlymbh1c8Q4Hl2uJGU,646
23
23
  fmtr/tools/inspection_tools.py,sha256=tLTRvzy9XVomQPi0dfnF_cgwc7KiDVZAr7gPTk4S_bQ,278
24
- fmtr/tools/iterator_tools.py,sha256=ysNT2h39_ukEGrj8k7Z_CLKjWoguKBqVdjj4PLe7faE,1502
24
+ fmtr/tools/iterator_tools.py,sha256=ymxo2U9MrPhouIhWCVvh1TrP1bXJPm_p0Lqwgi5Jr6w,1628
25
25
  fmtr/tools/json_fix_tools.py,sha256=vNSlswVQnujPmKEqDjFJcO901mjMyv59q3awsT7mlhs,477
26
26
  fmtr/tools/json_tools.py,sha256=WkFc5q7oqMtcFejhN1K5zQFULa9TdLOup83Fr0saDRY,348
27
27
  fmtr/tools/logging_tools.py,sha256=M7I5igs_tX5SIRv4f-jfb75LOODclSdmEg5ziAAMSPE,2503
@@ -33,7 +33,7 @@ fmtr/tools/openai_tools.py,sha256=6SUgejgzUzmlKKct2_ePXntvMegu3FJgfk9x7aqtqYc,74
33
33
  fmtr/tools/packaging_tools.py,sha256=FlgOTnDRHZWQL2iR-wucTsyGEHRE-MlddKL30MPmUqE,253
34
34
  fmtr/tools/parallel_tools.py,sha256=QEb_gN1StkxsqYaH4HSjiJX8Y3gpb2uKNsOzG4uFpaM,3071
35
35
  fmtr/tools/pattern_tools.py,sha256=DlEKzNJKhwFmU3-awoGkN5Xy-yLF_bsoj8eoSMCEytE,6018
36
- fmtr/tools/pdf_tools.py,sha256=xvv9B84uAF81rFJRnXhSsxYuP42vY9ZdPVFrSMVe8G8,4069
36
+ fmtr/tools/pdf_tools.py,sha256=9ElZRvXtYlvZkw8koB31xSGDM2B9JHQT7FVVIaLoXQI,4311
37
37
  fmtr/tools/platform_tools.py,sha256=7p69CmAHe_sF68Fx9uVhns1k5EewTHTWgUYzkl6ZQKA,308
38
38
  fmtr/tools/process_tools.py,sha256=Ysh5Dk2QFBhXQerArjKdt7xZd3JrN5Ho02AaOjH0Nnw,1425
39
39
  fmtr/tools/profiling_tools.py,sha256=jpXVjaNKPydTasEQVNXvxzGtMhXPit08AnJddkU8uIc,46
@@ -42,11 +42,11 @@ fmtr/tools/semantic_tools.py,sha256=cxY9NSAHWj4nEc6Oj4qA1omR3dWbl2OuH7_PkINc6_E,
42
42
  fmtr/tools/settings_tools.py,sha256=o11W3T60UZSvCTkh_eEQq1Mx74GycQ6JxUr0plBDbsk,2356
43
43
  fmtr/tools/spaces_tools.py,sha256=D_he3mve6DruB3OPS6QyzqD05ChHnRTb4buViKPe7To,1099
44
44
  fmtr/tools/string_tools.py,sha256=On6YRLTAK1i6mmMpOUWVM618CykJiuaoyKIsU1cB_mA,4952
45
- fmtr/tools/tabular_tools.py,sha256=tpIpZzYku1HcJrHZJL6BC39LmN3WUWVhFbK2N7nDVmE,120
45
+ fmtr/tools/tabular_tools.py,sha256=mw6vOij1Ch-pVAyHMPtm5zj__ULZN_TKeBYOfj33wFM,1634
46
46
  fmtr/tools/tokenization_tools.py,sha256=me-IBzSLyNYejLybwjO9CNB6Mj2NYfKPaOVThXyaGNg,4268
47
47
  fmtr/tools/tools.py,sha256=CAsApa1YwVdNE6H66Vjivs_mXYvOas3rh7fPELAnTpk,795
48
48
  fmtr/tools/unicode_tools.py,sha256=yS_9wpu8ogNoiIL7s1G_8bETFFO_YQlo4LNPv1NLDeY,52
49
- fmtr/tools/version,sha256=kV64L6ux0lCm-Ikwzdf6Z3DVqpQjGnzt_RRQ-TAwSug,6
49
+ fmtr/tools/version,sha256=Gu0FxrnxSfSiaBEXNfjNzhDJ5oAjWBiF3qy9XGVZUJU,6
50
50
  fmtr/tools/webhook_tools.py,sha256=q3pVJ1NCem2SrMuFcLxiWd7DibFs7Q-uGtojfXd3Qcg,380
51
51
  fmtr/tools/yaml_tools.py,sha256=Bhhyd6GQVKO72Lp8ky7bAUjIB_65Hdh0Q45SKIEe6S8,1901
52
52
  fmtr/tools/ai_tools/__init__.py,sha256=O8VRlPnnQCncg2ZZ2l_VdWLJf4jkKH6dkZFVbv6o7IM,388
@@ -71,7 +71,7 @@ fmtr/tools/interface_tools/controls.py,sha256=oOl0_sZB8fkvYB-9A5yjArfQmFQLMCsVGg
71
71
  fmtr/tools/interface_tools/interface_tools.py,sha256=i1TqP_67pVGiRZotKdVxyH0b5OFwaTZ_5Tf60gip0ts,4214
72
72
  fmtr/tools/path_tools/__init__.py,sha256=XrJXt7Zzo90tYUVksMlDfKkWt775zJ9OSi2NbhnqMDI,459
73
73
  fmtr/tools/path_tools/app_path_tools.py,sha256=JrJvtTDd_gkCKcZtBCDTMktsM77PZwGV_hzQX0g5GU8,1722
74
- fmtr/tools/path_tools/path_tools.py,sha256=eh30PpmH0wopy0wNWuPT84cmXY1EvqsTSDT7AV_GPOY,8034
74
+ fmtr/tools/path_tools/path_tools.py,sha256=s3RTXsjnr2Ah7vXQGjpGs-4DlWaVGvChu0aTFXX3gsE,8867
75
75
  fmtr/tools/path_tools/type_path_tools.py,sha256=Zgs-ek-GXRKDIlVDGdg3muB0PIxTg2ba0NeHw6y8FWQ,40
76
76
  fmtr/tools/setup_tools/__init__.py,sha256=Ro_Qj3Xndv8Z68DeWPI7c6X-aWKsdDm0KcX_k1xDhgE,394
77
77
  fmtr/tools/setup_tools/setup_tools.py,sha256=7Z6jlU6UE8P4cntGQ_hJR7hGvoqwh15xzZY63cnxG7E,10363
@@ -85,9 +85,9 @@ fmtr/tools/tests/test_path.py,sha256=AkZQa6_8BQ-VaCyL_J-iKmdf2ZaM-xFYR37Kun3k4_g
85
85
  fmtr/tools/tests/test_yaml.py,sha256=jc0TwwKu9eC0LvFGNMERdgBue591xwLxYXFbtsRwXVM,287
86
86
  fmtr/tools/version_tools/__init__.py,sha256=cjE6nO6AoVOUp3RwgTbqL9wiw8J1l2pHJOz6Gn6bxjA,326
87
87
  fmtr/tools/version_tools/version_tools.py,sha256=Hcc6yferZS1hHbugRTdiHhSNmXEEG0hjCiTTXKna-YY,1127
88
- fmtr_tools-1.3.48.dist-info/licenses/LICENSE,sha256=FW9aa6vVN5IjRQWLT43hs4_koYSmpcbIovlKeAJ0_cI,10757
89
- fmtr_tools-1.3.48.dist-info/METADATA,sha256=sSq6t3Hgu4AHxBdNAj4TKme9fmZsbJdI-HQxSLeeb_0,17257
90
- fmtr_tools-1.3.48.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
91
- fmtr_tools-1.3.48.dist-info/entry_points.txt,sha256=h-r__Xh5njtFqreMLg6cGuTFS4Qh-QqJPU1HB-_BS-Q,357
92
- fmtr_tools-1.3.48.dist-info/top_level.txt,sha256=LXem9xCgNOD72tE2gRKESdiQTL902mfFkwWb6-dlwEE,5
93
- fmtr_tools-1.3.48.dist-info/RECORD,,
88
+ fmtr_tools-1.3.49.dist-info/licenses/LICENSE,sha256=FW9aa6vVN5IjRQWLT43hs4_koYSmpcbIovlKeAJ0_cI,10757
89
+ fmtr_tools-1.3.49.dist-info/METADATA,sha256=1oejOVboa3Ol_fXNcxIa3WG49h9fMV8G8YgRWGqeJJo,17429
90
+ fmtr_tools-1.3.49.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
91
+ fmtr_tools-1.3.49.dist-info/entry_points.txt,sha256=h-r__Xh5njtFqreMLg6cGuTFS4Qh-QqJPU1HB-_BS-Q,357
92
+ fmtr_tools-1.3.49.dist-info/top_level.txt,sha256=LXem9xCgNOD72tE2gRKESdiQTL902mfFkwWb6-dlwEE,5
93
+ fmtr_tools-1.3.49.dist-info/RECORD,,