fmtr.tools 1.3.47__tar.gz → 1.3.49__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fmtr.tools might be problematic. Click here for more details.

Files changed (101) hide show
  1. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/PKG-INFO +59 -51
  2. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/iterator_tools.py +9 -0
  3. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/path_tools/path_tools.py +26 -0
  4. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/pdf_tools.py +14 -0
  5. fmtr_tools-1.3.49/fmtr/tools/tabular_tools.py +68 -0
  6. fmtr_tools-1.3.49/fmtr/tools/version +1 -0
  7. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr.tools.egg-info/PKG-INFO +59 -51
  8. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr.tools.egg-info/requires.txt +58 -50
  9. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/setup.py +1 -1
  10. fmtr_tools-1.3.47/fmtr/tools/tabular_tools.py +0 -7
  11. fmtr_tools-1.3.47/fmtr/tools/version +0 -1
  12. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/LICENSE +0 -0
  13. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/README.md +0 -0
  14. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/__init__.py +0 -0
  15. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/ai_tools/__init__.py +0 -0
  16. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/ai_tools/agentic_tools.py +0 -0
  17. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/ai_tools/inference_tools.py +0 -0
  18. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/api_tools.py +0 -0
  19. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/async_tools.py +0 -0
  20. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/augmentation_tools.py +0 -0
  21. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/caching_tools.py +0 -0
  22. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/constants.py +0 -0
  23. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/context_tools.py +0 -0
  24. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/data_modelling_tools.py +0 -0
  25. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/database_tools/__init__.py +0 -0
  26. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/database_tools/document.py +0 -0
  27. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/dataclass_tools.py +0 -0
  28. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/datatype_tools.py +0 -0
  29. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/datetime_tools.py +0 -0
  30. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/debugging_tools.py +0 -0
  31. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/dns_tools/__init__.py +0 -0
  32. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/dns_tools/client.py +0 -0
  33. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/dns_tools/dm.py +0 -0
  34. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/dns_tools/proxy.py +0 -0
  35. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/dns_tools/server.py +0 -0
  36. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/docker_tools.py +0 -0
  37. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/entrypoints/__init__.py +0 -0
  38. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/entrypoints/cache_hfh.py +0 -0
  39. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/entrypoints/ep_test.py +0 -0
  40. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/entrypoints/install_yamlscript.py +0 -0
  41. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/entrypoints/remote_debug_test.py +0 -0
  42. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/entrypoints/shell_debug.py +0 -0
  43. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/environment_tools.py +0 -0
  44. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/function_tools.py +0 -0
  45. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/google_api_tools.py +0 -0
  46. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/hash_tools.py +0 -0
  47. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/hfh_tools.py +0 -0
  48. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/html_tools.py +0 -0
  49. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/http_tools.py +0 -0
  50. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/import_tools.py +0 -0
  51. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/inherit_tools.py +0 -0
  52. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/inspection_tools.py +0 -0
  53. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/interface_tools/__init__.py +0 -0
  54. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/interface_tools/context.py +0 -0
  55. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/interface_tools/controls.py +0 -0
  56. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/interface_tools/interface_tools.py +0 -0
  57. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/json_fix_tools.py +0 -0
  58. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/json_tools.py +0 -0
  59. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/logging_tools.py +0 -0
  60. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/merging_tools.py +0 -0
  61. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/metric_tools.py +0 -0
  62. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/name_tools.py +0 -0
  63. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/netrc_tools.py +0 -0
  64. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/openai_tools.py +0 -0
  65. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/packaging_tools.py +0 -0
  66. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/parallel_tools.py +0 -0
  67. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/path_tools/__init__.py +0 -0
  68. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/path_tools/app_path_tools.py +0 -0
  69. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/path_tools/type_path_tools.py +0 -0
  70. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/pattern_tools.py +0 -0
  71. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/platform_tools.py +0 -0
  72. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/process_tools.py +0 -0
  73. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/profiling_tools.py +0 -0
  74. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/random_tools.py +0 -0
  75. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/semantic_tools.py +0 -0
  76. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/settings_tools.py +0 -0
  77. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/setup_tools/__init__.py +0 -0
  78. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/setup_tools/setup_tools.py +0 -0
  79. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/spaces_tools.py +0 -0
  80. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/string_tools.py +0 -0
  81. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/tests/__init__.py +0 -0
  82. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/tests/conftest.py +0 -0
  83. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/tests/helpers.py +0 -0
  84. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/tests/test_datatype.py +0 -0
  85. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/tests/test_environment.py +0 -0
  86. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/tests/test_json.py +0 -0
  87. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/tests/test_path.py +0 -0
  88. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/tests/test_yaml.py +0 -0
  89. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/tokenization_tools.py +0 -0
  90. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/tools.py +0 -0
  91. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/unicode_tools.py +0 -0
  92. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/version_tools/__init__.py +0 -0
  93. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/version_tools/version_tools.py +0 -0
  94. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/webhook_tools.py +0 -0
  95. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr/tools/yaml_tools.py +0 -0
  96. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr.tools.egg-info/SOURCES.txt +0 -0
  97. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr.tools.egg-info/dependency_links.txt +0 -0
  98. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr.tools.egg-info/entry_points.txt +0 -0
  99. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/fmtr.tools.egg-info/top_level.txt +0 -0
  100. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/pyproject.toml +0 -0
  101. {fmtr_tools-1.3.47 → fmtr_tools-1.3.49}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fmtr.tools
3
- Version: 1.3.47
3
+ Version: 1.3.49
4
4
  Summary: Collection of high-level tools to simplify everyday development tasks, with a focus on AI/ML
5
5
  Home-page: https://github.com/fmtr/fmtr.tools
6
6
  Author: Frontmatter
@@ -80,14 +80,20 @@ Requires-Dist: sentence_transformers; extra == "semantic"
80
80
  Requires-Dist: pandas; extra == "semantic"
81
81
  Requires-Dist: tabulate; extra == "semantic"
82
82
  Requires-Dist: openpyxl; extra == "semantic"
83
+ Requires-Dist: odfpy; extra == "semantic"
84
+ Requires-Dist: deepdiff; extra == "semantic"
83
85
  Provides-Extra: metric
84
86
  Requires-Dist: pandas; extra == "metric"
85
87
  Requires-Dist: tabulate; extra == "metric"
86
88
  Requires-Dist: openpyxl; extra == "metric"
89
+ Requires-Dist: odfpy; extra == "metric"
90
+ Requires-Dist: deepdiff; extra == "metric"
87
91
  Provides-Extra: tabular
88
92
  Requires-Dist: pandas; extra == "tabular"
89
93
  Requires-Dist: tabulate; extra == "tabular"
90
94
  Requires-Dist: openpyxl; extra == "tabular"
95
+ Requires-Dist: odfpy; extra == "tabular"
96
+ Requires-Dist: deepdiff; extra == "tabular"
91
97
  Provides-Extra: html
92
98
  Requires-Dist: html2text; extra == "html"
93
99
  Provides-Extra: interface
@@ -148,66 +154,68 @@ Provides-Extra: db-document
148
154
  Requires-Dist: beanie[odm]; extra == "db-document"
149
155
  Requires-Dist: motor; extra == "db-document"
150
156
  Provides-Extra: all
151
- Requires-Dist: pyyaml; extra == "all"
152
- Requires-Dist: docker; extra == "all"
153
- Requires-Dist: pydantic-ai[logfire,openai]; extra == "all"
154
- Requires-Dist: contexttimer; extra == "all"
155
- Requires-Dist: openpyxl; extra == "all"
156
- Requires-Dist: google-auth-oauthlib; extra == "all"
157
- Requires-Dist: beanie[odm]; extra == "all"
158
- Requires-Dist: huggingface_hub; extra == "all"
159
- Requires-Dist: tabulate; extra == "all"
160
- Requires-Dist: dnspython[doh]; extra == "all"
161
- Requires-Dist: cachetools; extra == "all"
162
157
  Requires-Dist: pycountry; extra == "all"
163
- Requires-Dist: regex; extra == "all"
164
- Requires-Dist: httpx_retries; extra == "all"
165
- Requires-Dist: diskcache; extra == "all"
166
- Requires-Dist: sre_yield; extra == "all"
167
- Requires-Dist: distributed; extra == "all"
168
- Requires-Dist: torchvision; extra == "all"
169
- Requires-Dist: Unidecode; extra == "all"
170
- Requires-Dist: dask[bag]; extra == "all"
171
- Requires-Dist: semver; extra == "all"
172
- Requires-Dist: transformers[sentencepiece]; extra == "all"
173
- Requires-Dist: filetype; extra == "all"
174
- Requires-Dist: deepmerge; extra == "all"
175
- Requires-Dist: flet-video; extra == "all"
176
- Requires-Dist: pydantic-settings; extra == "all"
177
158
  Requires-Dist: pydevd-pycharm~=251.25410.159; extra == "all"
159
+ Requires-Dist: pydantic-settings; extra == "all"
178
160
  Requires-Dist: tinynetrc; extra == "all"
179
- Requires-Dist: logfire[httpx]; extra == "all"
180
- Requires-Dist: pydantic-extra-types; extra == "all"
181
- Requires-Dist: peft; extra == "all"
182
- Requires-Dist: google-auth-httplib2; extra == "all"
183
- Requires-Dist: motor; extra == "all"
184
- Requires-Dist: torchaudio; extra == "all"
185
- Requires-Dist: pymupdf; extra == "all"
161
+ Requires-Dist: pytest-cov; extra == "all"
186
162
  Requires-Dist: sentence_transformers; extra == "all"
163
+ Requires-Dist: flet-video; extra == "all"
164
+ Requires-Dist: tokenizers; extra == "all"
165
+ Requires-Dist: transformers[sentencepiece]; extra == "all"
166
+ Requires-Dist: fastapi; extra == "all"
167
+ Requires-Dist: appdirs; extra == "all"
168
+ Requires-Dist: torchvision; extra == "all"
169
+ Requires-Dist: huggingface_hub; extra == "all"
170
+ Requires-Dist: semver; extra == "all"
171
+ Requires-Dist: pymupdf4llm; extra == "all"
172
+ Requires-Dist: odfpy; extra == "all"
173
+ Requires-Dist: ollama; extra == "all"
174
+ Requires-Dist: faker; extra == "all"
175
+ Requires-Dist: Unidecode; extra == "all"
176
+ Requires-Dist: logfire[httpx]; extra == "all"
177
+ Requires-Dist: regex; extra == "all"
178
+ Requires-Dist: distributed; extra == "all"
187
179
  Requires-Dist: yamlscript; extra == "all"
188
180
  Requires-Dist: openai; extra == "all"
189
- Requires-Dist: setuptools; extra == "all"
190
- Requires-Dist: pydantic; extra == "all"
191
- Requires-Dist: json_repair; extra == "all"
192
- Requires-Dist: google-api-python-client; extra == "all"
193
- Requires-Dist: pandas; extra == "all"
194
- Requires-Dist: google-auth; extra == "all"
181
+ Requires-Dist: torchaudio; extra == "all"
182
+ Requires-Dist: httpx_retries; extra == "all"
183
+ Requires-Dist: sre_yield; extra == "all"
184
+ Requires-Dist: dnspython[doh]; extra == "all"
185
+ Requires-Dist: google-auth-oauthlib; extra == "all"
186
+ Requires-Dist: dask[bag]; extra == "all"
187
+ Requires-Dist: logfire[fastapi]; extra == "all"
188
+ Requires-Dist: pyyaml; extra == "all"
195
189
  Requires-Dist: bokeh; extra == "all"
196
- Requires-Dist: pymupdf4llm; extra == "all"
197
- Requires-Dist: flet[all]; extra == "all"
190
+ Requires-Dist: uvicorn[standard]; extra == "all"
191
+ Requires-Dist: diskcache; extra == "all"
192
+ Requires-Dist: cachetools; extra == "all"
193
+ Requires-Dist: pymupdf; extra == "all"
194
+ Requires-Dist: docker; extra == "all"
195
+ Requires-Dist: deepdiff; extra == "all"
198
196
  Requires-Dist: httpx; extra == "all"
199
- Requires-Dist: playwright; extra == "all"
200
- Requires-Dist: logfire; extra == "all"
197
+ Requires-Dist: pydantic; extra == "all"
198
+ Requires-Dist: openpyxl; extra == "all"
201
199
  Requires-Dist: html2text; extra == "all"
202
- Requires-Dist: fastapi; extra == "all"
200
+ Requires-Dist: logfire; extra == "all"
201
+ Requires-Dist: peft; extra == "all"
202
+ Requires-Dist: pydantic-ai[logfire,openai]; extra == "all"
203
+ Requires-Dist: flet[all]; extra == "all"
204
+ Requires-Dist: filetype; extra == "all"
205
+ Requires-Dist: json_repair; extra == "all"
203
206
  Requires-Dist: flet-webview; extra == "all"
204
- Requires-Dist: faker; extra == "all"
205
- Requires-Dist: uvicorn[standard]; extra == "all"
206
- Requires-Dist: logfire[fastapi]; extra == "all"
207
- Requires-Dist: tokenizers; extra == "all"
208
- Requires-Dist: ollama; extra == "all"
209
- Requires-Dist: appdirs; extra == "all"
210
- Requires-Dist: pytest-cov; extra == "all"
207
+ Requires-Dist: google-auth; extra == "all"
208
+ Requires-Dist: setuptools; extra == "all"
209
+ Requires-Dist: beanie[odm]; extra == "all"
210
+ Requires-Dist: motor; extra == "all"
211
+ Requires-Dist: deepmerge; extra == "all"
212
+ Requires-Dist: google-api-python-client; extra == "all"
213
+ Requires-Dist: playwright; extra == "all"
214
+ Requires-Dist: tabulate; extra == "all"
215
+ Requires-Dist: google-auth-httplib2; extra == "all"
216
+ Requires-Dist: pydantic-extra-types; extra == "all"
217
+ Requires-Dist: contexttimer; extra == "all"
218
+ Requires-Dist: pandas; extra == "all"
211
219
  Dynamic: author
212
220
  Dynamic: author-email
213
221
  Dynamic: description
@@ -64,3 +64,12 @@ def strip_none(*items):
64
64
 
65
65
  """
66
66
  return [item for item in items if not is_none(item)]
67
+
68
+
69
+ def dedupe(items):
70
+ """
71
+
72
+ Deduplicate a list of items, retaining order
73
+
74
+ """
75
+ return list(dict.fromkeys(items))
@@ -145,6 +145,32 @@ class Path(type(Path())):
145
145
  """
146
146
  return self.mkdir(parents=True, exist_ok=True)
147
147
 
148
+ def with_suffix(self, suffix: str) -> 'Path':
149
+ """
150
+
151
+ Pathlib doesn't add a dot prefix, but then errors if you don't provide one, which feels rather obnoxious.
152
+
153
+ """
154
+ if not suffix.startswith('.'):
155
+ suffix = f'.{suffix}'
156
+ return super().with_suffix(suffix)
157
+
158
+ def get_conversion_path(self, suffix: str) -> 'Path':
159
+ """
160
+
161
+ Fetch the equivalent path for a different format in the standard conversion directory structure.
162
+ .../xyz/filename.xyx -> ../abc/filename.abc
163
+
164
+ """
165
+
166
+ old_dir = self.parent.name
167
+
168
+ if old_dir != self.suffix.removeprefix('.'):
169
+ raise ValueError(f"Expected parent directory '{old_dir}' to match file extension '{suffix}'")
170
+
171
+ new = self.parent.parent / suffix / f'{self.stem}.{suffix}'
172
+ return new
173
+
148
174
  @property
149
175
  def exist(self):
150
176
  """
@@ -179,6 +179,20 @@ class Document(pm.Document):
179
179
  """
180
180
  return pymupdf4llm.to_markdown(self, **kwargs)
181
181
 
182
+ def to_text(self):
183
+ """
184
+
185
+ Simple text output.
186
+
187
+ """
188
+ lines = []
189
+ for page in self:
190
+ text = page.get_text()
191
+ lines.append(text)
192
+
193
+ text = '\n'.join(lines)
194
+ return text
195
+
182
196
 
183
197
  if __name__ == '__main__':
184
198
  from fmtr.tools.path_tools import Path
@@ -0,0 +1,68 @@
1
+ import deepdiff
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+ from fmtr.tools.iterator_tools import dedupe
6
+
7
+ Table = DataFrame = pd.DataFrame
8
+ Series = pd.Series
9
+
10
+ nan = np.nan
11
+
12
+ CONCAT_HORIZONTALLY = 1
13
+ CONCAT_VERTICALLY = 0
14
+
15
+
16
+ def normalize_nan(df, value=np.nan):
17
+ return df.replace({pd.NA: value, None: value, np.nan: value})
18
+
19
+
20
+ class Differ:
21
+ """
22
+
23
+ Diff two dataframes via DeepDiff, after shape normalization, datatype simplification, etc.
24
+
25
+ """
26
+
27
+ def __init__(self, left: Table, right: Table):
28
+
29
+ self.cols = dedupe(left.columns.tolist() + right.columns.tolist())
30
+ self.rows = dedupe(left.index.values.tolist() + right.index.values.tolist())
31
+ self.left = self.process(left)
32
+ self.right = self.process(right)
33
+ self.dfs = [self.left, self.right]
34
+
35
+ def process(self, df: Table) -> Table:
36
+ """
37
+
38
+ Ensure same rows/columns, plus simplify datatypes via JSON round-robin.
39
+
40
+ """
41
+
42
+ df_rows = set(df.index.values.tolist())
43
+ for row in self.rows:
44
+ if row in df_rows:
45
+ continue
46
+ df.loc[len(df)] = None
47
+
48
+ df_cols = set(df.columns.tolist())
49
+ for col in self.cols:
50
+ if col in df_cols:
51
+ continue
52
+ df[col] = None
53
+
54
+ df = pd.read_json(df.to_json(date_format='iso'))
55
+ df = normalize_nan(df, value=None)
56
+
57
+ return df
58
+
59
+ def get_diff(self) -> deepdiff.DeepDiff:
60
+ """
61
+
62
+ Cast to dicts and get diff
63
+
64
+ """
65
+
66
+ dicts = [df.to_dict(orient='index') for df in self.dfs]
67
+ diff = deepdiff.DeepDiff(*dicts, ignore_order=True)
68
+ return diff
@@ -0,0 +1 @@
1
+ 1.3.49
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fmtr.tools
3
- Version: 1.3.47
3
+ Version: 1.3.49
4
4
  Summary: Collection of high-level tools to simplify everyday development tasks, with a focus on AI/ML
5
5
  Home-page: https://github.com/fmtr/fmtr.tools
6
6
  Author: Frontmatter
@@ -80,14 +80,20 @@ Requires-Dist: sentence_transformers; extra == "semantic"
80
80
  Requires-Dist: pandas; extra == "semantic"
81
81
  Requires-Dist: tabulate; extra == "semantic"
82
82
  Requires-Dist: openpyxl; extra == "semantic"
83
+ Requires-Dist: odfpy; extra == "semantic"
84
+ Requires-Dist: deepdiff; extra == "semantic"
83
85
  Provides-Extra: metric
84
86
  Requires-Dist: pandas; extra == "metric"
85
87
  Requires-Dist: tabulate; extra == "metric"
86
88
  Requires-Dist: openpyxl; extra == "metric"
89
+ Requires-Dist: odfpy; extra == "metric"
90
+ Requires-Dist: deepdiff; extra == "metric"
87
91
  Provides-Extra: tabular
88
92
  Requires-Dist: pandas; extra == "tabular"
89
93
  Requires-Dist: tabulate; extra == "tabular"
90
94
  Requires-Dist: openpyxl; extra == "tabular"
95
+ Requires-Dist: odfpy; extra == "tabular"
96
+ Requires-Dist: deepdiff; extra == "tabular"
91
97
  Provides-Extra: html
92
98
  Requires-Dist: html2text; extra == "html"
93
99
  Provides-Extra: interface
@@ -148,66 +154,68 @@ Provides-Extra: db-document
148
154
  Requires-Dist: beanie[odm]; extra == "db-document"
149
155
  Requires-Dist: motor; extra == "db-document"
150
156
  Provides-Extra: all
151
- Requires-Dist: pyyaml; extra == "all"
152
- Requires-Dist: docker; extra == "all"
153
- Requires-Dist: pydantic-ai[logfire,openai]; extra == "all"
154
- Requires-Dist: contexttimer; extra == "all"
155
- Requires-Dist: openpyxl; extra == "all"
156
- Requires-Dist: google-auth-oauthlib; extra == "all"
157
- Requires-Dist: beanie[odm]; extra == "all"
158
- Requires-Dist: huggingface_hub; extra == "all"
159
- Requires-Dist: tabulate; extra == "all"
160
- Requires-Dist: dnspython[doh]; extra == "all"
161
- Requires-Dist: cachetools; extra == "all"
162
157
  Requires-Dist: pycountry; extra == "all"
163
- Requires-Dist: regex; extra == "all"
164
- Requires-Dist: httpx_retries; extra == "all"
165
- Requires-Dist: diskcache; extra == "all"
166
- Requires-Dist: sre_yield; extra == "all"
167
- Requires-Dist: distributed; extra == "all"
168
- Requires-Dist: torchvision; extra == "all"
169
- Requires-Dist: Unidecode; extra == "all"
170
- Requires-Dist: dask[bag]; extra == "all"
171
- Requires-Dist: semver; extra == "all"
172
- Requires-Dist: transformers[sentencepiece]; extra == "all"
173
- Requires-Dist: filetype; extra == "all"
174
- Requires-Dist: deepmerge; extra == "all"
175
- Requires-Dist: flet-video; extra == "all"
176
- Requires-Dist: pydantic-settings; extra == "all"
177
158
  Requires-Dist: pydevd-pycharm~=251.25410.159; extra == "all"
159
+ Requires-Dist: pydantic-settings; extra == "all"
178
160
  Requires-Dist: tinynetrc; extra == "all"
179
- Requires-Dist: logfire[httpx]; extra == "all"
180
- Requires-Dist: pydantic-extra-types; extra == "all"
181
- Requires-Dist: peft; extra == "all"
182
- Requires-Dist: google-auth-httplib2; extra == "all"
183
- Requires-Dist: motor; extra == "all"
184
- Requires-Dist: torchaudio; extra == "all"
185
- Requires-Dist: pymupdf; extra == "all"
161
+ Requires-Dist: pytest-cov; extra == "all"
186
162
  Requires-Dist: sentence_transformers; extra == "all"
163
+ Requires-Dist: flet-video; extra == "all"
164
+ Requires-Dist: tokenizers; extra == "all"
165
+ Requires-Dist: transformers[sentencepiece]; extra == "all"
166
+ Requires-Dist: fastapi; extra == "all"
167
+ Requires-Dist: appdirs; extra == "all"
168
+ Requires-Dist: torchvision; extra == "all"
169
+ Requires-Dist: huggingface_hub; extra == "all"
170
+ Requires-Dist: semver; extra == "all"
171
+ Requires-Dist: pymupdf4llm; extra == "all"
172
+ Requires-Dist: odfpy; extra == "all"
173
+ Requires-Dist: ollama; extra == "all"
174
+ Requires-Dist: faker; extra == "all"
175
+ Requires-Dist: Unidecode; extra == "all"
176
+ Requires-Dist: logfire[httpx]; extra == "all"
177
+ Requires-Dist: regex; extra == "all"
178
+ Requires-Dist: distributed; extra == "all"
187
179
  Requires-Dist: yamlscript; extra == "all"
188
180
  Requires-Dist: openai; extra == "all"
189
- Requires-Dist: setuptools; extra == "all"
190
- Requires-Dist: pydantic; extra == "all"
191
- Requires-Dist: json_repair; extra == "all"
192
- Requires-Dist: google-api-python-client; extra == "all"
193
- Requires-Dist: pandas; extra == "all"
194
- Requires-Dist: google-auth; extra == "all"
181
+ Requires-Dist: torchaudio; extra == "all"
182
+ Requires-Dist: httpx_retries; extra == "all"
183
+ Requires-Dist: sre_yield; extra == "all"
184
+ Requires-Dist: dnspython[doh]; extra == "all"
185
+ Requires-Dist: google-auth-oauthlib; extra == "all"
186
+ Requires-Dist: dask[bag]; extra == "all"
187
+ Requires-Dist: logfire[fastapi]; extra == "all"
188
+ Requires-Dist: pyyaml; extra == "all"
195
189
  Requires-Dist: bokeh; extra == "all"
196
- Requires-Dist: pymupdf4llm; extra == "all"
197
- Requires-Dist: flet[all]; extra == "all"
190
+ Requires-Dist: uvicorn[standard]; extra == "all"
191
+ Requires-Dist: diskcache; extra == "all"
192
+ Requires-Dist: cachetools; extra == "all"
193
+ Requires-Dist: pymupdf; extra == "all"
194
+ Requires-Dist: docker; extra == "all"
195
+ Requires-Dist: deepdiff; extra == "all"
198
196
  Requires-Dist: httpx; extra == "all"
199
- Requires-Dist: playwright; extra == "all"
200
- Requires-Dist: logfire; extra == "all"
197
+ Requires-Dist: pydantic; extra == "all"
198
+ Requires-Dist: openpyxl; extra == "all"
201
199
  Requires-Dist: html2text; extra == "all"
202
- Requires-Dist: fastapi; extra == "all"
200
+ Requires-Dist: logfire; extra == "all"
201
+ Requires-Dist: peft; extra == "all"
202
+ Requires-Dist: pydantic-ai[logfire,openai]; extra == "all"
203
+ Requires-Dist: flet[all]; extra == "all"
204
+ Requires-Dist: filetype; extra == "all"
205
+ Requires-Dist: json_repair; extra == "all"
203
206
  Requires-Dist: flet-webview; extra == "all"
204
- Requires-Dist: faker; extra == "all"
205
- Requires-Dist: uvicorn[standard]; extra == "all"
206
- Requires-Dist: logfire[fastapi]; extra == "all"
207
- Requires-Dist: tokenizers; extra == "all"
208
- Requires-Dist: ollama; extra == "all"
209
- Requires-Dist: appdirs; extra == "all"
210
- Requires-Dist: pytest-cov; extra == "all"
207
+ Requires-Dist: google-auth; extra == "all"
208
+ Requires-Dist: setuptools; extra == "all"
209
+ Requires-Dist: beanie[odm]; extra == "all"
210
+ Requires-Dist: motor; extra == "all"
211
+ Requires-Dist: deepmerge; extra == "all"
212
+ Requires-Dist: google-api-python-client; extra == "all"
213
+ Requires-Dist: playwright; extra == "all"
214
+ Requires-Dist: tabulate; extra == "all"
215
+ Requires-Dist: google-auth-httplib2; extra == "all"
216
+ Requires-Dist: pydantic-extra-types; extra == "all"
217
+ Requires-Dist: contexttimer; extra == "all"
218
+ Requires-Dist: pandas; extra == "all"
211
219
  Dynamic: author
212
220
  Dynamic: author-email
213
221
  Dynamic: description
@@ -18,66 +18,68 @@ pydantic-ai[logfire,openai]
18
18
  ollama
19
19
 
20
20
  [all]
21
- pyyaml
22
- docker
23
- pydantic-ai[logfire,openai]
24
- contexttimer
25
- openpyxl
26
- google-auth-oauthlib
27
- beanie[odm]
28
- huggingface_hub
29
- tabulate
30
- dnspython[doh]
31
- cachetools
32
21
  pycountry
33
- regex
34
- httpx_retries
35
- diskcache
36
- sre_yield
37
- distributed
38
- torchvision
39
- Unidecode
40
- dask[bag]
41
- semver
42
- transformers[sentencepiece]
43
- filetype
44
- deepmerge
45
- flet-video
46
- pydantic-settings
47
22
  pydevd-pycharm~=251.25410.159
23
+ pydantic-settings
48
24
  tinynetrc
49
- logfire[httpx]
50
- pydantic-extra-types
51
- peft
52
- google-auth-httplib2
53
- motor
54
- torchaudio
55
- pymupdf
25
+ pytest-cov
56
26
  sentence_transformers
27
+ flet-video
28
+ tokenizers
29
+ transformers[sentencepiece]
30
+ fastapi
31
+ appdirs
32
+ torchvision
33
+ huggingface_hub
34
+ semver
35
+ pymupdf4llm
36
+ odfpy
37
+ ollama
38
+ faker
39
+ Unidecode
40
+ logfire[httpx]
41
+ regex
42
+ distributed
57
43
  yamlscript
58
44
  openai
59
- setuptools
60
- pydantic
61
- json_repair
62
- google-api-python-client
63
- pandas
64
- google-auth
45
+ torchaudio
46
+ httpx_retries
47
+ sre_yield
48
+ dnspython[doh]
49
+ google-auth-oauthlib
50
+ dask[bag]
51
+ logfire[fastapi]
52
+ pyyaml
65
53
  bokeh
66
- pymupdf4llm
67
- flet[all]
54
+ uvicorn[standard]
55
+ diskcache
56
+ cachetools
57
+ pymupdf
58
+ docker
59
+ deepdiff
68
60
  httpx
69
- playwright
70
- logfire
61
+ pydantic
62
+ openpyxl
71
63
  html2text
72
- fastapi
64
+ logfire
65
+ peft
66
+ pydantic-ai[logfire,openai]
67
+ flet[all]
68
+ filetype
69
+ json_repair
73
70
  flet-webview
74
- faker
75
- uvicorn[standard]
76
- logfire[fastapi]
77
- tokenizers
78
- ollama
79
- appdirs
80
- pytest-cov
71
+ google-auth
72
+ setuptools
73
+ beanie[odm]
74
+ motor
75
+ deepmerge
76
+ google-api-python-client
77
+ playwright
78
+ tabulate
79
+ google-auth-httplib2
80
+ pydantic-extra-types
81
+ contexttimer
82
+ pandas
81
83
 
82
84
  [api]
83
85
  fastapi
@@ -159,6 +161,8 @@ deepmerge
159
161
  pandas
160
162
  tabulate
161
163
  openpyxl
164
+ odfpy
165
+ deepdiff
162
166
 
163
167
  [netrc]
164
168
  tinynetrc
@@ -198,6 +202,8 @@ sentence_transformers
198
202
  pandas
199
203
  tabulate
200
204
  openpyxl
205
+ odfpy
206
+ deepdiff
201
207
 
202
208
  [sets]
203
209
  pydantic-settings
@@ -217,6 +223,8 @@ tinynetrc
217
223
  pandas
218
224
  tabulate
219
225
  openpyxl
226
+ odfpy
227
+ deepdiff
220
228
 
221
229
  [test]
222
230
  pytest-cov
@@ -26,7 +26,7 @@ DEPENDENCIES = {
26
26
  'json-fix': ['json_repair'],
27
27
  'semantic': ['sentence_transformers', 'metric'],
28
28
  'metric': ['tabular'],
29
- 'tabular': ['pandas', 'tabulate', 'openpyxl'],
29
+ 'tabular': ['pandas', 'tabulate', 'openpyxl', 'odfpy', 'deepdiff'],
30
30
  'html': ['html2text'],
31
31
  'interface': ['flet[all]', 'flet-video', 'flet-webview'],
32
32
  'google.api': ['google-auth', 'google-auth-oauthlib', 'google-auth-httplib2', 'google-api-python-client'],
@@ -1,7 +0,0 @@
1
- import pandas as pd
2
-
3
- Table = DataFrame = pd.DataFrame
4
- Series = pd.Series
5
-
6
- CONCAT_HORIZONTALLY = 1
7
- CONCAT_VERTICALLY = 0
@@ -1 +0,0 @@
1
- 1.3.47
File without changes
File without changes
File without changes
File without changes