fmtr.tools 1.3.48__tar.gz → 1.3.49__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fmtr.tools might be problematic. Click here for more details.

Files changed (101) hide show
  1. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/PKG-INFO +50 -46
  2. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/iterator_tools.py +9 -0
  3. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/path_tools/path_tools.py +26 -0
  4. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/pdf_tools.py +14 -0
  5. fmtr_tools-1.3.49/fmtr/tools/tabular_tools.py +68 -0
  6. fmtr_tools-1.3.49/fmtr/tools/version +1 -0
  7. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr.tools.egg-info/PKG-INFO +50 -46
  8. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr.tools.egg-info/requires.txt +49 -45
  9. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/setup.py +1 -1
  10. fmtr_tools-1.3.48/fmtr/tools/tabular_tools.py +0 -7
  11. fmtr_tools-1.3.48/fmtr/tools/version +0 -1
  12. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/LICENSE +0 -0
  13. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/README.md +0 -0
  14. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/__init__.py +0 -0
  15. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/ai_tools/__init__.py +0 -0
  16. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/ai_tools/agentic_tools.py +0 -0
  17. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/ai_tools/inference_tools.py +0 -0
  18. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/api_tools.py +0 -0
  19. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/async_tools.py +0 -0
  20. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/augmentation_tools.py +0 -0
  21. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/caching_tools.py +0 -0
  22. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/constants.py +0 -0
  23. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/context_tools.py +0 -0
  24. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/data_modelling_tools.py +0 -0
  25. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/database_tools/__init__.py +0 -0
  26. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/database_tools/document.py +0 -0
  27. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/dataclass_tools.py +0 -0
  28. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/datatype_tools.py +0 -0
  29. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/datetime_tools.py +0 -0
  30. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/debugging_tools.py +0 -0
  31. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/dns_tools/__init__.py +0 -0
  32. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/dns_tools/client.py +0 -0
  33. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/dns_tools/dm.py +0 -0
  34. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/dns_tools/proxy.py +0 -0
  35. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/dns_tools/server.py +0 -0
  36. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/docker_tools.py +0 -0
  37. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/entrypoints/__init__.py +0 -0
  38. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/entrypoints/cache_hfh.py +0 -0
  39. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/entrypoints/ep_test.py +0 -0
  40. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/entrypoints/install_yamlscript.py +0 -0
  41. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/entrypoints/remote_debug_test.py +0 -0
  42. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/entrypoints/shell_debug.py +0 -0
  43. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/environment_tools.py +0 -0
  44. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/function_tools.py +0 -0
  45. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/google_api_tools.py +0 -0
  46. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/hash_tools.py +0 -0
  47. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/hfh_tools.py +0 -0
  48. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/html_tools.py +0 -0
  49. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/http_tools.py +0 -0
  50. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/import_tools.py +0 -0
  51. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/inherit_tools.py +0 -0
  52. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/inspection_tools.py +0 -0
  53. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/interface_tools/__init__.py +0 -0
  54. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/interface_tools/context.py +0 -0
  55. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/interface_tools/controls.py +0 -0
  56. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/interface_tools/interface_tools.py +0 -0
  57. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/json_fix_tools.py +0 -0
  58. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/json_tools.py +0 -0
  59. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/logging_tools.py +0 -0
  60. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/merging_tools.py +0 -0
  61. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/metric_tools.py +0 -0
  62. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/name_tools.py +0 -0
  63. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/netrc_tools.py +0 -0
  64. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/openai_tools.py +0 -0
  65. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/packaging_tools.py +0 -0
  66. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/parallel_tools.py +0 -0
  67. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/path_tools/__init__.py +0 -0
  68. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/path_tools/app_path_tools.py +0 -0
  69. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/path_tools/type_path_tools.py +0 -0
  70. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/pattern_tools.py +0 -0
  71. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/platform_tools.py +0 -0
  72. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/process_tools.py +0 -0
  73. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/profiling_tools.py +0 -0
  74. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/random_tools.py +0 -0
  75. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/semantic_tools.py +0 -0
  76. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/settings_tools.py +0 -0
  77. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/setup_tools/__init__.py +0 -0
  78. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/setup_tools/setup_tools.py +0 -0
  79. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/spaces_tools.py +0 -0
  80. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/string_tools.py +0 -0
  81. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/tests/__init__.py +0 -0
  82. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/tests/conftest.py +0 -0
  83. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/tests/helpers.py +0 -0
  84. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/tests/test_datatype.py +0 -0
  85. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/tests/test_environment.py +0 -0
  86. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/tests/test_json.py +0 -0
  87. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/tests/test_path.py +0 -0
  88. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/tests/test_yaml.py +0 -0
  89. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/tokenization_tools.py +0 -0
  90. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/tools.py +0 -0
  91. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/unicode_tools.py +0 -0
  92. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/version_tools/__init__.py +0 -0
  93. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/version_tools/version_tools.py +0 -0
  94. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/webhook_tools.py +0 -0
  95. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr/tools/yaml_tools.py +0 -0
  96. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr.tools.egg-info/SOURCES.txt +0 -0
  97. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr.tools.egg-info/dependency_links.txt +0 -0
  98. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr.tools.egg-info/entry_points.txt +0 -0
  99. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/fmtr.tools.egg-info/top_level.txt +0 -0
  100. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/pyproject.toml +0 -0
  101. {fmtr_tools-1.3.48 → fmtr_tools-1.3.49}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fmtr.tools
3
- Version: 1.3.48
3
+ Version: 1.3.49
4
4
  Summary: Collection of high-level tools to simplify everyday development tasks, with a focus on AI/ML
5
5
  Home-page: https://github.com/fmtr/fmtr.tools
6
6
  Author: Frontmatter
@@ -81,16 +81,19 @@ Requires-Dist: pandas; extra == "semantic"
81
81
  Requires-Dist: tabulate; extra == "semantic"
82
82
  Requires-Dist: openpyxl; extra == "semantic"
83
83
  Requires-Dist: odfpy; extra == "semantic"
84
+ Requires-Dist: deepdiff; extra == "semantic"
84
85
  Provides-Extra: metric
85
86
  Requires-Dist: pandas; extra == "metric"
86
87
  Requires-Dist: tabulate; extra == "metric"
87
88
  Requires-Dist: openpyxl; extra == "metric"
88
89
  Requires-Dist: odfpy; extra == "metric"
90
+ Requires-Dist: deepdiff; extra == "metric"
89
91
  Provides-Extra: tabular
90
92
  Requires-Dist: pandas; extra == "tabular"
91
93
  Requires-Dist: tabulate; extra == "tabular"
92
94
  Requires-Dist: openpyxl; extra == "tabular"
93
95
  Requires-Dist: odfpy; extra == "tabular"
96
+ Requires-Dist: deepdiff; extra == "tabular"
94
97
  Provides-Extra: html
95
98
  Requires-Dist: html2text; extra == "html"
96
99
  Provides-Extra: interface
@@ -151,67 +154,68 @@ Provides-Extra: db-document
151
154
  Requires-Dist: beanie[odm]; extra == "db-document"
152
155
  Requires-Dist: motor; extra == "db-document"
153
156
  Provides-Extra: all
154
- Requires-Dist: filetype; extra == "all"
155
- Requires-Dist: flet-webview; extra == "all"
157
+ Requires-Dist: pycountry; extra == "all"
156
158
  Requires-Dist: pydevd-pycharm~=251.25410.159; extra == "all"
157
- Requires-Dist: logfire; extra == "all"
159
+ Requires-Dist: pydantic-settings; extra == "all"
160
+ Requires-Dist: tinynetrc; extra == "all"
161
+ Requires-Dist: pytest-cov; extra == "all"
158
162
  Requires-Dist: sentence_transformers; extra == "all"
159
- Requires-Dist: motor; extra == "all"
160
- Requires-Dist: torchvision; extra == "all"
161
- Requires-Dist: google-auth-oauthlib; extra == "all"
162
- Requires-Dist: json_repair; extra == "all"
163
163
  Requires-Dist: flet-video; extra == "all"
164
- Requires-Dist: pycountry; extra == "all"
164
+ Requires-Dist: tokenizers; extra == "all"
165
165
  Requires-Dist: transformers[sentencepiece]; extra == "all"
166
- Requires-Dist: beanie[odm]; extra == "all"
167
- Requires-Dist: yamlscript; extra == "all"
166
+ Requires-Dist: fastapi; extra == "all"
168
167
  Requires-Dist: appdirs; extra == "all"
169
- Requires-Dist: pydantic-settings; extra == "all"
170
- Requires-Dist: httpx; extra == "all"
171
- Requires-Dist: contexttimer; extra == "all"
172
- Requires-Dist: bokeh; extra == "all"
173
- Requires-Dist: google-auth-httplib2; extra == "all"
168
+ Requires-Dist: torchvision; extra == "all"
174
169
  Requires-Dist: huggingface_hub; extra == "all"
170
+ Requires-Dist: semver; extra == "all"
171
+ Requires-Dist: pymupdf4llm; extra == "all"
172
+ Requires-Dist: odfpy; extra == "all"
173
+ Requires-Dist: ollama; extra == "all"
175
174
  Requires-Dist: faker; extra == "all"
176
- Requires-Dist: dnspython[doh]; extra == "all"
177
- Requires-Dist: html2text; extra == "all"
178
- Requires-Dist: tinynetrc; extra == "all"
179
- Requires-Dist: pydantic; extra == "all"
180
- Requires-Dist: torchaudio; extra == "all"
181
- Requires-Dist: distributed; extra == "all"
182
- Requires-Dist: flet[all]; extra == "all"
183
- Requires-Dist: pydantic-ai[logfire,openai]; extra == "all"
184
- Requires-Dist: diskcache; extra == "all"
185
- Requires-Dist: peft; extra == "all"
186
- Requires-Dist: pytest-cov; extra == "all"
187
- Requires-Dist: pandas; extra == "all"
175
+ Requires-Dist: Unidecode; extra == "all"
188
176
  Requires-Dist: logfire[httpx]; extra == "all"
189
177
  Requires-Dist: regex; extra == "all"
190
- Requires-Dist: google-auth; extra == "all"
191
- Requires-Dist: pyyaml; extra == "all"
192
- Requires-Dist: pymupdf4llm; extra == "all"
193
- Requires-Dist: ollama; extra == "all"
178
+ Requires-Dist: distributed; extra == "all"
179
+ Requires-Dist: yamlscript; extra == "all"
194
180
  Requires-Dist: openai; extra == "all"
195
- Requires-Dist: tokenizers; extra == "all"
196
- Requires-Dist: pymupdf; extra == "all"
181
+ Requires-Dist: torchaudio; extra == "all"
197
182
  Requires-Dist: httpx_retries; extra == "all"
198
- Requires-Dist: pydantic-extra-types; extra == "all"
199
- Requires-Dist: Unidecode; extra == "all"
200
- Requires-Dist: openpyxl; extra == "all"
201
183
  Requires-Dist: sre_yield; extra == "all"
202
- Requires-Dist: odfpy; extra == "all"
203
- Requires-Dist: cachetools; extra == "all"
184
+ Requires-Dist: dnspython[doh]; extra == "all"
185
+ Requires-Dist: google-auth-oauthlib; extra == "all"
204
186
  Requires-Dist: dask[bag]; extra == "all"
205
- Requires-Dist: deepmerge; extra == "all"
206
- Requires-Dist: fastapi; extra == "all"
207
- Requires-Dist: google-api-python-client; extra == "all"
208
- Requires-Dist: semver; extra == "all"
209
- Requires-Dist: tabulate; extra == "all"
210
187
  Requires-Dist: logfire[fastapi]; extra == "all"
211
- Requires-Dist: setuptools; extra == "all"
188
+ Requires-Dist: pyyaml; extra == "all"
189
+ Requires-Dist: bokeh; extra == "all"
190
+ Requires-Dist: uvicorn[standard]; extra == "all"
191
+ Requires-Dist: diskcache; extra == "all"
192
+ Requires-Dist: cachetools; extra == "all"
193
+ Requires-Dist: pymupdf; extra == "all"
212
194
  Requires-Dist: docker; extra == "all"
195
+ Requires-Dist: deepdiff; extra == "all"
196
+ Requires-Dist: httpx; extra == "all"
197
+ Requires-Dist: pydantic; extra == "all"
198
+ Requires-Dist: openpyxl; extra == "all"
199
+ Requires-Dist: html2text; extra == "all"
200
+ Requires-Dist: logfire; extra == "all"
201
+ Requires-Dist: peft; extra == "all"
202
+ Requires-Dist: pydantic-ai[logfire,openai]; extra == "all"
203
+ Requires-Dist: flet[all]; extra == "all"
204
+ Requires-Dist: filetype; extra == "all"
205
+ Requires-Dist: json_repair; extra == "all"
206
+ Requires-Dist: flet-webview; extra == "all"
207
+ Requires-Dist: google-auth; extra == "all"
208
+ Requires-Dist: setuptools; extra == "all"
209
+ Requires-Dist: beanie[odm]; extra == "all"
210
+ Requires-Dist: motor; extra == "all"
211
+ Requires-Dist: deepmerge; extra == "all"
212
+ Requires-Dist: google-api-python-client; extra == "all"
213
213
  Requires-Dist: playwright; extra == "all"
214
- Requires-Dist: uvicorn[standard]; extra == "all"
214
+ Requires-Dist: tabulate; extra == "all"
215
+ Requires-Dist: google-auth-httplib2; extra == "all"
216
+ Requires-Dist: pydantic-extra-types; extra == "all"
217
+ Requires-Dist: contexttimer; extra == "all"
218
+ Requires-Dist: pandas; extra == "all"
215
219
  Dynamic: author
216
220
  Dynamic: author-email
217
221
  Dynamic: description
@@ -64,3 +64,12 @@ def strip_none(*items):
64
64
 
65
65
  """
66
66
  return [item for item in items if not is_none(item)]
67
+
68
+
69
+ def dedupe(items):
70
+ """
71
+
72
+ Deduplicate a list of items, retaining order
73
+
74
+ """
75
+ return list(dict.fromkeys(items))
@@ -145,6 +145,32 @@ class Path(type(Path())):
145
145
  """
146
146
  return self.mkdir(parents=True, exist_ok=True)
147
147
 
148
+ def with_suffix(self, suffix: str) -> 'Path':
149
+ """
150
+
151
+ Pathlib doesn't add a dot prefix, but then errors if you don't provide one, which feels rather obnoxious.
152
+
153
+ """
154
+ if not suffix.startswith('.'):
155
+ suffix = f'.{suffix}'
156
+ return super().with_suffix(suffix)
157
+
158
+ def get_conversion_path(self, suffix: str) -> 'Path':
159
+ """
160
+
161
+ Fetch the equivalent path for a different format in the standard conversion directory structure.
162
+ .../xyz/filename.xyx -> ../abc/filename.abc
163
+
164
+ """
165
+
166
+ old_dir = self.parent.name
167
+
168
+ if old_dir != self.suffix.removeprefix('.'):
169
+ raise ValueError(f"Expected parent directory '{old_dir}' to match file extension '{suffix}'")
170
+
171
+ new = self.parent.parent / suffix / f'{self.stem}.{suffix}'
172
+ return new
173
+
148
174
  @property
149
175
  def exist(self):
150
176
  """
@@ -179,6 +179,20 @@ class Document(pm.Document):
179
179
  """
180
180
  return pymupdf4llm.to_markdown(self, **kwargs)
181
181
 
182
+ def to_text(self):
183
+ """
184
+
185
+ Simple text output.
186
+
187
+ """
188
+ lines = []
189
+ for page in self:
190
+ text = page.get_text()
191
+ lines.append(text)
192
+
193
+ text = '\n'.join(lines)
194
+ return text
195
+
182
196
 
183
197
  if __name__ == '__main__':
184
198
  from fmtr.tools.path_tools import Path
@@ -0,0 +1,68 @@
1
+ import deepdiff
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+ from fmtr.tools.iterator_tools import dedupe
6
+
7
+ Table = DataFrame = pd.DataFrame
8
+ Series = pd.Series
9
+
10
+ nan = np.nan
11
+
12
+ CONCAT_HORIZONTALLY = 1
13
+ CONCAT_VERTICALLY = 0
14
+
15
+
16
+ def normalize_nan(df, value=np.nan):
17
+ return df.replace({pd.NA: value, None: value, np.nan: value})
18
+
19
+
20
+ class Differ:
21
+ """
22
+
23
+ Diff two dataframes via DeepDiff, after shape normalization, datatype simplification, etc.
24
+
25
+ """
26
+
27
+ def __init__(self, left: Table, right: Table):
28
+
29
+ self.cols = dedupe(left.columns.tolist() + right.columns.tolist())
30
+ self.rows = dedupe(left.index.values.tolist() + right.index.values.tolist())
31
+ self.left = self.process(left)
32
+ self.right = self.process(right)
33
+ self.dfs = [self.left, self.right]
34
+
35
+ def process(self, df: Table) -> Table:
36
+ """
37
+
38
+ Ensure same rows/columns, plus simplify datatypes via JSON round-robin.
39
+
40
+ """
41
+
42
+ df_rows = set(df.index.values.tolist())
43
+ for row in self.rows:
44
+ if row in df_rows:
45
+ continue
46
+ df.loc[len(df)] = None
47
+
48
+ df_cols = set(df.columns.tolist())
49
+ for col in self.cols:
50
+ if col in df_cols:
51
+ continue
52
+ df[col] = None
53
+
54
+ df = pd.read_json(df.to_json(date_format='iso'))
55
+ df = normalize_nan(df, value=None)
56
+
57
+ return df
58
+
59
+ def get_diff(self) -> deepdiff.DeepDiff:
60
+ """
61
+
62
+ Cast to dicts and get diff
63
+
64
+ """
65
+
66
+ dicts = [df.to_dict(orient='index') for df in self.dfs]
67
+ diff = deepdiff.DeepDiff(*dicts, ignore_order=True)
68
+ return diff
@@ -0,0 +1 @@
1
+ 1.3.49
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fmtr.tools
3
- Version: 1.3.48
3
+ Version: 1.3.49
4
4
  Summary: Collection of high-level tools to simplify everyday development tasks, with a focus on AI/ML
5
5
  Home-page: https://github.com/fmtr/fmtr.tools
6
6
  Author: Frontmatter
@@ -81,16 +81,19 @@ Requires-Dist: pandas; extra == "semantic"
81
81
  Requires-Dist: tabulate; extra == "semantic"
82
82
  Requires-Dist: openpyxl; extra == "semantic"
83
83
  Requires-Dist: odfpy; extra == "semantic"
84
+ Requires-Dist: deepdiff; extra == "semantic"
84
85
  Provides-Extra: metric
85
86
  Requires-Dist: pandas; extra == "metric"
86
87
  Requires-Dist: tabulate; extra == "metric"
87
88
  Requires-Dist: openpyxl; extra == "metric"
88
89
  Requires-Dist: odfpy; extra == "metric"
90
+ Requires-Dist: deepdiff; extra == "metric"
89
91
  Provides-Extra: tabular
90
92
  Requires-Dist: pandas; extra == "tabular"
91
93
  Requires-Dist: tabulate; extra == "tabular"
92
94
  Requires-Dist: openpyxl; extra == "tabular"
93
95
  Requires-Dist: odfpy; extra == "tabular"
96
+ Requires-Dist: deepdiff; extra == "tabular"
94
97
  Provides-Extra: html
95
98
  Requires-Dist: html2text; extra == "html"
96
99
  Provides-Extra: interface
@@ -151,67 +154,68 @@ Provides-Extra: db-document
151
154
  Requires-Dist: beanie[odm]; extra == "db-document"
152
155
  Requires-Dist: motor; extra == "db-document"
153
156
  Provides-Extra: all
154
- Requires-Dist: filetype; extra == "all"
155
- Requires-Dist: flet-webview; extra == "all"
157
+ Requires-Dist: pycountry; extra == "all"
156
158
  Requires-Dist: pydevd-pycharm~=251.25410.159; extra == "all"
157
- Requires-Dist: logfire; extra == "all"
159
+ Requires-Dist: pydantic-settings; extra == "all"
160
+ Requires-Dist: tinynetrc; extra == "all"
161
+ Requires-Dist: pytest-cov; extra == "all"
158
162
  Requires-Dist: sentence_transformers; extra == "all"
159
- Requires-Dist: motor; extra == "all"
160
- Requires-Dist: torchvision; extra == "all"
161
- Requires-Dist: google-auth-oauthlib; extra == "all"
162
- Requires-Dist: json_repair; extra == "all"
163
163
  Requires-Dist: flet-video; extra == "all"
164
- Requires-Dist: pycountry; extra == "all"
164
+ Requires-Dist: tokenizers; extra == "all"
165
165
  Requires-Dist: transformers[sentencepiece]; extra == "all"
166
- Requires-Dist: beanie[odm]; extra == "all"
167
- Requires-Dist: yamlscript; extra == "all"
166
+ Requires-Dist: fastapi; extra == "all"
168
167
  Requires-Dist: appdirs; extra == "all"
169
- Requires-Dist: pydantic-settings; extra == "all"
170
- Requires-Dist: httpx; extra == "all"
171
- Requires-Dist: contexttimer; extra == "all"
172
- Requires-Dist: bokeh; extra == "all"
173
- Requires-Dist: google-auth-httplib2; extra == "all"
168
+ Requires-Dist: torchvision; extra == "all"
174
169
  Requires-Dist: huggingface_hub; extra == "all"
170
+ Requires-Dist: semver; extra == "all"
171
+ Requires-Dist: pymupdf4llm; extra == "all"
172
+ Requires-Dist: odfpy; extra == "all"
173
+ Requires-Dist: ollama; extra == "all"
175
174
  Requires-Dist: faker; extra == "all"
176
- Requires-Dist: dnspython[doh]; extra == "all"
177
- Requires-Dist: html2text; extra == "all"
178
- Requires-Dist: tinynetrc; extra == "all"
179
- Requires-Dist: pydantic; extra == "all"
180
- Requires-Dist: torchaudio; extra == "all"
181
- Requires-Dist: distributed; extra == "all"
182
- Requires-Dist: flet[all]; extra == "all"
183
- Requires-Dist: pydantic-ai[logfire,openai]; extra == "all"
184
- Requires-Dist: diskcache; extra == "all"
185
- Requires-Dist: peft; extra == "all"
186
- Requires-Dist: pytest-cov; extra == "all"
187
- Requires-Dist: pandas; extra == "all"
175
+ Requires-Dist: Unidecode; extra == "all"
188
176
  Requires-Dist: logfire[httpx]; extra == "all"
189
177
  Requires-Dist: regex; extra == "all"
190
- Requires-Dist: google-auth; extra == "all"
191
- Requires-Dist: pyyaml; extra == "all"
192
- Requires-Dist: pymupdf4llm; extra == "all"
193
- Requires-Dist: ollama; extra == "all"
178
+ Requires-Dist: distributed; extra == "all"
179
+ Requires-Dist: yamlscript; extra == "all"
194
180
  Requires-Dist: openai; extra == "all"
195
- Requires-Dist: tokenizers; extra == "all"
196
- Requires-Dist: pymupdf; extra == "all"
181
+ Requires-Dist: torchaudio; extra == "all"
197
182
  Requires-Dist: httpx_retries; extra == "all"
198
- Requires-Dist: pydantic-extra-types; extra == "all"
199
- Requires-Dist: Unidecode; extra == "all"
200
- Requires-Dist: openpyxl; extra == "all"
201
183
  Requires-Dist: sre_yield; extra == "all"
202
- Requires-Dist: odfpy; extra == "all"
203
- Requires-Dist: cachetools; extra == "all"
184
+ Requires-Dist: dnspython[doh]; extra == "all"
185
+ Requires-Dist: google-auth-oauthlib; extra == "all"
204
186
  Requires-Dist: dask[bag]; extra == "all"
205
- Requires-Dist: deepmerge; extra == "all"
206
- Requires-Dist: fastapi; extra == "all"
207
- Requires-Dist: google-api-python-client; extra == "all"
208
- Requires-Dist: semver; extra == "all"
209
- Requires-Dist: tabulate; extra == "all"
210
187
  Requires-Dist: logfire[fastapi]; extra == "all"
211
- Requires-Dist: setuptools; extra == "all"
188
+ Requires-Dist: pyyaml; extra == "all"
189
+ Requires-Dist: bokeh; extra == "all"
190
+ Requires-Dist: uvicorn[standard]; extra == "all"
191
+ Requires-Dist: diskcache; extra == "all"
192
+ Requires-Dist: cachetools; extra == "all"
193
+ Requires-Dist: pymupdf; extra == "all"
212
194
  Requires-Dist: docker; extra == "all"
195
+ Requires-Dist: deepdiff; extra == "all"
196
+ Requires-Dist: httpx; extra == "all"
197
+ Requires-Dist: pydantic; extra == "all"
198
+ Requires-Dist: openpyxl; extra == "all"
199
+ Requires-Dist: html2text; extra == "all"
200
+ Requires-Dist: logfire; extra == "all"
201
+ Requires-Dist: peft; extra == "all"
202
+ Requires-Dist: pydantic-ai[logfire,openai]; extra == "all"
203
+ Requires-Dist: flet[all]; extra == "all"
204
+ Requires-Dist: filetype; extra == "all"
205
+ Requires-Dist: json_repair; extra == "all"
206
+ Requires-Dist: flet-webview; extra == "all"
207
+ Requires-Dist: google-auth; extra == "all"
208
+ Requires-Dist: setuptools; extra == "all"
209
+ Requires-Dist: beanie[odm]; extra == "all"
210
+ Requires-Dist: motor; extra == "all"
211
+ Requires-Dist: deepmerge; extra == "all"
212
+ Requires-Dist: google-api-python-client; extra == "all"
213
213
  Requires-Dist: playwright; extra == "all"
214
- Requires-Dist: uvicorn[standard]; extra == "all"
214
+ Requires-Dist: tabulate; extra == "all"
215
+ Requires-Dist: google-auth-httplib2; extra == "all"
216
+ Requires-Dist: pydantic-extra-types; extra == "all"
217
+ Requires-Dist: contexttimer; extra == "all"
218
+ Requires-Dist: pandas; extra == "all"
215
219
  Dynamic: author
216
220
  Dynamic: author-email
217
221
  Dynamic: description
@@ -18,67 +18,68 @@ pydantic-ai[logfire,openai]
18
18
  ollama
19
19
 
20
20
  [all]
21
- filetype
22
- flet-webview
21
+ pycountry
23
22
  pydevd-pycharm~=251.25410.159
24
- logfire
23
+ pydantic-settings
24
+ tinynetrc
25
+ pytest-cov
25
26
  sentence_transformers
26
- motor
27
- torchvision
28
- google-auth-oauthlib
29
- json_repair
30
27
  flet-video
31
- pycountry
28
+ tokenizers
32
29
  transformers[sentencepiece]
33
- beanie[odm]
34
- yamlscript
30
+ fastapi
35
31
  appdirs
36
- pydantic-settings
37
- httpx
38
- contexttimer
39
- bokeh
40
- google-auth-httplib2
32
+ torchvision
41
33
  huggingface_hub
34
+ semver
35
+ pymupdf4llm
36
+ odfpy
37
+ ollama
42
38
  faker
43
- dnspython[doh]
44
- html2text
45
- tinynetrc
46
- pydantic
47
- torchaudio
48
- distributed
49
- flet[all]
50
- pydantic-ai[logfire,openai]
51
- diskcache
52
- peft
53
- pytest-cov
54
- pandas
39
+ Unidecode
55
40
  logfire[httpx]
56
41
  regex
57
- google-auth
58
- pyyaml
59
- pymupdf4llm
60
- ollama
42
+ distributed
43
+ yamlscript
61
44
  openai
62
- tokenizers
63
- pymupdf
45
+ torchaudio
64
46
  httpx_retries
65
- pydantic-extra-types
66
- Unidecode
67
- openpyxl
68
47
  sre_yield
69
- odfpy
70
- cachetools
48
+ dnspython[doh]
49
+ google-auth-oauthlib
71
50
  dask[bag]
72
- deepmerge
73
- fastapi
74
- google-api-python-client
75
- semver
76
- tabulate
77
51
  logfire[fastapi]
78
- setuptools
52
+ pyyaml
53
+ bokeh
54
+ uvicorn[standard]
55
+ diskcache
56
+ cachetools
57
+ pymupdf
79
58
  docker
59
+ deepdiff
60
+ httpx
61
+ pydantic
62
+ openpyxl
63
+ html2text
64
+ logfire
65
+ peft
66
+ pydantic-ai[logfire,openai]
67
+ flet[all]
68
+ filetype
69
+ json_repair
70
+ flet-webview
71
+ google-auth
72
+ setuptools
73
+ beanie[odm]
74
+ motor
75
+ deepmerge
76
+ google-api-python-client
80
77
  playwright
81
- uvicorn[standard]
78
+ tabulate
79
+ google-auth-httplib2
80
+ pydantic-extra-types
81
+ contexttimer
82
+ pandas
82
83
 
83
84
  [api]
84
85
  fastapi
@@ -161,6 +162,7 @@ pandas
161
162
  tabulate
162
163
  openpyxl
163
164
  odfpy
165
+ deepdiff
164
166
 
165
167
  [netrc]
166
168
  tinynetrc
@@ -201,6 +203,7 @@ pandas
201
203
  tabulate
202
204
  openpyxl
203
205
  odfpy
206
+ deepdiff
204
207
 
205
208
  [sets]
206
209
  pydantic-settings
@@ -221,6 +224,7 @@ pandas
221
224
  tabulate
222
225
  openpyxl
223
226
  odfpy
227
+ deepdiff
224
228
 
225
229
  [test]
226
230
  pytest-cov
@@ -26,7 +26,7 @@ DEPENDENCIES = {
26
26
  'json-fix': ['json_repair'],
27
27
  'semantic': ['sentence_transformers', 'metric'],
28
28
  'metric': ['tabular'],
29
- 'tabular': ['pandas', 'tabulate', 'openpyxl', 'odfpy'],
29
+ 'tabular': ['pandas', 'tabulate', 'openpyxl', 'odfpy', 'deepdiff'],
30
30
  'html': ['html2text'],
31
31
  'interface': ['flet[all]', 'flet-video', 'flet-webview'],
32
32
  'google.api': ['google-auth', 'google-auth-oauthlib', 'google-auth-httplib2', 'google-api-python-client'],
@@ -1,7 +0,0 @@
1
- import pandas as pd
2
-
3
- Table = DataFrame = pd.DataFrame
4
- Series = pd.Series
5
-
6
- CONCAT_HORIZONTALLY = 1
7
- CONCAT_VERTICALLY = 0
@@ -1 +0,0 @@
1
- 1.3.48
File without changes
File without changes
File without changes
File without changes