fmtr.tools 1.1.1__py3-none-any.whl → 1.4.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fmtr/tools/__init__.py +86 -52
- fmtr/tools/ai_tools/__init__.py +2 -2
- fmtr/tools/ai_tools/agentic_tools.py +151 -32
- fmtr/tools/ai_tools/inference_tools.py +2 -1
- fmtr/tools/api_tools.py +73 -12
- fmtr/tools/async_tools.py +4 -0
- fmtr/tools/av_tools.py +7 -0
- fmtr/tools/caching_tools.py +101 -3
- fmtr/tools/constants.py +41 -0
- fmtr/tools/context_tools.py +23 -0
- fmtr/tools/data_modelling_tools.py +227 -14
- fmtr/tools/database_tools/__init__.py +6 -0
- fmtr/tools/database_tools/document.py +51 -0
- fmtr/tools/datatype_tools.py +22 -2
- fmtr/tools/datetime_tools.py +12 -0
- fmtr/tools/debugging_tools.py +60 -1
- fmtr/tools/dns_tools/__init__.py +7 -0
- fmtr/tools/dns_tools/client.py +97 -0
- fmtr/tools/dns_tools/dm.py +257 -0
- fmtr/tools/dns_tools/proxy.py +66 -0
- fmtr/tools/dns_tools/server.py +138 -0
- fmtr/tools/docker_tools/__init__.py +6 -0
- fmtr/tools/entrypoints/__init__.py +0 -0
- fmtr/tools/entrypoints/cache_hfh.py +3 -0
- fmtr/tools/entrypoints/ep_test.py +2 -0
- fmtr/tools/entrypoints/install_yamlscript.py +8 -0
- fmtr/tools/{console_script_tools.py → entrypoints/remote_debug_test.py} +1 -6
- fmtr/tools/entrypoints/shell_debug.py +8 -0
- fmtr/tools/environment_tools.py +3 -2
- fmtr/tools/function_tools.py +77 -1
- fmtr/tools/google_api_tools.py +15 -4
- fmtr/tools/ha_tools/__init__.py +8 -0
- fmtr/tools/ha_tools/constants.py +9 -0
- fmtr/tools/ha_tools/core.py +16 -0
- fmtr/tools/ha_tools/supervisor.py +16 -0
- fmtr/tools/ha_tools/utils.py +46 -0
- fmtr/tools/http_tools.py +52 -0
- fmtr/tools/inherit_tools.py +27 -0
- fmtr/tools/interface_tools/__init__.py +8 -0
- fmtr/tools/interface_tools/context.py +13 -0
- fmtr/tools/interface_tools/controls.py +354 -0
- fmtr/tools/interface_tools/interface_tools.py +189 -0
- fmtr/tools/iterator_tools.py +122 -1
- fmtr/tools/logging_tools.py +99 -18
- fmtr/tools/mqtt_tools.py +89 -0
- fmtr/tools/networking_tools.py +73 -0
- fmtr/tools/packaging_tools.py +14 -0
- fmtr/tools/path_tools/__init__.py +12 -0
- fmtr/tools/path_tools/app_path_tools.py +40 -0
- fmtr/tools/{path_tools.py → path_tools/path_tools.py} +217 -14
- fmtr/tools/path_tools/type_path_tools.py +3 -0
- fmtr/tools/pattern_tools.py +277 -0
- fmtr/tools/pdf_tools.py +39 -1
- fmtr/tools/settings_tools.py +27 -6
- fmtr/tools/setup_tools/__init__.py +8 -0
- fmtr/tools/setup_tools/setup_tools.py +481 -0
- fmtr/tools/string_tools.py +92 -13
- fmtr/tools/tabular_tools.py +61 -0
- fmtr/tools/tools.py +27 -2
- fmtr/tools/version +1 -1
- fmtr/tools/version_tools/__init__.py +12 -0
- fmtr/tools/version_tools/version_tools.py +51 -0
- fmtr/tools/webhook_tools.py +17 -0
- fmtr/tools/yaml_tools.py +64 -5
- fmtr/tools/youtube_tools.py +128 -0
- fmtr_tools-1.4.37.data/scripts/add-service +14 -0
- fmtr_tools-1.4.37.data/scripts/add-user-path +8 -0
- fmtr_tools-1.4.37.data/scripts/apt-headless +23 -0
- fmtr_tools-1.4.37.data/scripts/compose-update +10 -0
- fmtr_tools-1.4.37.data/scripts/docker-sandbox +43 -0
- fmtr_tools-1.4.37.data/scripts/docker-sandbox-init +23 -0
- fmtr_tools-1.4.37.data/scripts/docs-deploy +6 -0
- fmtr_tools-1.4.37.data/scripts/docs-serve +5 -0
- fmtr_tools-1.4.37.data/scripts/download +9 -0
- fmtr_tools-1.4.37.data/scripts/fmtr-test-script +3 -0
- fmtr_tools-1.4.37.data/scripts/ftu +3 -0
- fmtr_tools-1.4.37.data/scripts/ha-addon-launch +16 -0
- fmtr_tools-1.4.37.data/scripts/install-browser +8 -0
- fmtr_tools-1.4.37.data/scripts/parse-args +43 -0
- fmtr_tools-1.4.37.data/scripts/set-password +5 -0
- fmtr_tools-1.4.37.data/scripts/snips-install +14 -0
- fmtr_tools-1.4.37.data/scripts/ssh-auth +28 -0
- fmtr_tools-1.4.37.data/scripts/ssh-serve +15 -0
- fmtr_tools-1.4.37.data/scripts/vlc-tn +10 -0
- fmtr_tools-1.4.37.data/scripts/vm-launch +17 -0
- {fmtr_tools-1.1.1.dist-info → fmtr_tools-1.4.37.dist-info}/METADATA +178 -54
- fmtr_tools-1.4.37.dist-info/RECORD +122 -0
- {fmtr_tools-1.1.1.dist-info → fmtr_tools-1.4.37.dist-info}/WHEEL +1 -1
- fmtr_tools-1.4.37.dist-info/entry_points.txt +6 -0
- fmtr_tools-1.4.37.dist-info/top_level.txt +1 -0
- fmtr/tools/docker_tools.py +0 -30
- fmtr/tools/interface_tools.py +0 -64
- fmtr/tools/version_tools.py +0 -62
- fmtr_tools-1.1.1.dist-info/RECORD +0 -65
- fmtr_tools-1.1.1.dist-info/entry_points.txt +0 -3
- fmtr_tools-1.1.1.dist-info/top_level.txt +0 -2
- {fmtr_tools-1.1.1.dist-info → fmtr_tools-1.4.37.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
1
3
|
import re
|
|
2
4
|
import subprocess
|
|
3
|
-
from pathlib import Path
|
|
4
5
|
from tempfile import gettempdir
|
|
5
|
-
from typing import Union, Any
|
|
6
|
+
from typing import Union, Any, Self
|
|
6
7
|
|
|
7
8
|
from fmtr.tools.constants import Constants
|
|
8
9
|
from fmtr.tools.platform_tools import is_wsl
|
|
@@ -145,19 +146,134 @@ class Path(type(Path())):
|
|
|
145
146
|
"""
|
|
146
147
|
return self.mkdir(parents=True, exist_ok=True)
|
|
147
148
|
|
|
149
|
+
def with_suffix(self, suffix: str) -> 'Path':
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
Pathlib doesn't add a dot prefix, but then errors if you don't provide one, which feels rather obnoxious.
|
|
153
|
+
|
|
154
|
+
"""
|
|
155
|
+
if not suffix.startswith('.'):
|
|
156
|
+
suffix = f'.{suffix}'
|
|
157
|
+
return super().with_suffix(suffix)
|
|
158
|
+
|
|
159
|
+
def get_conversion_path(self, suffix: str) -> 'Path':
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
Fetch the equivalent path for a different format in the standard conversion directory structure.
|
|
163
|
+
.../xyz/filename.xyx -> ../abc/filename.abc
|
|
164
|
+
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
old_dir = self.parent.name
|
|
168
|
+
|
|
169
|
+
if old_dir != self.suffix.removeprefix('.'):
|
|
170
|
+
raise ValueError(f"Expected parent directory '{old_dir}' to match file extension '{suffix}'")
|
|
171
|
+
|
|
172
|
+
new = self.parent.parent / suffix / f'{self.stem}.{suffix}'
|
|
173
|
+
return new
|
|
174
|
+
|
|
175
|
+
@property
|
|
176
|
+
def exist(self):
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
Exists as property
|
|
180
|
+
|
|
181
|
+
"""
|
|
182
|
+
return super().exists()
|
|
183
|
+
|
|
184
|
+
@classmethod
|
|
185
|
+
def app(cls):
|
|
186
|
+
"""
|
|
187
|
+
|
|
188
|
+
Convenience method for getting application paths
|
|
189
|
+
|
|
190
|
+
"""
|
|
191
|
+
from fmtr.tools import path
|
|
192
|
+
return path.AppPaths()
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def type(self):
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
Infer file type, extension, etc.
|
|
199
|
+
|
|
200
|
+
"""
|
|
201
|
+
if not self.exists():
|
|
202
|
+
return None
|
|
203
|
+
from fmtr.tools import path
|
|
204
|
+
kind = path.guess(str(self.absolute()))
|
|
205
|
+
return kind
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
def children(self) -> list[Self]:
|
|
209
|
+
"""
|
|
210
|
+
|
|
211
|
+
Recursive children property
|
|
212
|
+
|
|
213
|
+
"""
|
|
214
|
+
if not self.is_dir():
|
|
215
|
+
return None
|
|
216
|
+
return sorted(self.iterdir(), key=lambda x: x.is_dir(), reverse=True)
|
|
217
|
+
|
|
218
|
+
@classmethod
|
|
219
|
+
def __get_pydantic_core_schema__(cls, source, handler):
|
|
220
|
+
"""
|
|
221
|
+
|
|
222
|
+
Support Pydantic de/serialization and validation
|
|
223
|
+
|
|
224
|
+
TODO: Ideally these would be a mixin in dm, but then we'd need Pydantic to use it. Split dm module into Pydantic depts and other utils and import from there.
|
|
225
|
+
|
|
226
|
+
"""
|
|
227
|
+
from pydantic_core import core_schema
|
|
228
|
+
return core_schema.no_info_plain_validator_function(
|
|
229
|
+
cls.__deserialize_pydantic__,
|
|
230
|
+
serialization=core_schema.plain_serializer_function_ser_schema(cls.__serialize_pydantic__),
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
@classmethod
|
|
234
|
+
def __serialize_pydantic__(cls, self) -> str:
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
Serialize to string
|
|
238
|
+
|
|
239
|
+
"""
|
|
240
|
+
return str(self)
|
|
241
|
+
|
|
242
|
+
@classmethod
|
|
243
|
+
def __deserialize_pydantic__(cls, data) -> Self:
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
Deserialize from string
|
|
247
|
+
|
|
248
|
+
"""
|
|
249
|
+
if isinstance(data, cls):
|
|
250
|
+
return data
|
|
251
|
+
return cls(data)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class FromCallerMixin:
|
|
255
|
+
"""
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
"""
|
|
259
|
+
|
|
260
|
+
def from_caller(self):
|
|
261
|
+
from fmtr.tools.inspection_tools import get_call_path
|
|
262
|
+
path = get_call_path(offset=3).parent
|
|
263
|
+
return path
|
|
148
264
|
|
|
149
|
-
|
|
265
|
+
|
|
266
|
+
class PackagePaths(FromCallerMixin):
|
|
150
267
|
"""
|
|
151
268
|
|
|
152
269
|
Canonical paths for a package.
|
|
153
270
|
|
|
154
271
|
"""
|
|
155
272
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
FILENAME_VERSION = 'version'
|
|
273
|
+
dev = Path('/') / 'opt' / 'dev'
|
|
274
|
+
data_global = dev / Constants.DIR_NAME_DATA
|
|
159
275
|
|
|
160
|
-
def __init__(self, path=None, org_singleton=None,
|
|
276
|
+
def __init__(self, path=None, org_singleton=None, dir_name_data=Constants.DIR_NAME_DATA, filename_config=Constants.FILENAME_CONFIG, file_version=Constants.FILENAME_VERSION):
|
|
161
277
|
|
|
162
278
|
"""
|
|
163
279
|
|
|
@@ -165,15 +281,23 @@ class PackagePaths:
|
|
|
165
281
|
|
|
166
282
|
"""
|
|
167
283
|
if not path:
|
|
168
|
-
|
|
169
|
-
path = get_call_path(offset=2).parent
|
|
284
|
+
path = self.from_caller()
|
|
170
285
|
|
|
171
286
|
self.path = Path(path)
|
|
172
287
|
self.org_singleton = org_singleton
|
|
173
|
-
self.
|
|
288
|
+
self.dir_name_data = dir_name_data
|
|
174
289
|
self.filename_config = filename_config
|
|
175
290
|
self.filename_version = file_version
|
|
176
291
|
|
|
292
|
+
@property
|
|
293
|
+
def is_dev(self) -> bool:
|
|
294
|
+
"""
|
|
295
|
+
|
|
296
|
+
Is the package in the dev directory - as opposed to `site-packages` etc?
|
|
297
|
+
|
|
298
|
+
"""
|
|
299
|
+
return self.path.is_relative_to(self.dev)
|
|
300
|
+
|
|
177
301
|
@property
|
|
178
302
|
def is_namespace(self) -> bool:
|
|
179
303
|
"""
|
|
@@ -192,6 +316,19 @@ class PackagePaths:
|
|
|
192
316
|
"""
|
|
193
317
|
return self.path.stem
|
|
194
318
|
|
|
319
|
+
@property
|
|
320
|
+
def name_ns(self) -> str:
|
|
321
|
+
"""
|
|
322
|
+
|
|
323
|
+
Name of namespace package.
|
|
324
|
+
|
|
325
|
+
"""
|
|
326
|
+
|
|
327
|
+
if self.is_namespace:
|
|
328
|
+
return f'{self.org}.{self.name}'
|
|
329
|
+
else:
|
|
330
|
+
return self.name
|
|
331
|
+
|
|
195
332
|
@property
|
|
196
333
|
def org(self) -> str:
|
|
197
334
|
"""
|
|
@@ -226,13 +363,44 @@ class PackagePaths:
|
|
|
226
363
|
return self.path / self.filename_version
|
|
227
364
|
|
|
228
365
|
@property
|
|
229
|
-
def
|
|
366
|
+
def data(self) -> Path:
|
|
367
|
+
"""
|
|
368
|
+
|
|
369
|
+
Path of project-specific data directory.
|
|
370
|
+
|
|
371
|
+
"""
|
|
372
|
+
|
|
373
|
+
return self.dev / Constants.DIR_NAME_REPO / self.name_ns / self.dir_name_data
|
|
374
|
+
|
|
375
|
+
@property
|
|
376
|
+
def cache(self) -> Path:
|
|
230
377
|
"""
|
|
231
378
|
|
|
232
|
-
Path of
|
|
379
|
+
Path of cache directory.
|
|
233
380
|
|
|
234
381
|
"""
|
|
235
|
-
|
|
382
|
+
|
|
383
|
+
return self.data / Constants.DIR_NAME_CACHE
|
|
384
|
+
|
|
385
|
+
@property
|
|
386
|
+
def artifact(self) -> Path:
|
|
387
|
+
"""
|
|
388
|
+
|
|
389
|
+
Path of project-specific artifact directory
|
|
390
|
+
|
|
391
|
+
"""
|
|
392
|
+
|
|
393
|
+
return self.data / Constants.DIR_NAME_ARTIFACT
|
|
394
|
+
|
|
395
|
+
@property
|
|
396
|
+
def source(self) -> Path:
|
|
397
|
+
"""
|
|
398
|
+
|
|
399
|
+
Path of project-specific source directory
|
|
400
|
+
|
|
401
|
+
"""
|
|
402
|
+
|
|
403
|
+
return self.data / Constants.DIR_NAME_SOURCE
|
|
236
404
|
|
|
237
405
|
@property
|
|
238
406
|
def settings(self) -> Path:
|
|
@@ -241,7 +409,34 @@ class PackagePaths:
|
|
|
241
409
|
Path of settings file.
|
|
242
410
|
|
|
243
411
|
"""
|
|
244
|
-
return self.
|
|
412
|
+
return self.data / self.filename_config
|
|
413
|
+
|
|
414
|
+
@property
|
|
415
|
+
def hf(self) -> Path:
|
|
416
|
+
"""
|
|
417
|
+
|
|
418
|
+
Path of HuggingFace directory
|
|
419
|
+
|
|
420
|
+
"""
|
|
421
|
+
return self.artifact / Constants.DIR_NAME_HF
|
|
422
|
+
|
|
423
|
+
@property
|
|
424
|
+
def docs(self) -> Path:
|
|
425
|
+
"""
|
|
426
|
+
|
|
427
|
+
Path of docs directory
|
|
428
|
+
|
|
429
|
+
"""
|
|
430
|
+
return self.repo / Constants.DOCS_DIR
|
|
431
|
+
|
|
432
|
+
@property
|
|
433
|
+
def docs_config(self) -> Path:
|
|
434
|
+
"""
|
|
435
|
+
|
|
436
|
+
Path of docs config file
|
|
437
|
+
|
|
438
|
+
"""
|
|
439
|
+
return self.repo / Constants.DOCS_CONFIG_FILENAME
|
|
245
440
|
|
|
246
441
|
def __repr__(self) -> str:
|
|
247
442
|
"""
|
|
@@ -250,3 +445,11 @@ class PackagePaths:
|
|
|
250
445
|
|
|
251
446
|
"""
|
|
252
447
|
return f'{self.__class__.__name__}("{self.path}")'
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
root = Path('/')
|
|
451
|
+
|
|
452
|
+
if __name__ == "__main__":
|
|
453
|
+
path = Path('/usr/bin/bash').absolute()
|
|
454
|
+
path.type
|
|
455
|
+
path
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
import regex as re
|
|
2
|
+
from dataclasses import dataclass, asdict
|
|
3
|
+
from functools import cached_property
|
|
4
|
+
from typing import List, Any
|
|
5
|
+
|
|
6
|
+
from fmtr.tools import Constants
|
|
7
|
+
from fmtr.tools.logging_tools import logger
|
|
8
|
+
from fmtr.tools.string_tools import join
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RewriteCircularLoopError(Exception):
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
Circular loop error
|
|
15
|
+
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
MASK_GROUP = '(?:{pattern})'
|
|
20
|
+
MASK_NAMED = r"(?P<{key}>{pattern})"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def alt(*patterns):
|
|
24
|
+
patterns = sorted(patterns, key=len, reverse=True)
|
|
25
|
+
pattern = '|'.join(patterns)
|
|
26
|
+
pattern = MASK_GROUP.format(pattern=pattern)
|
|
27
|
+
return pattern
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class Key:
|
|
31
|
+
RECORD_SEP = '␞'
|
|
32
|
+
FILLS = None
|
|
33
|
+
|
|
34
|
+
def flatten(self, data):
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
Flatten/serialise dictionary data
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
pairs = [f'{value}' for key, value in data.items()]
|
|
41
|
+
string = self.RECORD_SEP.join(pairs)
|
|
42
|
+
return string
|
|
43
|
+
|
|
44
|
+
@cached_property
|
|
45
|
+
def pattern(self):
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
Serialise to pattern
|
|
49
|
+
|
|
50
|
+
"""
|
|
51
|
+
data = {
|
|
52
|
+
key:
|
|
53
|
+
MASK_NAMED.format(
|
|
54
|
+
key=key,
|
|
55
|
+
pattern=value.format_map(self.fills))
|
|
56
|
+
for key, value in asdict(self).items()
|
|
57
|
+
}
|
|
58
|
+
pattern = self.flatten(data)
|
|
59
|
+
return pattern
|
|
60
|
+
|
|
61
|
+
@cached_property
|
|
62
|
+
def rx(self):
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
Compile to Regular Expression
|
|
66
|
+
|
|
67
|
+
"""
|
|
68
|
+
return re.compile(self.pattern)
|
|
69
|
+
|
|
70
|
+
@cached_property
|
|
71
|
+
def string(self):
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
Serialise to string
|
|
75
|
+
|
|
76
|
+
"""
|
|
77
|
+
string = self.flatten(asdict(self))
|
|
78
|
+
return string
|
|
79
|
+
|
|
80
|
+
@cached_property
|
|
81
|
+
def fills(self):
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
Add key names as regex group names
|
|
85
|
+
|
|
86
|
+
"""
|
|
87
|
+
return {key: MASK_NAMED.format(key=key, pattern=value) for key, value in self.FILLS.items()}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def transform(self, match: re.Match):
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
Transform match object into a new object of the same type.
|
|
94
|
+
|
|
95
|
+
"""
|
|
96
|
+
fills = match.groupdict()
|
|
97
|
+
data = {key: value.format_map(fills) for key, value in asdict(self).items()}
|
|
98
|
+
obj = self.__class__(**data)
|
|
99
|
+
return obj
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class Item:
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
Key-value pair
|
|
107
|
+
|
|
108
|
+
"""
|
|
109
|
+
source: Key
|
|
110
|
+
target: Key
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass(kw_only=True)
|
|
114
|
+
class Transformer:
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
Pattern-based, dictionary-like mapper.
|
|
118
|
+
Compiles an complex set of rules into single regex pattern, and determines which rule matched.
|
|
119
|
+
Inputs are then transformed according to the matching rule.
|
|
120
|
+
Works like a pattern-based dictionary when is_recursive==False.
|
|
121
|
+
Works something like an FSA/transducer when is_recursive=True.
|
|
122
|
+
|
|
123
|
+
"""
|
|
124
|
+
PREFIX_GROUP = '__'
|
|
125
|
+
items: List[Item]
|
|
126
|
+
default: Any = None
|
|
127
|
+
is_recursive: bool = False
|
|
128
|
+
|
|
129
|
+
def __post_init__(self):
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
Compile on init
|
|
133
|
+
|
|
134
|
+
"""
|
|
135
|
+
return self.compile(clear=False)
|
|
136
|
+
|
|
137
|
+
def compile(self, clear=True):
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
Re/compile regex pattern, invalidating existing caches if recompile.
|
|
141
|
+
|
|
142
|
+
"""
|
|
143
|
+
if clear:
|
|
144
|
+
del self.pattern
|
|
145
|
+
del self.rx
|
|
146
|
+
|
|
147
|
+
with logger.span(f'Compiling expression {len(self.items)=}'):
|
|
148
|
+
rx = self.rx
|
|
149
|
+
logger.debug(f'Compiled successfully {rx.groups=}')
|
|
150
|
+
|
|
151
|
+
@cached_property
|
|
152
|
+
def pattern(self) -> str:
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
Dynamically generated regex pattern based on the rules provided.
|
|
156
|
+
|
|
157
|
+
"""
|
|
158
|
+
patterns = [
|
|
159
|
+
MASK_NAMED.format(key=f'{self.PREFIX_GROUP}{i}', pattern=item.source.pattern)
|
|
160
|
+
for i, item in enumerate(self.items)
|
|
161
|
+
]
|
|
162
|
+
pattern = alt(*patterns)
|
|
163
|
+
return pattern
|
|
164
|
+
|
|
165
|
+
@cached_property
|
|
166
|
+
def rx(self) -> re.Pattern:
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
Regex object.
|
|
170
|
+
|
|
171
|
+
"""
|
|
172
|
+
return re.compile(self.pattern)
|
|
173
|
+
|
|
174
|
+
def get_default(self, key: Key) -> Any:
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
Define what to return in case of no match
|
|
178
|
+
|
|
179
|
+
"""
|
|
180
|
+
if self.is_recursive:
|
|
181
|
+
return key
|
|
182
|
+
else:
|
|
183
|
+
return self.default
|
|
184
|
+
|
|
185
|
+
def get(self, key: Key) -> Key | Any:
|
|
186
|
+
"""
|
|
187
|
+
|
|
188
|
+
Use recursive or single lookup pass, depending on whether recursive lookups have been specified.
|
|
189
|
+
|
|
190
|
+
"""
|
|
191
|
+
if self.is_recursive:
|
|
192
|
+
with logger.span(f'Transforming recursively {key=}...'):
|
|
193
|
+
return self.get_recursive(key)
|
|
194
|
+
else:
|
|
195
|
+
with logger.span(f'Transforming linearly {key=}...'):
|
|
196
|
+
return self.get_one(key)
|
|
197
|
+
|
|
198
|
+
def get_one(self, key: Key) -> Key | Any:
|
|
199
|
+
"""
|
|
200
|
+
|
|
201
|
+
Single lookup pass.
|
|
202
|
+
Lookup the source string based on the matching rule.
|
|
203
|
+
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
match = self.rx.fullmatch(key.string)
|
|
207
|
+
|
|
208
|
+
if not match:
|
|
209
|
+
value = self.get_default(key)
|
|
210
|
+
logger.debug(f'No match for {key=}. Returning {self.get_default(key)=}')
|
|
211
|
+
else:
|
|
212
|
+
|
|
213
|
+
match_ids = {name: v for name, v in match.groupdict().items() if v}
|
|
214
|
+
rule_ids = {
|
|
215
|
+
int(id.removeprefix(self.PREFIX_GROUP))
|
|
216
|
+
for id in match_ids.keys() if id.startswith(self.PREFIX_GROUP)
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
if len(rule_ids) != 1:
|
|
220
|
+
msg = f'Multiple group matches: {rule_ids}'
|
|
221
|
+
raise ValueError(msg)
|
|
222
|
+
|
|
223
|
+
rule_id = next(iter(rule_ids))
|
|
224
|
+
rule = self.items[rule_id]
|
|
225
|
+
|
|
226
|
+
logger.debug(f'Matched using {rule_id=}: {rule.source=}')
|
|
227
|
+
|
|
228
|
+
if isinstance(rule.target, Key):
|
|
229
|
+
value = rule.target.transform(match)
|
|
230
|
+
else:
|
|
231
|
+
value = rule.target
|
|
232
|
+
|
|
233
|
+
logger.debug(f'Transformed using {rule_id=}: {key=} → {value=}')
|
|
234
|
+
|
|
235
|
+
return value
|
|
236
|
+
|
|
237
|
+
def get_recursive(self, key: Key) -> Key | Any:
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
Lookup the provided key by continuously applying transforms until no changes are made
|
|
241
|
+
or a circular loop is detected.
|
|
242
|
+
|
|
243
|
+
"""
|
|
244
|
+
history = []
|
|
245
|
+
previous = key
|
|
246
|
+
|
|
247
|
+
def get_history_str():
|
|
248
|
+
return join(history, sep=Constants.ARROW_SEP)
|
|
249
|
+
|
|
250
|
+
while True:
|
|
251
|
+
if previous in history:
|
|
252
|
+
history.append(previous)
|
|
253
|
+
msg = f'Loop detected on node "{previous}": {get_history_str()}'
|
|
254
|
+
raise RewriteCircularLoopError(msg)
|
|
255
|
+
|
|
256
|
+
history.append(previous)
|
|
257
|
+
new = previous
|
|
258
|
+
new = self.get_one(new)
|
|
259
|
+
if new == previous:
|
|
260
|
+
break
|
|
261
|
+
previous = new
|
|
262
|
+
|
|
263
|
+
if not isinstance(new, Key):
|
|
264
|
+
history.append(previous)
|
|
265
|
+
break
|
|
266
|
+
|
|
267
|
+
if len(history) == 1:
|
|
268
|
+
history_str = 'No transforms performed.'
|
|
269
|
+
else:
|
|
270
|
+
history_str = get_history_str()
|
|
271
|
+
logger.debug(f'Finished transforming: {history_str}')
|
|
272
|
+
|
|
273
|
+
return previous
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
if __name__ == '__main__':
|
|
277
|
+
...
|
fmtr/tools/pdf_tools.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
from typing import List, Tuple, Dict, Any, Self
|
|
2
|
+
|
|
1
3
|
import pymupdf as pm
|
|
2
4
|
import pymupdf4llm
|
|
3
|
-
from typing import List, Tuple, Dict, Any, Self
|
|
4
5
|
|
|
5
6
|
from fmtr.tools import data_modelling_tools
|
|
6
7
|
|
|
@@ -179,6 +180,43 @@ class Document(pm.Document):
|
|
|
179
180
|
"""
|
|
180
181
|
return pymupdf4llm.to_markdown(self, **kwargs)
|
|
181
182
|
|
|
183
|
+
def to_text_pages(self) -> List[str]:
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
Simple text output per-page.
|
|
187
|
+
|
|
188
|
+
"""
|
|
189
|
+
lines = []
|
|
190
|
+
for page in self:
|
|
191
|
+
text = page.get_text()
|
|
192
|
+
lines.append(text)
|
|
193
|
+
|
|
194
|
+
return lines
|
|
195
|
+
|
|
196
|
+
def to_text(self) -> str:
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
Simple text output.
|
|
200
|
+
|
|
201
|
+
"""
|
|
202
|
+
|
|
203
|
+
text = '\n'.join(self.to_text_pages())
|
|
204
|
+
return text
|
|
205
|
+
|
|
206
|
+
def split(self) -> List[Self]:
|
|
207
|
+
"""
|
|
208
|
+
|
|
209
|
+
Split pages into individual documents.
|
|
210
|
+
|
|
211
|
+
"""
|
|
212
|
+
|
|
213
|
+
documents = []
|
|
214
|
+
for i, page in enumerate(self, start=1):
|
|
215
|
+
document = self.__class__()
|
|
216
|
+
document.insert_pdf(self, from_page=i, to_page=i)
|
|
217
|
+
documents.append(document)
|
|
218
|
+
|
|
219
|
+
return documents
|
|
182
220
|
|
|
183
221
|
if __name__ == '__main__':
|
|
184
222
|
from fmtr.tools.path_tools import Path
|
fmtr/tools/settings_tools.py
CHANGED
|
@@ -1,10 +1,30 @@
|
|
|
1
|
+
from typing import ClassVar, Any
|
|
2
|
+
|
|
1
3
|
from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, YamlConfigSettingsSource, EnvSettingsSource, CliSettingsSource
|
|
2
|
-
from typing import ClassVar
|
|
3
4
|
|
|
4
|
-
from fmtr.tools
|
|
5
|
+
from fmtr.tools import Constants
|
|
6
|
+
from fmtr.tools.data_modelling_tools import CliRunMixin
|
|
7
|
+
from fmtr.tools.path_tools import PackagePaths, Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class YamlScriptConfigSettingsSource(YamlConfigSettingsSource):
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
Customer source for reading YAML *Script* (as opposed to plain YAML) configuration files.
|
|
14
|
+
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def _read_file(self, file_path: Path) -> dict[str, Any]:
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
Use our own Path class to read YAML Script.
|
|
21
|
+
|
|
22
|
+
"""
|
|
23
|
+
data = Path(file_path).read_yaml() or {}
|
|
24
|
+
return data
|
|
5
25
|
|
|
6
26
|
|
|
7
|
-
class Base(BaseSettings):
|
|
27
|
+
class Base(BaseSettings, CliRunMixin):
|
|
8
28
|
"""
|
|
9
29
|
|
|
10
30
|
Base class for settings configuration using Pydantic BaseSettings.
|
|
@@ -13,6 +33,7 @@ class Base(BaseSettings):
|
|
|
13
33
|
|
|
14
34
|
"""
|
|
15
35
|
|
|
36
|
+
ENV_NESTED_DELIMITER: ClassVar = Constants.ENV_NESTED_DELIMITER
|
|
16
37
|
paths: ClassVar = PackagePaths()
|
|
17
38
|
|
|
18
39
|
@classmethod
|
|
@@ -33,8 +54,8 @@ class Base(BaseSettings):
|
|
|
33
54
|
sources = (
|
|
34
55
|
init_settings,
|
|
35
56
|
CliSettingsSource(settings_cls, cli_parse_args=True),
|
|
36
|
-
EnvSettingsSource(settings_cls, env_prefix=cls.get_env_prefix()),
|
|
37
|
-
|
|
57
|
+
EnvSettingsSource(settings_cls, env_prefix=cls.get_env_prefix(), env_nested_delimiter=cls.ENV_NESTED_DELIMITER),
|
|
58
|
+
YamlScriptConfigSettingsSource(settings_cls, yaml_file=cls.paths.settings),
|
|
38
59
|
)
|
|
39
60
|
|
|
40
61
|
return sources
|
|
@@ -51,7 +72,7 @@ class Base(BaseSettings):
|
|
|
51
72
|
else:
|
|
52
73
|
stem = f'{cls.paths.name}'
|
|
53
74
|
|
|
54
|
-
prefix = f'{stem}
|
|
75
|
+
prefix = f'{stem}{cls.ENV_NESTED_DELIMITER}'.upper()
|
|
55
76
|
return prefix
|
|
56
77
|
|
|
57
78
|
@property
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from fmtr.tools.import_tools import MissingExtraMockModule
|
|
2
|
+
|
|
3
|
+
from fmtr.tools.setup_tools.setup_tools import Setup, SetupPaths, Dependencies, Tools
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from setuptools import find_namespace_packages, find_packages, setup as setup_setuptools
|
|
7
|
+
except ModuleNotFoundError as exception:
|
|
8
|
+
find_namespace_packages = find_packages = setup_setuptools = MissingExtraMockModule('setup', exception)
|