fmtr.tools 1.1.1__py3-none-any.whl → 1.3.81__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fmtr/tools/__init__.py +68 -52
- fmtr/tools/ai_tools/__init__.py +2 -2
- fmtr/tools/ai_tools/agentic_tools.py +151 -32
- fmtr/tools/ai_tools/inference_tools.py +2 -1
- fmtr/tools/api_tools.py +8 -5
- fmtr/tools/caching_tools.py +101 -3
- fmtr/tools/constants.py +33 -0
- fmtr/tools/context_tools.py +23 -0
- fmtr/tools/data_modelling_tools.py +227 -14
- fmtr/tools/database_tools/__init__.py +6 -0
- fmtr/tools/database_tools/document.py +51 -0
- fmtr/tools/datatype_tools.py +21 -1
- fmtr/tools/datetime_tools.py +12 -0
- fmtr/tools/debugging_tools.py +60 -0
- fmtr/tools/dns_tools/__init__.py +7 -0
- fmtr/tools/dns_tools/client.py +97 -0
- fmtr/tools/dns_tools/dm.py +257 -0
- fmtr/tools/dns_tools/proxy.py +66 -0
- fmtr/tools/dns_tools/server.py +138 -0
- fmtr/tools/docker_tools/__init__.py +6 -0
- fmtr/tools/entrypoints/__init__.py +0 -0
- fmtr/tools/entrypoints/cache_hfh.py +3 -0
- fmtr/tools/entrypoints/ep_test.py +2 -0
- fmtr/tools/entrypoints/install_yamlscript.py +8 -0
- fmtr/tools/{console_script_tools.py → entrypoints/remote_debug_test.py} +1 -6
- fmtr/tools/entrypoints/shell_debug.py +8 -0
- fmtr/tools/environment_tools.py +2 -2
- fmtr/tools/function_tools.py +77 -1
- fmtr/tools/google_api_tools.py +15 -4
- fmtr/tools/http_tools.py +26 -0
- fmtr/tools/inherit_tools.py +27 -0
- fmtr/tools/interface_tools/__init__.py +8 -0
- fmtr/tools/interface_tools/context.py +13 -0
- fmtr/tools/interface_tools/controls.py +354 -0
- fmtr/tools/interface_tools/interface_tools.py +189 -0
- fmtr/tools/iterator_tools.py +29 -0
- fmtr/tools/logging_tools.py +43 -16
- fmtr/tools/packaging_tools.py +14 -0
- fmtr/tools/path_tools/__init__.py +12 -0
- fmtr/tools/path_tools/app_path_tools.py +40 -0
- fmtr/tools/{path_tools.py → path_tools/path_tools.py} +156 -12
- fmtr/tools/path_tools/type_path_tools.py +3 -0
- fmtr/tools/pattern_tools.py +260 -0
- fmtr/tools/pdf_tools.py +39 -1
- fmtr/tools/settings_tools.py +23 -4
- fmtr/tools/setup_tools/__init__.py +8 -0
- fmtr/tools/setup_tools/setup_tools.py +447 -0
- fmtr/tools/string_tools.py +92 -13
- fmtr/tools/tabular_tools.py +61 -0
- fmtr/tools/tools.py +27 -2
- fmtr/tools/version +1 -1
- fmtr/tools/version_tools/__init__.py +12 -0
- fmtr/tools/version_tools/version_tools.py +51 -0
- fmtr/tools/webhook_tools.py +17 -0
- fmtr/tools/yaml_tools.py +66 -5
- {fmtr_tools-1.1.1.dist-info → fmtr_tools-1.3.81.dist-info}/METADATA +136 -54
- fmtr_tools-1.3.81.dist-info/RECORD +93 -0
- {fmtr_tools-1.1.1.dist-info → fmtr_tools-1.3.81.dist-info}/WHEEL +1 -1
- fmtr_tools-1.3.81.dist-info/entry_points.txt +6 -0
- fmtr_tools-1.3.81.dist-info/top_level.txt +1 -0
- fmtr/tools/docker_tools.py +0 -30
- fmtr/tools/interface_tools.py +0 -64
- fmtr/tools/version_tools.py +0 -62
- fmtr_tools-1.1.1.dist-info/RECORD +0 -65
- fmtr_tools-1.1.1.dist-info/entry_points.txt +0 -3
- fmtr_tools-1.1.1.dist-info/top_level.txt +0 -2
- {fmtr_tools-1.1.1.dist-info → fmtr_tools-1.3.81.dist-info}/licenses/LICENSE +0 -0
fmtr/tools/logging_tools.py
CHANGED
|
@@ -4,23 +4,34 @@ import os
|
|
|
4
4
|
from fmtr.tools import environment_tools
|
|
5
5
|
from fmtr.tools.constants import Constants
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
if environment_tools.IS_DEV:
|
|
8
|
+
STREAM_DEFAULT = ENVIRONMENT_DEFAULT = Constants.DEVELOPMENT
|
|
9
|
+
else:
|
|
10
|
+
STREAM_DEFAULT = None
|
|
11
|
+
ENVIRONMENT_DEFAULT = Constants.PRODUCTION
|
|
11
12
|
|
|
12
|
-
|
|
13
|
+
IS_DEBUG = environment_tools.get(Constants.FMTR_LOG_LEVEL_KEY, None, converter=str.upper) == 'DEBUG'
|
|
14
|
+
LEVEL_DEFAULT = logging.DEBUG if IS_DEBUG else logging.INFO
|
|
13
15
|
|
|
14
16
|
|
|
17
|
+
def null_scrubber(match):
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
Effectively disable scrubbing
|
|
21
|
+
|
|
22
|
+
"""
|
|
23
|
+
return match.value
|
|
24
|
+
|
|
15
25
|
def get_logger(name, version=None, host=Constants.FMTR_OBS_HOST, key=None, org=Constants.ORG_NAME,
|
|
16
|
-
stream=STREAM_DEFAULT,
|
|
17
|
-
environment=ENVIRONMENT_DEFAULT, level=LEVEL_DEFAULT):
|
|
26
|
+
stream=STREAM_DEFAULT, environment=ENVIRONMENT_DEFAULT, level=LEVEL_DEFAULT):
|
|
18
27
|
"""
|
|
19
28
|
|
|
20
29
|
Get a pre-configured logfire logger, if dependency is present, otherwise default to native logger.
|
|
21
30
|
|
|
22
31
|
"""
|
|
23
32
|
|
|
33
|
+
stream = stream or name
|
|
34
|
+
|
|
24
35
|
try:
|
|
25
36
|
import logfire
|
|
26
37
|
except ImportError:
|
|
@@ -30,30 +41,45 @@ def get_logger(name, version=None, host=Constants.FMTR_OBS_HOST, key=None, org=C
|
|
|
30
41
|
|
|
31
42
|
return logger
|
|
32
43
|
|
|
44
|
+
logger = logfire
|
|
45
|
+
|
|
33
46
|
if key is None:
|
|
34
|
-
key = environment_tools.get(Constants.FMTR_OBS_API_KEY_KEY)
|
|
35
|
-
|
|
36
|
-
|
|
47
|
+
key = environment_tools.get(Constants.FMTR_OBS_API_KEY_KEY, default=None)
|
|
48
|
+
|
|
49
|
+
if key:
|
|
50
|
+
url = f"https://{host}/api/{org}/v1/traces"
|
|
51
|
+
headers = f"Authorization=Basic {key},stream-name={stream}"
|
|
37
52
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
53
|
+
os.environ["OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"] = url
|
|
54
|
+
os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = headers
|
|
55
|
+
os.environ["OTEL_EXPORTER_OTLP_INSECURE"] = str(False).lower()
|
|
41
56
|
|
|
42
57
|
if not version:
|
|
43
58
|
from fmtr.tools import version_tools
|
|
44
59
|
version = version_tools.read()
|
|
45
60
|
|
|
61
|
+
# Rigmarole to translate native levels to logfire/otel ones.
|
|
62
|
+
lev_num_otel = logfire._internal.constants.LOGGING_TO_OTEL_LEVEL_NUMBERS[level]
|
|
63
|
+
lev_name_otel = logfire._internal.constants.NUMBER_TO_LEVEL[lev_num_otel]
|
|
64
|
+
|
|
65
|
+
console_opts = logfire.ConsoleOptions(
|
|
66
|
+
colors='always',
|
|
67
|
+
min_log_level=lev_name_otel,
|
|
68
|
+
)
|
|
69
|
+
|
|
46
70
|
logfire.configure(
|
|
47
71
|
service_name=name,
|
|
48
72
|
service_version=version,
|
|
49
73
|
environment=environment,
|
|
50
74
|
send_to_logfire=False,
|
|
51
|
-
console=
|
|
75
|
+
console=console_opts,
|
|
76
|
+
scrubbing=logfire.ScrubbingOptions(callback=null_scrubber)
|
|
52
77
|
)
|
|
53
78
|
|
|
54
|
-
|
|
79
|
+
if key is None:
|
|
80
|
+
msg = f'Observability dependencies installed, but "{Constants.FMTR_OBS_API_KEY_KEY}" not set. Cloud observability will be disabled.'
|
|
81
|
+
logger.warning(msg)
|
|
55
82
|
|
|
56
|
-
logger = logfire
|
|
57
83
|
return logger
|
|
58
84
|
|
|
59
85
|
|
|
@@ -63,3 +89,4 @@ if __name__ == '__main__':
|
|
|
63
89
|
logger.info('Hello World')
|
|
64
90
|
logger.warning('test warning')
|
|
65
91
|
logger.debug('Hello World')
|
|
92
|
+
logger
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from fmtr.tools.import_tools import MissingExtraMockModule
|
|
2
|
+
from fmtr.tools.path_tools.path_tools import Path, PackagePaths
|
|
3
|
+
|
|
4
|
+
try:
|
|
5
|
+
from fmtr.tools.path_tools.app_path_tools import AppPaths
|
|
6
|
+
except ModuleNotFoundError as exception:
|
|
7
|
+
AppPaths = MissingExtraMockModule('path.app', exception)
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from fmtr.tools.path_tools.type_path_tools import guess
|
|
11
|
+
except ModuleNotFoundError as exception:
|
|
12
|
+
guess = MissingExtraMockModule('path.type', exception)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import appdirs
|
|
2
|
+
|
|
3
|
+
from fmtr.tools.path_tools import Path
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class AppPaths:
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
Wrap appdirs to return Path objects
|
|
10
|
+
|
|
11
|
+
"""
|
|
12
|
+
PathType = Path
|
|
13
|
+
|
|
14
|
+
def user_data_dir(self, appname=None, appauthor=None, version=None, roaming=False):
|
|
15
|
+
path_str = appdirs.user_data_dir(appname=appname, appauthor=appauthor, version=version, roaming=roaming)
|
|
16
|
+
return self.PathType(path_str)
|
|
17
|
+
|
|
18
|
+
def user_config_dir(self, appname=None, appauthor=None, version=None, roaming=False):
|
|
19
|
+
path_str = appdirs.user_config_dir(appname=appname, appauthor=appauthor, version=version, roaming=roaming)
|
|
20
|
+
return self.PathType(path_str)
|
|
21
|
+
|
|
22
|
+
def site_config_dir(self, appname=None, appauthor=None, version=None):
|
|
23
|
+
path_str = appdirs.site_config_dir(appname=appname, appauthor=appauthor, version=version, multipath=False)
|
|
24
|
+
return self.PathType(path_str)
|
|
25
|
+
|
|
26
|
+
def site_data_dir(self, appname=None, appauthor=None, version=None):
|
|
27
|
+
path_str = appdirs.site_data_dir(appname=appname, appauthor=appauthor, version=version, multipath=False)
|
|
28
|
+
return self.PathType(path_str)
|
|
29
|
+
|
|
30
|
+
def user_cache_dir(self, appname=None, appauthor=None, version=None):
|
|
31
|
+
path_str = appdirs.user_cache_dir(appname=appname, appauthor=appauthor, version=version)
|
|
32
|
+
return self.PathType(path_str)
|
|
33
|
+
|
|
34
|
+
def user_state_dir(self, appname=None, appauthor=None, version=None):
|
|
35
|
+
path_str = appdirs.user_state_dir(appname=appname, appauthor=appauthor, version=version)
|
|
36
|
+
return self.PathType(path_str)
|
|
37
|
+
|
|
38
|
+
def user_log_dir(self, appname=None, appauthor=None, version=None):
|
|
39
|
+
path_str = appdirs.user_log_dir(appname=appname, appauthor=appauthor, version=version)
|
|
40
|
+
return self.PathType(path_str)
|
|
@@ -145,19 +145,94 @@ class Path(type(Path())):
|
|
|
145
145
|
"""
|
|
146
146
|
return self.mkdir(parents=True, exist_ok=True)
|
|
147
147
|
|
|
148
|
+
def with_suffix(self, suffix: str) -> 'Path':
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
Pathlib doesn't add a dot prefix, but then errors if you don't provide one, which feels rather obnoxious.
|
|
152
|
+
|
|
153
|
+
"""
|
|
154
|
+
if not suffix.startswith('.'):
|
|
155
|
+
suffix = f'.{suffix}'
|
|
156
|
+
return super().with_suffix(suffix)
|
|
157
|
+
|
|
158
|
+
def get_conversion_path(self, suffix: str) -> 'Path':
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
Fetch the equivalent path for a different format in the standard conversion directory structure.
|
|
162
|
+
.../xyz/filename.xyx -> ../abc/filename.abc
|
|
163
|
+
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
old_dir = self.parent.name
|
|
167
|
+
|
|
168
|
+
if old_dir != self.suffix.removeprefix('.'):
|
|
169
|
+
raise ValueError(f"Expected parent directory '{old_dir}' to match file extension '{suffix}'")
|
|
170
|
+
|
|
171
|
+
new = self.parent.parent / suffix / f'{self.stem}.{suffix}'
|
|
172
|
+
return new
|
|
173
|
+
|
|
174
|
+
@property
|
|
175
|
+
def exist(self):
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
Exists as property
|
|
179
|
+
|
|
180
|
+
"""
|
|
181
|
+
return super().exists()
|
|
182
|
+
|
|
183
|
+
@classmethod
|
|
184
|
+
def app(cls):
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
Convenience method for getting application paths
|
|
188
|
+
|
|
189
|
+
"""
|
|
190
|
+
from fmtr.tools import path
|
|
191
|
+
return path.AppPaths()
|
|
192
|
+
|
|
193
|
+
@property
|
|
194
|
+
def type(self):
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
Infer file type, extension, etc.
|
|
198
|
+
|
|
199
|
+
"""
|
|
200
|
+
if not self.exists():
|
|
201
|
+
return None
|
|
202
|
+
from fmtr.tools import path
|
|
203
|
+
kind = path.guess(str(self.absolute()))
|
|
204
|
+
return kind
|
|
205
|
+
|
|
206
|
+
@property
|
|
207
|
+
def children(self):
|
|
208
|
+
if not self.is_dir():
|
|
209
|
+
return None
|
|
210
|
+
return sorted(self.iterdir(), key=lambda x: x.is_dir(), reverse=True)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class FromCallerMixin:
|
|
214
|
+
"""
|
|
215
|
+
|
|
148
216
|
|
|
149
|
-
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
def from_caller(self):
|
|
220
|
+
from fmtr.tools.inspection_tools import get_call_path
|
|
221
|
+
path = get_call_path(offset=3).parent
|
|
222
|
+
return path
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
class PackagePaths(FromCallerMixin):
|
|
150
226
|
"""
|
|
151
227
|
|
|
152
228
|
Canonical paths for a package.
|
|
153
229
|
|
|
154
230
|
"""
|
|
155
231
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
FILENAME_VERSION = 'version'
|
|
232
|
+
dev = Path('/') / 'opt' / 'dev'
|
|
233
|
+
data_global = dev / Constants.DIR_NAME_DATA
|
|
159
234
|
|
|
160
|
-
def __init__(self, path=None, org_singleton=None,
|
|
235
|
+
def __init__(self, path=None, org_singleton=None, dir_name_data=Constants.DIR_NAME_DATA, filename_config=Constants.FILENAME_CONFIG, file_version=Constants.FILENAME_VERSION):
|
|
161
236
|
|
|
162
237
|
"""
|
|
163
238
|
|
|
@@ -165,15 +240,23 @@ class PackagePaths:
|
|
|
165
240
|
|
|
166
241
|
"""
|
|
167
242
|
if not path:
|
|
168
|
-
|
|
169
|
-
path = get_call_path(offset=2).parent
|
|
243
|
+
path = self.from_caller()
|
|
170
244
|
|
|
171
245
|
self.path = Path(path)
|
|
172
246
|
self.org_singleton = org_singleton
|
|
173
|
-
self.
|
|
247
|
+
self.dir_name_data = dir_name_data
|
|
174
248
|
self.filename_config = filename_config
|
|
175
249
|
self.filename_version = file_version
|
|
176
250
|
|
|
251
|
+
@property
|
|
252
|
+
def is_dev(self) -> bool:
|
|
253
|
+
"""
|
|
254
|
+
|
|
255
|
+
Is the package in the dev directory - as opposed to `site-packages` etc?
|
|
256
|
+
|
|
257
|
+
"""
|
|
258
|
+
return self.path.is_relative_to(self.dev)
|
|
259
|
+
|
|
177
260
|
@property
|
|
178
261
|
def is_namespace(self) -> bool:
|
|
179
262
|
"""
|
|
@@ -192,6 +275,19 @@ class PackagePaths:
|
|
|
192
275
|
"""
|
|
193
276
|
return self.path.stem
|
|
194
277
|
|
|
278
|
+
@property
|
|
279
|
+
def name_ns(self) -> str:
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
Name of namespace package.
|
|
283
|
+
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
if self.is_namespace:
|
|
287
|
+
return f'{self.org}.{self.name}'
|
|
288
|
+
else:
|
|
289
|
+
return self.name
|
|
290
|
+
|
|
195
291
|
@property
|
|
196
292
|
def org(self) -> str:
|
|
197
293
|
"""
|
|
@@ -226,13 +322,46 @@ class PackagePaths:
|
|
|
226
322
|
return self.path / self.filename_version
|
|
227
323
|
|
|
228
324
|
@property
|
|
229
|
-
def
|
|
325
|
+
def data(self) -> Path:
|
|
326
|
+
"""
|
|
327
|
+
|
|
328
|
+
Path of project-specific data directory.
|
|
329
|
+
|
|
330
|
+
"""
|
|
331
|
+
|
|
332
|
+
return self.dev / Constants.DIR_NAME_REPO / self.name_ns / self.dir_name_data
|
|
333
|
+
|
|
334
|
+
@property
|
|
335
|
+
def cache(self) -> Path:
|
|
336
|
+
"""
|
|
337
|
+
|
|
338
|
+
Path of cache directory.
|
|
339
|
+
|
|
340
|
+
"""
|
|
341
|
+
|
|
342
|
+
return self.data / Constants.DIR_NAME_CACHE
|
|
343
|
+
|
|
344
|
+
@property
|
|
345
|
+
def artifact(self) -> Path:
|
|
346
|
+
"""
|
|
347
|
+
|
|
348
|
+
Path of project-specific artifact directory
|
|
349
|
+
|
|
350
|
+
"""
|
|
351
|
+
|
|
352
|
+
return self.data / Constants.DIR_NAME_ARTIFACT
|
|
353
|
+
|
|
354
|
+
@property
|
|
355
|
+
def source(self) -> Path:
|
|
230
356
|
"""
|
|
231
357
|
|
|
232
|
-
Path of
|
|
358
|
+
Path of project-specific source directory
|
|
233
359
|
|
|
234
360
|
"""
|
|
235
|
-
|
|
361
|
+
|
|
362
|
+
return self.data / Constants.DIR_NAME_SOURCE
|
|
363
|
+
|
|
364
|
+
|
|
236
365
|
|
|
237
366
|
@property
|
|
238
367
|
def settings(self) -> Path:
|
|
@@ -241,7 +370,16 @@ class PackagePaths:
|
|
|
241
370
|
Path of settings file.
|
|
242
371
|
|
|
243
372
|
"""
|
|
244
|
-
return self.
|
|
373
|
+
return self.data / self.filename_config
|
|
374
|
+
|
|
375
|
+
@property
|
|
376
|
+
def hf(self) -> Path:
|
|
377
|
+
"""
|
|
378
|
+
|
|
379
|
+
Path of HuggingFace directory
|
|
380
|
+
|
|
381
|
+
"""
|
|
382
|
+
return self.artifact / Constants.DIR_NAME_HF
|
|
245
383
|
|
|
246
384
|
def __repr__(self) -> str:
|
|
247
385
|
"""
|
|
@@ -250,3 +388,9 @@ class PackagePaths:
|
|
|
250
388
|
|
|
251
389
|
"""
|
|
252
390
|
return f'{self.__class__.__name__}("{self.path}")'
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
if __name__ == "__main__":
|
|
394
|
+
path = Path('/usr/bin/bash').absolute()
|
|
395
|
+
path.type
|
|
396
|
+
path
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
import regex as re
|
|
2
|
+
from dataclasses import dataclass, asdict
|
|
3
|
+
from functools import cached_property
|
|
4
|
+
from typing import List, Any
|
|
5
|
+
|
|
6
|
+
from fmtr.tools import Constants
|
|
7
|
+
from fmtr.tools.logging_tools import logger
|
|
8
|
+
from fmtr.tools.string_tools import join
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RewriteCircularLoopError(Exception):
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
Circular loop error
|
|
15
|
+
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
MASK_GROUP = '(?:{pattern})'
|
|
20
|
+
MASK_NAMED = r"(?P<{key}>{pattern})"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def alt(*patterns):
|
|
24
|
+
patterns = sorted(patterns, key=len, reverse=True)
|
|
25
|
+
pattern = '|'.join(patterns)
|
|
26
|
+
pattern = MASK_GROUP.format(pattern=pattern)
|
|
27
|
+
return pattern
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class Key:
|
|
31
|
+
RECORD_SEP = '␞'
|
|
32
|
+
FILLS = None
|
|
33
|
+
|
|
34
|
+
def flatten(self, data):
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
Flatten/serialise dictionary data
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
pairs = [f'{value}' for key, value in data.items()]
|
|
41
|
+
string = self.RECORD_SEP.join(pairs)
|
|
42
|
+
return string
|
|
43
|
+
|
|
44
|
+
@cached_property
|
|
45
|
+
def pattern(self):
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
Serialise to pattern
|
|
49
|
+
|
|
50
|
+
"""
|
|
51
|
+
data = {
|
|
52
|
+
key:
|
|
53
|
+
MASK_NAMED.format(
|
|
54
|
+
key=key,
|
|
55
|
+
pattern=value.format_map(self.fills))
|
|
56
|
+
for key, value in asdict(self).items()
|
|
57
|
+
}
|
|
58
|
+
pattern = self.flatten(data)
|
|
59
|
+
return pattern
|
|
60
|
+
|
|
61
|
+
@cached_property
|
|
62
|
+
def rx(self):
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
Compile to Regular Expression
|
|
66
|
+
|
|
67
|
+
"""
|
|
68
|
+
return re.compile(self.pattern)
|
|
69
|
+
|
|
70
|
+
@cached_property
|
|
71
|
+
def string(self):
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
Serialise to string
|
|
75
|
+
|
|
76
|
+
"""
|
|
77
|
+
string = self.flatten(asdict(self))
|
|
78
|
+
return string
|
|
79
|
+
|
|
80
|
+
@cached_property
|
|
81
|
+
def fills(self):
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
Add key names as regex group names
|
|
85
|
+
|
|
86
|
+
"""
|
|
87
|
+
return {key: MASK_NAMED.format(key=key, pattern=value) for key, value in self.FILLS.items()}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def transform(self, match: re.Match):
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
Transform match object into a new object of the same type.
|
|
94
|
+
|
|
95
|
+
"""
|
|
96
|
+
fills = match.groupdict()
|
|
97
|
+
data = {key: value.format_map(fills) for key, value in asdict(self).items()}
|
|
98
|
+
obj = self.__class__(**data)
|
|
99
|
+
return obj
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class Item:
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
Key-value pair
|
|
107
|
+
|
|
108
|
+
"""
|
|
109
|
+
source: Key
|
|
110
|
+
target: Key
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass(kw_only=True)
|
|
114
|
+
class Transformer:
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
Pattern-based, dictionary-like mapper.
|
|
118
|
+
Compiles an complex set of rules into single regex pattern, and determines which rule matched.
|
|
119
|
+
Inputs are then transformed according to the matching rule.
|
|
120
|
+
Works like a pattern-based dictionary when is_recursive==False.
|
|
121
|
+
Works something like an FSA/transducer when is_recursive=True.
|
|
122
|
+
|
|
123
|
+
"""
|
|
124
|
+
PREFIX_GROUP = '__'
|
|
125
|
+
items: List[Item]
|
|
126
|
+
default: Any = None
|
|
127
|
+
is_recursive: bool = False
|
|
128
|
+
|
|
129
|
+
def __post_init__(self):
|
|
130
|
+
with logger.span(f'Compiling expression {len(self.items)=}'):
|
|
131
|
+
rx = self.rx
|
|
132
|
+
logger.debug(f'Compiled successfully {rx.groups=}')
|
|
133
|
+
|
|
134
|
+
@cached_property
|
|
135
|
+
def pattern(self) -> str:
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
Dynamically generated regex pattern based on the rules provided.
|
|
139
|
+
|
|
140
|
+
"""
|
|
141
|
+
patterns = [
|
|
142
|
+
MASK_NAMED.format(key=f'{self.PREFIX_GROUP}{i}', pattern=item.source.pattern)
|
|
143
|
+
for i, item in enumerate(self.items)
|
|
144
|
+
]
|
|
145
|
+
pattern = alt(*patterns)
|
|
146
|
+
return pattern
|
|
147
|
+
|
|
148
|
+
@cached_property
|
|
149
|
+
def rx(self) -> re.Pattern:
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
Regex object.
|
|
153
|
+
|
|
154
|
+
"""
|
|
155
|
+
return re.compile(self.pattern)
|
|
156
|
+
|
|
157
|
+
def get_default(self, key: Key) -> Any:
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
Define what to return in case of no match
|
|
161
|
+
|
|
162
|
+
"""
|
|
163
|
+
if self.is_recursive:
|
|
164
|
+
return key
|
|
165
|
+
else:
|
|
166
|
+
return self.default
|
|
167
|
+
|
|
168
|
+
def get(self, key: Key) -> Key | Any:
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
Use recursive or single lookup pass, depending on whether recursive lookups have been specified.
|
|
172
|
+
|
|
173
|
+
"""
|
|
174
|
+
if self.is_recursive:
|
|
175
|
+
with logger.span(f'Transforming recursively {key=}...'):
|
|
176
|
+
return self.get_recursive(key)
|
|
177
|
+
else:
|
|
178
|
+
with logger.span(f'Transforming linearly {key=}...'):
|
|
179
|
+
return self.get_one(key)
|
|
180
|
+
|
|
181
|
+
def get_one(self, key: Key) -> Key | Any:
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
Single lookup pass.
|
|
185
|
+
Lookup the source string based on the matching rule.
|
|
186
|
+
|
|
187
|
+
"""
|
|
188
|
+
|
|
189
|
+
match = self.rx.fullmatch(key.string)
|
|
190
|
+
|
|
191
|
+
if not match:
|
|
192
|
+
value = self.get_default(key)
|
|
193
|
+
logger.debug(f'No match for {key=}. Returning {self.get_default(key)=}')
|
|
194
|
+
else:
|
|
195
|
+
|
|
196
|
+
match_ids = {name: v for name, v in match.groupdict().items() if v}
|
|
197
|
+
rule_ids = {
|
|
198
|
+
int(id.removeprefix(self.PREFIX_GROUP))
|
|
199
|
+
for id in match_ids.keys() if id.startswith(self.PREFIX_GROUP)
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if len(rule_ids) != 1:
|
|
203
|
+
msg = f'Multiple group matches: {rule_ids}'
|
|
204
|
+
raise ValueError(msg)
|
|
205
|
+
|
|
206
|
+
rule_id = next(iter(rule_ids))
|
|
207
|
+
rule = self.items[rule_id]
|
|
208
|
+
|
|
209
|
+
logger.debug(f'Matched using {rule_id=}: {rule.source=}')
|
|
210
|
+
|
|
211
|
+
if isinstance(rule.target, Key):
|
|
212
|
+
value = rule.target.transform(match)
|
|
213
|
+
else:
|
|
214
|
+
value = rule.target
|
|
215
|
+
|
|
216
|
+
logger.debug(f'Transformed using {rule_id=}: {key=} → {value=}')
|
|
217
|
+
|
|
218
|
+
return value
|
|
219
|
+
|
|
220
|
+
def get_recursive(self, key: Key) -> Key | Any:
|
|
221
|
+
"""
|
|
222
|
+
|
|
223
|
+
Lookup the provided key by continuously applying transforms until no changes are made
|
|
224
|
+
or a circular loop is detected.
|
|
225
|
+
|
|
226
|
+
"""
|
|
227
|
+
history = []
|
|
228
|
+
previous = key
|
|
229
|
+
|
|
230
|
+
def get_history_str():
|
|
231
|
+
return join(history, sep=Constants.ARROW_SEP)
|
|
232
|
+
|
|
233
|
+
while True:
|
|
234
|
+
if previous in history:
|
|
235
|
+
history.append(previous)
|
|
236
|
+
msg = f'Loop detected on node "{previous}": {get_history_str()}'
|
|
237
|
+
raise RewriteCircularLoopError(msg)
|
|
238
|
+
|
|
239
|
+
history.append(previous)
|
|
240
|
+
new = previous
|
|
241
|
+
new = self.get_one(new)
|
|
242
|
+
if new == previous:
|
|
243
|
+
break
|
|
244
|
+
previous = new
|
|
245
|
+
|
|
246
|
+
if not isinstance(new, Key):
|
|
247
|
+
history.append(previous)
|
|
248
|
+
break
|
|
249
|
+
|
|
250
|
+
if len(history) == 1:
|
|
251
|
+
history_str = 'No transforms performed.'
|
|
252
|
+
else:
|
|
253
|
+
history_str = get_history_str()
|
|
254
|
+
logger.debug(f'Finished transforming: {history_str}')
|
|
255
|
+
|
|
256
|
+
return previous
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
if __name__ == '__main__':
|
|
260
|
+
...
|