pythagoras 0.24.4__py3-none-any.whl → 0.24.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pythagoras/_060_autonomous_code_portals/autonomous_decorators.py +31 -4
- pythagoras/_060_autonomous_code_portals/autonomous_portal_core_classes.py +94 -14
- pythagoras/_060_autonomous_code_portals/names_usage_analyzer.py +133 -4
- pythagoras/_070_protected_code_portals/basic_pre_validators.py +130 -15
- pythagoras/_070_protected_code_portals/fn_arg_names_checker.py +20 -18
- pythagoras/_070_protected_code_portals/list_flattener.py +45 -7
- pythagoras/_070_protected_code_portals/package_manager.py +99 -24
- pythagoras/_070_protected_code_portals/protected_decorators.py +59 -1
- pythagoras/_070_protected_code_portals/protected_portal_core_classes.py +239 -4
- pythagoras/_070_protected_code_portals/system_utils.py +85 -12
- pythagoras/_070_protected_code_portals/validation_succesful_const.py +12 -7
- pythagoras/_080_pure_code_portals/pure_core_classes.py +178 -25
- pythagoras/_080_pure_code_portals/pure_decorator.py +37 -0
- pythagoras/_080_pure_code_portals/recursion_pre_validator.py +39 -0
- pythagoras/_090_swarming_portals/output_suppressor.py +32 -3
- pythagoras/_090_swarming_portals/swarming_portals.py +165 -19
- pythagoras/_100_top_level_API/__init__.py +11 -0
- pythagoras/_800_signatures_and_converters/__init__.py +17 -0
- pythagoras/_800_signatures_and_converters/base_16_32_convertors.py +55 -20
- pythagoras/_800_signatures_and_converters/current_date_gmt_str.py +20 -5
- pythagoras/_800_signatures_and_converters/hash_signatures.py +46 -10
- pythagoras/_800_signatures_and_converters/node_signature.py +27 -12
- pythagoras/_800_signatures_and_converters/random_signatures.py +14 -3
- pythagoras/core/__init__.py +54 -0
- {pythagoras-0.24.4.dist-info → pythagoras-0.24.7.dist-info}/METADATA +1 -1
- {pythagoras-0.24.4.dist-info → pythagoras-0.24.7.dist-info}/RECORD +27 -27
- {pythagoras-0.24.4.dist-info → pythagoras-0.24.7.dist-info}/WHEEL +0 -0
|
@@ -45,6 +45,29 @@ BACKGROUND_WORKERS_TXT = "Background workers"
|
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
class SwarmingPortal(PureCodePortal):
|
|
48
|
+
"""Portal for asynchronous swarming execution of pure functions.
|
|
49
|
+
|
|
50
|
+
The SwarmingPortal distributes execution of registered pure functions
|
|
51
|
+
across background worker processes (potentially on different machines)
|
|
52
|
+
in a best-effort, eventually-executed manner. It manages a child process
|
|
53
|
+
that maintains a pool of background workers and randomly dispatches
|
|
54
|
+
execution requests. No strong guarantees are provided regarding which
|
|
55
|
+
worker runs a task, how many times it runs, or when it will run; only that
|
|
56
|
+
eligible requests will eventually be executed at least once.
|
|
57
|
+
|
|
58
|
+
Notes:
|
|
59
|
+
- Parent/child: The portal instance created by user code is the parent.
|
|
60
|
+
It may spawn a separate child process whose responsibility is to keep
|
|
61
|
+
background workers alive. Child processes created by the portal do not
|
|
62
|
+
spawn further workers (max_n_workers is forced to 0 in children).
|
|
63
|
+
- Resources: The effective number of workers is automatically bounded by
|
|
64
|
+
currently available CPU cores and RAM at runtime.
|
|
65
|
+
- Environment logging: Runtime environment summary is stored under compute_nodes
|
|
66
|
+
for debugging purposes to help describe where the parent process is running.
|
|
67
|
+
|
|
68
|
+
See also: OutputSuppressor for silencing worker output, PureCodePortal for
|
|
69
|
+
the base API and lifecycle management, and tests in tests/_090_swarming_portals.
|
|
70
|
+
"""
|
|
48
71
|
_compute_nodes: OverlappingMultiDict | None
|
|
49
72
|
_node_id: str | None
|
|
50
73
|
|
|
@@ -62,6 +85,31 @@ class SwarmingPortal(PureCodePortal):
|
|
|
62
85
|
, parent_process_id: int | None = None
|
|
63
86
|
, parent_process_start_time: float | None = None
|
|
64
87
|
):
|
|
88
|
+
"""Initialize a swarming portal.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
root_dict: Persistent dictionary or path used to back the portal's
|
|
92
|
+
state. If None, a default dictionary will be used.
|
|
93
|
+
p_consistency_checks: Probability or Joker controlling internal
|
|
94
|
+
consistency checks. Passed to PureCodePortal.
|
|
95
|
+
excessive_logging: Whether to enable verbose logging. Passed to
|
|
96
|
+
PureCodePortal.
|
|
97
|
+
max_n_workers: Desired maximum number of background workers for the
|
|
98
|
+
parent process. Children must pass 0 here.
|
|
99
|
+
The effective value may be reduced based on available CPUs and
|
|
100
|
+
RAM at runtime.
|
|
101
|
+
parent_process_id: ID of the parent process when this portal is
|
|
102
|
+
constructed inside a child process. For parent portals, it
|
|
103
|
+
must be None.
|
|
104
|
+
parent_process_start_time: Start time of the parent process, used to
|
|
105
|
+
detect PID reuse. For parents, it must be None.
|
|
106
|
+
|
|
107
|
+
Notes:
|
|
108
|
+
- When parent_process_id or parent_process_start_time is provided,
|
|
109
|
+
both must be provided and max_n_workers must be 0.
|
|
110
|
+
- Initializes compute_nodes storage and captures a unique
|
|
111
|
+
node signature for this runtime.
|
|
112
|
+
"""
|
|
65
113
|
PureCodePortal.__init__(self
|
|
66
114
|
, root_dict=root_dict
|
|
67
115
|
, p_consistency_checks=p_consistency_checks
|
|
@@ -95,6 +143,13 @@ class SwarmingPortal(PureCodePortal):
|
|
|
95
143
|
|
|
96
144
|
|
|
97
145
|
def get_params(self) -> dict:
|
|
146
|
+
"""Return portal parameters including parent process metadata.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
dict: Sorted dictionary of portal parameters inherited from
|
|
150
|
+
PureCodePortal plus parent_process_id and
|
|
151
|
+
parent_process_start_time.
|
|
152
|
+
"""
|
|
98
153
|
params = super().get_params()
|
|
99
154
|
params["parent_process_id"] = self._parent_process_id
|
|
100
155
|
params["parent_process_start_time"] = self._parent_process_start_time
|
|
@@ -104,13 +159,26 @@ class SwarmingPortal(PureCodePortal):
|
|
|
104
159
|
|
|
105
160
|
@property
|
|
106
161
|
def is_parent(self) -> bool:
|
|
107
|
-
"""
|
|
162
|
+
"""Whether this portal instance represents the parent process.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
bool: True if this process created the portal instance on the node
|
|
166
|
+
(no parent metadata set); False if this is a child portal
|
|
167
|
+
instantiated inside the background worker process.
|
|
168
|
+
"""
|
|
108
169
|
if self._parent_process_id is None:
|
|
109
170
|
return True
|
|
110
171
|
return False
|
|
111
172
|
|
|
112
173
|
|
|
113
174
|
def _post_init_hook(self) -> None:
|
|
175
|
+
"""Lifecycle hook invoked after initialization.
|
|
176
|
+
|
|
177
|
+
Registers a global atexit handler once per process to terminate any
|
|
178
|
+
child processes spawned by portals. For parent portals with a positive
|
|
179
|
+
max_n_workers, spawns a child process that manages the pool of
|
|
180
|
+
background worker processes.
|
|
181
|
+
"""
|
|
114
182
|
super()._post_init_hook()
|
|
115
183
|
|
|
116
184
|
if not SwarmingPortal._atexit_is_registered:
|
|
@@ -132,7 +200,12 @@ class SwarmingPortal(PureCodePortal):
|
|
|
132
200
|
|
|
133
201
|
|
|
134
202
|
def _terminate_child_process(self):
|
|
135
|
-
"""Terminate the child process if it is running.
|
|
203
|
+
"""Terminate the child process if it is running.
|
|
204
|
+
|
|
205
|
+
This method is idempotent and safe to call from atexit handlers.
|
|
206
|
+
It attempts a graceful termination, then escalates to kill if the
|
|
207
|
+
child does not stop within a short grace period.
|
|
208
|
+
"""
|
|
136
209
|
if self._child_process is not None:
|
|
137
210
|
if self._child_process.is_alive():
|
|
138
211
|
self._child_process.terminate()
|
|
@@ -145,14 +218,28 @@ class SwarmingPortal(PureCodePortal):
|
|
|
145
218
|
|
|
146
219
|
@property
|
|
147
220
|
def _execution_environment_address(self) -> list[str]: #TODO: move to Logs
|
|
148
|
-
"""
|
|
221
|
+
"""Address path for storing execution environment summary.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
list[str]: A hierarchical key used in the compute_nodes storage of
|
|
225
|
+
the form [node_id, parent_pid_and_start, "execution_environment"].
|
|
226
|
+
"""
|
|
149
227
|
s = str(self._parent_process_id) + "_" + str(self._parent_process_start_time)
|
|
150
228
|
return [self._node_id, s, "execution_environment"]
|
|
151
229
|
|
|
152
230
|
|
|
153
231
|
@property
|
|
154
232
|
def max_n_workers(self) -> int:
|
|
155
|
-
"""
|
|
233
|
+
"""Effective cap on background worker processes.
|
|
234
|
+
|
|
235
|
+
The configured max_n_workers value is adjusted down by runtime
|
|
236
|
+
resource availability: currently unused CPU cores and available RAM.
|
|
237
|
+
The result is cached in RAM for the life of the portal process
|
|
238
|
+
until the cache is invalidated.
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
int: Effective maximum number of worker processes to use.
|
|
242
|
+
"""
|
|
156
243
|
if not hasattr(self, "_max_n_workers_cache"):
|
|
157
244
|
n = self._get_config_setting("max_n_workers")
|
|
158
245
|
if n in (None, KEEP_CURRENT):
|
|
@@ -166,7 +253,13 @@ class SwarmingPortal(PureCodePortal):
|
|
|
166
253
|
|
|
167
254
|
|
|
168
255
|
def describe(self) -> pd.DataFrame:
|
|
169
|
-
"""
|
|
256
|
+
"""Describe the current portal configuration and runtime.
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
pandas.DataFrame: A table combining PureCodePortal description with
|
|
260
|
+
swarming-specific characteristics such as effective number of
|
|
261
|
+
background workers.
|
|
262
|
+
"""
|
|
170
263
|
all_params = [super().describe()]
|
|
171
264
|
all_params.append(_describe_runtime_characteristic(
|
|
172
265
|
BACKGROUND_WORKERS_TXT, self.max_n_workers))
|
|
@@ -177,6 +270,12 @@ class SwarmingPortal(PureCodePortal):
|
|
|
177
270
|
|
|
178
271
|
|
|
179
272
|
def parent_runtime_is_live(self):
|
|
273
|
+
"""Check that the recorded parent process is still alive.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
bool: True if the parent PID exists and its start time matches the
|
|
277
|
+
recorded start time (to avoid PID reuse issues); False otherwise.
|
|
278
|
+
"""
|
|
180
279
|
if not process_is_active(self._parent_process_id):
|
|
181
280
|
return False
|
|
182
281
|
if get_process_start_time(self._parent_process_id) != self._parent_process_start_time:
|
|
@@ -185,6 +284,12 @@ class SwarmingPortal(PureCodePortal):
|
|
|
185
284
|
|
|
186
285
|
|
|
187
286
|
def _clear(self):
|
|
287
|
+
"""Release resources and clear internal state.
|
|
288
|
+
|
|
289
|
+
Side Effects:
|
|
290
|
+
Terminates the child process if present and clears references to
|
|
291
|
+
compute node metadata before delegating to the base implementation.
|
|
292
|
+
"""
|
|
188
293
|
self._compute_nodes = None
|
|
189
294
|
self._terminate_child_process()
|
|
190
295
|
super()._clear()
|
|
@@ -195,18 +300,28 @@ class SwarmingPortal(PureCodePortal):
|
|
|
195
300
|
, min_delay:float = 0.02
|
|
196
301
|
, max_delay:float = 0.22
|
|
197
302
|
) -> None:
|
|
198
|
-
"""
|
|
303
|
+
"""Introduce randomized backoff to reduce contention.
|
|
304
|
+
|
|
305
|
+
With probability p, sleeps for a random delay uniformly drawn from
|
|
306
|
+
[min_delay, max_delay]. Uses the portal's entropy_infuser to remain
|
|
307
|
+
deterministic when seeded in tests.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
p: Probability of applying the delay.
|
|
311
|
+
min_delay: Minimum sleep duration in seconds.
|
|
312
|
+
max_delay: Maximum sleep duration in seconds.
|
|
313
|
+
"""
|
|
199
314
|
if self.entropy_infuser.uniform(0, 1) < p:
|
|
200
315
|
delay = self.entropy_infuser.uniform(min_delay, max_delay)
|
|
201
316
|
sleep(delay)
|
|
202
317
|
|
|
203
318
|
|
|
204
319
|
def _invalidate_cache(self):
|
|
205
|
-
"""
|
|
320
|
+
"""Drop cached computed attributes and delegate to base class.
|
|
206
321
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
322
|
+
This implementation removes any attributes used as local caches by this
|
|
323
|
+
class (currently _max_n_workers_cache) and then calls the base
|
|
324
|
+
implementation to allow it to clear its own caches.
|
|
210
325
|
"""
|
|
211
326
|
if hasattr(self, "_max_n_workers_cache"):
|
|
212
327
|
del self._max_n_workers_cache
|
|
@@ -214,7 +329,19 @@ class SwarmingPortal(PureCodePortal):
|
|
|
214
329
|
|
|
215
330
|
|
|
216
331
|
def _launch_many_background_workers(portal_init_jsparams:JsonSerializedObject) -> None:
|
|
217
|
-
"""
|
|
332
|
+
"""Spawn and maintain a pool of background worker processes.
|
|
333
|
+
|
|
334
|
+
This function is executed inside a dedicated child process created by the
|
|
335
|
+
parent portal. It spawns up to max_n_workers worker processes, restarts
|
|
336
|
+
any that exit unexpectedly, and records an execution environment summary
|
|
337
|
+
under the portal's compute_nodes.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
portal_init_jsparams: Serialized initialization parameters for
|
|
341
|
+
reconstructing a SwarmingPortal. The parameters are adjusted to
|
|
342
|
+
indicate this is a child context (max_n_workers=0) and to record
|
|
343
|
+
the parent process metadata.
|
|
344
|
+
"""
|
|
218
345
|
|
|
219
346
|
|
|
220
347
|
n_workers_to_launch = access_jsparams(portal_init_jsparams
|
|
@@ -264,7 +391,16 @@ def _launch_many_background_workers(portal_init_jsparams:JsonSerializedObject) -
|
|
|
264
391
|
|
|
265
392
|
|
|
266
393
|
def _background_worker(portal_init_jsparams:JsonSerializedObject) -> None:
|
|
267
|
-
"""
|
|
394
|
+
"""Worker loop that processes random execution requests serially.
|
|
395
|
+
|
|
396
|
+
Runs indefinitely until the parent process is detected as dead.
|
|
397
|
+
Within the loop, each individual request is handled in a subprocess to
|
|
398
|
+
isolate failures and to reduce the risk of state leakage.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
portal_init_jsparams: Serialized initialization parameters for
|
|
402
|
+
reconstructing a SwarmingPortal in child context.
|
|
403
|
+
"""
|
|
268
404
|
portal = parameterizable.loadjs(portal_init_jsparams)
|
|
269
405
|
assert isinstance(portal, SwarmingPortal)
|
|
270
406
|
with portal:
|
|
@@ -282,9 +418,18 @@ def _background_worker(portal_init_jsparams:JsonSerializedObject) -> None:
|
|
|
282
418
|
|
|
283
419
|
|
|
284
420
|
def _process_random_execution_request(portal_init_jsparams:JsonSerializedObject):
|
|
285
|
-
"""Process
|
|
286
|
-
|
|
287
|
-
|
|
421
|
+
"""Process a single pending execution request, if any.
|
|
422
|
+
|
|
423
|
+
The function reconstructs a child-context portal, selects a random pending
|
|
424
|
+
execution request (if available), and validates readiness. If validation
|
|
425
|
+
yields a PureFnCallSignature, it continues with it; otherwise, it executes
|
|
426
|
+
the request when validation returns VALIDATION_SUCCESSFUL. Output during
|
|
427
|
+
execution is suppressed by the caller to keep workers quiet.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
portal_init_jsparams: Serialized initialization parameters for
|
|
431
|
+
reconstructing a SwarmingPortal in child context.
|
|
432
|
+
"""
|
|
288
433
|
portal_init_jsparams = update_jsparams(
|
|
289
434
|
portal_init_jsparams, max_n_workers=0)
|
|
290
435
|
portal = parameterizable.loadjs(portal_init_jsparams)
|
|
@@ -326,13 +471,14 @@ def _process_random_execution_request(portal_init_jsparams:JsonSerializedObject)
|
|
|
326
471
|
|
|
327
472
|
|
|
328
473
|
def _terminate_all_portals_child_processes():
|
|
329
|
-
"""
|
|
474
|
+
"""Terminate child processes for all known portals.
|
|
330
475
|
|
|
331
|
-
|
|
332
|
-
|
|
476
|
+
Registered with atexit the first time a SwarmingPortal is initialized.
|
|
477
|
+
Ensures that any child processes are terminated to avoid orphaned workers.
|
|
333
478
|
"""
|
|
334
479
|
for portal in get_all_known_portals():
|
|
335
480
|
try:
|
|
336
481
|
portal._terminate_child_process()
|
|
337
|
-
except:
|
|
482
|
+
except Exception:
|
|
483
|
+
# Best-effort cleanup; ignore errors during shutdown.
|
|
338
484
|
pass
|
|
@@ -1,2 +1,13 @@
|
|
|
1
|
+
"""Top-level, user-facing API shortcuts.
|
|
2
|
+
|
|
3
|
+
This subpackage exposes the easiest entry points for application authors,
|
|
4
|
+
primarily constructors and helpers to obtain a portal and interact with it.
|
|
5
|
+
|
|
6
|
+
Exports:
|
|
7
|
+
top_level_API: Functions that construct and return a portal (e.g., get_portal).
|
|
8
|
+
default_local_portal: Defaults and helpers for creating a local portal
|
|
9
|
+
when not explicitly created/provided by an application that uses Pythagoras.
|
|
10
|
+
"""
|
|
11
|
+
|
|
1
12
|
from .top_level_API import *
|
|
2
13
|
from .default_local_portal import *
|
|
@@ -1,3 +1,20 @@
|
|
|
1
|
+
"""Signatures and conversion utilities.
|
|
2
|
+
|
|
3
|
+
This subpackage provides helpers for generating stable identifiers and
|
|
4
|
+
converting between common textual/byte representations used across
|
|
5
|
+
Pythagoras. It re-exports frequently used helpers for convenience.
|
|
6
|
+
|
|
7
|
+
The modules exposed here are intentionally lightweight and side-effect free
|
|
8
|
+
so they can be used in hashing and address computations.
|
|
9
|
+
|
|
10
|
+
Exports:
|
|
11
|
+
base_16_32_convertors: Base-16/32 encoding and decoding helpers.
|
|
12
|
+
current_date_gmt_str: Utilities to format current date/time in GMT.
|
|
13
|
+
hash_signatures: Functions to compute content hash/signature strings.
|
|
14
|
+
node_signature: Functions to derive signatures for the current node.
|
|
15
|
+
random_signatures: Helpers to generate random, collision-resistant IDs.
|
|
16
|
+
"""
|
|
17
|
+
|
|
1
18
|
from .base_16_32_convertors import *
|
|
2
19
|
from .current_date_gmt_str import *
|
|
3
20
|
from .hash_signatures import *
|
|
@@ -1,48 +1,83 @@
|
|
|
1
|
+
from typing import Final
|
|
1
2
|
|
|
2
|
-
|
|
3
|
-
|
|
3
|
+
_BASE32_ALPHABET: Final[str] = '0123456789abcdefghijklmnopqrstuv'
|
|
4
|
+
_BASE32_ALPHABET_MAP: Final[dict[str, int]] = {
|
|
5
|
+
char:index for index,char in enumerate(_BASE32_ALPHABET)}
|
|
4
6
|
|
|
5
7
|
|
|
6
8
|
def convert_base16_to_base32(hexdigest: str) -> str:
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
+
"""Convert a hexadecimal (base16) string to this project's base32.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
hexdigest (str): A hexadecimal string (case-insensitive). May be an
|
|
13
|
+
empty string or "0" to represent zero.
|
|
9
14
|
|
|
10
|
-
:
|
|
11
|
-
|
|
15
|
+
Returns:
|
|
16
|
+
str: The corresponding value encoded with the custom base32 alphabet
|
|
17
|
+
(digits 0-9 then letters a-v).
|
|
18
|
+
|
|
19
|
+
Examples:
|
|
20
|
+
>>> convert_base16_to_base32("ff")
|
|
21
|
+
'7v'
|
|
12
22
|
"""
|
|
13
23
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
24
|
+
try:
|
|
25
|
+
num = int(hexdigest, 16)
|
|
26
|
+
except ValueError as e:
|
|
27
|
+
raise ValueError(f"Invalid hexadecimal string: {hexdigest}") from e
|
|
28
|
+
|
|
17
29
|
base32_str = convert_int_to_base32(num)
|
|
18
30
|
|
|
19
31
|
return base32_str
|
|
20
32
|
|
|
21
33
|
|
|
22
34
|
def convert_int_to_base32(n: int) -> str:
|
|
23
|
-
"""
|
|
24
|
-
|
|
35
|
+
"""Convert a non-negative integer to Pythagoras' base32 string.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
n (int): Non-negative integer to encode.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
str: The base32 representation.
|
|
25
42
|
|
|
26
|
-
:
|
|
27
|
-
|
|
43
|
+
Raises:
|
|
44
|
+
ValueError: If n is negative.
|
|
28
45
|
"""
|
|
46
|
+
if n < 0:
|
|
47
|
+
raise ValueError("n must be non-negative")
|
|
48
|
+
|
|
49
|
+
if n == 0:
|
|
50
|
+
return "0"
|
|
51
|
+
|
|
29
52
|
base32_str = ''
|
|
30
53
|
while n > 0:
|
|
31
|
-
base32_str =
|
|
54
|
+
base32_str = _BASE32_ALPHABET[n & 31] + base32_str
|
|
32
55
|
n >>= 5
|
|
33
56
|
|
|
34
57
|
return base32_str
|
|
35
58
|
|
|
36
59
|
def convert_base_32_to_int(digest: str) -> int:
|
|
37
|
-
"""
|
|
38
|
-
Convert a base 32 string to an integer.
|
|
60
|
+
"""Convert a base32 string (custom alphabet) to an integer.
|
|
39
61
|
|
|
40
|
-
:
|
|
41
|
-
|
|
62
|
+
Args:
|
|
63
|
+
digest (str): String encoded with Pythagoras' base32 alphabet.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
int: The decoded non-negative integer value.
|
|
67
|
+
|
|
68
|
+
Raises:
|
|
69
|
+
KeyError: If digest contains a character outside the supported
|
|
70
|
+
base32 alphabet (0-9, a-v).
|
|
42
71
|
"""
|
|
72
|
+
if not digest:
|
|
73
|
+
raise ValueError("Digest cannot be empty")
|
|
74
|
+
|
|
43
75
|
digest = digest.lower()
|
|
44
76
|
num = 0
|
|
45
|
-
|
|
46
|
-
|
|
77
|
+
try:
|
|
78
|
+
for char in digest:
|
|
79
|
+
num = num * 32 + _BASE32_ALPHABET_MAP[char]
|
|
80
|
+
except KeyError as e:
|
|
81
|
+
raise ValueError(f"Invalid character '{e.args[0]}' in base32 digest: {digest}") from e
|
|
47
82
|
return num
|
|
48
83
|
|
|
@@ -1,11 +1,26 @@
|
|
|
1
1
|
from datetime import datetime, timezone
|
|
2
|
+
from typing import Final
|
|
2
3
|
|
|
4
|
+
_MONTH_ABBREVIATIONS: Final[tuple[str, ...]] = (
|
|
5
|
+
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
|
6
|
+
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
|
|
3
7
|
|
|
4
|
-
|
|
5
|
-
|
|
8
|
+
|
|
9
|
+
def current_date_gmt_string() -> str:
|
|
10
|
+
"""Get the current UTC date as a compact string.
|
|
11
|
+
|
|
12
|
+
Produces an underscore-delimited UTC date string suitable for
|
|
13
|
+
stable file names and log records.
|
|
14
|
+
|
|
15
|
+
The format is: "YYYY_MMMonAbbrev_dd_utc" (e.g., "2024_12Dec_11_utc").
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
str: The formatted UTC date string, for the current moment.
|
|
6
19
|
"""
|
|
7
20
|
|
|
8
21
|
utc_now = datetime.now(timezone.utc)
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
22
|
+
month_abbrev = _MONTH_ABBREVIATIONS[utc_now.month - 1]
|
|
23
|
+
# locale-dependent month abbreviation
|
|
24
|
+
result = (f"{utc_now.year}_{utc_now.month:02d}{month_abbrev}" +
|
|
25
|
+
f"_{utc_now.day:02d}_utc")
|
|
26
|
+
return result
|
|
@@ -1,33 +1,69 @@
|
|
|
1
1
|
import sys
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any, Final
|
|
3
3
|
|
|
4
4
|
import joblib.hashing
|
|
5
5
|
|
|
6
6
|
from .base_16_32_convertors import convert_base16_to_base32
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
_HASH_TYPE: Final[str] = "sha256"
|
|
10
|
+
_MAX_SIGNATURE_LENGTH: Final[int] = 22
|
|
11
11
|
|
|
12
12
|
def get_base16_hash_signature(x:Any) -> str:
|
|
13
|
-
"""
|
|
13
|
+
"""Compute a hexadecimal (base16) hash for an arbitrary Python object.
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
This function delegates to joblib's hashing utilities. If NumPy is
|
|
16
|
+
imported in the current process, it uses NumpyHasher for efficient and
|
|
17
|
+
stable hashing of NumPy arrays; otherwise it uses the generic Hasher.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
x (Any): The object to hash. Must be picklable by joblib unless a
|
|
21
|
+
specialized routine (e.g., for NumPy arrays) is available.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
str: A hexadecimal string digest computed with the configured
|
|
25
|
+
algorithm (sha256 by default).
|
|
26
|
+
|
|
27
|
+
Notes:
|
|
28
|
+
- joblib relies on pickle for most Python objects; ensure that custom
|
|
29
|
+
objects are picklable for stable results.
|
|
30
|
+
- The digest is deterministic for the same object content.
|
|
17
31
|
"""
|
|
18
32
|
if 'numpy' in sys.modules:
|
|
19
|
-
hasher = joblib.hashing.NumpyHasher(hash_name=
|
|
33
|
+
hasher = joblib.hashing.NumpyHasher(hash_name=_HASH_TYPE)
|
|
20
34
|
else:
|
|
21
|
-
hasher = joblib.hashing.Hasher(hash_name=
|
|
35
|
+
hasher = joblib.hashing.Hasher(hash_name=_HASH_TYPE)
|
|
22
36
|
hash_signature = hasher.hash(x)
|
|
23
37
|
return str(hash_signature)
|
|
24
38
|
|
|
25
39
|
def get_base32_hash_signature(x:Any) -> str:
|
|
26
|
-
"""
|
|
40
|
+
"""Compute a base32-encoded hash for an arbitrary Python object.
|
|
41
|
+
|
|
42
|
+
Internally computes a hexadecimal digest first, then converts it to the
|
|
43
|
+
custom base32 alphabet used by Pythagoras.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
x (Any): The object to hash.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
str: The full-length base32 digest string (not truncated).
|
|
50
|
+
"""
|
|
27
51
|
base_16_hash = get_base16_hash_signature(x)
|
|
28
52
|
base_32_hash = convert_base16_to_base32(base_16_hash)
|
|
29
53
|
return base_32_hash
|
|
30
54
|
|
|
31
55
|
def get_hash_signature(x:Any) -> str:
|
|
32
|
-
|
|
56
|
+
"""Compute a short, URL-safe hash signature for an object.
|
|
57
|
+
|
|
58
|
+
This is a convenience wrapper that returns the first max_signature_length
|
|
59
|
+
characters of the base32 digest, which is typically sufficient for
|
|
60
|
+
collision-resistant identifiers in logs and filenames.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
x (Any): The object to hash.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
str: The truncated base32 digest string.
|
|
67
|
+
"""
|
|
68
|
+
return get_base32_hash_signature(x)[:_MAX_SIGNATURE_LENGTH]
|
|
33
69
|
|
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
import
|
|
1
|
+
import getpass
|
|
2
|
+
import platform
|
|
3
|
+
import uuid
|
|
2
4
|
from functools import cache
|
|
3
5
|
|
|
4
6
|
from .hash_signatures import get_hash_signature
|
|
@@ -6,15 +8,28 @@ from .hash_signatures import get_hash_signature
|
|
|
6
8
|
|
|
7
9
|
@cache
|
|
8
10
|
def get_node_signature() -> str:
|
|
9
|
-
"""
|
|
11
|
+
"""Return a stable signature for the current computing node and user.
|
|
12
|
+
|
|
13
|
+
The signature is derived from a concatenation of multiple system- and
|
|
14
|
+
user-specific attributes (MAC address, OS info, CPU, username) and then
|
|
15
|
+
hashed using Pythagoras' short base32 digest. The result is intended to
|
|
16
|
+
uniquely identify the node within logs and distributed systems.
|
|
17
|
+
|
|
18
|
+
Caching:
|
|
19
|
+
The result is cached for the lifetime of the process using
|
|
20
|
+
functools.cache, as the underlying attributes are not expected to
|
|
21
|
+
change while the process is running.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
str: A short base32 signature string representing this node.
|
|
10
25
|
"""
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
return get_hash_signature(
|
|
26
|
+
id_parts = [
|
|
27
|
+
str(uuid.getnode()),
|
|
28
|
+
platform.system(),
|
|
29
|
+
platform.release(),
|
|
30
|
+
platform.version(),
|
|
31
|
+
platform.machine(),
|
|
32
|
+
platform.processor(),
|
|
33
|
+
getpass.getuser(),
|
|
34
|
+
]
|
|
35
|
+
return get_hash_signature("".join(id_parts))
|
|
@@ -1,11 +1,22 @@
|
|
|
1
1
|
import uuid
|
|
2
|
+
from typing import Final
|
|
2
3
|
|
|
3
4
|
from .base_16_32_convertors import convert_int_to_base32
|
|
4
5
|
|
|
5
|
-
|
|
6
|
+
_MAX_SIGNATURE_LENGTH: Final[int] = 22
|
|
6
7
|
|
|
7
8
|
def get_random_signature() -> str:
|
|
8
|
-
|
|
9
|
+
"""Generate a short, random base32 signature string.
|
|
10
|
+
|
|
11
|
+
The randomness is sourced from uuid.uuid4(), which uses a cryptographically
|
|
12
|
+
strong RNG provided by the OS. The resulting large integer is encoded with
|
|
13
|
+
Pythagoras' base32 alphabet and truncated to max_signature_length
|
|
14
|
+
characters.
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
str: A random, URL-safe base32 string of length up to
|
|
18
|
+
max_signature_length.
|
|
19
|
+
"""
|
|
9
20
|
random_int = uuid.uuid4().int
|
|
10
21
|
random_str = convert_int_to_base32(random_int)
|
|
11
|
-
return random_str[:
|
|
22
|
+
return random_str[:_MAX_SIGNATURE_LENGTH]
|