pythagoras 0.24.4__py3-none-any.whl → 0.24.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. pythagoras/_060_autonomous_code_portals/autonomous_decorators.py +31 -4
  2. pythagoras/_060_autonomous_code_portals/autonomous_portal_core_classes.py +94 -14
  3. pythagoras/_060_autonomous_code_portals/names_usage_analyzer.py +133 -4
  4. pythagoras/_070_protected_code_portals/basic_pre_validators.py +130 -15
  5. pythagoras/_070_protected_code_portals/fn_arg_names_checker.py +20 -18
  6. pythagoras/_070_protected_code_portals/list_flattener.py +45 -7
  7. pythagoras/_070_protected_code_portals/package_manager.py +99 -24
  8. pythagoras/_070_protected_code_portals/protected_decorators.py +59 -1
  9. pythagoras/_070_protected_code_portals/protected_portal_core_classes.py +239 -4
  10. pythagoras/_070_protected_code_portals/system_utils.py +85 -12
  11. pythagoras/_070_protected_code_portals/validation_succesful_const.py +12 -7
  12. pythagoras/_080_pure_code_portals/pure_core_classes.py +178 -25
  13. pythagoras/_080_pure_code_portals/pure_decorator.py +37 -0
  14. pythagoras/_080_pure_code_portals/recursion_pre_validator.py +39 -0
  15. pythagoras/_090_swarming_portals/output_suppressor.py +32 -3
  16. pythagoras/_090_swarming_portals/swarming_portals.py +165 -19
  17. pythagoras/_100_top_level_API/__init__.py +11 -0
  18. pythagoras/_800_signatures_and_converters/__init__.py +17 -0
  19. pythagoras/_800_signatures_and_converters/base_16_32_convertors.py +55 -20
  20. pythagoras/_800_signatures_and_converters/current_date_gmt_str.py +20 -5
  21. pythagoras/_800_signatures_and_converters/hash_signatures.py +46 -10
  22. pythagoras/_800_signatures_and_converters/node_signature.py +27 -12
  23. pythagoras/_800_signatures_and_converters/random_signatures.py +14 -3
  24. pythagoras/core/__init__.py +54 -0
  25. {pythagoras-0.24.4.dist-info → pythagoras-0.24.7.dist-info}/METADATA +1 -1
  26. {pythagoras-0.24.4.dist-info → pythagoras-0.24.7.dist-info}/RECORD +27 -27
  27. {pythagoras-0.24.4.dist-info → pythagoras-0.24.7.dist-info}/WHEEL +0 -0
@@ -45,6 +45,29 @@ BACKGROUND_WORKERS_TXT = "Background workers"
45
45
 
46
46
 
47
47
  class SwarmingPortal(PureCodePortal):
48
+ """Portal for asynchronous swarming execution of pure functions.
49
+
50
+ The SwarmingPortal distributes execution of registered pure functions
51
+ across background worker processes (potentially on different machines)
52
+ in a best-effort, eventually-executed manner. It manages a child process
53
+ that maintains a pool of background workers and randomly dispatches
54
+ execution requests. No strong guarantees are provided regarding which
55
+ worker runs a task, how many times it runs, or when it will run; only that
56
+ eligible requests will eventually be executed at least once.
57
+
58
+ Notes:
59
+ - Parent/child: The portal instance created by user code is the parent.
60
+ It may spawn a separate child process whose responsibility is to keep
61
+ background workers alive. Child processes created by the portal do not
62
+ spawn further workers (max_n_workers is forced to 0 in children).
63
+ - Resources: The effective number of workers is automatically bounded by
64
+ currently available CPU cores and RAM at runtime.
65
+ - Environment logging: Runtime environment summary is stored under compute_nodes
66
+ for debugging purposes to help describe where the parent process is running.
67
+
68
+ See also: OutputSuppressor for silencing worker output, PureCodePortal for
69
+ the base API and lifecycle management, and tests in tests/_090_swarming_portals.
70
+ """
48
71
  _compute_nodes: OverlappingMultiDict | None
49
72
  _node_id: str | None
50
73
 
@@ -62,6 +85,31 @@ class SwarmingPortal(PureCodePortal):
62
85
  , parent_process_id: int | None = None
63
86
  , parent_process_start_time: float | None = None
64
87
  ):
88
+ """Initialize a swarming portal.
89
+
90
+ Args:
91
+ root_dict: Persistent dictionary or path used to back the portal's
92
+ state. If None, a default dictionary will be used.
93
+ p_consistency_checks: Probability or Joker controlling internal
94
+ consistency checks. Passed to PureCodePortal.
95
+ excessive_logging: Whether to enable verbose logging. Passed to
96
+ PureCodePortal.
97
+ max_n_workers: Desired maximum number of background workers for the
98
+ parent process. Children must pass 0 here.
99
+ The effective value may be reduced based on available CPUs and
100
+ RAM at runtime.
101
+ parent_process_id: ID of the parent process when this portal is
102
+ constructed inside a child process. For parent portals, it
103
+ must be None.
104
+ parent_process_start_time: Start time of the parent process, used to
105
+ detect PID reuse. For parents, it must be None.
106
+
107
+ Notes:
108
+ - When parent_process_id or parent_process_start_time is provided,
109
+ both must be provided and max_n_workers must be 0.
110
+ - Initializes compute_nodes storage and captures a unique
111
+ node signature for this runtime.
112
+ """
65
113
  PureCodePortal.__init__(self
66
114
  , root_dict=root_dict
67
115
  , p_consistency_checks=p_consistency_checks
@@ -95,6 +143,13 @@ class SwarmingPortal(PureCodePortal):
95
143
 
96
144
 
97
145
  def get_params(self) -> dict:
146
+ """Return portal parameters including parent process metadata.
147
+
148
+ Returns:
149
+ dict: Sorted dictionary of portal parameters inherited from
150
+ PureCodePortal plus parent_process_id and
151
+ parent_process_start_time.
152
+ """
98
153
  params = super().get_params()
99
154
  params["parent_process_id"] = self._parent_process_id
100
155
  params["parent_process_start_time"] = self._parent_process_start_time
@@ -104,13 +159,26 @@ class SwarmingPortal(PureCodePortal):
104
159
 
105
160
  @property
106
161
  def is_parent(self) -> bool:
107
- """Check if this portal is the parent process."""
162
+ """Whether this portal instance represents the parent process.
163
+
164
+ Returns:
165
+ bool: True if this process created the portal instance on the node
166
+ (no parent metadata set); False if this is a child portal
167
+ instantiated inside the background worker process.
168
+ """
108
169
  if self._parent_process_id is None:
109
170
  return True
110
171
  return False
111
172
 
112
173
 
113
174
  def _post_init_hook(self) -> None:
175
+ """Lifecycle hook invoked after initialization.
176
+
177
+ Registers a global atexit handler once per process to terminate any
178
+ child processes spawned by portals. For parent portals with a positive
179
+ max_n_workers, spawns a child process that manages the pool of
180
+ background worker processes.
181
+ """
114
182
  super()._post_init_hook()
115
183
 
116
184
  if not SwarmingPortal._atexit_is_registered:
@@ -132,7 +200,12 @@ class SwarmingPortal(PureCodePortal):
132
200
 
133
201
 
134
202
  def _terminate_child_process(self):
135
- """Terminate the child process if it is running."""
203
+ """Terminate the child process if it is running.
204
+
205
+ This method is idempotent and safe to call from atexit handlers.
206
+ It attempts a graceful termination, then escalates to kill if the
207
+ child does not stop within a short grace period.
208
+ """
136
209
  if self._child_process is not None:
137
210
  if self._child_process.is_alive():
138
211
  self._child_process.terminate()
@@ -145,14 +218,28 @@ class SwarmingPortal(PureCodePortal):
145
218
 
146
219
  @property
147
220
  def _execution_environment_address(self) -> list[str]: #TODO: move to Logs
148
- """Get the address of the execution environment in the compute nodes."""
221
+ """Address path for storing execution environment summary.
222
+
223
+ Returns:
224
+ list[str]: A hierarchical key used in the compute_nodes storage of
225
+ the form [node_id, parent_pid_and_start, "execution_environment"].
226
+ """
149
227
  s = str(self._parent_process_id) + "_" + str(self._parent_process_start_time)
150
228
  return [self._node_id, s, "execution_environment"]
151
229
 
152
230
 
153
231
  @property
154
232
  def max_n_workers(self) -> int:
155
- """Get the maximum number of background workers"""
233
+ """Effective cap on background worker processes.
234
+
235
+ The configured max_n_workers value is adjusted down by runtime
236
+ resource availability: currently unused CPU cores and available RAM.
237
+ The result is cached in RAM for the life of the portal process
238
+ until the cache is invalidated.
239
+
240
+ Returns:
241
+ int: Effective maximum number of worker processes to use.
242
+ """
156
243
  if not hasattr(self, "_max_n_workers_cache"):
157
244
  n = self._get_config_setting("max_n_workers")
158
245
  if n in (None, KEEP_CURRENT):
@@ -166,7 +253,13 @@ class SwarmingPortal(PureCodePortal):
166
253
 
167
254
 
168
255
  def describe(self) -> pd.DataFrame:
169
- """Get a DataFrame describing the portal's current state"""
256
+ """Describe the current portal configuration and runtime.
257
+
258
+ Returns:
259
+ pandas.DataFrame: A table combining PureCodePortal description with
260
+ swarming-specific characteristics such as effective number of
261
+ background workers.
262
+ """
170
263
  all_params = [super().describe()]
171
264
  all_params.append(_describe_runtime_characteristic(
172
265
  BACKGROUND_WORKERS_TXT, self.max_n_workers))
@@ -177,6 +270,12 @@ class SwarmingPortal(PureCodePortal):
177
270
 
178
271
 
179
272
  def parent_runtime_is_live(self):
273
+ """Check that the recorded parent process is still alive.
274
+
275
+ Returns:
276
+ bool: True if the parent PID exists and its start time matches the
277
+ recorded start time (to avoid PID reuse issues); False otherwise.
278
+ """
180
279
  if not process_is_active(self._parent_process_id):
181
280
  return False
182
281
  if get_process_start_time(self._parent_process_id) != self._parent_process_start_time:
@@ -185,6 +284,12 @@ class SwarmingPortal(PureCodePortal):
185
284
 
186
285
 
187
286
  def _clear(self):
287
+ """Release resources and clear internal state.
288
+
289
+ Side Effects:
290
+ Terminates the child process if present and clears references to
291
+ compute node metadata before delegating to the base implementation.
292
+ """
188
293
  self._compute_nodes = None
189
294
  self._terminate_child_process()
190
295
  super()._clear()
@@ -195,18 +300,28 @@ class SwarmingPortal(PureCodePortal):
195
300
  , min_delay:float = 0.02
196
301
  , max_delay:float = 0.22
197
302
  ) -> None:
198
- """Randomly delay execution by a given probability."""
303
+ """Introduce randomized backoff to reduce contention.
304
+
305
+ With probability p, sleeps for a random delay uniformly drawn from
306
+ [min_delay, max_delay]. Uses the portal's entropy_infuser to remain
307
+ deterministic when seeded in tests.
308
+
309
+ Args:
310
+ p: Probability of applying the delay.
311
+ min_delay: Minimum sleep duration in seconds.
312
+ max_delay: Maximum sleep duration in seconds.
313
+ """
199
314
  if self.entropy_infuser.uniform(0, 1) < p:
200
315
  delay = self.entropy_infuser.uniform(min_delay, max_delay)
201
316
  sleep(delay)
202
317
 
203
318
 
204
319
  def _invalidate_cache(self):
205
- """Invalidate the object's attribute cache.
320
+ """Drop cached computed attributes and delegate to base class.
206
321
 
207
- If the object's attribute named ATTR is cached,
208
- its cached value will be stored in an attribute named _ATTR_cache
209
- This method should delete all such attributes.
322
+ This implementation removes any attributes used as local caches by this
323
+ class (currently _max_n_workers_cache) and then calls the base
324
+ implementation to allow it to clear its own caches.
210
325
  """
211
326
  if hasattr(self, "_max_n_workers_cache"):
212
327
  del self._max_n_workers_cache
@@ -214,7 +329,19 @@ class SwarmingPortal(PureCodePortal):
214
329
 
215
330
 
216
331
  def _launch_many_background_workers(portal_init_jsparams:JsonSerializedObject) -> None:
217
- """Launch many background worker processes."""
332
+ """Spawn and maintain a pool of background worker processes.
333
+
334
+ This function is executed inside a dedicated child process created by the
335
+ parent portal. It spawns up to max_n_workers worker processes, restarts
336
+ any that exit unexpectedly, and records an execution environment summary
337
+ under the portal's compute_nodes.
338
+
339
+ Args:
340
+ portal_init_jsparams: Serialized initialization parameters for
341
+ reconstructing a SwarmingPortal. The parameters are adjusted to
342
+ indicate this is a child context (max_n_workers=0) and to record
343
+ the parent process metadata.
344
+ """
218
345
 
219
346
 
220
347
  n_workers_to_launch = access_jsparams(portal_init_jsparams
@@ -264,7 +391,16 @@ def _launch_many_background_workers(portal_init_jsparams:JsonSerializedObject) -
264
391
 
265
392
 
266
393
  def _background_worker(portal_init_jsparams:JsonSerializedObject) -> None:
267
- """Background worker that keeps processing random execution requests."""
394
+ """Worker loop that processes random execution requests serially.
395
+
396
+ Runs indefinitely until the parent process is detected as dead.
397
+ Within the loop, each individual request is handled in a subprocess to
398
+ isolate failures and to reduce the risk of state leakage.
399
+
400
+ Args:
401
+ portal_init_jsparams: Serialized initialization parameters for
402
+ reconstructing a SwarmingPortal in child context.
403
+ """
268
404
  portal = parameterizable.loadjs(portal_init_jsparams)
269
405
  assert isinstance(portal, SwarmingPortal)
270
406
  with portal:
@@ -282,9 +418,18 @@ def _background_worker(portal_init_jsparams:JsonSerializedObject) -> None:
282
418
 
283
419
 
284
420
  def _process_random_execution_request(portal_init_jsparams:JsonSerializedObject):
285
- """Process one random execution request."""
286
- # portal = parameterizable.get_object_from_portable_params(
287
- # portal_init_params)
421
+ """Process a single pending execution request, if any.
422
+
423
+ The function reconstructs a child-context portal, selects a random pending
424
+ execution request (if available), and validates readiness. If validation
425
+ yields a PureFnCallSignature, it continues with it; otherwise, it executes
426
+ the request when validation returns VALIDATION_SUCCESSFUL. Output during
427
+ execution is suppressed by the caller to keep workers quiet.
428
+
429
+ Args:
430
+ portal_init_jsparams: Serialized initialization parameters for
431
+ reconstructing a SwarmingPortal in child context.
432
+ """
288
433
  portal_init_jsparams = update_jsparams(
289
434
  portal_init_jsparams, max_n_workers=0)
290
435
  portal = parameterizable.loadjs(portal_init_jsparams)
@@ -326,13 +471,14 @@ def _process_random_execution_request(portal_init_jsparams:JsonSerializedObject)
326
471
 
327
472
 
328
473
  def _terminate_all_portals_child_processes():
329
- """ Clean runtime id.
474
+ """Terminate child processes for all known portals.
330
475
 
331
- This function is called at the end of the program execution.
332
- It deletes the principal_runtime_id record from all portals.
476
+ Registered with atexit the first time a SwarmingPortal is initialized.
477
+ Ensures that any child processes are terminated to avoid orphaned workers.
333
478
  """
334
479
  for portal in get_all_known_portals():
335
480
  try:
336
481
  portal._terminate_child_process()
337
- except:
482
+ except Exception:
483
+ # Best-effort cleanup; ignore errors during shutdown.
338
484
  pass
@@ -1,2 +1,13 @@
1
+ """Top-level, user-facing API shortcuts.
2
+
3
+ This subpackage exposes the easiest entry points for application authors,
4
+ primarily constructors and helpers to obtain a portal and interact with it.
5
+
6
+ Exports:
7
+ top_level_API: Functions that construct and return a portal (e.g., get_portal).
8
+ default_local_portal: Defaults and helpers for creating a local portal
9
+ when not explicitly created/provided by an application that uses Pythagoras.
10
+ """
11
+
1
12
  from .top_level_API import *
2
13
  from .default_local_portal import *
@@ -1,3 +1,20 @@
1
+ """Signatures and conversion utilities.
2
+
3
+ This subpackage provides helpers for generating stable identifiers and
4
+ converting between common textual/byte representations used across
5
+ Pythagoras. It re-exports frequently used helpers for convenience.
6
+
7
+ The modules exposed here are intentionally lightweight and side-effect free
8
+ so they can be used in hashing and address computations.
9
+
10
+ Exports:
11
+ base_16_32_convertors: Base-16/32 encoding and decoding helpers.
12
+ current_date_gmt_str: Utilities to format current date/time in GMT.
13
+ hash_signatures: Functions to compute content hash/signature strings.
14
+ node_signature: Functions to derive signatures for the current node.
15
+ random_signatures: Helpers to generate random, collision-resistant IDs.
16
+ """
17
+
1
18
  from .base_16_32_convertors import *
2
19
  from .current_date_gmt_str import *
3
20
  from .hash_signatures import *
@@ -1,48 +1,83 @@
1
+ from typing import Final
1
2
 
2
- base32_alphabet = '0123456789abcdefghijklmnopqrstuv'
3
- base32_alphabet_map = {char:index for index,char in enumerate(base32_alphabet)}
3
+ _BASE32_ALPHABET: Final[str] = '0123456789abcdefghijklmnopqrstuv'
4
+ _BASE32_ALPHABET_MAP: Final[dict[str, int]] = {
5
+ char:index for index,char in enumerate(_BASE32_ALPHABET)}
4
6
 
5
7
 
6
8
  def convert_base16_to_base32(hexdigest: str) -> str:
7
- """
8
- Convert a hexadecimal (base 16) string to a base 32 string.
9
+ """Convert a hexadecimal (base16) string to this project's base32.
10
+
11
+ Args:
12
+ hexdigest (str): A hexadecimal string (case-insensitive). May be an
13
+ empty string or "0" to represent zero.
9
14
 
10
- :name hexdigest: A string representing a hexadecimal number.
11
- :return: A string representing the equivalent number in base 32.
15
+ Returns:
16
+ str: The corresponding value encoded with the custom base32 alphabet
17
+ (digits 0-9 then letters a-v).
18
+
19
+ Examples:
20
+ >>> convert_base16_to_base32("ff")
21
+ '7v'
12
22
  """
13
23
 
14
- if not hexdigest:
15
- return '0'
16
- num = int(hexdigest,16)
24
+ try:
25
+ num = int(hexdigest, 16)
26
+ except ValueError as e:
27
+ raise ValueError(f"Invalid hexadecimal string: {hexdigest}") from e
28
+
17
29
  base32_str = convert_int_to_base32(num)
18
30
 
19
31
  return base32_str
20
32
 
21
33
 
22
34
  def convert_int_to_base32(n: int) -> str:
23
- """
24
- Convert an integer to a base 32 string.
35
+ """Convert a non-negative integer to Pythagoras' base32 string.
36
+
37
+ Args:
38
+ n (int): Non-negative integer to encode.
39
+
40
+ Returns:
41
+ str: The base32 representation.
25
42
 
26
- :name n: An integer.
27
- :return: A string representing the equivalent number in base 32.
43
+ Raises:
44
+ ValueError: If n is negative.
28
45
  """
46
+ if n < 0:
47
+ raise ValueError("n must be non-negative")
48
+
49
+ if n == 0:
50
+ return "0"
51
+
29
52
  base32_str = ''
30
53
  while n > 0:
31
- base32_str = base32_alphabet[n & 31] + base32_str
54
+ base32_str = _BASE32_ALPHABET[n & 31] + base32_str
32
55
  n >>= 5
33
56
 
34
57
  return base32_str
35
58
 
36
59
  def convert_base_32_to_int(digest: str) -> int:
37
- """
38
- Convert a base 32 string to an integer.
60
+ """Convert a base32 string (custom alphabet) to an integer.
39
61
 
40
- :name digest: A string representing a number in base 32.
41
- :return: An integer representing the equivalent number.
62
+ Args:
63
+ digest (str): String encoded with Pythagoras' base32 alphabet.
64
+
65
+ Returns:
66
+ int: The decoded non-negative integer value.
67
+
68
+ Raises:
69
+ KeyError: If digest contains a character outside the supported
70
+ base32 alphabet (0-9, a-v).
42
71
  """
72
+ if not digest:
73
+ raise ValueError("Digest cannot be empty")
74
+
43
75
  digest = digest.lower()
44
76
  num = 0
45
- for char in digest:
46
- num = num * 32 + base32_alphabet_map[char]
77
+ try:
78
+ for char in digest:
79
+ num = num * 32 + _BASE32_ALPHABET_MAP[char]
80
+ except KeyError as e:
81
+ raise ValueError(f"Invalid character '{e.args[0]}' in base32 digest: {digest}") from e
47
82
  return num
48
83
 
@@ -1,11 +1,26 @@
1
1
  from datetime import datetime, timezone
2
+ from typing import Final
2
3
 
4
+ _MONTH_ABBREVIATIONS: Final[tuple[str, ...]] = (
5
+ "Jan", "Feb", "Mar", "Apr", "May", "Jun",
6
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
3
7
 
4
- def current_date_gmt_string():
5
- """ Return the current date and time in the format '2024_12Jan_22_utc'
8
+
9
+ def current_date_gmt_string() -> str:
10
+ """Get the current UTC date as a compact string.
11
+
12
+ Produces an underscore-delimited UTC date string suitable for
13
+ stable file names and log records.
14
+
15
+ The format is: "YYYY_MMMonAbbrev_dd_utc" (e.g., "2024_12Dec_11_utc").
16
+
17
+ Returns:
18
+ str: The formatted UTC date string, for the current moment.
6
19
  """
7
20
 
8
21
  utc_now = datetime.now(timezone.utc)
9
- date_str = utc_now.strftime("%Y_%m%b_%d_utc")
10
-
11
- return date_str
22
+ month_abbrev = _MONTH_ABBREVIATIONS[utc_now.month - 1]
23
+ # locale-dependent month abbreviation
24
+ result = (f"{utc_now.year}_{utc_now.month:02d}{month_abbrev}" +
25
+ f"_{utc_now.day:02d}_utc")
26
+ return result
@@ -1,33 +1,69 @@
1
1
  import sys
2
- from typing import Any
2
+ from typing import Any, Final
3
3
 
4
4
  import joblib.hashing
5
5
 
6
6
  from .base_16_32_convertors import convert_base16_to_base32
7
7
 
8
8
 
9
- hash_type: str = "sha256"
10
- max_signature_length: int = 22
9
+ _HASH_TYPE: Final[str] = "sha256"
10
+ _MAX_SIGNATURE_LENGTH: Final[int] = 22
11
11
 
12
12
  def get_base16_hash_signature(x:Any) -> str:
13
- """Return base16 hash signature of an object.
13
+ """Compute a hexadecimal (base16) hash for an arbitrary Python object.
14
14
 
15
- Uses joblib's Hasher (or NumpyHasher). It uses Pickle for serialization,
16
- except for NumPy arrays, which use optimized custom routines.
15
+ This function delegates to joblib's hashing utilities. If NumPy is
16
+ imported in the current process, it uses NumpyHasher for efficient and
17
+ stable hashing of NumPy arrays; otherwise it uses the generic Hasher.
18
+
19
+ Args:
20
+ x (Any): The object to hash. Must be picklable by joblib unless a
21
+ specialized routine (e.g., for NumPy arrays) is available.
22
+
23
+ Returns:
24
+ str: A hexadecimal string digest computed with the configured
25
+ algorithm (sha256 by default).
26
+
27
+ Notes:
28
+ - joblib relies on pickle for most Python objects; ensure that custom
29
+ objects are picklable for stable results.
30
+ - The digest is deterministic for the same object content.
17
31
  """
18
32
  if 'numpy' in sys.modules:
19
- hasher = joblib.hashing.NumpyHasher(hash_name=hash_type)
33
+ hasher = joblib.hashing.NumpyHasher(hash_name=_HASH_TYPE)
20
34
  else:
21
- hasher = joblib.hashing.Hasher(hash_name=hash_type)
35
+ hasher = joblib.hashing.Hasher(hash_name=_HASH_TYPE)
22
36
  hash_signature = hasher.hash(x)
23
37
  return str(hash_signature)
24
38
 
25
39
  def get_base32_hash_signature(x:Any) -> str:
26
- """Return base32 hash signature of an object"""
40
+ """Compute a base32-encoded hash for an arbitrary Python object.
41
+
42
+ Internally computes a hexadecimal digest first, then converts it to the
43
+ custom base32 alphabet used by Pythagoras.
44
+
45
+ Args:
46
+ x (Any): The object to hash.
47
+
48
+ Returns:
49
+ str: The full-length base32 digest string (not truncated).
50
+ """
27
51
  base_16_hash = get_base16_hash_signature(x)
28
52
  base_32_hash = convert_base16_to_base32(base_16_hash)
29
53
  return base_32_hash
30
54
 
31
55
  def get_hash_signature(x:Any) -> str:
32
- return get_base32_hash_signature(x)[:max_signature_length]
56
+ """Compute a short, URL-safe hash signature for an object.
57
+
58
+ This is a convenience wrapper that returns the first max_signature_length
59
+ characters of the base32 digest, which is typically sufficient for
60
+ collision-resistant identifiers in logs and filenames.
61
+
62
+ Args:
63
+ x (Any): The object to hash.
64
+
65
+ Returns:
66
+ str: The truncated base32 digest string.
67
+ """
68
+ return get_base32_hash_signature(x)[:_MAX_SIGNATURE_LENGTH]
33
69
 
@@ -1,4 +1,6 @@
1
- import uuid, platform, getpass
1
+ import getpass
2
+ import platform
3
+ import uuid
2
4
  from functools import cache
3
5
 
4
6
  from .hash_signatures import get_hash_signature
@@ -6,15 +8,28 @@ from .hash_signatures import get_hash_signature
6
8
 
7
9
  @cache
8
10
  def get_node_signature() -> str:
9
- """Returns a globally-unique signature for the current computing node.
11
+ """Return a stable signature for the current computing node and user.
12
+
13
+ The signature is derived from a concatenation of multiple system- and
14
+ user-specific attributes (MAC address, OS info, CPU, username) and then
15
+ hashed using Pythagoras' short base32 digest. The result is intended to
16
+ uniquely identify the node within logs and distributed systems.
17
+
18
+ Caching:
19
+ The result is cached for the lifetime of the process using
20
+ functools.cache, as the underlying attributes are not expected to
21
+ change while the process is running.
22
+
23
+ Returns:
24
+ str: A short base32 signature string representing this node.
10
25
  """
11
- mac = uuid.getnode()
12
- system = platform.system()
13
- release = platform.release()
14
- version = platform.version()
15
- machine = platform.machine()
16
- processor = platform.processor()
17
- user = getpass.getuser()
18
- id_string = f"{mac}{system}{release}{version}"
19
- id_string += f"{machine}{processor}{user}"
20
- return get_hash_signature(id_string)
26
+ id_parts = [
27
+ str(uuid.getnode()),
28
+ platform.system(),
29
+ platform.release(),
30
+ platform.version(),
31
+ platform.machine(),
32
+ platform.processor(),
33
+ getpass.getuser(),
34
+ ]
35
+ return get_hash_signature("".join(id_parts))
@@ -1,11 +1,22 @@
1
1
  import uuid
2
+ from typing import Final
2
3
 
3
4
  from .base_16_32_convertors import convert_int_to_base32
4
5
 
5
- max_signature_length: int = 22
6
+ _MAX_SIGNATURE_LENGTH: Final[int] = 22
6
7
 
7
8
  def get_random_signature() -> str:
8
- # random_int = uuid.uuid4().int + uuid.uuid1().int
9
+ """Generate a short, random base32 signature string.
10
+
11
+ The randomness is sourced from uuid.uuid4(), which uses a cryptographically
12
+ strong RNG provided by the OS. The resulting large integer is encoded with
13
+ Pythagoras' base32 alphabet and truncated to max_signature_length
14
+ characters.
15
+
16
+ Returns:
17
+ str: A random, URL-safe base32 string of length up to
18
+ max_signature_length.
19
+ """
9
20
  random_int = uuid.uuid4().int
10
21
  random_str = convert_int_to_base32(random_int)
11
- return random_str[:max_signature_length]
22
+ return random_str[:_MAX_SIGNATURE_LENGTH]