PyPI - permid64 - Versions diffs - 0.1.0__tar.gz - Mend

permid64 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

permid64-0.1.0/PKG-INFO +267 -0
permid64-0.1.0/README.md +240 -0
permid64-0.1.0/permid64/__init__.py +17 -0
permid64-0.1.0/permid64/generator.py +122 -0
permid64-0.1.0/permid64/layout.py +54 -0
permid64-0.1.0/permid64/permutation.py +175 -0
permid64-0.1.0/permid64/source.py +105 -0
permid64-0.1.0/permid64/types.py +12 -0
permid64-0.1.0/permid64.egg-info/PKG-INFO +267 -0
permid64-0.1.0/permid64.egg-info/SOURCES.txt +18 -0
permid64-0.1.0/permid64.egg-info/dependency_links.txt +1 -0
permid64-0.1.0/permid64.egg-info/requires.txt +7 -0
permid64-0.1.0/permid64.egg-info/top_level.txt +1 -0
permid64-0.1.0/pyproject.toml +46 -0
permid64-0.1.0/setup.cfg +4 -0
permid64-0.1.0/tests/test_counter.py +101 -0
permid64-0.1.0/tests/test_id64_e2e.py +147 -0
permid64-0.1.0/tests/test_layout.py +52 -0
permid64-0.1.0/tests/test_permutation.py +86 -0
permid64-0.1.0/tests/test_properties.py +185 -0

permid64-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,267 @@
+Metadata-Version: 2.4
+Name: permid64
+Version: 0.1.0
+Summary: Clock-free, persistent, reversible-permutation 64-bit ID generation
+License: MIT
+Project-URL: Repository, https://github.com/erickh826/permid64
+Project-URL: Issues, https://github.com/erickh826/permid64/issues
+Keywords: id,uuid,unique-id,feistel,permutation,counter,clock-free
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Software Development :: Libraries
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0; extra == "dev"
+Requires-Dist: pytest-xdist; extra == "dev"
+Requires-Dist: hypothesis; extra == "dev"
+Requires-Dist: ruff; extra == "dev"
+Requires-Dist: mypy; extra == "dev"
+# permid64
+**Clock-free, persistent, reversible-permutation 64-bit ID generation.**
+> *Counter in, permutation out.*
+permid64 generates unique 64-bit integer IDs without relying on wall-clock time. It combines a crash-safe persistent counter with an invertible permutation to produce IDs that look random but carry recoverable metadata.
+```
+# Raw counter (leaks business volume at a glance)
+1001, 1002, 1003 ...
+# permid64 (shuffled surface, recoverable structure)
+12609531668580943872, 7349201938475629, 3847291038012847 ...
+# decode(12609531668580943872)  →  instance_id=42, sequence=0
+```
+---
+## What it is
+- A **clock-free** 64-bit ID generator — no timestamp, no NTP dependency
+- IDs are **unique** because the source is a monotonically increasing counter
+- IDs **look shuffled** because they pass through a reversible permutation
+- The permutation is **invertible** — `decode()` recovers the original metadata
+## What it is not
+- **Not a timestamp-based scheme** — there is no time component in the ID
+- **Not a UUID replacement for every scenario** — if you need a globally unique random token with no infrastructure at all, UUID v4 is simpler
+- **Not cryptographic encryption** — the permutation is an obfuscation layer, not authenticated encryption; do not use IDs as secrets or security tokens
+- **Not safe for multiple processes sharing one state file** — `PersistentCounterSource` is single-process only; concurrent writes from multiple processes to the same state file will cause duplicates (see [Limitations](#limitations))
+---
+## Design
+```
+seq  = source.next()                    # monotonic counter (persistent)
+raw  = layout.compose(instance_id, seq) # pack 16-bit shard + 48-bit seq
+id64 = permutation.forward(raw)         # obfuscate with invertible bijection
+```
+**Layout** — default 64-bit split:
+```
+[ instance_id : 16 bits ][ sequence : 48 bits ]
+```
+- Up to **65 535** independent shards
+- Up to **281 trillion** IDs per shard
+**Permutations** — both are bijections over `[0, 2^64)`:
+| Mode | Formula | Speed | Mixing |
+|---|---|---|---|
+| `multiplicative` | `f(x) = (a·x + b) mod 2^64` | ~500 M/s | Good |
+| `feistel` | 64-bit Feistel network | ~150 M/s | Excellent |
+**Persistence** — block reservation strategy:
+1. On startup, read high-water mark from state file.
+2. Reserve a block of N sequence numbers, write new high-water mark.
+3. Serve IDs from memory until block exhausted.
+4. If the process crashes, the unused block is lost (gap), but **no duplicate is ever issued**.
+---
+## Quick start
+```python
+from permid64 import Id64
+# Multiplicative (fastest)
+gen = Id64.multiplicative(
+    instance_id=42,
+    state_file="permid64.state",
+    block_size=4096,
+)
+uid = gen.next_u64()          # e.g. 12609531668580943872
+meta = gen.decode(uid)
+# DecodedId(raw=2748779069440, instance_id=42, sequence=0)
+print(meta.instance_id, meta.sequence)
+# Feistel (better statistical mixing)
+gen2 = Id64.feistel(
+    instance_id=42,
+    state_file="permid64.state",
+    block_size=4096,
+    key=0xDEADBEEFCAFEBABE,
+    rounds=6,
+)
+```
+### Why decode() matters
+In production, when an anomalous ID appears in a log or alert, you can decode it instantly — no DB lookup needed:
+```python
+meta = gen.decode(12609531668580943872)
+print(f"Issued by instance {meta.instance_id}, sequence #{meta.sequence}")
+# Issued by instance 42, sequence #0
+```
+This makes incident tracing dramatically faster: you immediately know which shard issued the ID and its approximate position in the issuance history.
+### Assigning instance_id
+Assign each process or deployment unit a distinct `instance_id`. Common patterns:
+```python
+import os
+# From environment variable (works in Docker / K8s)
+instance_id = int(os.environ.get("INSTANCE_ID", "0"))
+# From K8s StatefulSet pod name (e.g. "worker-3" -> 3)
+import re
+pod_name = os.environ.get("POD_NAME", "worker-0")
+instance_id = int(re.search(r"(\d+)$", pod_name).group(1))
+```
+Each `instance_id` gets its own independent sequence space — no coordination needed between shards.
+---
+## Installation
+```bash
+pip install permid64          # once published to PyPI
+# or from source:
+pip install -e ".[dev]"
+```
+---
+## Running tests
+```bash
+pytest
+```
+Five acceptance criteria are checked:
+1. **Uniqueness** — 1 million IDs, zero duplicates
+2. **Invertibility** — `decode(next_u64())` recovers `instance_id` and `sequence`
+3. **Restart safety** — sequence never resets across process restarts
+4. **Gap tolerance** — crash causes a gap, never a duplicate
+5. **Thread safety** — concurrent generation remains unique
+---
+## Benchmark
+```bash
+python benchmarks/bench_id64.py
+```
+Sample output (Apple M2):
+```
+[Permutation comparison — block_size=4096]
+  multiplicative (default keys)          ~480,000,000 IDs/sec
+  feistel (6 rounds)                     ~140,000,000 IDs/sec
+  feistel (12 rounds)                     ~80,000,000 IDs/sec
+```
+---
+## Guarantees
+| Guarantee | Notes |
+|---|---|
+| No duplicate IDs within a shard | Strict |
+| No duplicates across restarts | Strict — state file must be on durable storage |
+| Decodable | Only with the same permutation key / params |
+| Gaps allowed | After a crash, some sequence numbers are skipped |
+| No global coordination | Each `instance_id` is fully independent |
+---
+## Limitations
+### Single-process only
+`PersistentCounterSource` is **not safe for concurrent use across multiple processes** sharing the same state file. A best-effort `fcntl.flock` advisory lock is applied during block reservation on POSIX systems, but this is not a hard guarantee — do not rely on it as a substitute for proper shard isolation.
+The correct pattern for multiple processes is to assign each a **distinct `instance_id`** and a **distinct state file**. Multi-process coordination via a central allocator is planned for v0.3.
+### Feistel is obfuscation, not encryption
+The Feistel permutation provides strong mixing and is reversible, but it is not a formally audited cryptographic primitive. Do not rely on it for access control, token authentication, or any security-sensitive use case.
+### instance_id must be assigned manually
+There is no automatic shard coordination. Assign `instance_id` values via config or environment variables and ensure they are unique across your deployment.
+### Sequence space is large but finite
+The default 48-bit sequence space supports ~281 trillion IDs per shard. This is enough for virtually all workloads, but it is not infinite.
+---
+## Architecture
+```
+permid64/
+  __init__.py       # public exports: Id64, DecodedId
+  generator.py      # Id64 façade
+  source.py         # PersistentCounterSource
+  layout.py         # Layout64 — pack/unpack 64-bit raw value
+  permutation.py    # MultiplyOddPermutation, Feistel64Permutation
+  types.py          # DecodedId dataclass
+tests/
+  test_counter.py
+  test_layout.py
+  test_permutation.py
+  test_id64_e2e.py   # the 5 MVP acceptance tests
+benchmarks/
+  bench_id64.py
+```
+---
+## Roadmap
+| Version | Focus |
+|---|---|
+| v0.1 (current) | Core: counter + permutation + decode |
+| v0.2 | `IdentityPermutation`, Base32/Base62 encoding, `Id64Config` |
+| v0.3 | Multi-process file locking, `ReservedBlockSource` (central allocator) |
+| v0.4+ | Rust/Go reference implementations, formal cross-language spec |
+---
+## License
+MIT

permid64-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,240 @@
+# permid64
+**Clock-free, persistent, reversible-permutation 64-bit ID generation.**
+> *Counter in, permutation out.*
+permid64 generates unique 64-bit integer IDs without relying on wall-clock time. It combines a crash-safe persistent counter with an invertible permutation to produce IDs that look random but carry recoverable metadata.
+```
+# Raw counter (leaks business volume at a glance)
+1001, 1002, 1003 ...
+# permid64 (shuffled surface, recoverable structure)
+12609531668580943872, 7349201938475629, 3847291038012847 ...
+# decode(12609531668580943872)  →  instance_id=42, sequence=0
+```
+---
+## What it is
+- A **clock-free** 64-bit ID generator — no timestamp, no NTP dependency
+- IDs are **unique** because the source is a monotonically increasing counter
+- IDs **look shuffled** because they pass through a reversible permutation
+- The permutation is **invertible** — `decode()` recovers the original metadata
+## What it is not
+- **Not a timestamp-based scheme** — there is no time component in the ID
+- **Not a UUID replacement for every scenario** — if you need a globally unique random token with no infrastructure at all, UUID v4 is simpler
+- **Not cryptographic encryption** — the permutation is an obfuscation layer, not authenticated encryption; do not use IDs as secrets or security tokens
+- **Not safe for multiple processes sharing one state file** — `PersistentCounterSource` is single-process only; concurrent writes from multiple processes to the same state file will cause duplicates (see [Limitations](#limitations))
+---
+## Design
+```
+seq  = source.next()                    # monotonic counter (persistent)
+raw  = layout.compose(instance_id, seq) # pack 16-bit shard + 48-bit seq
+id64 = permutation.forward(raw)         # obfuscate with invertible bijection
+```
+**Layout** — default 64-bit split:
+```
+[ instance_id : 16 bits ][ sequence : 48 bits ]
+```
+- Up to **65 535** independent shards
+- Up to **281 trillion** IDs per shard
+**Permutations** — both are bijections over `[0, 2^64)`:
+| Mode | Formula | Speed | Mixing |
+|---|---|---|---|
+| `multiplicative` | `f(x) = (a·x + b) mod 2^64` | ~500 M/s | Good |
+| `feistel` | 64-bit Feistel network | ~150 M/s | Excellent |
+**Persistence** — block reservation strategy:
+1. On startup, read high-water mark from state file.
+2. Reserve a block of N sequence numbers, write new high-water mark.
+3. Serve IDs from memory until block exhausted.
+4. If the process crashes, the unused block is lost (gap), but **no duplicate is ever issued**.
+---
+## Quick start
+```python
+from permid64 import Id64
+# Multiplicative (fastest)
+gen = Id64.multiplicative(
+    instance_id=42,
+    state_file="permid64.state",
+    block_size=4096,
+)
+uid = gen.next_u64()          # e.g. 12609531668580943872
+meta = gen.decode(uid)
+# DecodedId(raw=2748779069440, instance_id=42, sequence=0)
+print(meta.instance_id, meta.sequence)
+# Feistel (better statistical mixing)
+gen2 = Id64.feistel(
+    instance_id=42,
+    state_file="permid64.state",
+    block_size=4096,
+    key=0xDEADBEEFCAFEBABE,
+    rounds=6,
+)
+```
+### Why decode() matters
+In production, when an anomalous ID appears in a log or alert, you can decode it instantly — no DB lookup needed:
+```python
+meta = gen.decode(12609531668580943872)
+print(f"Issued by instance {meta.instance_id}, sequence #{meta.sequence}")
+# Issued by instance 42, sequence #0
+```
+This makes incident tracing dramatically faster: you immediately know which shard issued the ID and its approximate position in the issuance history.
+### Assigning instance_id
+Assign each process or deployment unit a distinct `instance_id`. Common patterns:
+```python
+import os
+# From environment variable (works in Docker / K8s)
+instance_id = int(os.environ.get("INSTANCE_ID", "0"))
+# From K8s StatefulSet pod name (e.g. "worker-3" -> 3)
+import re
+pod_name = os.environ.get("POD_NAME", "worker-0")
+instance_id = int(re.search(r"(\d+)$", pod_name).group(1))
+```
+Each `instance_id` gets its own independent sequence space — no coordination needed between shards.
+---
+## Installation
+```bash
+pip install permid64          # once published to PyPI
+# or from source:
+pip install -e ".[dev]"
+```
+---
+## Running tests
+```bash
+pytest
+```
+Five acceptance criteria are checked:
+1. **Uniqueness** — 1 million IDs, zero duplicates
+2. **Invertibility** — `decode(next_u64())` recovers `instance_id` and `sequence`
+3. **Restart safety** — sequence never resets across process restarts
+4. **Gap tolerance** — crash causes a gap, never a duplicate
+5. **Thread safety** — concurrent generation remains unique
+---
+## Benchmark
+```bash
+python benchmarks/bench_id64.py
+```
+Sample output (Apple M2):
+```
+[Permutation comparison — block_size=4096]
+  multiplicative (default keys)          ~480,000,000 IDs/sec
+  feistel (6 rounds)                     ~140,000,000 IDs/sec
+  feistel (12 rounds)                     ~80,000,000 IDs/sec
+```
+---
+## Guarantees
+| Guarantee | Notes |
+|---|---|
+| No duplicate IDs within a shard | Strict |
+| No duplicates across restarts | Strict — state file must be on durable storage |
+| Decodable | Only with the same permutation key / params |
+| Gaps allowed | After a crash, some sequence numbers are skipped |
+| No global coordination | Each `instance_id` is fully independent |
+---
+## Limitations
+### Single-process only
+`PersistentCounterSource` is **not safe for concurrent use across multiple processes** sharing the same state file. A best-effort `fcntl.flock` advisory lock is applied during block reservation on POSIX systems, but this is not a hard guarantee — do not rely on it as a substitute for proper shard isolation.
+The correct pattern for multiple processes is to assign each a **distinct `instance_id`** and a **distinct state file**. Multi-process coordination via a central allocator is planned for v0.3.
+### Feistel is obfuscation, not encryption
+The Feistel permutation provides strong mixing and is reversible, but it is not a formally audited cryptographic primitive. Do not rely on it for access control, token authentication, or any security-sensitive use case.
+### instance_id must be assigned manually
+There is no automatic shard coordination. Assign `instance_id` values via config or environment variables and ensure they are unique across your deployment.
+### Sequence space is large but finite
+The default 48-bit sequence space supports ~281 trillion IDs per shard. This is enough for virtually all workloads, but it is not infinite.
+---
+## Architecture
+```
+permid64/
+  __init__.py       # public exports: Id64, DecodedId
+  generator.py      # Id64 façade
+  source.py         # PersistentCounterSource
+  layout.py         # Layout64 — pack/unpack 64-bit raw value
+  permutation.py    # MultiplyOddPermutation, Feistel64Permutation
+  types.py          # DecodedId dataclass
+tests/
+  test_counter.py
+  test_layout.py
+  test_permutation.py
+  test_id64_e2e.py   # the 5 MVP acceptance tests
+benchmarks/
+  bench_id64.py
+```
+---
+## Roadmap
+| Version | Focus |
+|---|---|
+| v0.1 (current) | Core: counter + permutation + decode |
+| v0.2 | `IdentityPermutation`, Base32/Base62 encoding, `Id64Config` |
+| v0.3 | Multi-process file locking, `ReservedBlockSource` (central allocator) |
+| v0.4+ | Rust/Go reference implementations, formal cross-language spec |
+---
+## License
+MIT

permid64-0.1.0/permid64/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""
+permid64 — Clock-free, persistent, obfuscated 64-bit ID generation.
+Public API
+----------
+    from permid64 import Id64, DecodedId
+    gen = Id64.multiplicative(instance_id=1, state_file="id64.state")
+    uid = gen.next_u64()
+    meta = gen.decode(uid)   # DecodedId(raw=..., instance_id=1, sequence=0)
+"""
+from .generator import Id64
+from .permutation import Permutation64Protocol
+from .types import DecodedId
+__all__ = ["Id64", "DecodedId", "Permutation64Protocol"]
+__version__ = "0.1.0"

permid64-0.1.0/permid64/generator.py ADDED Viewed

@@ -0,0 +1,122 @@
+"""
+generator.py — Id64: the main public façade.
+Usage
+-----
+    # Multiplicative (fast, simpler)
+    gen = Id64.multiplicative(
+        instance_id=42,
+        state_file="id64.state",
+        block_size=4096,
+        a=0x9E3779B185EBCA87,
+        b=0x6A09E667F3BCC909,
+    )
+    # Feistel (better statistical mixing)
+    gen = Id64.feistel(
+        instance_id=42,
+        state_file="id64.state",
+        block_size=4096,
+        key=0xDEADBEEFCAFEBABE,
+        rounds=6,
+    )
+    id_val = gen.next_u64()      # -> int  (unsigned 64-bit)
+    meta   = gen.decode(id_val)  # -> DecodedId(raw, instance_id, sequence)
+"""
+from __future__ import annotations
+from .layout import Layout64
+from .permutation import Feistel64Permutation, MultiplyOddPermutation, Permutation64Protocol
+from .source import PersistentCounterSource
+from .types import DecodedId
+class Id64:
+    """
+    Clock-free 64-bit ID generator.
+    Architecture
+    ------------
+    seq  = source.next()                    # monotonic counter (persistent)
+    raw  = layout.compose(instance_id, seq) # pack bits
+    id64 = permutation.forward(raw)         # obfuscate
+    Decode
+    ------
+    raw  = permutation.inverse(id64)
+    meta = layout.decompose(raw)
+    """
+    def __init__(
+        self,
+        instance_id: int,
+        source: PersistentCounterSource,
+        permutation: Permutation64Protocol,
+        layout: Layout64 | None = None,
+    ) -> None:
+        self.instance_id = instance_id
+        self.source = source
+        self.permutation = permutation
+        self.layout = layout or Layout64()
+    # ------------------------------------------------------------------
+    # Factory constructors
+    # ------------------------------------------------------------------
+    @classmethod
+    def multiplicative(
+        cls,
+        instance_id: int,
+        state_file: str,
+        block_size: int = 4096,
+        a: int = 0x9E3779B185EBCA87,
+        b: int = 0x6A09E667F3BCC909,
+    ) -> "Id64":
+        """
+        Create a generator backed by a multiply-odd (affine) permutation.
+        ``a`` defaults to the 64-bit golden-ratio constant; ``b`` adds a
+        second independent mixing constant.  Both can be overridden.
+        """
+        return cls(
+            instance_id=instance_id,
+            source=PersistentCounterSource(state_file, block_size),
+            permutation=MultiplyOddPermutation(a=a, b=b),
+        )
+    @classmethod
+    def feistel(
+        cls,
+        instance_id: int,
+        state_file: str,
+        block_size: int = 4096,
+        key: int = 0xDEADBEEFCAFEBABE,
+        rounds: int = 6,
+    ) -> "Id64":
+        """
+        Create a generator backed by a 64-bit Feistel-network permutation.
+        ``key`` is a 64-bit seed from which round keys are derived.
+        ``rounds`` defaults to 6 (good mixing / speed balance).
+        """
+        return cls(
+            instance_id=instance_id,
+            source=PersistentCounterSource(state_file, block_size),
+            permutation=Feistel64Permutation(key=key, rounds=rounds),
+        )
+    # ------------------------------------------------------------------
+    # Core API
+    # ------------------------------------------------------------------
+    def next_u64(self) -> int:
+        """Return the next unique, obfuscated 64-bit ID."""
+        seq = self.source.next()
+        raw = self.layout.compose(self.instance_id, seq)
+        return self.permutation.forward(raw)
+    def decode(self, id64: int) -> DecodedId:
+        """Reverse a previously generated ID back to its metadata."""
+        raw = self.permutation.inverse(id64)
+        return self.layout.decompose(raw)

permid64-0.1.0/permid64/layout.py ADDED Viewed

@@ -0,0 +1,54 @@
+"""
+layout.py — Pack / unpack instance_id + sequence into a single 64-bit integer.
+Default split: 16 bits for instance_id  (up to 65 535 shards)
+               48 bits for sequence      (up to 281 trillion IDs per shard)
+"""
+from .types import DecodedId
+MASK64 = 0xFFFFFFFFFFFFFFFF
+class Layout64:
+    """
+    Bit-layout for the 64-bit raw value.
+    instance_id occupies the top `instance_bits` bits.
+    sequence    occupies the bottom `sequence_bits` bits.
+    """
+    def __init__(self, instance_bits: int = 16, sequence_bits: int = 48) -> None:
+        if instance_bits + sequence_bits != 64:
+            raise ValueError(
+                f"instance_bits ({instance_bits}) + sequence_bits ({sequence_bits}) must equal 64"
+            )
+        self.instance_bits = instance_bits
+        self.sequence_bits = sequence_bits
+        self.sequence_mask = (1 << sequence_bits) - 1
+        self.instance_mask = (1 << instance_bits) - 1
+    def compose(self, instance_id: int, sequence: int) -> int:
+        """
+        Pack instance_id and sequence into a single 64-bit integer.
+        Both values are silently masked to their configured bit widths.
+        Values exceeding the field width (e.g. instance_id >= 2^instance_bits)
+        will have their high bits truncated without raising an error.
+        Use ``instance_mask`` / ``sequence_mask`` to validate inputs if
+        strict overflow detection is required.
+        """
+        if sequence > self.sequence_mask:
+            raise OverflowError(
+                f"sequence {sequence} exceeds {self.sequence_bits}-bit maximum "
+                f"({self.sequence_mask}). The ID space for this shard is exhausted."
+            )
+        return (
+            ((instance_id & self.instance_mask) << self.sequence_bits)
+            | (sequence & self.sequence_mask)
+        ) & MASK64
+    def decompose(self, raw: int) -> DecodedId:
+        """Unpack a raw 64-bit integer back into instance_id and sequence."""
+        seq = raw & self.sequence_mask
+        instance_id = (raw >> self.sequence_bits) & self.instance_mask
+        return DecodedId(raw=raw, instance_id=instance_id, sequence=seq)