PyPI - chimp-encoding - Versions diffs - 0.1.0__tar.gz - Mend

chimp-encoding 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

chimp_encoding-0.1.0/.gitignore +10 -0
chimp_encoding-0.1.0/.python-version +1 -0
chimp_encoding-0.1.0/LICENSE +21 -0
chimp_encoding-0.1.0/PKG-INFO +105 -0
chimp_encoding-0.1.0/README.md +87 -0
chimp_encoding-0.1.0/pyproject.toml +47 -0
chimp_encoding-0.1.0/src/chimp_encoding/__init__.py +17 -0
chimp_encoding-0.1.0/src/chimp_encoding/_bit_stream.py +130 -0
chimp_encoding-0.1.0/src/chimp_encoding/_tables.py +143 -0
chimp_encoding-0.1.0/src/chimp_encoding/chimp.py +191 -0
chimp_encoding-0.1.0/src/chimp_encoding/chimp32.py +175 -0
chimp_encoding-0.1.0/src/chimp_encoding/chimp_n.py +235 -0
chimp_encoding-0.1.0/src/chimp_encoding/chimp_n32.py +226 -0
chimp_encoding-0.1.0/src/chimp_encoding/cli.py +176 -0
chimp_encoding-0.1.0/src/chimp_encoding/py.typed +0 -0
chimp_encoding-0.1.0/tests/__init__.py +0 -0
chimp_encoding-0.1.0/tests/test_bit_stream.py +124 -0
chimp_encoding-0.1.0/tests/test_chimp.py +109 -0
chimp_encoding-0.1.0/tests/test_chimp32.py +79 -0
chimp_encoding-0.1.0/tests/test_chimp_n.py +116 -0
chimp_encoding-0.1.0/tests/test_chimp_n32.py +85 -0
chimp_encoding-0.1.0/uv.lock +141 -0

chimp_encoding-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,10 @@
+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv

chimp_encoding-0.1.0/.python-version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 3.12

chimp_encoding-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Hugo
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

chimp_encoding-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,105 @@
+Metadata-Version: 2.4
+Name: chimp-encoding
+Version: 0.1.0
+Summary: Chimp time-series floating point compression for Python
+Author: Hugo
+License-Expression: MIT
+License-File: LICENSE
+Keywords: chimp,compression,floating-point,time-series
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering
+Classifier: Typing :: Typed
+Requires-Python: >=3.12
+Description-Content-Type: text/markdown
+# chimp-encoding
+Chimp time-series floating point compression for Python.
+A Python port of the [Chimp](https://github.com/panagiotisl/chimp) algorithm for lossless compression of floating-point time-series data. Supports both 64-bit (double) and 32-bit (float) variants, in base mode and history-based (Chimp128/ChimpN) mode.
+## Installation
+```bash
+pip install chimp-encoding
+```
+## Usage
+### Encoding and decoding 64-bit doubles
+```python
+from chimp_encoding import ChimpEncoder, ChimpDecoder
+# Encode
+encoder = ChimpEncoder()
+for value in [23.5, 23.6, 23.4, 24.0, 22.9]:
+    encoder.add_value(value)
+encoder.close()
+compressed = encoder.get_bytes()
+print(f"Compressed to {len(compressed)} bytes")
+# Decode
+decoder = ChimpDecoder(compressed)
+values = decoder.get_values()
+print(values)  # [23.5, 23.6, 23.4, 24.0, 22.9]
+```
+### Using Chimp128 (history-based, better compression)
+```python
+from chimp_encoding import ChimpNEncoder, ChimpNDecoder
+encoder = ChimpNEncoder(previous_values=128)
+for value in data:
+    encoder.add_value(value)
+encoder.close()
+decoder = ChimpNDecoder(encoder.get_bytes(), previous_values=128)
+values = decoder.get_values()
+```
+### 32-bit float variants
+```python
+from chimp_encoding import Chimp32Encoder, Chimp32Decoder
+from chimp_encoding import ChimpN32Encoder, ChimpN32Decoder
+```
+### CLI
+```bash
+# Encode values to hex
+chimp encode 23.5 23.6 23.4 24.0 22.9
+# Encode to file
+chimp encode 23.5 23.6 23.4 -o compressed.bin
+# Decode from hex
+chimp decode --hex "4037800000000000..."
+# Decode from file
+chimp decode compressed.bin
+# Use different variants
+chimp encode --variant chimp128 23.5 23.6 23.4
+chimp decode --variant chimp128 --hex "..."
+```
+## Variants
+| Variant | Class | Description |
+|---------|-------|-------------|
+| Chimp | `ChimpEncoder` / `ChimpDecoder` | Base 64-bit, XOR with previous value |
+| Chimp32 | `Chimp32Encoder` / `Chimp32Decoder` | Base 32-bit |
+| ChimpN | `ChimpNEncoder` / `ChimpNDecoder` | 64-bit with N-value history (default N=128) |
+| ChimpN32 | `ChimpN32Encoder` / `ChimpN32Decoder` | 32-bit with N-value history (default N=64) |
+## License
+MIT

chimp_encoding-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,87 @@
+# chimp-encoding
+Chimp time-series floating point compression for Python.
+A Python port of the [Chimp](https://github.com/panagiotisl/chimp) algorithm for lossless compression of floating-point time-series data. Supports both 64-bit (double) and 32-bit (float) variants, in base mode and history-based (Chimp128/ChimpN) mode.
+## Installation
+```bash
+pip install chimp-encoding
+```
+## Usage
+### Encoding and decoding 64-bit doubles
+```python
+from chimp_encoding import ChimpEncoder, ChimpDecoder
+# Encode
+encoder = ChimpEncoder()
+for value in [23.5, 23.6, 23.4, 24.0, 22.9]:
+    encoder.add_value(value)
+encoder.close()
+compressed = encoder.get_bytes()
+print(f"Compressed to {len(compressed)} bytes")
+# Decode
+decoder = ChimpDecoder(compressed)
+values = decoder.get_values()
+print(values)  # [23.5, 23.6, 23.4, 24.0, 22.9]
+```
+### Using Chimp128 (history-based, better compression)
+```python
+from chimp_encoding import ChimpNEncoder, ChimpNDecoder
+encoder = ChimpNEncoder(previous_values=128)
+for value in data:
+    encoder.add_value(value)
+encoder.close()
+decoder = ChimpNDecoder(encoder.get_bytes(), previous_values=128)
+values = decoder.get_values()
+```
+### 32-bit float variants
+```python
+from chimp_encoding import Chimp32Encoder, Chimp32Decoder
+from chimp_encoding import ChimpN32Encoder, ChimpN32Decoder
+```
+### CLI
+```bash
+# Encode values to hex
+chimp encode 23.5 23.6 23.4 24.0 22.9
+# Encode to file
+chimp encode 23.5 23.6 23.4 -o compressed.bin
+# Decode from hex
+chimp decode --hex "4037800000000000..."
+# Decode from file
+chimp decode compressed.bin
+# Use different variants
+chimp encode --variant chimp128 23.5 23.6 23.4
+chimp decode --variant chimp128 --hex "..."
+```
+## Variants
+| Variant | Class | Description |
+|---------|-------|-------------|
+| Chimp | `ChimpEncoder` / `ChimpDecoder` | Base 64-bit, XOR with previous value |
+| Chimp32 | `Chimp32Encoder` / `Chimp32Decoder` | Base 32-bit |
+| ChimpN | `ChimpNEncoder` / `ChimpNDecoder` | 64-bit with N-value history (default N=128) |
+| ChimpN32 | `ChimpN32Encoder` / `ChimpN32Decoder` | 32-bit with N-value history (default N=64) |
+## License
+MIT

chimp_encoding-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,47 @@
+[project]
+name = "chimp-encoding"
+version = "0.1.0"
+description = "Chimp time-series floating point compression for Python"
+readme = "README.md"
+requires-python = ">=3.12"
+license = "MIT"
+authors = [{ name = "Hugo" }]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering",
+    "Typing :: Typed",
+]
+keywords = ["compression", "time-series", "floating-point", "chimp"]
+dependencies = []
+[project.scripts]
+chimp = "chimp_encoding.cli:main"
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["src/chimp_encoding"]
+[dependency-groups]
+dev = ["pytest", "ruff", "pyright"]
+[tool.ruff]
+line-length = 100
+target-version = "py312"
+[tool.ruff.lint]
+select = ["E", "F", "I", "UP", "B", "SIM", "TCH"]
+[tool.pyright]
+pythonVersion = "3.12"
+typeCheckingMode = "strict"
+include = ["src"]
+[tool.pytest.ini_options]
+testpaths = ["tests"]

chimp_encoding-0.1.0/src/chimp_encoding/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""Chimp time-series floating point compression for Python."""
+from chimp_encoding.chimp import ChimpDecoder, ChimpEncoder
+from chimp_encoding.chimp32 import Chimp32Decoder, Chimp32Encoder
+from chimp_encoding.chimp_n import ChimpNDecoder, ChimpNEncoder
+from chimp_encoding.chimp_n32 import ChimpN32Decoder, ChimpN32Encoder
+__all__ = [
+    "ChimpEncoder",
+    "ChimpDecoder",
+    "Chimp32Encoder",
+    "Chimp32Decoder",
+    "ChimpNEncoder",
+    "ChimpNDecoder",
+    "ChimpN32Encoder",
+    "ChimpN32Decoder",
+]

chimp_encoding-0.1.0/src/chimp_encoding/_bit_stream.py ADDED Viewed

@@ -0,0 +1,130 @@
+"""Big-endian bit-level I/O streams for Chimp compression."""
+from __future__ import annotations
+class OutputBitStream:
+    """Big-endian bit-level output stream over a growable byte buffer."""
+    def __init__(self, initial_capacity: int = 8000) -> None:
+        self._buffer: bytearray = bytearray(initial_capacity)
+        self._current: int = 0
+        self._free: int = 8
+        self._pos: int = 0
+    def _ensure_capacity(self, needed: int) -> None:
+        while self._pos + needed >= len(self._buffer):
+            self._buffer.extend(bytearray(len(self._buffer)))
+    def _write_in_current(self, value: int, length: int) -> None:
+        self._current |= (value & ((1 << length) - 1)) << (self._free - length)
+        self._free -= length
+        if self._free == 0:
+            self._ensure_capacity(1)
+            self._buffer[self._pos] = self._current & 0xFF
+            self._pos += 1
+            self._free = 8
+            self._current = 0
+    def write_bit(self, bit: bool) -> None:
+        self._write_in_current(1 if bit else 0, 1)
+    def write_int(self, value: int, length: int) -> None:
+        if length == 0:
+            return
+        if length <= self._free:
+            self._write_in_current(value, length)
+            return
+        # Fill current byte
+        remaining = length - self._free
+        if self._free != 0:
+            self._write_in_current(value >> remaining, self._free)
+        # Write full bytes
+        while remaining >= 8:
+            remaining -= 8
+            self._ensure_capacity(1)
+            self._buffer[self._pos] = (value >> remaining) & 0xFF
+            self._pos += 1
+        # Write remaining bits
+        if remaining > 0:
+            self._write_in_current(value, remaining)
+    def write_long(self, value: int, length: int) -> None:
+        self.write_int(value, length)
+    def flush(self) -> None:
+        if self._free < 8:
+            self._ensure_capacity(1)
+            self._buffer[self._pos] = self._current & 0xFF
+            self._pos += 1
+            self._current = 0
+            self._free = 8
+    @property
+    def buffer(self) -> bytes:
+        return bytes(self._buffer[: self._pos + (1 if self._free < 8 else 0)])
+class InputBitStream:
+    """Big-endian bit-level input stream over a byte buffer."""
+    def __init__(self, data: bytes | bytearray) -> None:
+        self._buffer: bytes = bytes(data)
+        self._current: int = 0
+        self._fill: int = 0
+        self._pos: int = 0
+    def _read_byte(self) -> int:
+        if self._pos >= len(self._buffer):
+            return 0
+        b = self._buffer[self._pos]
+        self._pos += 1
+        return b
+    def _refill(self) -> None:
+        while self._fill < 16 and self._pos < len(self._buffer):
+            self._current = (self._current << 8) | self._read_byte()
+            self._fill += 8
+    def _read_from_current(self, length: int) -> int:
+        if length == 0:
+            return 0
+        self._fill -= length
+        return (self._current >> self._fill) & ((1 << length) - 1)
+    def read_bit(self) -> int:
+        if self._fill == 0:
+            self._current = self._read_byte()
+            self._fill = 8
+        return self._read_from_current(1)
+    def read_int(self, length: int) -> int:
+        if length == 0:
+            return 0
+        if self._fill < 16:
+            self._refill()
+        if length <= self._fill:
+            return self._read_from_current(length)
+        remaining = length - self._fill
+        result = self._read_from_current(self._fill)
+        # Read full bytes
+        while remaining >= 8:
+            result = (result << 8) | self._read_byte()
+            remaining -= 8
+        # Read remaining bits
+        if remaining > 0:
+            if self._fill == 0:
+                self._current = self._read_byte()
+                self._fill = 8
+            result = (result << remaining) | self._read_from_current(remaining)
+        return result
+    def read_long(self, length: int) -> int:
+        return self.read_int(length)

chimp_encoding-0.1.0/src/chimp_encoding/_tables.py ADDED Viewed

@@ -0,0 +1,143 @@
+"""Shared lookup tables for Chimp encoding/decoding."""
+# Maps a leading-zero count (0..63) to a 3-bit representation code (0..7).
+# Used by encoders to quantize leading zero counts.
+LEADING_REPRESENTATION_ENCODE: tuple[int, ...] = (
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    1,
+    1,
+    1,
+    1,
+    2,
+    2,
+    2,
+    2,
+    3,
+    3,
+    4,
+    4,
+    5,
+    5,
+    6,
+    6,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+    7,
+)
+# Rounds a leading-zero count to the nearest representable value.
+# Used by encoders before storing leading zeros.
+LEADING_ROUND: tuple[int, ...] = (
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    8,
+    8,
+    8,
+    8,
+    12,
+    12,
+    12,
+    12,
+    16,
+    16,
+    18,
+    18,
+    20,
+    20,
+    22,
+    22,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+    24,
+)
+# Inverse decode table: maps a 3-bit code back to the leading-zero count.
+# Used by decoders to reconstruct leading zero counts.
+LEADING_REPRESENTATION_DECODE: tuple[int, ...] = (0, 8, 12, 16, 18, 20, 22, 24)