chimp-encoding 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Hugo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,105 @@
1
+ Metadata-Version: 2.4
2
+ Name: chimp-encoding
3
+ Version: 0.1.0
4
+ Summary: Chimp time-series floating point compression for Python
5
+ Author: Hugo
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Keywords: chimp,compression,floating-point,time-series
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Scientific/Engineering
15
+ Classifier: Typing :: Typed
16
+ Requires-Python: >=3.12
17
+ Description-Content-Type: text/markdown
18
+
19
+ # chimp-encoding
20
+
21
+ Chimp time-series floating point compression for Python.
22
+
23
+ A Python port of the [Chimp](https://github.com/panagiotisl/chimp) algorithm for lossless compression of floating-point time-series data. Supports both 64-bit (double) and 32-bit (float) variants, in base mode and history-based (Chimp128/ChimpN) mode.
24
+
25
+ ## Installation
26
+
27
+ ```bash
28
+ pip install chimp-encoding
29
+ ```
30
+
31
+ ## Usage
32
+
33
+ ### Encoding and decoding 64-bit doubles
34
+
35
+ ```python
36
+ from chimp_encoding import ChimpEncoder, ChimpDecoder
37
+
38
+ # Encode
39
+ encoder = ChimpEncoder()
40
+ for value in [23.5, 23.6, 23.4, 24.0, 22.9]:
41
+ encoder.add_value(value)
42
+ encoder.close()
43
+
44
+ compressed = encoder.get_bytes()
45
+ print(f"Compressed to {len(compressed)} bytes")
46
+
47
+ # Decode
48
+ decoder = ChimpDecoder(compressed)
49
+ values = decoder.get_values()
50
+ print(values) # [23.5, 23.6, 23.4, 24.0, 22.9]
51
+ ```
52
+
53
+ ### Using Chimp128 (history-based, better compression)
54
+
55
+ ```python
56
+ from chimp_encoding import ChimpNEncoder, ChimpNDecoder
57
+
58
+ encoder = ChimpNEncoder(previous_values=128)
59
+ for value in data:
60
+ encoder.add_value(value)
61
+ encoder.close()
62
+
63
+ decoder = ChimpNDecoder(encoder.get_bytes(), previous_values=128)
64
+ values = decoder.get_values()
65
+ ```
66
+
67
+ ### 32-bit float variants
68
+
69
+ ```python
70
+ from chimp_encoding import Chimp32Encoder, Chimp32Decoder
71
+ from chimp_encoding import ChimpN32Encoder, ChimpN32Decoder
72
+ ```
73
+
74
+ ### CLI
75
+
76
+ ```bash
77
+ # Encode values to hex
78
+ chimp encode 23.5 23.6 23.4 24.0 22.9
79
+
80
+ # Encode to file
81
+ chimp encode 23.5 23.6 23.4 -o compressed.bin
82
+
83
+ # Decode from hex
84
+ chimp decode --hex "4037800000000000..."
85
+
86
+ # Decode from file
87
+ chimp decode compressed.bin
88
+
89
+ # Use different variants
90
+ chimp encode --variant chimp128 23.5 23.6 23.4
91
+ chimp decode --variant chimp128 --hex "..."
92
+ ```
93
+
94
+ ## Variants
95
+
96
+ | Variant | Class | Description |
97
+ |---------|-------|-------------|
98
+ | Chimp | `ChimpEncoder` / `ChimpDecoder` | Base 64-bit, XOR with previous value |
99
+ | Chimp32 | `Chimp32Encoder` / `Chimp32Decoder` | Base 32-bit |
100
+ | ChimpN | `ChimpNEncoder` / `ChimpNDecoder` | 64-bit with N-value history (default N=128) |
101
+ | ChimpN32 | `ChimpN32Encoder` / `ChimpN32Decoder` | 32-bit with N-value history (default N=64) |
102
+
103
+ ## License
104
+
105
+ MIT
@@ -0,0 +1,87 @@
1
+ # chimp-encoding
2
+
3
+ Chimp time-series floating point compression for Python.
4
+
5
+ A Python port of the [Chimp](https://github.com/panagiotisl/chimp) algorithm for lossless compression of floating-point time-series data. Supports both 64-bit (double) and 32-bit (float) variants, in base mode and history-based (Chimp128/ChimpN) mode.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install chimp-encoding
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ### Encoding and decoding 64-bit doubles
16
+
17
+ ```python
18
+ from chimp_encoding import ChimpEncoder, ChimpDecoder
19
+
20
+ # Encode
21
+ encoder = ChimpEncoder()
22
+ for value in [23.5, 23.6, 23.4, 24.0, 22.9]:
23
+ encoder.add_value(value)
24
+ encoder.close()
25
+
26
+ compressed = encoder.get_bytes()
27
+ print(f"Compressed to {len(compressed)} bytes")
28
+
29
+ # Decode
30
+ decoder = ChimpDecoder(compressed)
31
+ values = decoder.get_values()
32
+ print(values) # [23.5, 23.6, 23.4, 24.0, 22.9]
33
+ ```
34
+
35
+ ### Using Chimp128 (history-based, better compression)
36
+
37
+ ```python
38
+ from chimp_encoding import ChimpNEncoder, ChimpNDecoder
39
+
40
+ encoder = ChimpNEncoder(previous_values=128)
41
+ for value in data:
42
+ encoder.add_value(value)
43
+ encoder.close()
44
+
45
+ decoder = ChimpNDecoder(encoder.get_bytes(), previous_values=128)
46
+ values = decoder.get_values()
47
+ ```
48
+
49
+ ### 32-bit float variants
50
+
51
+ ```python
52
+ from chimp_encoding import Chimp32Encoder, Chimp32Decoder
53
+ from chimp_encoding import ChimpN32Encoder, ChimpN32Decoder
54
+ ```
55
+
56
+ ### CLI
57
+
58
+ ```bash
59
+ # Encode values to hex
60
+ chimp encode 23.5 23.6 23.4 24.0 22.9
61
+
62
+ # Encode to file
63
+ chimp encode 23.5 23.6 23.4 -o compressed.bin
64
+
65
+ # Decode from hex
66
+ chimp decode --hex "4037800000000000..."
67
+
68
+ # Decode from file
69
+ chimp decode compressed.bin
70
+
71
+ # Use different variants
72
+ chimp encode --variant chimp128 23.5 23.6 23.4
73
+ chimp decode --variant chimp128 --hex "..."
74
+ ```
75
+
76
+ ## Variants
77
+
78
+ | Variant | Class | Description |
79
+ |---------|-------|-------------|
80
+ | Chimp | `ChimpEncoder` / `ChimpDecoder` | Base 64-bit, XOR with previous value |
81
+ | Chimp32 | `Chimp32Encoder` / `Chimp32Decoder` | Base 32-bit |
82
+ | ChimpN | `ChimpNEncoder` / `ChimpNDecoder` | 64-bit with N-value history (default N=128) |
83
+ | ChimpN32 | `ChimpN32Encoder` / `ChimpN32Decoder` | 32-bit with N-value history (default N=64) |
84
+
85
+ ## License
86
+
87
+ MIT
@@ -0,0 +1,47 @@
1
+ [project]
2
+ name = "chimp-encoding"
3
+ version = "0.1.0"
4
+ description = "Chimp time-series floating point compression for Python"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ license = "MIT"
8
+ authors = [{ name = "Hugo" }]
9
+ classifiers = [
10
+ "Development Status :: 4 - Beta",
11
+ "Intended Audience :: Developers",
12
+ "License :: OSI Approved :: MIT License",
13
+ "Programming Language :: Python :: 3",
14
+ "Programming Language :: Python :: 3.12",
15
+ "Topic :: Scientific/Engineering",
16
+ "Typing :: Typed",
17
+ ]
18
+ keywords = ["compression", "time-series", "floating-point", "chimp"]
19
+ dependencies = []
20
+
21
+ [project.scripts]
22
+ chimp = "chimp_encoding.cli:main"
23
+
24
+ [build-system]
25
+ requires = ["hatchling"]
26
+ build-backend = "hatchling.build"
27
+
28
+ [tool.hatch.build.targets.wheel]
29
+ packages = ["src/chimp_encoding"]
30
+
31
+ [dependency-groups]
32
+ dev = ["pytest", "ruff", "pyright"]
33
+
34
+ [tool.ruff]
35
+ line-length = 100
36
+ target-version = "py312"
37
+
38
+ [tool.ruff.lint]
39
+ select = ["E", "F", "I", "UP", "B", "SIM", "TCH"]
40
+
41
+ [tool.pyright]
42
+ pythonVersion = "3.12"
43
+ typeCheckingMode = "strict"
44
+ include = ["src"]
45
+
46
+ [tool.pytest.ini_options]
47
+ testpaths = ["tests"]
@@ -0,0 +1,17 @@
1
+ """Chimp time-series floating point compression for Python."""
2
+
3
+ from chimp_encoding.chimp import ChimpDecoder, ChimpEncoder
4
+ from chimp_encoding.chimp32 import Chimp32Decoder, Chimp32Encoder
5
+ from chimp_encoding.chimp_n import ChimpNDecoder, ChimpNEncoder
6
+ from chimp_encoding.chimp_n32 import ChimpN32Decoder, ChimpN32Encoder
7
+
8
+ __all__ = [
9
+ "ChimpEncoder",
10
+ "ChimpDecoder",
11
+ "Chimp32Encoder",
12
+ "Chimp32Decoder",
13
+ "ChimpNEncoder",
14
+ "ChimpNDecoder",
15
+ "ChimpN32Encoder",
16
+ "ChimpN32Decoder",
17
+ ]
@@ -0,0 +1,130 @@
1
+ """Big-endian bit-level I/O streams for Chimp compression."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ class OutputBitStream:
7
+ """Big-endian bit-level output stream over a growable byte buffer."""
8
+
9
+ def __init__(self, initial_capacity: int = 8000) -> None:
10
+ self._buffer: bytearray = bytearray(initial_capacity)
11
+ self._current: int = 0
12
+ self._free: int = 8
13
+ self._pos: int = 0
14
+
15
+ def _ensure_capacity(self, needed: int) -> None:
16
+ while self._pos + needed >= len(self._buffer):
17
+ self._buffer.extend(bytearray(len(self._buffer)))
18
+
19
+ def _write_in_current(self, value: int, length: int) -> None:
20
+ self._current |= (value & ((1 << length) - 1)) << (self._free - length)
21
+ self._free -= length
22
+ if self._free == 0:
23
+ self._ensure_capacity(1)
24
+ self._buffer[self._pos] = self._current & 0xFF
25
+ self._pos += 1
26
+ self._free = 8
27
+ self._current = 0
28
+
29
+ def write_bit(self, bit: bool) -> None:
30
+ self._write_in_current(1 if bit else 0, 1)
31
+
32
+ def write_int(self, value: int, length: int) -> None:
33
+ if length == 0:
34
+ return
35
+ if length <= self._free:
36
+ self._write_in_current(value, length)
37
+ return
38
+
39
+ # Fill current byte
40
+ remaining = length - self._free
41
+ if self._free != 0:
42
+ self._write_in_current(value >> remaining, self._free)
43
+
44
+ # Write full bytes
45
+ while remaining >= 8:
46
+ remaining -= 8
47
+ self._ensure_capacity(1)
48
+ self._buffer[self._pos] = (value >> remaining) & 0xFF
49
+ self._pos += 1
50
+
51
+ # Write remaining bits
52
+ if remaining > 0:
53
+ self._write_in_current(value, remaining)
54
+
55
+ def write_long(self, value: int, length: int) -> None:
56
+ self.write_int(value, length)
57
+
58
+ def flush(self) -> None:
59
+ if self._free < 8:
60
+ self._ensure_capacity(1)
61
+ self._buffer[self._pos] = self._current & 0xFF
62
+ self._pos += 1
63
+ self._current = 0
64
+ self._free = 8
65
+
66
+ @property
67
+ def buffer(self) -> bytes:
68
+ return bytes(self._buffer[: self._pos + (1 if self._free < 8 else 0)])
69
+
70
+
71
+ class InputBitStream:
72
+ """Big-endian bit-level input stream over a byte buffer."""
73
+
74
+ def __init__(self, data: bytes | bytearray) -> None:
75
+ self._buffer: bytes = bytes(data)
76
+ self._current: int = 0
77
+ self._fill: int = 0
78
+ self._pos: int = 0
79
+
80
+ def _read_byte(self) -> int:
81
+ if self._pos >= len(self._buffer):
82
+ return 0
83
+ b = self._buffer[self._pos]
84
+ self._pos += 1
85
+ return b
86
+
87
+ def _refill(self) -> None:
88
+ while self._fill < 16 and self._pos < len(self._buffer):
89
+ self._current = (self._current << 8) | self._read_byte()
90
+ self._fill += 8
91
+
92
+ def _read_from_current(self, length: int) -> int:
93
+ if length == 0:
94
+ return 0
95
+ self._fill -= length
96
+ return (self._current >> self._fill) & ((1 << length) - 1)
97
+
98
+ def read_bit(self) -> int:
99
+ if self._fill == 0:
100
+ self._current = self._read_byte()
101
+ self._fill = 8
102
+ return self._read_from_current(1)
103
+
104
+ def read_int(self, length: int) -> int:
105
+ if length == 0:
106
+ return 0
107
+ if self._fill < 16:
108
+ self._refill()
109
+ if length <= self._fill:
110
+ return self._read_from_current(length)
111
+
112
+ remaining = length - self._fill
113
+ result = self._read_from_current(self._fill)
114
+
115
+ # Read full bytes
116
+ while remaining >= 8:
117
+ result = (result << 8) | self._read_byte()
118
+ remaining -= 8
119
+
120
+ # Read remaining bits
121
+ if remaining > 0:
122
+ if self._fill == 0:
123
+ self._current = self._read_byte()
124
+ self._fill = 8
125
+ result = (result << remaining) | self._read_from_current(remaining)
126
+
127
+ return result
128
+
129
+ def read_long(self, length: int) -> int:
130
+ return self.read_int(length)
@@ -0,0 +1,143 @@
1
+ """Shared lookup tables for Chimp encoding/decoding."""
2
+
3
+ # Maps a leading-zero count (0..63) to a 3-bit representation code (0..7).
4
+ # Used by encoders to quantize leading zero counts.
5
+ LEADING_REPRESENTATION_ENCODE: tuple[int, ...] = (
6
+ 0,
7
+ 0,
8
+ 0,
9
+ 0,
10
+ 0,
11
+ 0,
12
+ 0,
13
+ 0,
14
+ 1,
15
+ 1,
16
+ 1,
17
+ 1,
18
+ 2,
19
+ 2,
20
+ 2,
21
+ 2,
22
+ 3,
23
+ 3,
24
+ 4,
25
+ 4,
26
+ 5,
27
+ 5,
28
+ 6,
29
+ 6,
30
+ 7,
31
+ 7,
32
+ 7,
33
+ 7,
34
+ 7,
35
+ 7,
36
+ 7,
37
+ 7,
38
+ 7,
39
+ 7,
40
+ 7,
41
+ 7,
42
+ 7,
43
+ 7,
44
+ 7,
45
+ 7,
46
+ 7,
47
+ 7,
48
+ 7,
49
+ 7,
50
+ 7,
51
+ 7,
52
+ 7,
53
+ 7,
54
+ 7,
55
+ 7,
56
+ 7,
57
+ 7,
58
+ 7,
59
+ 7,
60
+ 7,
61
+ 7,
62
+ 7,
63
+ 7,
64
+ 7,
65
+ 7,
66
+ 7,
67
+ 7,
68
+ 7,
69
+ 7,
70
+ )
71
+
72
+ # Rounds a leading-zero count to the nearest representable value.
73
+ # Used by encoders before storing leading zeros.
74
+ LEADING_ROUND: tuple[int, ...] = (
75
+ 0,
76
+ 0,
77
+ 0,
78
+ 0,
79
+ 0,
80
+ 0,
81
+ 0,
82
+ 0,
83
+ 8,
84
+ 8,
85
+ 8,
86
+ 8,
87
+ 12,
88
+ 12,
89
+ 12,
90
+ 12,
91
+ 16,
92
+ 16,
93
+ 18,
94
+ 18,
95
+ 20,
96
+ 20,
97
+ 22,
98
+ 22,
99
+ 24,
100
+ 24,
101
+ 24,
102
+ 24,
103
+ 24,
104
+ 24,
105
+ 24,
106
+ 24,
107
+ 24,
108
+ 24,
109
+ 24,
110
+ 24,
111
+ 24,
112
+ 24,
113
+ 24,
114
+ 24,
115
+ 24,
116
+ 24,
117
+ 24,
118
+ 24,
119
+ 24,
120
+ 24,
121
+ 24,
122
+ 24,
123
+ 24,
124
+ 24,
125
+ 24,
126
+ 24,
127
+ 24,
128
+ 24,
129
+ 24,
130
+ 24,
131
+ 24,
132
+ 24,
133
+ 24,
134
+ 24,
135
+ 24,
136
+ 24,
137
+ 24,
138
+ 24,
139
+ )
140
+
141
+ # Inverse decode table: maps a 3-bit code back to the leading-zero count.
142
+ # Used by decoders to reconstruct leading zero counts.
143
+ LEADING_REPRESENTATION_DECODE: tuple[int, ...] = (0, 8, 12, 16, 18, 20, 22, 24)