dipcompress 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. dipcompress-0.1.0/PKG-INFO +26 -0
  2. dipcompress-0.1.0/README.md +1 -0
  3. dipcompress-0.1.0/dipcompress/__init__.py +12 -0
  4. dipcompress-0.1.0/dipcompress/__main__.py +3 -0
  5. dipcompress-0.1.0/dipcompress/_c_ext/build.py +42 -0
  6. dipcompress-0.1.0/dipcompress/_c_ext/compress.c +98 -0
  7. dipcompress-0.1.0/dipcompress/_c_ext/setup.py +0 -0
  8. dipcompress-0.1.0/dipcompress/bitstream.py +0 -0
  9. dipcompress-0.1.0/dipcompress/cli.py +108 -0
  10. dipcompress-0.1.0/dipcompress/decoder.py +70 -0
  11. dipcompress-0.1.0/dipcompress/encoder.py +95 -0
  12. dipcompress-0.1.0/dipcompress/filters.py +128 -0
  13. dipcompress-0.1.0/dipcompress/format.py +99 -0
  14. dipcompress-0.1.0/dipcompress/huffman.py +122 -0
  15. dipcompress-0.1.0/dipcompress/image_io.py +36 -0
  16. dipcompress-0.1.0/dipcompress/lz77.py +97 -0
  17. dipcompress-0.1.0/dipcompress/lz77_fast.py +23 -0
  18. dipcompress-0.1.0/dipcompress/rle.py +48 -0
  19. dipcompress-0.1.0/dipcompress.egg-info/PKG-INFO +26 -0
  20. dipcompress-0.1.0/dipcompress.egg-info/SOURCES.txt +31 -0
  21. dipcompress-0.1.0/dipcompress.egg-info/dependency_links.txt +1 -0
  22. dipcompress-0.1.0/dipcompress.egg-info/entry_points.txt +2 -0
  23. dipcompress-0.1.0/dipcompress.egg-info/requires.txt +5 -0
  24. dipcompress-0.1.0/dipcompress.egg-info/top_level.txt +1 -0
  25. dipcompress-0.1.0/pyproject.toml +56 -0
  26. dipcompress-0.1.0/setup.cfg +4 -0
  27. dipcompress-0.1.0/setup.py +25 -0
  28. dipcompress-0.1.0/tests/test_filters.py +52 -0
  29. dipcompress-0.1.0/tests/test_huffman.py +41 -0
  30. dipcompress-0.1.0/tests/test_image_io.py +30 -0
  31. dipcompress-0.1.0/tests/test_lz77.py +50 -0
  32. dipcompress-0.1.0/tests/test_rle.py +39 -0
  33. dipcompress-0.1.0/tests/test_roundtrip.py +64 -0
@@ -0,0 +1,26 @@
1
+ Metadata-Version: 2.4
2
+ Name: dipcompress
3
+ Version: 0.1.0
4
+ Summary: Lossless image compression using LZ77 + Huffman coding, with an optional C extension.
5
+ Author-email: Subhadip Mondal <subhadipmondal789@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/16bitOni/dipcompress
8
+ Project-URL: Repository, https://github.com/16bitOni/dipcompress
9
+ Project-URL: Issues, https://github.com/16bitOni/dipcompress/issues
10
+ Keywords: image,compression,lz77,huffman,lossless
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: C
17
+ Classifier: Topic :: Multimedia :: Graphics
18
+ Classifier: Topic :: System :: Archiving :: Compression
19
+ Requires-Python: >=3.11
20
+ Description-Content-Type: text/markdown
21
+ Requires-Dist: numpy>=2.0
22
+ Requires-Dist: pillow>=10.0
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest>=9.0; extra == "dev"
25
+
26
+ Dipcompress is my try to implement PNG style image compression from scratch
@@ -0,0 +1 @@
1
+ Dipcompress is my try to implement PNG style image compression from scratch
@@ -0,0 +1,12 @@
1
+ # dipcompress/__init__.py
2
+ # This is what users see when they do: import dipcompress
3
+
4
+ __version__ = "0.1.0"
5
+ __author__ = "Subhadip"
6
+
7
+ from .encoder import compress
8
+ from .decoder import decompress
9
+ from .image_io import load_image, save_image, pixels_to_bytes, bytes_to_pixels
10
+ from .rle import rle_encode, rle_decode
11
+
12
+ __all__ = ["compress", "decompress", "load_image", "save_image", "pixels_to_bytes", "bytes_to_pixels", "rle_encode", "rle_decode"]
@@ -0,0 +1,3 @@
1
+ from .cli import main
2
+
3
+ main()
@@ -0,0 +1,42 @@
1
+ """
2
+ Build the lz77_cext Python extension module.
3
+
4
+ Usage (run from the repo root):
5
+ python dipcompress/_c_ext/build.py
6
+
7
+ Output: dipcompress/lz77_cext.<platform-tag>.pyd (Windows)
8
+ dipcompress/lz77_cext.<platform-tag>.so (Linux/macOS)
9
+
10
+ The built module is placed in-place inside the dipcompress package so that
11
+ `from dipcompress import lz77_cext` works immediately without installation.
12
+ """
13
+ import os
14
+ import sys
15
+ from pathlib import Path
16
+
17
+ # Run from the repo root so setuptools resolves relative source paths correctly.
18
+ REPO_ROOT = Path(__file__).resolve().parent.parent.parent
19
+ os.chdir(REPO_ROOT)
20
+
21
+
22
+ def build():
23
+ from setuptools import Extension
24
+ from setuptools.dist import Distribution
25
+ from setuptools.command.build_ext import build_ext
26
+
27
+ ext = Extension(
28
+ 'dipcompress.lz77_cext',
29
+ sources=['dipcompress/_c_ext/compress.c'],
30
+ )
31
+
32
+ dist = Distribution(attrs={'ext_modules': [ext]})
33
+ cmd = build_ext(dist)
34
+ cmd.inplace = True # place .pyd/.so next to the package source
35
+ cmd.ensure_finalized()
36
+ cmd.run()
37
+
38
+ print('\nBuild complete — lz77_cext is ready.')
39
+
40
+
41
+ if __name__ == '__main__':
42
+ build()
@@ -0,0 +1,98 @@
1
+ #define PY_SSIZE_T_CLEAN
2
+ #include <Python.h>
3
+ #include <stdint.h>
4
+ #include <stdlib.h>
5
+
6
+ #define WINDOW_SIZE 4096
7
+ #define LOOKAHEAD 18
8
+
9
+ /*
10
+ * Core LZ77 encode — identical algorithm to lz77.py.
11
+ * Output: raw token stream, 4 bytes per token:
12
+ * [distance_hi, distance_lo, length, next_byte]
13
+ * Literals have distance=0 and length=0.
14
+ * Returns number of bytes written.
15
+ */
16
+ static int
17
+ _lz77_encode(const uint8_t *input, int input_len,
18
+ uint8_t *output, int output_max)
19
+ {
20
+ int pos = 0, out_pos = 0;
21
+
22
+ while (pos < input_len) {
23
+ int wstart = pos - WINDOW_SIZE;
24
+ if (wstart < 0) wstart = 0;
25
+
26
+ int lend = pos + LOOKAHEAD;
27
+ if (lend > input_len) lend = input_len;
28
+ int llen = lend - pos;
29
+
30
+ int best_dist = 0, best_len = 0;
31
+
32
+ for (int i = wstart; i < pos; i++) {
33
+ int ml = 0;
34
+ while (ml < llen && i + ml < pos && input[i + ml] == input[pos + ml])
35
+ ml++;
36
+ if (ml > best_len) {
37
+ best_len = ml;
38
+ best_dist = pos - i;
39
+ }
40
+ }
41
+
42
+ if (best_len >= 3) {
43
+ int np = pos + best_len;
44
+ if (np >= input_len) { best_len--; np--; }
45
+
46
+ output[out_pos++] = (best_dist >> 8) & 0xFF;
47
+ output[out_pos++] = best_dist & 0xFF;
48
+ output[out_pos++] = (uint8_t)best_len;
49
+ output[out_pos++] = input[np];
50
+ pos += best_len + 1;
51
+ } else {
52
+ output[out_pos++] = 0;
53
+ output[out_pos++] = 0;
54
+ output[out_pos++] = 0;
55
+ output[out_pos++] = input[pos];
56
+ pos++;
57
+ }
58
+ }
59
+ return out_pos;
60
+ }
61
+
62
+ /* ── Python wrapper ───────────────────────────────────────────────────── */
63
+
64
+ static PyObject *
65
+ py_lz77_encode(PyObject *self, PyObject *args)
66
+ {
67
+ const uint8_t *input;
68
+ Py_ssize_t input_len;
69
+
70
+ if (!PyArg_ParseTuple(args, "y#", &input, &input_len))
71
+ return NULL;
72
+
73
+ int output_max = (int)input_len * 4 + 4;
74
+ uint8_t *output = (uint8_t *)malloc(output_max);
75
+ if (!output)
76
+ return PyErr_NoMemory();
77
+
78
+ int n = _lz77_encode(input, (int)input_len, output, output_max);
79
+ PyObject *result = PyBytes_FromStringAndSize((char *)output, n);
80
+ free(output);
81
+ return result;
82
+ }
83
+
84
+ static PyMethodDef lz77_methods[] = {
85
+ {"lz77_encode", py_lz77_encode, METH_VARARGS,
86
+ "lz77_encode(data: bytes) -> bytes — raw 4-byte-per-token stream"},
87
+ {NULL, NULL, 0, NULL}
88
+ };
89
+
90
+ static struct PyModuleDef lz77_module = {
91
+ PyModuleDef_HEAD_INIT, "lz77_cext", NULL, -1, lz77_methods
92
+ };
93
+
94
+ PyMODINIT_FUNC
95
+ PyInit_lz77_cext(void)
96
+ {
97
+ return PyModule_Create(&lz77_module);
98
+ }
File without changes
File without changes
@@ -0,0 +1,108 @@
1
+ import argparse
2
+ import sys
3
+ import os
4
+
5
+ from . import __version__
6
+
7
+
8
+ def _cmd_compress(args):
9
+ from .encoder import compress
10
+ if not os.path.isfile(args.input):
11
+ print(f"error: input file not found: {args.input}", file=sys.stderr)
12
+ sys.exit(1)
13
+
14
+ stats = compress(args.input, args.output, verbose=args.verbose)
15
+
16
+ if not args.verbose:
17
+ pct = stats['savings_pct']
18
+ sign = '-' if pct >= 0 else '+'
19
+ print(f"{stats['width']}x{stats['height']} "
20
+ f"{stats['original_size']:,} B -> {stats['compressed_size']:,} B "
21
+ f"({sign}{abs(pct):.1f}%)")
22
+
23
+
24
+ def _cmd_decompress(args):
25
+ from .decoder import decompress
26
+ if not os.path.isfile(args.input):
27
+ print(f"error: input file not found: {args.input}", file=sys.stderr)
28
+ sys.exit(1)
29
+
30
+ decompress(args.input, args.output, verbose=args.verbose)
31
+
32
+ if not args.verbose:
33
+ print(f"saved -> {args.output}")
34
+
35
+
36
+ def _cmd_info(args):
37
+ from .format import DipHeader, HEADER_SIZE, decode_filter_table
38
+ from collections import Counter
39
+
40
+ if not os.path.isfile(args.file):
41
+ print(f"error: file not found: {args.file}", file=sys.stderr)
42
+ sys.exit(1)
43
+
44
+ with open(args.file, 'rb') as f:
45
+ raw = f.read()
46
+
47
+ header = DipHeader.from_bytes(raw)
48
+ filter_bytes = raw[HEADER_SIZE: HEADER_SIZE + header.height]
49
+ filter_types = decode_filter_table(filter_bytes)
50
+ filter_names = {0: 'None', 1: 'Sub', 2: 'Up', 3: 'Average', 4: 'Paeth'}
51
+ filter_counts = {filter_names.get(k, k): v
52
+ for k, v in Counter(filter_types).most_common()}
53
+
54
+ channels_label = {1: 'grayscale', 3: 'RGB', 4: 'RGBA'}.get(header.channels, str(header.channels))
55
+ raw_size = header.width * header.height * header.channels
56
+
57
+ print(f"File : {args.file}")
58
+ print(f"Dimensions : {header.width} x {header.height} ({channels_label})")
59
+ print(f"Raw size : {raw_size:,} bytes ({raw_size/1024:.1f} KB)")
60
+ print(f"Stored size : {len(raw):,} bytes ({len(raw)/1024:.1f} KB)")
61
+ print(f"Ratio : {len(raw)/raw_size:.3f} ({(1-len(raw)/raw_size)*100:.1f}% smaller)")
62
+ print(f"Row filters : {filter_counts}")
63
+ print(f"Format ver : {header.version}")
64
+
65
+
66
+ def build_parser():
67
+ parser = argparse.ArgumentParser(
68
+ prog='dipcompress',
69
+ description='DipCompress — lossless image compression.',
70
+ )
71
+ parser.add_argument('--version', action='version', version=f'dipcompress {__version__}')
72
+
73
+ sub = parser.add_subparsers(dest='command', metavar='command')
74
+ sub.required = True
75
+
76
+ # compress
77
+ p_c = sub.add_parser('compress', aliases=['c'],
78
+ help='compress an image to .dip format')
79
+ p_c.add_argument('input', help='input image (PNG, JPEG, …)')
80
+ p_c.add_argument('output', help='output .dip file')
81
+ p_c.add_argument('-v', '--verbose', action='store_true')
82
+ p_c.set_defaults(func=_cmd_compress)
83
+
84
+ # decompress
85
+ p_d = sub.add_parser('decompress', aliases=['d'],
86
+ help='decompress a .dip file back to an image')
87
+ p_d.add_argument('input', help='input .dip file')
88
+ p_d.add_argument('output', help='output image (PNG recommended)')
89
+ p_d.add_argument('-v', '--verbose', action='store_true')
90
+ p_d.set_defaults(func=_cmd_decompress)
91
+
92
+ # info
93
+ p_i = sub.add_parser('info', aliases=['i'],
94
+ help='show metadata stored in a .dip file')
95
+ p_i.add_argument('file', help='.dip file to inspect')
96
+ p_i.set_defaults(func=_cmd_info)
97
+
98
+ return parser
99
+
100
+
101
+ def main():
102
+ parser = build_parser()
103
+ args = parser.parse_args()
104
+ args.func(args)
105
+
106
+
107
+ if __name__ == '__main__':
108
+ main()
@@ -0,0 +1,70 @@
1
+ # dipcompress/decoder.py
2
+
3
+ import numpy as np
4
+ from .image_io import save_image, bytes_to_pixels
5
+ from .filters import remove_filters
6
+ from .lz77 import bytes_to_tokens, lz77_decode
7
+ from .huffman import huffman_decode
8
+ from .format import DipHeader, HEADER_SIZE, decode_filter_table, decode_huffman_table
9
+
10
+
11
+ def decompress(input_path: str, output_path: str, verbose: bool = False) -> None:
12
+ """
13
+ Decompress a .dip file back to an image.
14
+
15
+ Args:
16
+ input_path: path to the .dip file
17
+ output_path: path to write the reconstructed image (PNG recommended)
18
+ verbose: print progress information
19
+ """
20
+ with open(input_path, 'rb') as f:
21
+ raw = f.read()
22
+
23
+ # Step 1: Parse header
24
+ if verbose: print("Parsing header...")
25
+ header = DipHeader.from_bytes(raw)
26
+ pos = HEADER_SIZE
27
+
28
+ width = header.width
29
+ height = header.height
30
+ channels = header.channels
31
+
32
+ # Step 2: Read filter table (one byte per row)
33
+ filter_types = decode_filter_table(raw[pos:pos+height])
34
+ pos += height
35
+
36
+ # Step 3: Read and parse Huffman table from payload
37
+ if verbose: print("Reading Huffman table...")
38
+ payload = raw[pos:]
39
+ codes, original_length, padding, table_size = decode_huffman_table(payload)
40
+
41
+ huffman_compressed = payload[table_size:]
42
+
43
+ # Step 4: Huffman decode
44
+ if verbose: print("Huffman decoding...")
45
+ huffman_meta = {
46
+ 'codes': codes,
47
+ 'original_length': original_length,
48
+ 'padding': padding,
49
+ }
50
+ lz77_bytes = huffman_decode(huffman_compressed, huffman_meta)
51
+
52
+ if verbose: print("LZ77 decoding...")
53
+ lz77_tokens = bytes_to_tokens(lz77_bytes)
54
+ filtered_data = lz77_decode(lz77_tokens)
55
+
56
+ if verbose: print("Removing filters...")
57
+ row_width = width * channels
58
+ filtered_rows_data = [
59
+ (ft, filtered_data[i*row_width:(i+1)*row_width])
60
+ for i, ft in enumerate(filter_types)
61
+ ]
62
+ pixels = remove_filters(filtered_rows_data, width, channels)
63
+
64
+ mode = {1: 'L', 3: 'RGB', 4: 'RGBA'}.get(channels, 'RGB')
65
+ save_image(pixels, output_path, mode=mode)
66
+
67
+ if verbose:
68
+ print(f"\n✓ Decompression complete!")
69
+ print(f" Image: {width}x{height} {mode}")
70
+ print(f" Saved to: {output_path}")
@@ -0,0 +1,95 @@
1
+ # dipcompress/encoder.py
2
+
3
+ import numpy as np
4
+ from .image_io import load_image, pixels_to_bytes
5
+ from .filters import apply_filters
6
+ from .lz77_fast import lz77_encode, using_c as _lz77_using_c
7
+ from .lz77 import tokens_to_bytes
8
+ from .huffman import huffman_encode
9
+ from .format import DipHeader, COMPRESS_DEFLATE, encode_filter_table, encode_huffman_table
10
+
11
+
12
+ def compress(input_path: str, output_path: str, verbose: bool = False) -> dict:
13
+ """
14
+ Compress an image file to .dip format.
15
+
16
+ Args:
17
+ input_path: path to PNG/JPEG/any PIL-supported image
18
+ output_path: path to write the .dip file
19
+ verbose: print progress information
20
+
21
+ Returns:
22
+ dict with compression statistics
23
+ """
24
+
25
+ if verbose: print(f"Loading {input_path}...")
26
+ pixels, meta = load_image(input_path)
27
+ width = meta['width']
28
+ height = meta['height']
29
+ channels = meta['channels']
30
+ original_size = width * height * channels
31
+
32
+ if verbose: print("Applying filters...")
33
+ filtered_rows = apply_filters(pixels)
34
+ filter_types = [ft for ft, _ in filtered_rows]
35
+ filtered_data = b''.join(row for _, row in filtered_rows)
36
+
37
+ if verbose:
38
+ backend = "C" if _lz77_using_c else "Python"
39
+ print(f"Running LZ77 ({backend})...")
40
+ lz77_tokens = lz77_encode(filtered_data)
41
+ lz77_bytes = tokens_to_bytes(lz77_tokens)
42
+
43
+ if verbose: print("Running Huffman coding...")
44
+ huffman_compressed, huffman_meta = huffman_encode(lz77_bytes)
45
+ codes = huffman_meta['codes']
46
+ original_length = huffman_meta['original_length']
47
+ padding = huffman_meta['padding']
48
+
49
+ if verbose: print("Assembling .dip file...")
50
+
51
+ filter_table_bytes = encode_filter_table(filter_types)
52
+ huffman_table_bytes = encode_huffman_table(codes, original_length, padding)
53
+
54
+ payload = huffman_table_bytes + huffman_compressed
55
+
56
+ header = DipHeader(
57
+ width=width,
58
+ height=height,
59
+ channels=channels,
60
+ compression_mode=COMPRESS_DEFLATE,
61
+ compressed_length=len(payload)
62
+ )
63
+
64
+ with open(output_path, 'wb') as f:
65
+ f.write(header.to_bytes())
66
+ f.write(filter_table_bytes)
67
+ f.write(payload)
68
+
69
+ compressed_size = header.to_bytes().__len__() + len(filter_table_bytes) + len(payload)
70
+
71
+ stats = {
72
+ 'original_size': original_size,
73
+ 'compressed_size': compressed_size,
74
+ 'ratio': compressed_size / original_size,
75
+ 'savings_pct': (1 - compressed_size / original_size) * 100,
76
+ 'width': width,
77
+ 'height': height,
78
+ 'channels': channels,
79
+ 'filter_stats': _count_filters(filter_types),
80
+ }
81
+
82
+ if verbose:
83
+ print(f"\n✓ Compression complete!")
84
+ print(f" Original: {original_size:,} bytes ({original_size/1024:.1f} KB)")
85
+ print(f" Compressed: {compressed_size:,} bytes ({compressed_size/1024:.1f} KB)")
86
+ print(f" Ratio: {stats['ratio']:.3f} ({stats['savings_pct']:.1f}% smaller)")
87
+
88
+ return stats
89
+
90
+
91
+ def _count_filters(filter_types: list) -> dict:
92
+ from collections import Counter
93
+ filter_names = {0: 'None', 1: 'Sub', 2: 'Up', 3: 'Average', 4: 'Paeth'}
94
+ counts = Counter(filter_types)
95
+ return {filter_names.get(k, k): v for k, v in counts.items()}
@@ -0,0 +1,128 @@
1
+ import numpy as np
2
+
3
+ def filter_sub(row: bytes) -> bytes:
4
+
5
+ row_arr = list(row)
6
+ result = [row_arr[0]]
7
+ for i in range(1,len(row_arr)):
8
+ result.append((row_arr[i] - row_arr[i-1]) % 256)
9
+ return bytes(result)
10
+
11
+ def unfilter_sub(row: bytes) -> bytes:
12
+
13
+ row_arr = list(row)
14
+ result = [row_arr[0]]
15
+ for i in range(1,len(row_arr)):
16
+ result.append((row_arr[i] + result[i-1]) % 256)
17
+ return bytes(result)
18
+
19
+ def filter_up(row: bytes, prev_row: bytes) -> bytes:
20
+
21
+ result = []
22
+ for curr, prev in zip(row, prev_row):
23
+ result.append((curr - prev) % 256)
24
+ return bytes(result)
25
+
26
+
27
+ def unfilter_up(row: bytes, prev_row: bytes) -> bytes:
28
+
29
+ result = []
30
+ for curr, prev in zip(row, prev_row):
31
+ result.append((curr + prev) % 256)
32
+ return bytes(result)
33
+
34
+ def paeth_predictor(a: int, b: int, c: int) -> int:
35
+
36
+ p = a + b - c
37
+ pa = abs(p - a)
38
+ pb = abs(p - b)
39
+ pc = abs(p - c)
40
+ if pa <= pb and pa <= pc:
41
+ return a
42
+ elif pb <= pc:
43
+ return b
44
+ else:
45
+ return c
46
+
47
+ def filter_paeth(row: bytes, prev_row:bytes) -> bytes:
48
+
49
+ result = []
50
+ for i, curr in enumerate(row):
51
+ a = row[i-1] if i > 0 else 0
52
+ b = prev_row[i]
53
+ c = prev_row[i-1] if i > 0 else 0
54
+ p = paeth_predictor(a, b, c)
55
+ result.append((curr - p) % 256)
56
+ return bytes(result)
57
+
58
+ def unfilter_paeth(row: bytes, prev_row:bytes) -> bytes:
59
+
60
+ result = []
61
+ for i, curr in enumerate(row):
62
+ a = result[i-1] if i > 0 else 0
63
+ b = prev_row[i]
64
+ c = prev_row[i-1] if i > 0 else 0
65
+ p = paeth_predictor(a, b, c)
66
+ result.append((curr + p) % 256)
67
+ return bytes(result)
68
+
69
+ def best_filter_for_row(row:bytes, prev_row:bytes) -> int:
70
+
71
+ candidates = {
72
+ 0: row,
73
+ 1: filter_sub(row),
74
+ 2: filter_up(row, prev_row),
75
+ 4: filter_paeth(row, prev_row)
76
+ }
77
+ def score(filtered:bytes) -> int:
78
+ return sum(min(v,256-v) for v in filtered)
79
+
80
+ best_type = min(candidates, key=lambda t: score(candidates[t]))
81
+ return best_type, candidates[best_type]
82
+
83
+ def apply_filters(pixels: np.array) -> list[tuple[int, bytes]]:
84
+
85
+ if pixels.ndim ==2:
86
+ rows = [bytes(row) for row in pixels]
87
+
88
+ else:
89
+ rows = [bytes(row.flatten()) for row in pixels]
90
+
91
+ result = []
92
+ prev_row = bytes(len(rows[0]))
93
+
94
+ for row in rows:
95
+ filter_type, filtered = best_filter_for_row(row, prev_row)
96
+ result.append((filter_type, filtered))
97
+ prev_row = row
98
+
99
+ return result
100
+
101
+ def remove_filters(filtered_rows: list[tuple[int, bytes]], width: int, channels: int) -> np.ndarray:
102
+
103
+ raw_rows = []
104
+ prev_row = bytes(width * channels)
105
+
106
+ for filter_type, row_data in filtered_rows:
107
+ if filter_type == 0:
108
+ raw_row = row_data
109
+ elif filter_type == 1:
110
+ raw_row = unfilter_sub(row_data)
111
+ elif filter_type == 2:
112
+ raw_row = unfilter_up(row_data, prev_row)
113
+ elif filter_type == 4:
114
+ raw_row = unfilter_paeth(row_data, prev_row)
115
+ else:
116
+ raise ValueError(f"Unknown filter type: {filter_type}")
117
+
118
+ raw_rows.append(raw_row)
119
+ prev_row = raw_row
120
+
121
+
122
+ all_bytes = b''.join(raw_rows)
123
+ arr = np.frombuffer(all_bytes, dtype = np.uint8)
124
+
125
+ if channels == 1:
126
+ return arr.reshape(len(raw_rows), width)
127
+ else:
128
+ return arr.reshape(len(raw_rows), width, channels)
@@ -0,0 +1,99 @@
1
+ import struct
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+
5
+ MAGIC = b'DIP\x01'
6
+ VERSION = 1
7
+
8
+ COMPRESS_RLE = 0X01
9
+ COMPRESS_DEFLATE = 0X02
10
+
11
+ HEADER_FORMAT = '>4sHIIBBI'
12
+
13
+ HEADER_SIZE = struct.calcsize(HEADER_FORMAT)
14
+
15
+ @dataclass
16
+ class DipHeader:
17
+ width: int
18
+ height: int
19
+ channels: int
20
+ compression_mode:int
21
+ compressed_length: int
22
+ version: int = VERSION
23
+
24
+ def to_bytes(self) -> bytes:
25
+ return struct.pack(
26
+ HEADER_FORMAT,
27
+ MAGIC,
28
+ self.version,
29
+ self.width,
30
+ self.height,
31
+ self.channels,
32
+ self.compression_mode,
33
+ self.compressed_length
34
+ )
35
+
36
+ @staticmethod
37
+ def from_bytes(data: bytes)->'DipHeader':
38
+ magic,version,width,height,channels,mode,comp_len = struct.unpack(
39
+ HEADER_FORMAT, data[:HEADER_SIZE]
40
+ )
41
+ if magic != MAGIC:
42
+ raise ValueError(f"Not a DipCompress file! Magic: {magic}")
43
+ return DipHeader(
44
+ width=width,
45
+ height=height,
46
+ channels=channels,
47
+ compression_mode=mode,
48
+ compressed_length=comp_len,
49
+ version=version
50
+ )
51
+
52
+
53
+ def encode_filter_table(filter_types: list[int]) -> bytes:
54
+ return bytes(filter_types)
55
+
56
+
57
+ def decode_filter_table(data: bytes) -> list[int]:
58
+ return list(data)
59
+
60
+
61
+ def encode_huffman_table(codes: dict, original_length: int, padding: int) -> bytes:
62
+ entries = []
63
+ entries.append(struct.pack('>IB', original_length, padding)) # 4 bytes length + 1 byte padding
64
+ entries.append(struct.pack('>H', len(codes))) # Number of entries
65
+
66
+ for symbol, code_str in sorted(codes.items()):
67
+ code_len = len(code_str)
68
+ padded = code_str.ljust((code_len + 7) // 8 * 8, '0')
69
+ code_bytes = bytes(int(padded[i:i+8], 2) for i in range(0, len(padded), 8))
70
+
71
+ entries.append(struct.pack('BB', symbol, code_len))
72
+ entries.append(code_bytes)
73
+
74
+ return b''.join(entries)
75
+
76
+
77
+ def decode_huffman_table(data: bytes) -> tuple[dict, int, int, int]:
78
+ pos = 0
79
+ original_length, padding = struct.unpack('>IB', data[pos:pos+5])
80
+ pos += 5
81
+
82
+ num_entries = struct.unpack('>H', data[pos:pos+2])[0]
83
+ pos += 2
84
+
85
+ codes = {}
86
+ for _ in range(num_entries):
87
+ symbol, code_len = struct.unpack('BB', data[pos:pos+2])
88
+ pos += 2
89
+
90
+ num_bytes = (code_len + 7) // 8
91
+ code_bytes = data[pos:pos+num_bytes]
92
+ pos += num_bytes
93
+
94
+ full_bits = ''.join(f'{b:08b}' for b in code_bytes)
95
+ code_str = full_bits[:code_len]
96
+
97
+ codes[symbol] = code_str
98
+
99
+ return codes, original_length, padding, pos