dipcompress 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dipcompress-0.1.0/PKG-INFO +26 -0
- dipcompress-0.1.0/README.md +1 -0
- dipcompress-0.1.0/dipcompress/__init__.py +12 -0
- dipcompress-0.1.0/dipcompress/__main__.py +3 -0
- dipcompress-0.1.0/dipcompress/_c_ext/build.py +42 -0
- dipcompress-0.1.0/dipcompress/_c_ext/compress.c +98 -0
- dipcompress-0.1.0/dipcompress/_c_ext/setup.py +0 -0
- dipcompress-0.1.0/dipcompress/bitstream.py +0 -0
- dipcompress-0.1.0/dipcompress/cli.py +108 -0
- dipcompress-0.1.0/dipcompress/decoder.py +70 -0
- dipcompress-0.1.0/dipcompress/encoder.py +95 -0
- dipcompress-0.1.0/dipcompress/filters.py +128 -0
- dipcompress-0.1.0/dipcompress/format.py +99 -0
- dipcompress-0.1.0/dipcompress/huffman.py +122 -0
- dipcompress-0.1.0/dipcompress/image_io.py +36 -0
- dipcompress-0.1.0/dipcompress/lz77.py +97 -0
- dipcompress-0.1.0/dipcompress/lz77_fast.py +23 -0
- dipcompress-0.1.0/dipcompress/rle.py +48 -0
- dipcompress-0.1.0/dipcompress.egg-info/PKG-INFO +26 -0
- dipcompress-0.1.0/dipcompress.egg-info/SOURCES.txt +31 -0
- dipcompress-0.1.0/dipcompress.egg-info/dependency_links.txt +1 -0
- dipcompress-0.1.0/dipcompress.egg-info/entry_points.txt +2 -0
- dipcompress-0.1.0/dipcompress.egg-info/requires.txt +5 -0
- dipcompress-0.1.0/dipcompress.egg-info/top_level.txt +1 -0
- dipcompress-0.1.0/pyproject.toml +56 -0
- dipcompress-0.1.0/setup.cfg +4 -0
- dipcompress-0.1.0/setup.py +25 -0
- dipcompress-0.1.0/tests/test_filters.py +52 -0
- dipcompress-0.1.0/tests/test_huffman.py +41 -0
- dipcompress-0.1.0/tests/test_image_io.py +30 -0
- dipcompress-0.1.0/tests/test_lz77.py +50 -0
- dipcompress-0.1.0/tests/test_rle.py +39 -0
- dipcompress-0.1.0/tests/test_roundtrip.py +64 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dipcompress
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Lossless image compression using LZ77 + Huffman coding, with an optional C extension.
|
|
5
|
+
Author-email: Subhadip Mondal <subhadipmondal789@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/16bitOni/dipcompress
|
|
8
|
+
Project-URL: Repository, https://github.com/16bitOni/dipcompress
|
|
9
|
+
Project-URL: Issues, https://github.com/16bitOni/dipcompress/issues
|
|
10
|
+
Keywords: image,compression,lz77,huffman,lossless
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: C
|
|
17
|
+
Classifier: Topic :: Multimedia :: Graphics
|
|
18
|
+
Classifier: Topic :: System :: Archiving :: Compression
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
Requires-Dist: numpy>=2.0
|
|
22
|
+
Requires-Dist: pillow>=10.0
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: pytest>=9.0; extra == "dev"
|
|
25
|
+
|
|
26
|
+
Dipcompress is my try to implement PNG style image compression from scratch
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Dipcompress is my try to implement PNG style image compression from scratch
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# dipcompress/__init__.py
|
|
2
|
+
# This is what users see when they do: import dipcompress
|
|
3
|
+
|
|
4
|
+
__version__ = "0.1.0"
|
|
5
|
+
__author__ = "Subhadip"
|
|
6
|
+
|
|
7
|
+
from .encoder import compress
|
|
8
|
+
from .decoder import decompress
|
|
9
|
+
from .image_io import load_image, save_image, pixels_to_bytes, bytes_to_pixels
|
|
10
|
+
from .rle import rle_encode, rle_decode
|
|
11
|
+
|
|
12
|
+
__all__ = ["compress", "decompress", "load_image", "save_image", "pixels_to_bytes", "bytes_to_pixels", "rle_encode", "rle_decode"]
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Build the lz77_cext Python extension module.
|
|
3
|
+
|
|
4
|
+
Usage (run from the repo root):
|
|
5
|
+
python dipcompress/_c_ext/build.py
|
|
6
|
+
|
|
7
|
+
Output: dipcompress/lz77_cext.<platform-tag>.pyd (Windows)
|
|
8
|
+
dipcompress/lz77_cext.<platform-tag>.so (Linux/macOS)
|
|
9
|
+
|
|
10
|
+
The built module is placed in-place inside the dipcompress package so that
|
|
11
|
+
`from dipcompress import lz77_cext` works immediately without installation.
|
|
12
|
+
"""
|
|
13
|
+
import os
|
|
14
|
+
import sys
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
# Run from the repo root so setuptools resolves relative source paths correctly.
|
|
18
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent.parent
|
|
19
|
+
os.chdir(REPO_ROOT)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def build():
|
|
23
|
+
from setuptools import Extension
|
|
24
|
+
from setuptools.dist import Distribution
|
|
25
|
+
from setuptools.command.build_ext import build_ext
|
|
26
|
+
|
|
27
|
+
ext = Extension(
|
|
28
|
+
'dipcompress.lz77_cext',
|
|
29
|
+
sources=['dipcompress/_c_ext/compress.c'],
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
dist = Distribution(attrs={'ext_modules': [ext]})
|
|
33
|
+
cmd = build_ext(dist)
|
|
34
|
+
cmd.inplace = True # place .pyd/.so next to the package source
|
|
35
|
+
cmd.ensure_finalized()
|
|
36
|
+
cmd.run()
|
|
37
|
+
|
|
38
|
+
print('\nBuild complete — lz77_cext is ready.')
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
if __name__ == '__main__':
|
|
42
|
+
build()
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
#define PY_SSIZE_T_CLEAN
|
|
2
|
+
#include <Python.h>
|
|
3
|
+
#include <stdint.h>
|
|
4
|
+
#include <stdlib.h>
|
|
5
|
+
|
|
6
|
+
#define WINDOW_SIZE 4096
|
|
7
|
+
#define LOOKAHEAD 18
|
|
8
|
+
|
|
9
|
+
/*
|
|
10
|
+
* Core LZ77 encode — identical algorithm to lz77.py.
|
|
11
|
+
* Output: raw token stream, 4 bytes per token:
|
|
12
|
+
* [distance_hi, distance_lo, length, next_byte]
|
|
13
|
+
* Literals have distance=0 and length=0.
|
|
14
|
+
* Returns number of bytes written.
|
|
15
|
+
*/
|
|
16
|
+
static int
|
|
17
|
+
_lz77_encode(const uint8_t *input, int input_len,
|
|
18
|
+
uint8_t *output, int output_max)
|
|
19
|
+
{
|
|
20
|
+
int pos = 0, out_pos = 0;
|
|
21
|
+
|
|
22
|
+
while (pos < input_len) {
|
|
23
|
+
int wstart = pos - WINDOW_SIZE;
|
|
24
|
+
if (wstart < 0) wstart = 0;
|
|
25
|
+
|
|
26
|
+
int lend = pos + LOOKAHEAD;
|
|
27
|
+
if (lend > input_len) lend = input_len;
|
|
28
|
+
int llen = lend - pos;
|
|
29
|
+
|
|
30
|
+
int best_dist = 0, best_len = 0;
|
|
31
|
+
|
|
32
|
+
for (int i = wstart; i < pos; i++) {
|
|
33
|
+
int ml = 0;
|
|
34
|
+
while (ml < llen && i + ml < pos && input[i + ml] == input[pos + ml])
|
|
35
|
+
ml++;
|
|
36
|
+
if (ml > best_len) {
|
|
37
|
+
best_len = ml;
|
|
38
|
+
best_dist = pos - i;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (best_len >= 3) {
|
|
43
|
+
int np = pos + best_len;
|
|
44
|
+
if (np >= input_len) { best_len--; np--; }
|
|
45
|
+
|
|
46
|
+
output[out_pos++] = (best_dist >> 8) & 0xFF;
|
|
47
|
+
output[out_pos++] = best_dist & 0xFF;
|
|
48
|
+
output[out_pos++] = (uint8_t)best_len;
|
|
49
|
+
output[out_pos++] = input[np];
|
|
50
|
+
pos += best_len + 1;
|
|
51
|
+
} else {
|
|
52
|
+
output[out_pos++] = 0;
|
|
53
|
+
output[out_pos++] = 0;
|
|
54
|
+
output[out_pos++] = 0;
|
|
55
|
+
output[out_pos++] = input[pos];
|
|
56
|
+
pos++;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return out_pos;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/* ── Python wrapper ───────────────────────────────────────────────────── */
|
|
63
|
+
|
|
64
|
+
static PyObject *
|
|
65
|
+
py_lz77_encode(PyObject *self, PyObject *args)
|
|
66
|
+
{
|
|
67
|
+
const uint8_t *input;
|
|
68
|
+
Py_ssize_t input_len;
|
|
69
|
+
|
|
70
|
+
if (!PyArg_ParseTuple(args, "y#", &input, &input_len))
|
|
71
|
+
return NULL;
|
|
72
|
+
|
|
73
|
+
int output_max = (int)input_len * 4 + 4;
|
|
74
|
+
uint8_t *output = (uint8_t *)malloc(output_max);
|
|
75
|
+
if (!output)
|
|
76
|
+
return PyErr_NoMemory();
|
|
77
|
+
|
|
78
|
+
int n = _lz77_encode(input, (int)input_len, output, output_max);
|
|
79
|
+
PyObject *result = PyBytes_FromStringAndSize((char *)output, n);
|
|
80
|
+
free(output);
|
|
81
|
+
return result;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
static PyMethodDef lz77_methods[] = {
|
|
85
|
+
{"lz77_encode", py_lz77_encode, METH_VARARGS,
|
|
86
|
+
"lz77_encode(data: bytes) -> bytes — raw 4-byte-per-token stream"},
|
|
87
|
+
{NULL, NULL, 0, NULL}
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
static struct PyModuleDef lz77_module = {
|
|
91
|
+
PyModuleDef_HEAD_INIT, "lz77_cext", NULL, -1, lz77_methods
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
PyMODINIT_FUNC
|
|
95
|
+
PyInit_lz77_cext(void)
|
|
96
|
+
{
|
|
97
|
+
return PyModule_Create(&lz77_module);
|
|
98
|
+
}
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import sys
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from . import __version__
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _cmd_compress(args):
|
|
9
|
+
from .encoder import compress
|
|
10
|
+
if not os.path.isfile(args.input):
|
|
11
|
+
print(f"error: input file not found: {args.input}", file=sys.stderr)
|
|
12
|
+
sys.exit(1)
|
|
13
|
+
|
|
14
|
+
stats = compress(args.input, args.output, verbose=args.verbose)
|
|
15
|
+
|
|
16
|
+
if not args.verbose:
|
|
17
|
+
pct = stats['savings_pct']
|
|
18
|
+
sign = '-' if pct >= 0 else '+'
|
|
19
|
+
print(f"{stats['width']}x{stats['height']} "
|
|
20
|
+
f"{stats['original_size']:,} B -> {stats['compressed_size']:,} B "
|
|
21
|
+
f"({sign}{abs(pct):.1f}%)")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _cmd_decompress(args):
|
|
25
|
+
from .decoder import decompress
|
|
26
|
+
if not os.path.isfile(args.input):
|
|
27
|
+
print(f"error: input file not found: {args.input}", file=sys.stderr)
|
|
28
|
+
sys.exit(1)
|
|
29
|
+
|
|
30
|
+
decompress(args.input, args.output, verbose=args.verbose)
|
|
31
|
+
|
|
32
|
+
if not args.verbose:
|
|
33
|
+
print(f"saved -> {args.output}")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _cmd_info(args):
|
|
37
|
+
from .format import DipHeader, HEADER_SIZE, decode_filter_table
|
|
38
|
+
from collections import Counter
|
|
39
|
+
|
|
40
|
+
if not os.path.isfile(args.file):
|
|
41
|
+
print(f"error: file not found: {args.file}", file=sys.stderr)
|
|
42
|
+
sys.exit(1)
|
|
43
|
+
|
|
44
|
+
with open(args.file, 'rb') as f:
|
|
45
|
+
raw = f.read()
|
|
46
|
+
|
|
47
|
+
header = DipHeader.from_bytes(raw)
|
|
48
|
+
filter_bytes = raw[HEADER_SIZE: HEADER_SIZE + header.height]
|
|
49
|
+
filter_types = decode_filter_table(filter_bytes)
|
|
50
|
+
filter_names = {0: 'None', 1: 'Sub', 2: 'Up', 3: 'Average', 4: 'Paeth'}
|
|
51
|
+
filter_counts = {filter_names.get(k, k): v
|
|
52
|
+
for k, v in Counter(filter_types).most_common()}
|
|
53
|
+
|
|
54
|
+
channels_label = {1: 'grayscale', 3: 'RGB', 4: 'RGBA'}.get(header.channels, str(header.channels))
|
|
55
|
+
raw_size = header.width * header.height * header.channels
|
|
56
|
+
|
|
57
|
+
print(f"File : {args.file}")
|
|
58
|
+
print(f"Dimensions : {header.width} x {header.height} ({channels_label})")
|
|
59
|
+
print(f"Raw size : {raw_size:,} bytes ({raw_size/1024:.1f} KB)")
|
|
60
|
+
print(f"Stored size : {len(raw):,} bytes ({len(raw)/1024:.1f} KB)")
|
|
61
|
+
print(f"Ratio : {len(raw)/raw_size:.3f} ({(1-len(raw)/raw_size)*100:.1f}% smaller)")
|
|
62
|
+
print(f"Row filters : {filter_counts}")
|
|
63
|
+
print(f"Format ver : {header.version}")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def build_parser():
|
|
67
|
+
parser = argparse.ArgumentParser(
|
|
68
|
+
prog='dipcompress',
|
|
69
|
+
description='DipCompress — lossless image compression.',
|
|
70
|
+
)
|
|
71
|
+
parser.add_argument('--version', action='version', version=f'dipcompress {__version__}')
|
|
72
|
+
|
|
73
|
+
sub = parser.add_subparsers(dest='command', metavar='command')
|
|
74
|
+
sub.required = True
|
|
75
|
+
|
|
76
|
+
# compress
|
|
77
|
+
p_c = sub.add_parser('compress', aliases=['c'],
|
|
78
|
+
help='compress an image to .dip format')
|
|
79
|
+
p_c.add_argument('input', help='input image (PNG, JPEG, …)')
|
|
80
|
+
p_c.add_argument('output', help='output .dip file')
|
|
81
|
+
p_c.add_argument('-v', '--verbose', action='store_true')
|
|
82
|
+
p_c.set_defaults(func=_cmd_compress)
|
|
83
|
+
|
|
84
|
+
# decompress
|
|
85
|
+
p_d = sub.add_parser('decompress', aliases=['d'],
|
|
86
|
+
help='decompress a .dip file back to an image')
|
|
87
|
+
p_d.add_argument('input', help='input .dip file')
|
|
88
|
+
p_d.add_argument('output', help='output image (PNG recommended)')
|
|
89
|
+
p_d.add_argument('-v', '--verbose', action='store_true')
|
|
90
|
+
p_d.set_defaults(func=_cmd_decompress)
|
|
91
|
+
|
|
92
|
+
# info
|
|
93
|
+
p_i = sub.add_parser('info', aliases=['i'],
|
|
94
|
+
help='show metadata stored in a .dip file')
|
|
95
|
+
p_i.add_argument('file', help='.dip file to inspect')
|
|
96
|
+
p_i.set_defaults(func=_cmd_info)
|
|
97
|
+
|
|
98
|
+
return parser
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def main():
|
|
102
|
+
parser = build_parser()
|
|
103
|
+
args = parser.parse_args()
|
|
104
|
+
args.func(args)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
if __name__ == '__main__':
|
|
108
|
+
main()
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# dipcompress/decoder.py
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from .image_io import save_image, bytes_to_pixels
|
|
5
|
+
from .filters import remove_filters
|
|
6
|
+
from .lz77 import bytes_to_tokens, lz77_decode
|
|
7
|
+
from .huffman import huffman_decode
|
|
8
|
+
from .format import DipHeader, HEADER_SIZE, decode_filter_table, decode_huffman_table
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def decompress(input_path: str, output_path: str, verbose: bool = False) -> None:
|
|
12
|
+
"""
|
|
13
|
+
Decompress a .dip file back to an image.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
input_path: path to the .dip file
|
|
17
|
+
output_path: path to write the reconstructed image (PNG recommended)
|
|
18
|
+
verbose: print progress information
|
|
19
|
+
"""
|
|
20
|
+
with open(input_path, 'rb') as f:
|
|
21
|
+
raw = f.read()
|
|
22
|
+
|
|
23
|
+
# Step 1: Parse header
|
|
24
|
+
if verbose: print("Parsing header...")
|
|
25
|
+
header = DipHeader.from_bytes(raw)
|
|
26
|
+
pos = HEADER_SIZE
|
|
27
|
+
|
|
28
|
+
width = header.width
|
|
29
|
+
height = header.height
|
|
30
|
+
channels = header.channels
|
|
31
|
+
|
|
32
|
+
# Step 2: Read filter table (one byte per row)
|
|
33
|
+
filter_types = decode_filter_table(raw[pos:pos+height])
|
|
34
|
+
pos += height
|
|
35
|
+
|
|
36
|
+
# Step 3: Read and parse Huffman table from payload
|
|
37
|
+
if verbose: print("Reading Huffman table...")
|
|
38
|
+
payload = raw[pos:]
|
|
39
|
+
codes, original_length, padding, table_size = decode_huffman_table(payload)
|
|
40
|
+
|
|
41
|
+
huffman_compressed = payload[table_size:]
|
|
42
|
+
|
|
43
|
+
# Step 4: Huffman decode
|
|
44
|
+
if verbose: print("Huffman decoding...")
|
|
45
|
+
huffman_meta = {
|
|
46
|
+
'codes': codes,
|
|
47
|
+
'original_length': original_length,
|
|
48
|
+
'padding': padding,
|
|
49
|
+
}
|
|
50
|
+
lz77_bytes = huffman_decode(huffman_compressed, huffman_meta)
|
|
51
|
+
|
|
52
|
+
if verbose: print("LZ77 decoding...")
|
|
53
|
+
lz77_tokens = bytes_to_tokens(lz77_bytes)
|
|
54
|
+
filtered_data = lz77_decode(lz77_tokens)
|
|
55
|
+
|
|
56
|
+
if verbose: print("Removing filters...")
|
|
57
|
+
row_width = width * channels
|
|
58
|
+
filtered_rows_data = [
|
|
59
|
+
(ft, filtered_data[i*row_width:(i+1)*row_width])
|
|
60
|
+
for i, ft in enumerate(filter_types)
|
|
61
|
+
]
|
|
62
|
+
pixels = remove_filters(filtered_rows_data, width, channels)
|
|
63
|
+
|
|
64
|
+
mode = {1: 'L', 3: 'RGB', 4: 'RGBA'}.get(channels, 'RGB')
|
|
65
|
+
save_image(pixels, output_path, mode=mode)
|
|
66
|
+
|
|
67
|
+
if verbose:
|
|
68
|
+
print(f"\n✓ Decompression complete!")
|
|
69
|
+
print(f" Image: {width}x{height} {mode}")
|
|
70
|
+
print(f" Saved to: {output_path}")
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# dipcompress/encoder.py
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from .image_io import load_image, pixels_to_bytes
|
|
5
|
+
from .filters import apply_filters
|
|
6
|
+
from .lz77_fast import lz77_encode, using_c as _lz77_using_c
|
|
7
|
+
from .lz77 import tokens_to_bytes
|
|
8
|
+
from .huffman import huffman_encode
|
|
9
|
+
from .format import DipHeader, COMPRESS_DEFLATE, encode_filter_table, encode_huffman_table
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def compress(input_path: str, output_path: str, verbose: bool = False) -> dict:
|
|
13
|
+
"""
|
|
14
|
+
Compress an image file to .dip format.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
input_path: path to PNG/JPEG/any PIL-supported image
|
|
18
|
+
output_path: path to write the .dip file
|
|
19
|
+
verbose: print progress information
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
dict with compression statistics
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
if verbose: print(f"Loading {input_path}...")
|
|
26
|
+
pixels, meta = load_image(input_path)
|
|
27
|
+
width = meta['width']
|
|
28
|
+
height = meta['height']
|
|
29
|
+
channels = meta['channels']
|
|
30
|
+
original_size = width * height * channels
|
|
31
|
+
|
|
32
|
+
if verbose: print("Applying filters...")
|
|
33
|
+
filtered_rows = apply_filters(pixels)
|
|
34
|
+
filter_types = [ft for ft, _ in filtered_rows]
|
|
35
|
+
filtered_data = b''.join(row for _, row in filtered_rows)
|
|
36
|
+
|
|
37
|
+
if verbose:
|
|
38
|
+
backend = "C" if _lz77_using_c else "Python"
|
|
39
|
+
print(f"Running LZ77 ({backend})...")
|
|
40
|
+
lz77_tokens = lz77_encode(filtered_data)
|
|
41
|
+
lz77_bytes = tokens_to_bytes(lz77_tokens)
|
|
42
|
+
|
|
43
|
+
if verbose: print("Running Huffman coding...")
|
|
44
|
+
huffman_compressed, huffman_meta = huffman_encode(lz77_bytes)
|
|
45
|
+
codes = huffman_meta['codes']
|
|
46
|
+
original_length = huffman_meta['original_length']
|
|
47
|
+
padding = huffman_meta['padding']
|
|
48
|
+
|
|
49
|
+
if verbose: print("Assembling .dip file...")
|
|
50
|
+
|
|
51
|
+
filter_table_bytes = encode_filter_table(filter_types)
|
|
52
|
+
huffman_table_bytes = encode_huffman_table(codes, original_length, padding)
|
|
53
|
+
|
|
54
|
+
payload = huffman_table_bytes + huffman_compressed
|
|
55
|
+
|
|
56
|
+
header = DipHeader(
|
|
57
|
+
width=width,
|
|
58
|
+
height=height,
|
|
59
|
+
channels=channels,
|
|
60
|
+
compression_mode=COMPRESS_DEFLATE,
|
|
61
|
+
compressed_length=len(payload)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
with open(output_path, 'wb') as f:
|
|
65
|
+
f.write(header.to_bytes())
|
|
66
|
+
f.write(filter_table_bytes)
|
|
67
|
+
f.write(payload)
|
|
68
|
+
|
|
69
|
+
compressed_size = header.to_bytes().__len__() + len(filter_table_bytes) + len(payload)
|
|
70
|
+
|
|
71
|
+
stats = {
|
|
72
|
+
'original_size': original_size,
|
|
73
|
+
'compressed_size': compressed_size,
|
|
74
|
+
'ratio': compressed_size / original_size,
|
|
75
|
+
'savings_pct': (1 - compressed_size / original_size) * 100,
|
|
76
|
+
'width': width,
|
|
77
|
+
'height': height,
|
|
78
|
+
'channels': channels,
|
|
79
|
+
'filter_stats': _count_filters(filter_types),
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if verbose:
|
|
83
|
+
print(f"\n✓ Compression complete!")
|
|
84
|
+
print(f" Original: {original_size:,} bytes ({original_size/1024:.1f} KB)")
|
|
85
|
+
print(f" Compressed: {compressed_size:,} bytes ({compressed_size/1024:.1f} KB)")
|
|
86
|
+
print(f" Ratio: {stats['ratio']:.3f} ({stats['savings_pct']:.1f}% smaller)")
|
|
87
|
+
|
|
88
|
+
return stats
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _count_filters(filter_types: list) -> dict:
|
|
92
|
+
from collections import Counter
|
|
93
|
+
filter_names = {0: 'None', 1: 'Sub', 2: 'Up', 3: 'Average', 4: 'Paeth'}
|
|
94
|
+
counts = Counter(filter_types)
|
|
95
|
+
return {filter_names.get(k, k): v for k, v in counts.items()}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
def filter_sub(row: bytes) -> bytes:
|
|
4
|
+
|
|
5
|
+
row_arr = list(row)
|
|
6
|
+
result = [row_arr[0]]
|
|
7
|
+
for i in range(1,len(row_arr)):
|
|
8
|
+
result.append((row_arr[i] - row_arr[i-1]) % 256)
|
|
9
|
+
return bytes(result)
|
|
10
|
+
|
|
11
|
+
def unfilter_sub(row: bytes) -> bytes:
|
|
12
|
+
|
|
13
|
+
row_arr = list(row)
|
|
14
|
+
result = [row_arr[0]]
|
|
15
|
+
for i in range(1,len(row_arr)):
|
|
16
|
+
result.append((row_arr[i] + result[i-1]) % 256)
|
|
17
|
+
return bytes(result)
|
|
18
|
+
|
|
19
|
+
def filter_up(row: bytes, prev_row: bytes) -> bytes:
|
|
20
|
+
|
|
21
|
+
result = []
|
|
22
|
+
for curr, prev in zip(row, prev_row):
|
|
23
|
+
result.append((curr - prev) % 256)
|
|
24
|
+
return bytes(result)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def unfilter_up(row: bytes, prev_row: bytes) -> bytes:
|
|
28
|
+
|
|
29
|
+
result = []
|
|
30
|
+
for curr, prev in zip(row, prev_row):
|
|
31
|
+
result.append((curr + prev) % 256)
|
|
32
|
+
return bytes(result)
|
|
33
|
+
|
|
34
|
+
def paeth_predictor(a: int, b: int, c: int) -> int:
|
|
35
|
+
|
|
36
|
+
p = a + b - c
|
|
37
|
+
pa = abs(p - a)
|
|
38
|
+
pb = abs(p - b)
|
|
39
|
+
pc = abs(p - c)
|
|
40
|
+
if pa <= pb and pa <= pc:
|
|
41
|
+
return a
|
|
42
|
+
elif pb <= pc:
|
|
43
|
+
return b
|
|
44
|
+
else:
|
|
45
|
+
return c
|
|
46
|
+
|
|
47
|
+
def filter_paeth(row: bytes, prev_row:bytes) -> bytes:
|
|
48
|
+
|
|
49
|
+
result = []
|
|
50
|
+
for i, curr in enumerate(row):
|
|
51
|
+
a = row[i-1] if i > 0 else 0
|
|
52
|
+
b = prev_row[i]
|
|
53
|
+
c = prev_row[i-1] if i > 0 else 0
|
|
54
|
+
p = paeth_predictor(a, b, c)
|
|
55
|
+
result.append((curr - p) % 256)
|
|
56
|
+
return bytes(result)
|
|
57
|
+
|
|
58
|
+
def unfilter_paeth(row: bytes, prev_row:bytes) -> bytes:
|
|
59
|
+
|
|
60
|
+
result = []
|
|
61
|
+
for i, curr in enumerate(row):
|
|
62
|
+
a = result[i-1] if i > 0 else 0
|
|
63
|
+
b = prev_row[i]
|
|
64
|
+
c = prev_row[i-1] if i > 0 else 0
|
|
65
|
+
p = paeth_predictor(a, b, c)
|
|
66
|
+
result.append((curr + p) % 256)
|
|
67
|
+
return bytes(result)
|
|
68
|
+
|
|
69
|
+
def best_filter_for_row(row:bytes, prev_row:bytes) -> int:
|
|
70
|
+
|
|
71
|
+
candidates = {
|
|
72
|
+
0: row,
|
|
73
|
+
1: filter_sub(row),
|
|
74
|
+
2: filter_up(row, prev_row),
|
|
75
|
+
4: filter_paeth(row, prev_row)
|
|
76
|
+
}
|
|
77
|
+
def score(filtered:bytes) -> int:
|
|
78
|
+
return sum(min(v,256-v) for v in filtered)
|
|
79
|
+
|
|
80
|
+
best_type = min(candidates, key=lambda t: score(candidates[t]))
|
|
81
|
+
return best_type, candidates[best_type]
|
|
82
|
+
|
|
83
|
+
def apply_filters(pixels: np.array) -> list[tuple[int, bytes]]:
|
|
84
|
+
|
|
85
|
+
if pixels.ndim ==2:
|
|
86
|
+
rows = [bytes(row) for row in pixels]
|
|
87
|
+
|
|
88
|
+
else:
|
|
89
|
+
rows = [bytes(row.flatten()) for row in pixels]
|
|
90
|
+
|
|
91
|
+
result = []
|
|
92
|
+
prev_row = bytes(len(rows[0]))
|
|
93
|
+
|
|
94
|
+
for row in rows:
|
|
95
|
+
filter_type, filtered = best_filter_for_row(row, prev_row)
|
|
96
|
+
result.append((filter_type, filtered))
|
|
97
|
+
prev_row = row
|
|
98
|
+
|
|
99
|
+
return result
|
|
100
|
+
|
|
101
|
+
def remove_filters(filtered_rows: list[tuple[int, bytes]], width: int, channels: int) -> np.ndarray:
|
|
102
|
+
|
|
103
|
+
raw_rows = []
|
|
104
|
+
prev_row = bytes(width * channels)
|
|
105
|
+
|
|
106
|
+
for filter_type, row_data in filtered_rows:
|
|
107
|
+
if filter_type == 0:
|
|
108
|
+
raw_row = row_data
|
|
109
|
+
elif filter_type == 1:
|
|
110
|
+
raw_row = unfilter_sub(row_data)
|
|
111
|
+
elif filter_type == 2:
|
|
112
|
+
raw_row = unfilter_up(row_data, prev_row)
|
|
113
|
+
elif filter_type == 4:
|
|
114
|
+
raw_row = unfilter_paeth(row_data, prev_row)
|
|
115
|
+
else:
|
|
116
|
+
raise ValueError(f"Unknown filter type: {filter_type}")
|
|
117
|
+
|
|
118
|
+
raw_rows.append(raw_row)
|
|
119
|
+
prev_row = raw_row
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
all_bytes = b''.join(raw_rows)
|
|
123
|
+
arr = np.frombuffer(all_bytes, dtype = np.uint8)
|
|
124
|
+
|
|
125
|
+
if channels == 1:
|
|
126
|
+
return arr.reshape(len(raw_rows), width)
|
|
127
|
+
else:
|
|
128
|
+
return arr.reshape(len(raw_rows), width, channels)
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import struct
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
MAGIC = b'DIP\x01'
|
|
6
|
+
VERSION = 1
|
|
7
|
+
|
|
8
|
+
COMPRESS_RLE = 0X01
|
|
9
|
+
COMPRESS_DEFLATE = 0X02
|
|
10
|
+
|
|
11
|
+
HEADER_FORMAT = '>4sHIIBBI'
|
|
12
|
+
|
|
13
|
+
HEADER_SIZE = struct.calcsize(HEADER_FORMAT)
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class DipHeader:
|
|
17
|
+
width: int
|
|
18
|
+
height: int
|
|
19
|
+
channels: int
|
|
20
|
+
compression_mode:int
|
|
21
|
+
compressed_length: int
|
|
22
|
+
version: int = VERSION
|
|
23
|
+
|
|
24
|
+
def to_bytes(self) -> bytes:
|
|
25
|
+
return struct.pack(
|
|
26
|
+
HEADER_FORMAT,
|
|
27
|
+
MAGIC,
|
|
28
|
+
self.version,
|
|
29
|
+
self.width,
|
|
30
|
+
self.height,
|
|
31
|
+
self.channels,
|
|
32
|
+
self.compression_mode,
|
|
33
|
+
self.compressed_length
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
@staticmethod
|
|
37
|
+
def from_bytes(data: bytes)->'DipHeader':
|
|
38
|
+
magic,version,width,height,channels,mode,comp_len = struct.unpack(
|
|
39
|
+
HEADER_FORMAT, data[:HEADER_SIZE]
|
|
40
|
+
)
|
|
41
|
+
if magic != MAGIC:
|
|
42
|
+
raise ValueError(f"Not a DipCompress file! Magic: {magic}")
|
|
43
|
+
return DipHeader(
|
|
44
|
+
width=width,
|
|
45
|
+
height=height,
|
|
46
|
+
channels=channels,
|
|
47
|
+
compression_mode=mode,
|
|
48
|
+
compressed_length=comp_len,
|
|
49
|
+
version=version
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def encode_filter_table(filter_types: list[int]) -> bytes:
|
|
54
|
+
return bytes(filter_types)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def decode_filter_table(data: bytes) -> list[int]:
|
|
58
|
+
return list(data)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def encode_huffman_table(codes: dict, original_length: int, padding: int) -> bytes:
|
|
62
|
+
entries = []
|
|
63
|
+
entries.append(struct.pack('>IB', original_length, padding)) # 4 bytes length + 1 byte padding
|
|
64
|
+
entries.append(struct.pack('>H', len(codes))) # Number of entries
|
|
65
|
+
|
|
66
|
+
for symbol, code_str in sorted(codes.items()):
|
|
67
|
+
code_len = len(code_str)
|
|
68
|
+
padded = code_str.ljust((code_len + 7) // 8 * 8, '0')
|
|
69
|
+
code_bytes = bytes(int(padded[i:i+8], 2) for i in range(0, len(padded), 8))
|
|
70
|
+
|
|
71
|
+
entries.append(struct.pack('BB', symbol, code_len))
|
|
72
|
+
entries.append(code_bytes)
|
|
73
|
+
|
|
74
|
+
return b''.join(entries)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def decode_huffman_table(data: bytes) -> tuple[dict, int, int, int]:
|
|
78
|
+
pos = 0
|
|
79
|
+
original_length, padding = struct.unpack('>IB', data[pos:pos+5])
|
|
80
|
+
pos += 5
|
|
81
|
+
|
|
82
|
+
num_entries = struct.unpack('>H', data[pos:pos+2])[0]
|
|
83
|
+
pos += 2
|
|
84
|
+
|
|
85
|
+
codes = {}
|
|
86
|
+
for _ in range(num_entries):
|
|
87
|
+
symbol, code_len = struct.unpack('BB', data[pos:pos+2])
|
|
88
|
+
pos += 2
|
|
89
|
+
|
|
90
|
+
num_bytes = (code_len + 7) // 8
|
|
91
|
+
code_bytes = data[pos:pos+num_bytes]
|
|
92
|
+
pos += num_bytes
|
|
93
|
+
|
|
94
|
+
full_bits = ''.join(f'{b:08b}' for b in code_bytes)
|
|
95
|
+
code_str = full_bits[:code_len]
|
|
96
|
+
|
|
97
|
+
codes[symbol] = code_str
|
|
98
|
+
|
|
99
|
+
return codes, original_length, padding, pos
|