mshzip 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mshzip-1.0.0/.gitignore +56 -0
- mshzip-1.0.0/LICENSE +21 -0
- mshzip-1.0.0/PKG-INFO +149 -0
- mshzip-1.0.0/README.md +131 -0
- mshzip-1.0.0/pyproject.toml +40 -0
- mshzip-1.0.0/src/mshzip/__init__.py +31 -0
- mshzip-1.0.0/src/mshzip/cli.py +315 -0
- mshzip-1.0.0/src/mshzip/constants.py +52 -0
- mshzip-1.0.0/src/mshzip/packer.py +203 -0
- mshzip-1.0.0/src/mshzip/parallel.py +115 -0
- mshzip-1.0.0/src/mshzip/stream.py +211 -0
- mshzip-1.0.0/src/mshzip/unpacker.py +117 -0
- mshzip-1.0.0/src/mshzip/varint.py +58 -0
- mshzip-1.0.0/tests/conftest.py +85 -0
- mshzip-1.0.0/tests/test_cli.py +128 -0
- mshzip-1.0.0/tests/test_compat.py +107 -0
- mshzip-1.0.0/tests/test_packer.py +132 -0
- mshzip-1.0.0/tests/test_parallel.py +160 -0
- mshzip-1.0.0/tests/test_roundtrip.py +123 -0
- mshzip-1.0.0/tests/test_stream.py +170 -0
- mshzip-1.0.0/tests/test_unpacker.py +101 -0
- mshzip-1.0.0/tests/test_varint.py +99 -0
mshzip-1.0.0/.gitignore
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# 의존성
|
|
2
|
+
node_modules/
|
|
3
|
+
.venv/
|
|
4
|
+
|
|
5
|
+
# 환경 변수
|
|
6
|
+
.env
|
|
7
|
+
.env.local
|
|
8
|
+
.env.*.local
|
|
9
|
+
|
|
10
|
+
# Claude Code 상태 파일 (세션별 임시)
|
|
11
|
+
.claude/current-command-node-*
|
|
12
|
+
.claude/planning-node-*
|
|
13
|
+
.claude/result-node-*
|
|
14
|
+
.claude/summary-written-*
|
|
15
|
+
.claude/cc-check-validated
|
|
16
|
+
.claude/cc-check-validated.bak
|
|
17
|
+
.claude/current-command-node.bak
|
|
18
|
+
.claude/session-summary.lock
|
|
19
|
+
.claude/server.log
|
|
20
|
+
.claude/settings.json.bak
|
|
21
|
+
.claude/logs/
|
|
22
|
+
|
|
23
|
+
# OS
|
|
24
|
+
Thumbs.db
|
|
25
|
+
Desktop.ini
|
|
26
|
+
.DS_Store
|
|
27
|
+
|
|
28
|
+
# IDE
|
|
29
|
+
.idea/
|
|
30
|
+
.vscode/
|
|
31
|
+
*.swp
|
|
32
|
+
*.swo
|
|
33
|
+
|
|
34
|
+
# Python
|
|
35
|
+
__pycache__/
|
|
36
|
+
*.pyc
|
|
37
|
+
.pytest_cache/
|
|
38
|
+
*.egg-info/
|
|
39
|
+
uv.lock
|
|
40
|
+
|
|
41
|
+
# Ralph
|
|
42
|
+
.ralph/
|
|
43
|
+
|
|
44
|
+
# 빌드
|
|
45
|
+
dist/
|
|
46
|
+
build/
|
|
47
|
+
|
|
48
|
+
# 테스트 임시 파일
|
|
49
|
+
demo-test-results.json
|
|
50
|
+
nodejs/test.txt
|
|
51
|
+
|
|
52
|
+
# npm lock (라이브러리 패키지)
|
|
53
|
+
nodejs/package-lock.json
|
|
54
|
+
|
|
55
|
+
# 내부 문서
|
|
56
|
+
배포.md
|
mshzip-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 brilante33
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
mshzip-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mshzip
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: 고정 청크 dedup + 엔트로피 압축 유틸 (MSH1 바이너리 포맷)
|
|
5
|
+
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Keywords: binary,compression,dedup,msh
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Topic :: System :: Archiving :: Compression
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
# mshzip (Python)
|
|
20
|
+
|
|
21
|
+
고정 청크 dedup + 엔트로피 압축 유틸 — Python/UV 버전
|
|
22
|
+
|
|
23
|
+
MSH1 바이너리 포맷을 사용하며, Node.js 구현체와 100% 교차 호환됩니다.
|
|
24
|
+
표준 라이브러리만 사용 (외부 의존성 없음).
|
|
25
|
+
|
|
26
|
+
## 설치
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
# uv (권장)
|
|
30
|
+
uv pip install mshzip
|
|
31
|
+
|
|
32
|
+
# pip
|
|
33
|
+
pip install mshzip
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## CLI 사용법
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# 압축
|
|
40
|
+
mshzip pack -i data.bin -o data.msh
|
|
41
|
+
mshzip pack -i data.bin -o data.msh --chunk 1024 --crc --verbose
|
|
42
|
+
|
|
43
|
+
# 해제
|
|
44
|
+
mshzip unpack -i data.msh -o data.bin
|
|
45
|
+
|
|
46
|
+
# 파일 정보
|
|
47
|
+
mshzip info -i data.msh
|
|
48
|
+
|
|
49
|
+
# 병렬 처리
|
|
50
|
+
mshzip multi pack file1.bin file2.bin file3.bin --out-dir ./compressed --workers 4
|
|
51
|
+
mshzip multi unpack compressed/*.msh --out-dir ./restored
|
|
52
|
+
|
|
53
|
+
# stdin/stdout 파이프
|
|
54
|
+
cat data.bin | mshzip pack -i - -o - > data.msh
|
|
55
|
+
mshzip unpack -i data.msh -o - | sha256sum
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Python API
|
|
59
|
+
|
|
60
|
+
### 간편 API
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
import mshzip
|
|
64
|
+
|
|
65
|
+
# 압축
|
|
66
|
+
compressed = mshzip.pack(b'hello world' * 100)
|
|
67
|
+
|
|
68
|
+
# 해제
|
|
69
|
+
original = mshzip.unpack(compressed)
|
|
70
|
+
|
|
71
|
+
# 옵션
|
|
72
|
+
compressed = mshzip.pack(data, chunk_size=1024, codec='gzip', crc=True)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Packer / Unpacker 클래스
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from mshzip import Packer, Unpacker
|
|
79
|
+
|
|
80
|
+
packer = Packer(chunk_size=256, codec='gzip', crc=True)
|
|
81
|
+
compressed = packer.pack(data)
|
|
82
|
+
|
|
83
|
+
unpacker = Unpacker()
|
|
84
|
+
restored = unpacker.unpack(compressed)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### 스트리밍 API
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
from mshzip import PackStream, UnpackStream, pack_stream, unpack_stream
|
|
91
|
+
|
|
92
|
+
# Generator 기반 스트리밍
|
|
93
|
+
ps = PackStream(chunk_size=128)
|
|
94
|
+
for frame in ps.feed(data):
|
|
95
|
+
output.write(frame)
|
|
96
|
+
for frame in ps.flush():
|
|
97
|
+
output.write(frame)
|
|
98
|
+
|
|
99
|
+
# 파일 I/O 편의 함수
|
|
100
|
+
with open('input.bin', 'rb') as inp, open('output.msh', 'wb') as out:
|
|
101
|
+
stats = pack_stream(inp, out, chunk_size=256)
|
|
102
|
+
|
|
103
|
+
with open('output.msh', 'rb') as inp, open('restored.bin', 'wb') as out:
|
|
104
|
+
stats = unpack_stream(inp, out)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### 병렬 처리
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
from mshzip.parallel import WorkerPool, Task
|
|
111
|
+
|
|
112
|
+
pool = WorkerPool(4)
|
|
113
|
+
results = pool.run_all([
|
|
114
|
+
Task(type='pack', input_path='a.bin', output_path='a.msh'),
|
|
115
|
+
Task(type='pack', input_path='b.bin', output_path='b.msh'),
|
|
116
|
+
])
|
|
117
|
+
pool.shutdown()
|
|
118
|
+
|
|
119
|
+
for r in results:
|
|
120
|
+
print(f'{r.success}: {r.input_size} -> {r.output_size} ({r.elapsed_ms}ms)')
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## CLI 옵션
|
|
124
|
+
|
|
125
|
+
| 옵션 | 기본값 | 설명 |
|
|
126
|
+
|------|--------|------|
|
|
127
|
+
| `--chunk <N>` | 128 | 청크 크기 (8 ~ 16,777,216B) |
|
|
128
|
+
| `--frame <N>` | 67108864 | 프레임당 최대 바이트 (64MB) |
|
|
129
|
+
| `--codec <종류>` | gzip | `gzip` 또는 `none` |
|
|
130
|
+
| `--crc` | off | CRC32 체크섬 추가 |
|
|
131
|
+
| `--verbose` | off | 상세 출력 |
|
|
132
|
+
| `--workers <N>` | CPU 코어 수 | 병렬 Worker 수 (multi 명령) |
|
|
133
|
+
|
|
134
|
+
## 테스트
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
# uv
|
|
138
|
+
uv run pytest
|
|
139
|
+
|
|
140
|
+
# pytest 직접
|
|
141
|
+
pytest tests/ -v
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
253개 테스트: varint(30) + packer(16) + unpacker(14) + roundtrip(131) + compat(32) + stream(16) + cli(7) + parallel(7)
|
|
145
|
+
|
|
146
|
+
## 요구 사항
|
|
147
|
+
|
|
148
|
+
- Python 3.10+
|
|
149
|
+
- 외부 의존성 없음 (표준 라이브러리만 사용)
|
mshzip-1.0.0/README.md
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# mshzip (Python)
|
|
2
|
+
|
|
3
|
+
고정 청크 dedup + 엔트로피 압축 유틸 — Python/UV 버전
|
|
4
|
+
|
|
5
|
+
MSH1 바이너리 포맷을 사용하며, Node.js 구현체와 100% 교차 호환됩니다.
|
|
6
|
+
표준 라이브러리만 사용 (외부 의존성 없음).
|
|
7
|
+
|
|
8
|
+
## 설치
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
# uv (권장)
|
|
12
|
+
uv pip install mshzip
|
|
13
|
+
|
|
14
|
+
# pip
|
|
15
|
+
pip install mshzip
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## CLI 사용법
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
# 압축
|
|
22
|
+
mshzip pack -i data.bin -o data.msh
|
|
23
|
+
mshzip pack -i data.bin -o data.msh --chunk 1024 --crc --verbose
|
|
24
|
+
|
|
25
|
+
# 해제
|
|
26
|
+
mshzip unpack -i data.msh -o data.bin
|
|
27
|
+
|
|
28
|
+
# 파일 정보
|
|
29
|
+
mshzip info -i data.msh
|
|
30
|
+
|
|
31
|
+
# 병렬 처리
|
|
32
|
+
mshzip multi pack file1.bin file2.bin file3.bin --out-dir ./compressed --workers 4
|
|
33
|
+
mshzip multi unpack compressed/*.msh --out-dir ./restored
|
|
34
|
+
|
|
35
|
+
# stdin/stdout 파이프
|
|
36
|
+
cat data.bin | mshzip pack -i - -o - > data.msh
|
|
37
|
+
mshzip unpack -i data.msh -o - | sha256sum
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Python API
|
|
41
|
+
|
|
42
|
+
### 간편 API
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
import mshzip
|
|
46
|
+
|
|
47
|
+
# 압축
|
|
48
|
+
compressed = mshzip.pack(b'hello world' * 100)
|
|
49
|
+
|
|
50
|
+
# 해제
|
|
51
|
+
original = mshzip.unpack(compressed)
|
|
52
|
+
|
|
53
|
+
# 옵션
|
|
54
|
+
compressed = mshzip.pack(data, chunk_size=1024, codec='gzip', crc=True)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Packer / Unpacker 클래스
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from mshzip import Packer, Unpacker
|
|
61
|
+
|
|
62
|
+
packer = Packer(chunk_size=256, codec='gzip', crc=True)
|
|
63
|
+
compressed = packer.pack(data)
|
|
64
|
+
|
|
65
|
+
unpacker = Unpacker()
|
|
66
|
+
restored = unpacker.unpack(compressed)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### 스트리밍 API
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from mshzip import PackStream, UnpackStream, pack_stream, unpack_stream
|
|
73
|
+
|
|
74
|
+
# Generator 기반 스트리밍
|
|
75
|
+
ps = PackStream(chunk_size=128)
|
|
76
|
+
for frame in ps.feed(data):
|
|
77
|
+
output.write(frame)
|
|
78
|
+
for frame in ps.flush():
|
|
79
|
+
output.write(frame)
|
|
80
|
+
|
|
81
|
+
# 파일 I/O 편의 함수
|
|
82
|
+
with open('input.bin', 'rb') as inp, open('output.msh', 'wb') as out:
|
|
83
|
+
stats = pack_stream(inp, out, chunk_size=256)
|
|
84
|
+
|
|
85
|
+
with open('output.msh', 'rb') as inp, open('restored.bin', 'wb') as out:
|
|
86
|
+
stats = unpack_stream(inp, out)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### 병렬 처리
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
from mshzip.parallel import WorkerPool, Task
|
|
93
|
+
|
|
94
|
+
pool = WorkerPool(4)
|
|
95
|
+
results = pool.run_all([
|
|
96
|
+
Task(type='pack', input_path='a.bin', output_path='a.msh'),
|
|
97
|
+
Task(type='pack', input_path='b.bin', output_path='b.msh'),
|
|
98
|
+
])
|
|
99
|
+
pool.shutdown()
|
|
100
|
+
|
|
101
|
+
for r in results:
|
|
102
|
+
print(f'{r.success}: {r.input_size} -> {r.output_size} ({r.elapsed_ms}ms)')
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## CLI 옵션
|
|
106
|
+
|
|
107
|
+
| 옵션 | 기본값 | 설명 |
|
|
108
|
+
|------|--------|------|
|
|
109
|
+
| `--chunk <N>` | 128 | 청크 크기 (8 ~ 16,777,216B) |
|
|
110
|
+
| `--frame <N>` | 67108864 | 프레임당 최대 바이트 (64MB) |
|
|
111
|
+
| `--codec <종류>` | gzip | `gzip` 또는 `none` |
|
|
112
|
+
| `--crc` | off | CRC32 체크섬 추가 |
|
|
113
|
+
| `--verbose` | off | 상세 출력 |
|
|
114
|
+
| `--workers <N>` | CPU 코어 수 | 병렬 Worker 수 (multi 명령) |
|
|
115
|
+
|
|
116
|
+
## 테스트
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
# uv
|
|
120
|
+
uv run pytest
|
|
121
|
+
|
|
122
|
+
# pytest 직접
|
|
123
|
+
pytest tests/ -v
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
253개 테스트: varint(30) + packer(16) + unpacker(14) + roundtrip(131) + compat(32) + stream(16) + cli(7) + parallel(7)
|
|
127
|
+
|
|
128
|
+
## 요구 사항
|
|
129
|
+
|
|
130
|
+
- Python 3.10+
|
|
131
|
+
- 외부 의존성 없음 (표준 라이브러리만 사용)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "mshzip"
|
|
3
|
+
version = "1.0.0"
|
|
4
|
+
description = "고정 청크 dedup + 엔트로피 압축 유틸 (MSH1 바이너리 포맷)"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = { text = "MIT" }
|
|
7
|
+
requires-python = ">=3.10"
|
|
8
|
+
keywords = ["compression", "dedup", "binary", "msh"]
|
|
9
|
+
classifiers = [
|
|
10
|
+
"Development Status :: 4 - Beta",
|
|
11
|
+
"Intended Audience :: Developers",
|
|
12
|
+
"License :: OSI Approved :: MIT License",
|
|
13
|
+
"Programming Language :: Python :: 3.10",
|
|
14
|
+
"Programming Language :: Python :: 3.11",
|
|
15
|
+
"Programming Language :: Python :: 3.12",
|
|
16
|
+
"Programming Language :: Python :: 3.13",
|
|
17
|
+
"Topic :: System :: Archiving :: Compression",
|
|
18
|
+
]
|
|
19
|
+
dependencies = []
|
|
20
|
+
|
|
21
|
+
[project.scripts]
|
|
22
|
+
mshzip = "mshzip.cli:main"
|
|
23
|
+
|
|
24
|
+
[build-system]
|
|
25
|
+
requires = ["hatchling"]
|
|
26
|
+
build-backend = "hatchling.build"
|
|
27
|
+
|
|
28
|
+
[tool.hatch.build.targets.wheel]
|
|
29
|
+
packages = ["src/mshzip"]
|
|
30
|
+
|
|
31
|
+
[tool.pytest.ini_options]
|
|
32
|
+
testpaths = ["tests"]
|
|
33
|
+
pythonpath = ["src"]
|
|
34
|
+
addopts = "-v --tb=short"
|
|
35
|
+
|
|
36
|
+
[dependency-groups]
|
|
37
|
+
dev = [
|
|
38
|
+
"pytest>=8.0",
|
|
39
|
+
"pytest-cov>=5.0",
|
|
40
|
+
]
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
'mshzip - 고정 청크 dedup + 엔트로피 압축'
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from .packer import Packer
|
|
5
|
+
from .unpacker import Unpacker
|
|
6
|
+
from .stream import PackStream, UnpackStream, pack_stream, unpack_stream
|
|
7
|
+
from . import constants
|
|
8
|
+
from . import varint
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def pack(data: bytes | bytearray, **opts) -> bytes:
|
|
12
|
+
'데이터 압축 (간편 API).'
|
|
13
|
+
packer = Packer(**opts)
|
|
14
|
+
return packer.pack(data)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def unpack(data: bytes | bytearray) -> bytes:
|
|
18
|
+
'데이터 해제 (간편 API).'
|
|
19
|
+
unpacker = Unpacker()
|
|
20
|
+
return unpacker.unpack(data)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
'pack', 'unpack',
|
|
25
|
+
'Packer', 'Unpacker',
|
|
26
|
+
'PackStream', 'UnpackStream',
|
|
27
|
+
'pack_stream', 'unpack_stream',
|
|
28
|
+
'constants', 'varint',
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
__version__ = '1.0.0'
|