py-self-md5 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,220 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+ input.py
6
+ input.self_md5.py
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .coverage
46
+ .coverage.*
47
+ .cache
48
+ nosetests.xml
49
+ coverage.xml
50
+ *.cover
51
+ *.py.cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+ cover/
55
+
56
+ # Translations
57
+ *.mo
58
+ *.pot
59
+
60
+ # Django stuff:
61
+ *.log
62
+ local_settings.py
63
+ db.sqlite3
64
+ db.sqlite3-journal
65
+
66
+ # Flask stuff:
67
+ instance/
68
+ .webassets-cache
69
+
70
+ # Scrapy stuff:
71
+ .scrapy
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+
76
+ # PyBuilder
77
+ .pybuilder/
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ # For a library or package, you might want to ignore these files since the code is
89
+ # intended to run in multiple environments; otherwise, check them in:
90
+ # .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ # Pipfile.lock
98
+
99
+ # UV
100
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
101
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
102
+ # commonly ignored for libraries.
103
+ # uv.lock
104
+
105
+ # poetry
106
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
107
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
108
+ # commonly ignored for libraries.
109
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
110
+ # poetry.lock
111
+ # poetry.toml
112
+
113
+ # pdm
114
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
115
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
116
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
117
+ # pdm.lock
118
+ # pdm.toml
119
+ .pdm-python
120
+ .pdm-build/
121
+
122
+ # pixi
123
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
124
+ # pixi.lock
125
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
126
+ # in the .venv directory. It is recommended not to include this directory in version control.
127
+ .pixi
128
+
129
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
130
+ __pypackages__/
131
+
132
+ # Celery stuff
133
+ celerybeat-schedule
134
+ celerybeat.pid
135
+
136
+ # Redis
137
+ *.rdb
138
+ *.aof
139
+ *.pid
140
+
141
+ # RabbitMQ
142
+ mnesia/
143
+ rabbitmq/
144
+ rabbitmq-data/
145
+
146
+ # ActiveMQ
147
+ activemq-data/
148
+
149
+ # SageMath parsed files
150
+ *.sage.py
151
+
152
+ # Environments
153
+ .env
154
+ .envrc
155
+ .venv
156
+ env/
157
+ venv/
158
+ ENV/
159
+ env.bak/
160
+ venv.bak/
161
+
162
+ # Spyder project settings
163
+ .spyderproject
164
+ .spyproject
165
+
166
+ # Rope project settings
167
+ .ropeproject
168
+
169
+ # mkdocs documentation
170
+ /site
171
+
172
+ # mypy
173
+ .mypy_cache/
174
+ .dmypy.json
175
+ dmypy.json
176
+
177
+ # Pyre type checker
178
+ .pyre/
179
+
180
+ # pytype static type analyzer
181
+ .pytype/
182
+
183
+ # Cython debug symbols
184
+ cython_debug/
185
+
186
+ # PyCharm
187
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
188
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
189
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
190
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
191
+ # .idea/
192
+
193
+ # Abstra
194
+ # Abstra is an AI-powered process automation framework.
195
+ # Ignore directories containing user credentials, local state, and settings.
196
+ # Learn more at https://abstra.io/docs
197
+ .abstra/
198
+
199
+ # Visual Studio Code
200
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
201
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
202
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
203
+ # you could uncomment the following to ignore the entire vscode folder
204
+ # .vscode/
205
+ # Temporary file for partial code execution
206
+ tempCodeRunnerFile.py
207
+
208
+ # Ruff stuff:
209
+ .ruff_cache/
210
+
211
+ # PyPI configuration file
212
+ .pypirc
213
+
214
+ # Marimo
215
+ marimo/_static/
216
+ marimo/_lsp/
217
+ __marimo__/
218
+
219
+ # Streamlit
220
+ .streamlit/secrets.toml
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 GGN_2015
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,117 @@
1
+ Metadata-Version: 2.4
2
+ Name: py-self-md5
3
+ Version: 0.1.0
4
+ Summary: Wrap Python scripts so they print their own MD5 without reading their source at runtime.
5
+ Author: GGN_2015
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Keywords: md5,python,quine,self-hash
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Environment :: Console
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Topic :: Software Development :: Build Tools
17
+ Classifier: Topic :: Utilities
18
+ Requires-Python: >=3.11
19
+ Description-Content-Type: text/markdown
20
+
21
+ # py-self-md5
22
+
23
+ `py-self-md5` wraps a Python script so the generated script prints
24
+ `MD5: <DIGEST>` before running the original program.
25
+
26
+ The generated script does **not** read its own source file at runtime. Instead,
27
+ the inserted prefix stores a quine-style byte template and computes the digest
28
+ from the reconstructed bytes in memory.
29
+
30
+ This project is inspired by
31
+ [`zhuzilin/pdf-with-its-own-md5`](https://github.com/zhuzilin/pdf-with-its-own-md5),
32
+ where selectable MD5-collision carriers are used to display the final digest in
33
+ a PDF. This tool targets executable Python programs and does not require
34
+ HashClash or precomputed collision blocks.
35
+
36
+ ## Install
37
+
38
+ Requires Python 3.11 or newer.
39
+
40
+ ```bash
41
+ python -m pip install py-self-md5
42
+ ```
43
+
44
+ After installation, run the built-in check:
45
+
46
+ ```bash
47
+ py-self-md5 --self-test
48
+ ```
49
+
50
+ You can also run it from the project root without installing:
51
+
52
+ ```bash
53
+ python -m py_self_md5 --self-test
54
+ ```
55
+
56
+ ## Usage
57
+
58
+ Create `input.self_md5.py`:
59
+
60
+ ```bash
61
+ py-self-md5 input.py
62
+ ```
63
+
64
+ Run the generated script:
65
+
66
+ ```bash
67
+ python input.self_md5.py
68
+ ```
69
+
70
+ The first output line is `MD5: <DIGEST>`, where `<DIGEST>` is the uppercase MD5
71
+ digest of `input.self_md5.py`; then the original program continues.
72
+
73
+ Write to a specific path:
74
+
75
+ ```bash
76
+ py-self-md5 input.py -o output.py
77
+ ```
78
+
79
+ Rewrite the input file in place:
80
+
81
+ ```bash
82
+ py-self-md5 input.py --in-place
83
+ ```
84
+
85
+ Replace an existing `py-self-md5` prefix in place:
86
+
87
+ ```bash
88
+ py-self-md5 input.py --in-place --force
89
+ ```
90
+
91
+ Overwrite an existing output file:
92
+
93
+ ```bash
94
+ py-self-md5 input.py -o output.py --force
95
+ ```
96
+
97
+ Generate and verify in one step:
98
+
99
+ ```bash
100
+ py-self-md5 input.py --check
101
+ ```
102
+
103
+ ## What the wrapper preserves
104
+
105
+ - The tool preserves shebang lines, encoding cookies, module docstrings, and
106
+ `from __future__ import ...` placement.
107
+ - By default, the input file is not modified.
108
+ - Existing `py-self-md5` prefixes are detected. Use `--force` to replace one.
109
+
110
+ ## Limitations
111
+
112
+ - This is an executable self-hash wrapper, not a general MD5 collision
113
+ generator.
114
+ - The generated prefix can be large because it embeds enough bytes to
115
+ reconstruct the generated script in memory.
116
+ - `--check` executes the generated script, so only use it with programs you are
117
+ willing to run.
@@ -0,0 +1,97 @@
1
+ # py-self-md5
2
+
3
+ `py-self-md5` wraps a Python script so the generated script prints
4
+ `MD5: <DIGEST>` before running the original program.
5
+
6
+ The generated script does **not** read its own source file at runtime. Instead,
7
+ the inserted prefix stores a quine-style byte template and computes the digest
8
+ from the reconstructed bytes in memory.
9
+
10
+ This project is inspired by
11
+ [`zhuzilin/pdf-with-its-own-md5`](https://github.com/zhuzilin/pdf-with-its-own-md5),
12
+ where selectable MD5-collision carriers are used to display the final digest in
13
+ a PDF. This tool targets executable Python programs and does not require
14
+ HashClash or precomputed collision blocks.
15
+
16
+ ## Install
17
+
18
+ Requires Python 3.11 or newer.
19
+
20
+ ```bash
21
+ python -m pip install py-self-md5
22
+ ```
23
+
24
+ After installation, run the built-in check:
25
+
26
+ ```bash
27
+ py-self-md5 --self-test
28
+ ```
29
+
30
+ You can also run it from the project root without installing:
31
+
32
+ ```bash
33
+ python -m py_self_md5 --self-test
34
+ ```
35
+
36
+ ## Usage
37
+
38
+ Create `input.self_md5.py`:
39
+
40
+ ```bash
41
+ py-self-md5 input.py
42
+ ```
43
+
44
+ Run the generated script:
45
+
46
+ ```bash
47
+ python input.self_md5.py
48
+ ```
49
+
50
+ The first output line is `MD5: <DIGEST>`, where `<DIGEST>` is the uppercase MD5
51
+ digest of `input.self_md5.py`; then the original program continues.
52
+
53
+ Write to a specific path:
54
+
55
+ ```bash
56
+ py-self-md5 input.py -o output.py
57
+ ```
58
+
59
+ Rewrite the input file in place:
60
+
61
+ ```bash
62
+ py-self-md5 input.py --in-place
63
+ ```
64
+
65
+ Replace an existing `py-self-md5` prefix in place:
66
+
67
+ ```bash
68
+ py-self-md5 input.py --in-place --force
69
+ ```
70
+
71
+ Overwrite an existing output file:
72
+
73
+ ```bash
74
+ py-self-md5 input.py -o output.py --force
75
+ ```
76
+
77
+ Generate and verify in one step:
78
+
79
+ ```bash
80
+ py-self-md5 input.py --check
81
+ ```
82
+
83
+ ## What the wrapper preserves
84
+
85
+ - The tool preserves shebang lines, encoding cookies, module docstrings, and
86
+ `from __future__ import ...` placement.
87
+ - By default, the input file is not modified.
88
+ - Existing `py-self-md5` prefixes are detected. Use `--force` to replace one.
89
+
90
+ ## Limitations
91
+
92
+ - This is an executable self-hash wrapper, not a general MD5 collision
93
+ generator.
94
+ - The generated prefix can be large because it embeds enough bytes to
95
+ reconstruct the generated script in memory.
96
+ - `--check` executes the generated script, so only use it with programs you are
97
+ willing to run.
@@ -0,0 +1,5 @@
1
+ """Tools for wrapping Python scripts that print their own MD5."""
2
+
3
+ from .cli import WrapResult, build_wrapped_source, file_md5, wrap_file
4
+
5
+ __all__ = ["WrapResult", "build_wrapped_source", "file_md5", "wrap_file"]
@@ -0,0 +1,5 @@
1
+ from .cli import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ raise SystemExit(main())
@@ -0,0 +1,390 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Create a Python script that prints its own file MD5 before running user code,
4
+ without reading its own source file at runtime.
5
+
6
+ The referenced PDF construction uses MD5 collision choices to make static
7
+ content display the digest of the final file. For executable Python, this
8
+ tool uses a quine-style carrier: the prepended shim stores a byte template for
9
+ the generated script and hashes the reconstructed bytes instead of opening
10
+ __file__.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import argparse
16
+ import ast
17
+ import hashlib
18
+ import io
19
+ import re
20
+ import subprocess
21
+ import sys
22
+ import tempfile
23
+ import textwrap
24
+ import tokenize
25
+ from dataclasses import dataclass
26
+ from pathlib import Path
27
+
28
+
29
+ BEGIN_MARKER = "# <py-self-md5:begin>"
30
+ END_MARKER = "# <py-self-md5:end>"
31
+ CODING_RE = re.compile(br"^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)")
32
+ UTF8_BOM = b"\xef\xbb\xbf"
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class WrapResult:
37
+ input_path: Path
38
+ output_path: Path
39
+ md5: str
40
+ insertion_line: int
41
+ already_wrapped: bool = False
42
+
43
+
44
+ def file_md5(path: Path) -> str:
45
+ digest = hashlib.md5()
46
+ with path.open("rb") as handle:
47
+ for chunk in iter(lambda: handle.read(1024 * 1024), b""):
48
+ digest.update(chunk)
49
+ return digest.hexdigest().upper()
50
+
51
+
52
+ def detect_newline(data: bytes) -> bytes:
53
+ first_lf = data.find(b"\n")
54
+ if first_lf > 0 and data[first_lf - 1:first_lf] == b"\r":
55
+ return b"\r\n"
56
+ return b"\n"
57
+
58
+
59
+ def strip_eol(line: bytes) -> bytes:
60
+ return line.rstrip(b"\r\n")
61
+
62
+
63
+ def find_existing_shim_span(data: bytes) -> tuple[int, int] | None:
64
+ begin_marker = BEGIN_MARKER.encode("ascii")
65
+ end_marker = END_MARKER.encode("ascii")
66
+ begin: int | None = None
67
+ offset = 0
68
+
69
+ for line in data.splitlines(keepends=True):
70
+ if begin is None:
71
+ if strip_eol(line) == begin_marker:
72
+ begin = offset
73
+ elif strip_eol(line) == end_marker:
74
+ end = offset + len(line)
75
+ while end < len(data) and data[end:end + 1] in (b"\r", b"\n"):
76
+ end += 1
77
+ return begin, end
78
+ offset += len(line)
79
+
80
+ return None
81
+
82
+
83
+ def has_coding_cookie(line: bytes) -> bool:
84
+ return bool(CODING_RE.match(line))
85
+
86
+
87
+ def protected_header_line_count(lines: list[bytes]) -> int:
88
+ """Return lines that must remain before executable code."""
89
+ count = 0
90
+ first = lines[0][len(UTF8_BOM):] if lines and lines[0].startswith(UTF8_BOM) else (lines[0] if lines else b"")
91
+ if lines and first.startswith(b"#!"):
92
+ count = 1
93
+ if len(lines) > count and has_coding_cookie(lines[count]):
94
+ count += 1
95
+ elif count == 0 and len(lines) > 1 and has_coding_cookie(lines[1]):
96
+ count = 2
97
+ return count
98
+
99
+
100
+ def byte_offset_for_line(lines: list[bytes], line_no: int) -> int:
101
+ """Return byte offset for the beginning of a 1-based line number."""
102
+ if line_no <= 1:
103
+ if lines and lines[0].startswith(UTF8_BOM):
104
+ return len(UTF8_BOM)
105
+ return 0
106
+ if line_no > len(lines):
107
+ return sum(len(line) for line in lines)
108
+ return sum(len(line) for line in lines[: line_no - 1])
109
+
110
+
111
+ def decode_python_source(data: bytes) -> str:
112
+ encoding, _ = tokenize.detect_encoding(io.BytesIO(data).readline)
113
+ return data.decode(encoding)
114
+
115
+
116
+ def first_insert_line_from_ast(source: str, minimum_line: int) -> int:
117
+ """Find the earliest legal line for executable shim code."""
118
+ try:
119
+ module = ast.parse(source)
120
+ except SyntaxError as exc:
121
+ raise ValueError(f"input is not valid Python: {exc}") from exc
122
+
123
+ insert_line = minimum_line
124
+ body = module.body
125
+ index = 0
126
+
127
+ if body and isinstance(body[0], ast.Expr):
128
+ value = body[0].value
129
+ if isinstance(value, ast.Constant) and isinstance(value.value, str):
130
+ insert_line = max(insert_line, (body[0].end_lineno or body[0].lineno) + 1)
131
+ index = 1
132
+
133
+ while index < len(body):
134
+ node = body[index]
135
+ if (
136
+ isinstance(node, ast.ImportFrom)
137
+ and node.module == "__future__"
138
+ and node.level == 0
139
+ ):
140
+ insert_line = max(insert_line, (node.end_lineno or node.lineno) + 1)
141
+ index += 1
142
+ continue
143
+ break
144
+
145
+ return insert_line
146
+
147
+
148
+ def make_quine_template(
149
+ head: bytes,
150
+ *,
151
+ function_name: str,
152
+ template_marker: bytes,
153
+ tail_marker: bytes,
154
+ newline: bytes,
155
+ ) -> bytes:
156
+ lines = [
157
+ BEGIN_MARKER,
158
+ f"def {function_name}():",
159
+ " import hashlib as __py_self_md5_hashlib",
160
+ " import sys as __py_self_md5_sys",
161
+ f" __py_self_md5_template = {template_marker.decode('ascii')}",
162
+ f" __py_self_md5_tail = {tail_marker.decode('ascii')}",
163
+ " __py_self_md5_source = (",
164
+ " __py_self_md5_template",
165
+ f" .replace({tail_marker!r}, repr(__py_self_md5_tail).encode('ascii'), 1)",
166
+ f" .replace({template_marker!r}, repr(__py_self_md5_template).encode('ascii'), 1)",
167
+ " + __py_self_md5_tail",
168
+ " )",
169
+ " __py_self_md5_sys.stdout.write(",
170
+ " 'MD5: ' + __py_self_md5_hashlib.md5(__py_self_md5_source).hexdigest().upper() + '\\n'",
171
+ " )",
172
+ f"{function_name}()",
173
+ f"del {function_name}",
174
+ END_MARKER,
175
+ "",
176
+ ]
177
+ return head + newline.join(line.encode("ascii") for line in lines)
178
+
179
+
180
+ def choose_markers(data: bytes, seed: str) -> tuple[bytes, bytes]:
181
+ for counter in range(1000):
182
+ suffix = f"{seed}_{counter}".encode("ascii")
183
+ template_marker = b"__PY_SELF_MD5_TEMPLATE_" + suffix + b"__"
184
+ tail_marker = b"__PY_SELF_MD5_TAIL_" + suffix + b"__"
185
+ if template_marker not in data and tail_marker not in data:
186
+ return template_marker, tail_marker
187
+ raise RuntimeError("could not find marker names absent from input")
188
+
189
+
190
+ def render_quine(template: bytes, tail: bytes, template_marker: bytes, tail_marker: bytes) -> bytes:
191
+ return (
192
+ template
193
+ .replace(tail_marker, repr(tail).encode("ascii"), 1)
194
+ .replace(template_marker, repr(template).encode("ascii"), 1)
195
+ + tail
196
+ )
197
+
198
+
199
+ def build_wrapped_source(data: bytes, *, force: bool = False) -> tuple[bytes, int, bool]:
200
+ existing_span = find_existing_shim_span(data)
201
+ if existing_span is not None:
202
+ if not force:
203
+ return data, 1, True
204
+ data = remove_existing_shim(data)
205
+
206
+ lines = data.splitlines(keepends=True)
207
+ minimum_line = protected_header_line_count(lines) + 1
208
+ source = decode_python_source(data)
209
+ insertion_line = first_insert_line_from_ast(source, minimum_line)
210
+ offset = byte_offset_for_line(lines, insertion_line)
211
+
212
+ seed = hashlib.sha1(data).hexdigest()[:16]
213
+ function_name = f"__py_self_md5_{seed}"
214
+ newline = detect_newline(data)
215
+
216
+ head = data[:offset]
217
+ tail = data[offset:]
218
+ if head and not head.endswith((b"\n", b"\r")):
219
+ head += newline
220
+
221
+ template_marker, tail_marker = choose_markers(data, seed)
222
+ template = make_quine_template(
223
+ head,
224
+ function_name=function_name,
225
+ template_marker=template_marker,
226
+ tail_marker=tail_marker,
227
+ newline=newline,
228
+ )
229
+ wrapped = render_quine(template, tail, template_marker, tail_marker)
230
+
231
+ return wrapped, insertion_line, False
232
+
233
+
234
+ def remove_existing_shim(data: bytes) -> bytes:
235
+ span = find_existing_shim_span(data)
236
+ if span is None:
237
+ return data
238
+ begin, end = span
239
+ return data[:begin] + data[end:]
240
+
241
+
242
+ def default_output_path(input_path: Path) -> Path:
243
+ return input_path.with_name(f"{input_path.stem}.self_md5{input_path.suffix or '.py'}")
244
+
245
+
246
+ def wrap_file(input_path: Path, output_path: Path, *, force: bool = False) -> WrapResult:
247
+ data = input_path.read_bytes()
248
+ wrapped, insertion_line, already_wrapped = build_wrapped_source(data, force=force)
249
+ if already_wrapped and input_path.resolve() != output_path.resolve():
250
+ output_path.parent.mkdir(parents=True, exist_ok=True)
251
+ output_path.write_bytes(wrapped)
252
+ elif not already_wrapped:
253
+ output_path.parent.mkdir(parents=True, exist_ok=True)
254
+ output_path.write_bytes(wrapped)
255
+ return WrapResult(
256
+ input_path=input_path,
257
+ output_path=output_path,
258
+ md5=file_md5(output_path),
259
+ insertion_line=insertion_line,
260
+ already_wrapped=already_wrapped,
261
+ )
262
+
263
+
264
+ def run_output_check(path: Path, timeout: float) -> tuple[bool, str, str]:
265
+ expected = file_md5(path)
266
+ proc = subprocess.run(
267
+ [sys.executable, str(path)],
268
+ stdout=subprocess.PIPE,
269
+ stderr=subprocess.PIPE,
270
+ text=True,
271
+ timeout=timeout,
272
+ check=False,
273
+ )
274
+ first_line = proc.stdout.splitlines()[0] if proc.stdout.splitlines() else ""
275
+ expected_line = f"MD5: {expected}"
276
+ ok = first_line == expected_line
277
+ detail = f"expected first line {expected_line}, got {first_line or '<no stdout>'}"
278
+ if proc.returncode != 0:
279
+ detail += f"; process exited with {proc.returncode}"
280
+ return ok, detail, proc.stderr
281
+
282
+
283
+ def self_test() -> None:
284
+ sample = textwrap.dedent(
285
+ '''\
286
+ #!/usr/bin/env python3
287
+ # coding: utf-8
288
+ """sample module docstring"""
289
+ from __future__ import annotations
290
+
291
+ print("payload ran")
292
+ '''
293
+ ).encode("utf-8")
294
+ with tempfile.TemporaryDirectory() as tmp_dir_name:
295
+ tmp_dir = Path(tmp_dir_name)
296
+ source = tmp_dir / "sample.py"
297
+ output = tmp_dir / "sample.self_md5.py"
298
+ source.write_bytes(sample)
299
+ result = wrap_file(source, output, force=False)
300
+ generated = output.read_bytes()
301
+ span = find_existing_shim_span(generated)
302
+ if span is None:
303
+ raise SystemExit("self-test failed: generated shim markers are missing")
304
+ shim = generated[span[0]:span[1]]
305
+ if b"open(" in shim or b"__file__" in shim:
306
+ raise SystemExit("self-test failed: generated shim reads or references its source file")
307
+ ok, detail, stderr = run_output_check(result.output_path, timeout=10)
308
+ if not ok:
309
+ raise SystemExit(f"self-test failed: {detail}\n{stderr}")
310
+ print(f"self-test ok: {result.output_path.name} prints MD5: {result.md5}")
311
+
312
+
313
+ def parse_args(argv: list[str]) -> argparse.Namespace:
314
+ parser = argparse.ArgumentParser(
315
+ description="Prepend a startup shim so a .py file prints its own MD5 before user code runs."
316
+ )
317
+ parser.add_argument("input", nargs="?", type=Path, help="input Python file")
318
+ parser.add_argument("-o", "--output", type=Path, help="output path")
319
+ parser.add_argument(
320
+ "--in-place",
321
+ action="store_true",
322
+ help="rewrite the input file instead of creating *.self_md5.py",
323
+ )
324
+ parser.add_argument(
325
+ "--force",
326
+ action="store_true",
327
+ help="overwrite output and replace an existing py-self-md5 shim if present",
328
+ )
329
+ parser.add_argument(
330
+ "--check",
331
+ action="store_true",
332
+ help="execute the generated file and verify that its first stdout line is MD5: <digest>",
333
+ )
334
+ parser.add_argument(
335
+ "--check-timeout",
336
+ type=float,
337
+ default=10.0,
338
+ help="timeout in seconds for --check",
339
+ )
340
+ parser.add_argument(
341
+ "--self-test",
342
+ action="store_true",
343
+ help="run an internal safe sample test",
344
+ )
345
+ return parser.parse_args(argv)
346
+
347
+
348
+ def main(argv: list[str] | None = None) -> int:
349
+ args = parse_args(sys.argv[1:] if argv is None else argv)
350
+
351
+ if args.self_test:
352
+ self_test()
353
+ return 0
354
+
355
+ if args.input is None:
356
+ raise SystemExit("error: input file is required unless --self-test is used")
357
+
358
+ input_path = args.input.resolve()
359
+ if not input_path.exists():
360
+ raise SystemExit(f"error: input file does not exist: {input_path}")
361
+ if not input_path.is_file():
362
+ raise SystemExit(f"error: input path is not a file: {input_path}")
363
+
364
+ if args.in_place and args.output:
365
+ raise SystemExit("error: --in-place and --output cannot be used together")
366
+
367
+ output_path = input_path if args.in_place else (args.output or default_output_path(input_path)).resolve()
368
+ if output_path.exists() and output_path.resolve() != input_path and not args.force:
369
+ raise SystemExit(f"error: output exists, use --force to overwrite: {output_path}")
370
+
371
+ result = wrap_file(input_path, output_path, force=args.force)
372
+ if result.already_wrapped:
373
+ print(f"already wrapped: {result.output_path}")
374
+ else:
375
+ print(f"wrote: {result.output_path}")
376
+ print(f"inserted shim at line: {result.insertion_line}")
377
+ print(f"file MD5: {result.md5}")
378
+
379
+ if args.check:
380
+ ok, detail, stderr = run_output_check(result.output_path, timeout=args.check_timeout)
381
+ print(f"check: {'ok' if ok else 'failed'} ({detail})")
382
+ if stderr:
383
+ print(stderr, file=sys.stderr, end="")
384
+ return 0 if ok else 1
385
+
386
+ return 0
387
+
388
+
389
+ if __name__ == "__main__":
390
+ raise SystemExit(main())
@@ -0,0 +1,33 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.26"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "py-self-md5"
7
+ version = "0.1.0"
8
+ description = "Wrap Python scripts so they print their own MD5 without reading their source at runtime."
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ license = "MIT"
12
+ authors = [
13
+ { name = "GGN_2015" }
14
+ ]
15
+ keywords = ["md5", "quine", "python", "self-hash"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Environment :: Console",
19
+ "Intended Audience :: Developers",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Programming Language :: Python :: 3.13",
24
+ "Topic :: Software Development :: Build Tools",
25
+ "Topic :: Utilities",
26
+ ]
27
+ dependencies = []
28
+
29
+ [project.scripts]
30
+ py-self-md5 = "py_self_md5.cli:main"
31
+
32
+ [tool.hatch.build.targets.wheel]
33
+ packages = ["py_self_md5"]