py-self-md5 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py_self_md5-0.1.0/.gitignore +220 -0
- py_self_md5-0.1.0/LICENSE +21 -0
- py_self_md5-0.1.0/PKG-INFO +117 -0
- py_self_md5-0.1.0/README.md +97 -0
- py_self_md5-0.1.0/py_self_md5/__init__.py +5 -0
- py_self_md5-0.1.0/py_self_md5/__main__.py +5 -0
- py_self_md5-0.1.0/py_self_md5/cli.py +390 -0
- py_self_md5-0.1.0/pyproject.toml +33 -0
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
input.py
|
|
6
|
+
input.self_md5.py
|
|
7
|
+
|
|
8
|
+
# C extensions
|
|
9
|
+
*.so
|
|
10
|
+
|
|
11
|
+
# Distribution / packaging
|
|
12
|
+
.Python
|
|
13
|
+
build/
|
|
14
|
+
develop-eggs/
|
|
15
|
+
dist/
|
|
16
|
+
downloads/
|
|
17
|
+
eggs/
|
|
18
|
+
.eggs/
|
|
19
|
+
lib/
|
|
20
|
+
lib64/
|
|
21
|
+
parts/
|
|
22
|
+
sdist/
|
|
23
|
+
var/
|
|
24
|
+
wheels/
|
|
25
|
+
share/python-wheels/
|
|
26
|
+
*.egg-info/
|
|
27
|
+
.installed.cfg
|
|
28
|
+
*.egg
|
|
29
|
+
MANIFEST
|
|
30
|
+
|
|
31
|
+
# PyInstaller
|
|
32
|
+
# Usually these files are written by a python script from a template
|
|
33
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
34
|
+
*.manifest
|
|
35
|
+
*.spec
|
|
36
|
+
|
|
37
|
+
# Installer logs
|
|
38
|
+
pip-log.txt
|
|
39
|
+
pip-delete-this-directory.txt
|
|
40
|
+
|
|
41
|
+
# Unit test / coverage reports
|
|
42
|
+
htmlcov/
|
|
43
|
+
.tox/
|
|
44
|
+
.nox/
|
|
45
|
+
.coverage
|
|
46
|
+
.coverage.*
|
|
47
|
+
.cache
|
|
48
|
+
nosetests.xml
|
|
49
|
+
coverage.xml
|
|
50
|
+
*.cover
|
|
51
|
+
*.py.cover
|
|
52
|
+
.hypothesis/
|
|
53
|
+
.pytest_cache/
|
|
54
|
+
cover/
|
|
55
|
+
|
|
56
|
+
# Translations
|
|
57
|
+
*.mo
|
|
58
|
+
*.pot
|
|
59
|
+
|
|
60
|
+
# Django stuff:
|
|
61
|
+
*.log
|
|
62
|
+
local_settings.py
|
|
63
|
+
db.sqlite3
|
|
64
|
+
db.sqlite3-journal
|
|
65
|
+
|
|
66
|
+
# Flask stuff:
|
|
67
|
+
instance/
|
|
68
|
+
.webassets-cache
|
|
69
|
+
|
|
70
|
+
# Scrapy stuff:
|
|
71
|
+
.scrapy
|
|
72
|
+
|
|
73
|
+
# Sphinx documentation
|
|
74
|
+
docs/_build/
|
|
75
|
+
|
|
76
|
+
# PyBuilder
|
|
77
|
+
.pybuilder/
|
|
78
|
+
target/
|
|
79
|
+
|
|
80
|
+
# Jupyter Notebook
|
|
81
|
+
.ipynb_checkpoints
|
|
82
|
+
|
|
83
|
+
# IPython
|
|
84
|
+
profile_default/
|
|
85
|
+
ipython_config.py
|
|
86
|
+
|
|
87
|
+
# pyenv
|
|
88
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
89
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
90
|
+
# .python-version
|
|
91
|
+
|
|
92
|
+
# pipenv
|
|
93
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
94
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
95
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
96
|
+
# install all needed dependencies.
|
|
97
|
+
# Pipfile.lock
|
|
98
|
+
|
|
99
|
+
# UV
|
|
100
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
101
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
102
|
+
# commonly ignored for libraries.
|
|
103
|
+
# uv.lock
|
|
104
|
+
|
|
105
|
+
# poetry
|
|
106
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
107
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
108
|
+
# commonly ignored for libraries.
|
|
109
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
110
|
+
# poetry.lock
|
|
111
|
+
# poetry.toml
|
|
112
|
+
|
|
113
|
+
# pdm
|
|
114
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
115
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
116
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
117
|
+
# pdm.lock
|
|
118
|
+
# pdm.toml
|
|
119
|
+
.pdm-python
|
|
120
|
+
.pdm-build/
|
|
121
|
+
|
|
122
|
+
# pixi
|
|
123
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
124
|
+
# pixi.lock
|
|
125
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
126
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
127
|
+
.pixi
|
|
128
|
+
|
|
129
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
130
|
+
__pypackages__/
|
|
131
|
+
|
|
132
|
+
# Celery stuff
|
|
133
|
+
celerybeat-schedule
|
|
134
|
+
celerybeat.pid
|
|
135
|
+
|
|
136
|
+
# Redis
|
|
137
|
+
*.rdb
|
|
138
|
+
*.aof
|
|
139
|
+
*.pid
|
|
140
|
+
|
|
141
|
+
# RabbitMQ
|
|
142
|
+
mnesia/
|
|
143
|
+
rabbitmq/
|
|
144
|
+
rabbitmq-data/
|
|
145
|
+
|
|
146
|
+
# ActiveMQ
|
|
147
|
+
activemq-data/
|
|
148
|
+
|
|
149
|
+
# SageMath parsed files
|
|
150
|
+
*.sage.py
|
|
151
|
+
|
|
152
|
+
# Environments
|
|
153
|
+
.env
|
|
154
|
+
.envrc
|
|
155
|
+
.venv
|
|
156
|
+
env/
|
|
157
|
+
venv/
|
|
158
|
+
ENV/
|
|
159
|
+
env.bak/
|
|
160
|
+
venv.bak/
|
|
161
|
+
|
|
162
|
+
# Spyder project settings
|
|
163
|
+
.spyderproject
|
|
164
|
+
.spyproject
|
|
165
|
+
|
|
166
|
+
# Rope project settings
|
|
167
|
+
.ropeproject
|
|
168
|
+
|
|
169
|
+
# mkdocs documentation
|
|
170
|
+
/site
|
|
171
|
+
|
|
172
|
+
# mypy
|
|
173
|
+
.mypy_cache/
|
|
174
|
+
.dmypy.json
|
|
175
|
+
dmypy.json
|
|
176
|
+
|
|
177
|
+
# Pyre type checker
|
|
178
|
+
.pyre/
|
|
179
|
+
|
|
180
|
+
# pytype static type analyzer
|
|
181
|
+
.pytype/
|
|
182
|
+
|
|
183
|
+
# Cython debug symbols
|
|
184
|
+
cython_debug/
|
|
185
|
+
|
|
186
|
+
# PyCharm
|
|
187
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
188
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
189
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
190
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
191
|
+
# .idea/
|
|
192
|
+
|
|
193
|
+
# Abstra
|
|
194
|
+
# Abstra is an AI-powered process automation framework.
|
|
195
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
196
|
+
# Learn more at https://abstra.io/docs
|
|
197
|
+
.abstra/
|
|
198
|
+
|
|
199
|
+
# Visual Studio Code
|
|
200
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
201
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
202
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
203
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
204
|
+
# .vscode/
|
|
205
|
+
# Temporary file for partial code execution
|
|
206
|
+
tempCodeRunnerFile.py
|
|
207
|
+
|
|
208
|
+
# Ruff stuff:
|
|
209
|
+
.ruff_cache/
|
|
210
|
+
|
|
211
|
+
# PyPI configuration file
|
|
212
|
+
.pypirc
|
|
213
|
+
|
|
214
|
+
# Marimo
|
|
215
|
+
marimo/_static/
|
|
216
|
+
marimo/_lsp/
|
|
217
|
+
__marimo__/
|
|
218
|
+
|
|
219
|
+
# Streamlit
|
|
220
|
+
.streamlit/secrets.toml
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 GGN_2015
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: py-self-md5
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Wrap Python scripts so they print their own MD5 without reading their source at runtime.
|
|
5
|
+
Author: GGN_2015
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: md5,python,quine,self-hash
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Environment :: Console
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Topic :: Software Development :: Build Tools
|
|
17
|
+
Classifier: Topic :: Utilities
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# py-self-md5
|
|
22
|
+
|
|
23
|
+
`py-self-md5` wraps a Python script so the generated script prints
|
|
24
|
+
`MD5: <DIGEST>` before running the original program.
|
|
25
|
+
|
|
26
|
+
The generated script does **not** read its own source file at runtime. Instead,
|
|
27
|
+
the inserted prefix stores a quine-style byte template and computes the digest
|
|
28
|
+
from the reconstructed bytes in memory.
|
|
29
|
+
|
|
30
|
+
This project is inspired by
|
|
31
|
+
[`zhuzilin/pdf-with-its-own-md5`](https://github.com/zhuzilin/pdf-with-its-own-md5),
|
|
32
|
+
where selectable MD5-collision carriers are used to display the final digest in
|
|
33
|
+
a PDF. This tool targets executable Python programs and does not require
|
|
34
|
+
HashClash or precomputed collision blocks.
|
|
35
|
+
|
|
36
|
+
## Install
|
|
37
|
+
|
|
38
|
+
Requires Python 3.11 or newer.
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
python -m pip install py-self-md5
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
After installation, run the built-in check:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
py-self-md5 --self-test
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
You can also run it from the project root without installing:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
python -m py_self_md5 --self-test
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Usage
|
|
57
|
+
|
|
58
|
+
Create `input.self_md5.py`:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
py-self-md5 input.py
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Run the generated script:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
python input.self_md5.py
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
The first output line is `MD5: <DIGEST>`, where `<DIGEST>` is the uppercase MD5
|
|
71
|
+
digest of `input.self_md5.py`; then the original program continues.
|
|
72
|
+
|
|
73
|
+
Write to a specific path:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
py-self-md5 input.py -o output.py
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Rewrite the input file in place:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
py-self-md5 input.py --in-place
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Replace an existing `py-self-md5` prefix in place:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
py-self-md5 input.py --in-place --force
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Overwrite an existing output file:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
py-self-md5 input.py -o output.py --force
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Generate and verify in one step:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
py-self-md5 input.py --check
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## What the wrapper preserves
|
|
104
|
+
|
|
105
|
+
- The tool preserves shebang lines, encoding cookies, module docstrings, and
|
|
106
|
+
`from __future__ import ...` placement.
|
|
107
|
+
- By default, the input file is not modified.
|
|
108
|
+
- Existing `py-self-md5` prefixes are detected. Use `--force` to replace one.
|
|
109
|
+
|
|
110
|
+
## Limitations
|
|
111
|
+
|
|
112
|
+
- This is an executable self-hash wrapper, not a general MD5 collision
|
|
113
|
+
generator.
|
|
114
|
+
- The generated prefix can be large because it embeds enough bytes to
|
|
115
|
+
reconstruct the generated script in memory.
|
|
116
|
+
- `--check` executes the generated script, so only use it with programs you are
|
|
117
|
+
willing to run.
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# py-self-md5
|
|
2
|
+
|
|
3
|
+
`py-self-md5` wraps a Python script so the generated script prints
|
|
4
|
+
`MD5: <DIGEST>` before running the original program.
|
|
5
|
+
|
|
6
|
+
The generated script does **not** read its own source file at runtime. Instead,
|
|
7
|
+
the inserted prefix stores a quine-style byte template and computes the digest
|
|
8
|
+
from the reconstructed bytes in memory.
|
|
9
|
+
|
|
10
|
+
This project is inspired by
|
|
11
|
+
[`zhuzilin/pdf-with-its-own-md5`](https://github.com/zhuzilin/pdf-with-its-own-md5),
|
|
12
|
+
where selectable MD5-collision carriers are used to display the final digest in
|
|
13
|
+
a PDF. This tool targets executable Python programs and does not require
|
|
14
|
+
HashClash or precomputed collision blocks.
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
Requires Python 3.11 or newer.
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
python -m pip install py-self-md5
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
After installation, run the built-in check:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
py-self-md5 --self-test
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
You can also run it from the project root without installing:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
python -m py_self_md5 --self-test
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Usage
|
|
37
|
+
|
|
38
|
+
Create `input.self_md5.py`:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
py-self-md5 input.py
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Run the generated script:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
python input.self_md5.py
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
The first output line is `MD5: <DIGEST>`, where `<DIGEST>` is the uppercase MD5
|
|
51
|
+
digest of `input.self_md5.py`; then the original program continues.
|
|
52
|
+
|
|
53
|
+
Write to a specific path:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
py-self-md5 input.py -o output.py
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Rewrite the input file in place:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
py-self-md5 input.py --in-place
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Replace an existing `py-self-md5` prefix in place:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
py-self-md5 input.py --in-place --force
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Overwrite an existing output file:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
py-self-md5 input.py -o output.py --force
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Generate and verify in one step:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
py-self-md5 input.py --check
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## What the wrapper preserves
|
|
84
|
+
|
|
85
|
+
- The tool preserves shebang lines, encoding cookies, module docstrings, and
|
|
86
|
+
`from __future__ import ...` placement.
|
|
87
|
+
- By default, the input file is not modified.
|
|
88
|
+
- Existing `py-self-md5` prefixes are detected. Use `--force` to replace one.
|
|
89
|
+
|
|
90
|
+
## Limitations
|
|
91
|
+
|
|
92
|
+
- This is an executable self-hash wrapper, not a general MD5 collision
|
|
93
|
+
generator.
|
|
94
|
+
- The generated prefix can be large because it embeds enough bytes to
|
|
95
|
+
reconstruct the generated script in memory.
|
|
96
|
+
- `--check` executes the generated script, so only use it with programs you are
|
|
97
|
+
willing to run.
|
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Create a Python script that prints its own file MD5 before running user code,
|
|
4
|
+
without reading its own source file at runtime.
|
|
5
|
+
|
|
6
|
+
The referenced PDF construction uses MD5 collision choices to make static
|
|
7
|
+
content display the digest of the final file. For executable Python, this
|
|
8
|
+
tool uses a quine-style carrier: the prepended shim stores a byte template for
|
|
9
|
+
the generated script and hashes the reconstructed bytes instead of opening
|
|
10
|
+
__file__.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
import ast
|
|
17
|
+
import hashlib
|
|
18
|
+
import io
|
|
19
|
+
import re
|
|
20
|
+
import subprocess
|
|
21
|
+
import sys
|
|
22
|
+
import tempfile
|
|
23
|
+
import textwrap
|
|
24
|
+
import tokenize
|
|
25
|
+
from dataclasses import dataclass
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
BEGIN_MARKER = "# <py-self-md5:begin>"
|
|
30
|
+
END_MARKER = "# <py-self-md5:end>"
|
|
31
|
+
CODING_RE = re.compile(br"^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)")
|
|
32
|
+
UTF8_BOM = b"\xef\xbb\xbf"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True)
|
|
36
|
+
class WrapResult:
|
|
37
|
+
input_path: Path
|
|
38
|
+
output_path: Path
|
|
39
|
+
md5: str
|
|
40
|
+
insertion_line: int
|
|
41
|
+
already_wrapped: bool = False
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def file_md5(path: Path) -> str:
|
|
45
|
+
digest = hashlib.md5()
|
|
46
|
+
with path.open("rb") as handle:
|
|
47
|
+
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
|
48
|
+
digest.update(chunk)
|
|
49
|
+
return digest.hexdigest().upper()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def detect_newline(data: bytes) -> bytes:
|
|
53
|
+
first_lf = data.find(b"\n")
|
|
54
|
+
if first_lf > 0 and data[first_lf - 1:first_lf] == b"\r":
|
|
55
|
+
return b"\r\n"
|
|
56
|
+
return b"\n"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def strip_eol(line: bytes) -> bytes:
|
|
60
|
+
return line.rstrip(b"\r\n")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def find_existing_shim_span(data: bytes) -> tuple[int, int] | None:
|
|
64
|
+
begin_marker = BEGIN_MARKER.encode("ascii")
|
|
65
|
+
end_marker = END_MARKER.encode("ascii")
|
|
66
|
+
begin: int | None = None
|
|
67
|
+
offset = 0
|
|
68
|
+
|
|
69
|
+
for line in data.splitlines(keepends=True):
|
|
70
|
+
if begin is None:
|
|
71
|
+
if strip_eol(line) == begin_marker:
|
|
72
|
+
begin = offset
|
|
73
|
+
elif strip_eol(line) == end_marker:
|
|
74
|
+
end = offset + len(line)
|
|
75
|
+
while end < len(data) and data[end:end + 1] in (b"\r", b"\n"):
|
|
76
|
+
end += 1
|
|
77
|
+
return begin, end
|
|
78
|
+
offset += len(line)
|
|
79
|
+
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def has_coding_cookie(line: bytes) -> bool:
|
|
84
|
+
return bool(CODING_RE.match(line))
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def protected_header_line_count(lines: list[bytes]) -> int:
|
|
88
|
+
"""Return lines that must remain before executable code."""
|
|
89
|
+
count = 0
|
|
90
|
+
first = lines[0][len(UTF8_BOM):] if lines and lines[0].startswith(UTF8_BOM) else (lines[0] if lines else b"")
|
|
91
|
+
if lines and first.startswith(b"#!"):
|
|
92
|
+
count = 1
|
|
93
|
+
if len(lines) > count and has_coding_cookie(lines[count]):
|
|
94
|
+
count += 1
|
|
95
|
+
elif count == 0 and len(lines) > 1 and has_coding_cookie(lines[1]):
|
|
96
|
+
count = 2
|
|
97
|
+
return count
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def byte_offset_for_line(lines: list[bytes], line_no: int) -> int:
|
|
101
|
+
"""Return byte offset for the beginning of a 1-based line number."""
|
|
102
|
+
if line_no <= 1:
|
|
103
|
+
if lines and lines[0].startswith(UTF8_BOM):
|
|
104
|
+
return len(UTF8_BOM)
|
|
105
|
+
return 0
|
|
106
|
+
if line_no > len(lines):
|
|
107
|
+
return sum(len(line) for line in lines)
|
|
108
|
+
return sum(len(line) for line in lines[: line_no - 1])
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def decode_python_source(data: bytes) -> str:
|
|
112
|
+
encoding, _ = tokenize.detect_encoding(io.BytesIO(data).readline)
|
|
113
|
+
return data.decode(encoding)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def first_insert_line_from_ast(source: str, minimum_line: int) -> int:
|
|
117
|
+
"""Find the earliest legal line for executable shim code."""
|
|
118
|
+
try:
|
|
119
|
+
module = ast.parse(source)
|
|
120
|
+
except SyntaxError as exc:
|
|
121
|
+
raise ValueError(f"input is not valid Python: {exc}") from exc
|
|
122
|
+
|
|
123
|
+
insert_line = minimum_line
|
|
124
|
+
body = module.body
|
|
125
|
+
index = 0
|
|
126
|
+
|
|
127
|
+
if body and isinstance(body[0], ast.Expr):
|
|
128
|
+
value = body[0].value
|
|
129
|
+
if isinstance(value, ast.Constant) and isinstance(value.value, str):
|
|
130
|
+
insert_line = max(insert_line, (body[0].end_lineno or body[0].lineno) + 1)
|
|
131
|
+
index = 1
|
|
132
|
+
|
|
133
|
+
while index < len(body):
|
|
134
|
+
node = body[index]
|
|
135
|
+
if (
|
|
136
|
+
isinstance(node, ast.ImportFrom)
|
|
137
|
+
and node.module == "__future__"
|
|
138
|
+
and node.level == 0
|
|
139
|
+
):
|
|
140
|
+
insert_line = max(insert_line, (node.end_lineno or node.lineno) + 1)
|
|
141
|
+
index += 1
|
|
142
|
+
continue
|
|
143
|
+
break
|
|
144
|
+
|
|
145
|
+
return insert_line
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def make_quine_template(
|
|
149
|
+
head: bytes,
|
|
150
|
+
*,
|
|
151
|
+
function_name: str,
|
|
152
|
+
template_marker: bytes,
|
|
153
|
+
tail_marker: bytes,
|
|
154
|
+
newline: bytes,
|
|
155
|
+
) -> bytes:
|
|
156
|
+
lines = [
|
|
157
|
+
BEGIN_MARKER,
|
|
158
|
+
f"def {function_name}():",
|
|
159
|
+
" import hashlib as __py_self_md5_hashlib",
|
|
160
|
+
" import sys as __py_self_md5_sys",
|
|
161
|
+
f" __py_self_md5_template = {template_marker.decode('ascii')}",
|
|
162
|
+
f" __py_self_md5_tail = {tail_marker.decode('ascii')}",
|
|
163
|
+
" __py_self_md5_source = (",
|
|
164
|
+
" __py_self_md5_template",
|
|
165
|
+
f" .replace({tail_marker!r}, repr(__py_self_md5_tail).encode('ascii'), 1)",
|
|
166
|
+
f" .replace({template_marker!r}, repr(__py_self_md5_template).encode('ascii'), 1)",
|
|
167
|
+
" + __py_self_md5_tail",
|
|
168
|
+
" )",
|
|
169
|
+
" __py_self_md5_sys.stdout.write(",
|
|
170
|
+
" 'MD5: ' + __py_self_md5_hashlib.md5(__py_self_md5_source).hexdigest().upper() + '\\n'",
|
|
171
|
+
" )",
|
|
172
|
+
f"{function_name}()",
|
|
173
|
+
f"del {function_name}",
|
|
174
|
+
END_MARKER,
|
|
175
|
+
"",
|
|
176
|
+
]
|
|
177
|
+
return head + newline.join(line.encode("ascii") for line in lines)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def choose_markers(data: bytes, seed: str) -> tuple[bytes, bytes]:
|
|
181
|
+
for counter in range(1000):
|
|
182
|
+
suffix = f"{seed}_{counter}".encode("ascii")
|
|
183
|
+
template_marker = b"__PY_SELF_MD5_TEMPLATE_" + suffix + b"__"
|
|
184
|
+
tail_marker = b"__PY_SELF_MD5_TAIL_" + suffix + b"__"
|
|
185
|
+
if template_marker not in data and tail_marker not in data:
|
|
186
|
+
return template_marker, tail_marker
|
|
187
|
+
raise RuntimeError("could not find marker names absent from input")
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def render_quine(template: bytes, tail: bytes, template_marker: bytes, tail_marker: bytes) -> bytes:
|
|
191
|
+
return (
|
|
192
|
+
template
|
|
193
|
+
.replace(tail_marker, repr(tail).encode("ascii"), 1)
|
|
194
|
+
.replace(template_marker, repr(template).encode("ascii"), 1)
|
|
195
|
+
+ tail
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def build_wrapped_source(data: bytes, *, force: bool = False) -> tuple[bytes, int, bool]:
|
|
200
|
+
existing_span = find_existing_shim_span(data)
|
|
201
|
+
if existing_span is not None:
|
|
202
|
+
if not force:
|
|
203
|
+
return data, 1, True
|
|
204
|
+
data = remove_existing_shim(data)
|
|
205
|
+
|
|
206
|
+
lines = data.splitlines(keepends=True)
|
|
207
|
+
minimum_line = protected_header_line_count(lines) + 1
|
|
208
|
+
source = decode_python_source(data)
|
|
209
|
+
insertion_line = first_insert_line_from_ast(source, minimum_line)
|
|
210
|
+
offset = byte_offset_for_line(lines, insertion_line)
|
|
211
|
+
|
|
212
|
+
seed = hashlib.sha1(data).hexdigest()[:16]
|
|
213
|
+
function_name = f"__py_self_md5_{seed}"
|
|
214
|
+
newline = detect_newline(data)
|
|
215
|
+
|
|
216
|
+
head = data[:offset]
|
|
217
|
+
tail = data[offset:]
|
|
218
|
+
if head and not head.endswith((b"\n", b"\r")):
|
|
219
|
+
head += newline
|
|
220
|
+
|
|
221
|
+
template_marker, tail_marker = choose_markers(data, seed)
|
|
222
|
+
template = make_quine_template(
|
|
223
|
+
head,
|
|
224
|
+
function_name=function_name,
|
|
225
|
+
template_marker=template_marker,
|
|
226
|
+
tail_marker=tail_marker,
|
|
227
|
+
newline=newline,
|
|
228
|
+
)
|
|
229
|
+
wrapped = render_quine(template, tail, template_marker, tail_marker)
|
|
230
|
+
|
|
231
|
+
return wrapped, insertion_line, False
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def remove_existing_shim(data: bytes) -> bytes:
|
|
235
|
+
span = find_existing_shim_span(data)
|
|
236
|
+
if span is None:
|
|
237
|
+
return data
|
|
238
|
+
begin, end = span
|
|
239
|
+
return data[:begin] + data[end:]
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def default_output_path(input_path: Path) -> Path:
|
|
243
|
+
return input_path.with_name(f"{input_path.stem}.self_md5{input_path.suffix or '.py'}")
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def wrap_file(input_path: Path, output_path: Path, *, force: bool = False) -> WrapResult:
|
|
247
|
+
data = input_path.read_bytes()
|
|
248
|
+
wrapped, insertion_line, already_wrapped = build_wrapped_source(data, force=force)
|
|
249
|
+
if already_wrapped and input_path.resolve() != output_path.resolve():
|
|
250
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
251
|
+
output_path.write_bytes(wrapped)
|
|
252
|
+
elif not already_wrapped:
|
|
253
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
254
|
+
output_path.write_bytes(wrapped)
|
|
255
|
+
return WrapResult(
|
|
256
|
+
input_path=input_path,
|
|
257
|
+
output_path=output_path,
|
|
258
|
+
md5=file_md5(output_path),
|
|
259
|
+
insertion_line=insertion_line,
|
|
260
|
+
already_wrapped=already_wrapped,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def run_output_check(path: Path, timeout: float) -> tuple[bool, str, str]:
|
|
265
|
+
expected = file_md5(path)
|
|
266
|
+
proc = subprocess.run(
|
|
267
|
+
[sys.executable, str(path)],
|
|
268
|
+
stdout=subprocess.PIPE,
|
|
269
|
+
stderr=subprocess.PIPE,
|
|
270
|
+
text=True,
|
|
271
|
+
timeout=timeout,
|
|
272
|
+
check=False,
|
|
273
|
+
)
|
|
274
|
+
first_line = proc.stdout.splitlines()[0] if proc.stdout.splitlines() else ""
|
|
275
|
+
expected_line = f"MD5: {expected}"
|
|
276
|
+
ok = first_line == expected_line
|
|
277
|
+
detail = f"expected first line {expected_line}, got {first_line or '<no stdout>'}"
|
|
278
|
+
if proc.returncode != 0:
|
|
279
|
+
detail += f"; process exited with {proc.returncode}"
|
|
280
|
+
return ok, detail, proc.stderr
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def self_test() -> None:
|
|
284
|
+
sample = textwrap.dedent(
|
|
285
|
+
'''\
|
|
286
|
+
#!/usr/bin/env python3
|
|
287
|
+
# coding: utf-8
|
|
288
|
+
"""sample module docstring"""
|
|
289
|
+
from __future__ import annotations
|
|
290
|
+
|
|
291
|
+
print("payload ran")
|
|
292
|
+
'''
|
|
293
|
+
).encode("utf-8")
|
|
294
|
+
with tempfile.TemporaryDirectory() as tmp_dir_name:
|
|
295
|
+
tmp_dir = Path(tmp_dir_name)
|
|
296
|
+
source = tmp_dir / "sample.py"
|
|
297
|
+
output = tmp_dir / "sample.self_md5.py"
|
|
298
|
+
source.write_bytes(sample)
|
|
299
|
+
result = wrap_file(source, output, force=False)
|
|
300
|
+
generated = output.read_bytes()
|
|
301
|
+
span = find_existing_shim_span(generated)
|
|
302
|
+
if span is None:
|
|
303
|
+
raise SystemExit("self-test failed: generated shim markers are missing")
|
|
304
|
+
shim = generated[span[0]:span[1]]
|
|
305
|
+
if b"open(" in shim or b"__file__" in shim:
|
|
306
|
+
raise SystemExit("self-test failed: generated shim reads or references its source file")
|
|
307
|
+
ok, detail, stderr = run_output_check(result.output_path, timeout=10)
|
|
308
|
+
if not ok:
|
|
309
|
+
raise SystemExit(f"self-test failed: {detail}\n{stderr}")
|
|
310
|
+
print(f"self-test ok: {result.output_path.name} prints MD5: {result.md5}")
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def parse_args(argv: list[str]) -> argparse.Namespace:
|
|
314
|
+
parser = argparse.ArgumentParser(
|
|
315
|
+
description="Prepend a startup shim so a .py file prints its own MD5 before user code runs."
|
|
316
|
+
)
|
|
317
|
+
parser.add_argument("input", nargs="?", type=Path, help="input Python file")
|
|
318
|
+
parser.add_argument("-o", "--output", type=Path, help="output path")
|
|
319
|
+
parser.add_argument(
|
|
320
|
+
"--in-place",
|
|
321
|
+
action="store_true",
|
|
322
|
+
help="rewrite the input file instead of creating *.self_md5.py",
|
|
323
|
+
)
|
|
324
|
+
parser.add_argument(
|
|
325
|
+
"--force",
|
|
326
|
+
action="store_true",
|
|
327
|
+
help="overwrite output and replace an existing py-self-md5 shim if present",
|
|
328
|
+
)
|
|
329
|
+
parser.add_argument(
|
|
330
|
+
"--check",
|
|
331
|
+
action="store_true",
|
|
332
|
+
help="execute the generated file and verify that its first stdout line is MD5: <digest>",
|
|
333
|
+
)
|
|
334
|
+
parser.add_argument(
|
|
335
|
+
"--check-timeout",
|
|
336
|
+
type=float,
|
|
337
|
+
default=10.0,
|
|
338
|
+
help="timeout in seconds for --check",
|
|
339
|
+
)
|
|
340
|
+
parser.add_argument(
|
|
341
|
+
"--self-test",
|
|
342
|
+
action="store_true",
|
|
343
|
+
help="run an internal safe sample test",
|
|
344
|
+
)
|
|
345
|
+
return parser.parse_args(argv)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def main(argv: list[str] | None = None) -> int:
|
|
349
|
+
args = parse_args(sys.argv[1:] if argv is None else argv)
|
|
350
|
+
|
|
351
|
+
if args.self_test:
|
|
352
|
+
self_test()
|
|
353
|
+
return 0
|
|
354
|
+
|
|
355
|
+
if args.input is None:
|
|
356
|
+
raise SystemExit("error: input file is required unless --self-test is used")
|
|
357
|
+
|
|
358
|
+
input_path = args.input.resolve()
|
|
359
|
+
if not input_path.exists():
|
|
360
|
+
raise SystemExit(f"error: input file does not exist: {input_path}")
|
|
361
|
+
if not input_path.is_file():
|
|
362
|
+
raise SystemExit(f"error: input path is not a file: {input_path}")
|
|
363
|
+
|
|
364
|
+
if args.in_place and args.output:
|
|
365
|
+
raise SystemExit("error: --in-place and --output cannot be used together")
|
|
366
|
+
|
|
367
|
+
output_path = input_path if args.in_place else (args.output or default_output_path(input_path)).resolve()
|
|
368
|
+
if output_path.exists() and output_path.resolve() != input_path and not args.force:
|
|
369
|
+
raise SystemExit(f"error: output exists, use --force to overwrite: {output_path}")
|
|
370
|
+
|
|
371
|
+
result = wrap_file(input_path, output_path, force=args.force)
|
|
372
|
+
if result.already_wrapped:
|
|
373
|
+
print(f"already wrapped: {result.output_path}")
|
|
374
|
+
else:
|
|
375
|
+
print(f"wrote: {result.output_path}")
|
|
376
|
+
print(f"inserted shim at line: {result.insertion_line}")
|
|
377
|
+
print(f"file MD5: {result.md5}")
|
|
378
|
+
|
|
379
|
+
if args.check:
|
|
380
|
+
ok, detail, stderr = run_output_check(result.output_path, timeout=args.check_timeout)
|
|
381
|
+
print(f"check: {'ok' if ok else 'failed'} ({detail})")
|
|
382
|
+
if stderr:
|
|
383
|
+
print(stderr, file=sys.stderr, end="")
|
|
384
|
+
return 0 if ok else 1
|
|
385
|
+
|
|
386
|
+
return 0
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
if __name__ == "__main__":
|
|
390
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling>=1.26"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "py-self-md5"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Wrap Python scripts so they print their own MD5 without reading their source at runtime."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "GGN_2015" }
|
|
14
|
+
]
|
|
15
|
+
keywords = ["md5", "quine", "python", "self-hash"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Environment :: Console",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Programming Language :: Python :: 3.13",
|
|
24
|
+
"Topic :: Software Development :: Build Tools",
|
|
25
|
+
"Topic :: Utilities",
|
|
26
|
+
]
|
|
27
|
+
dependencies = []
|
|
28
|
+
|
|
29
|
+
[project.scripts]
|
|
30
|
+
py-self-md5 = "py_self_md5.cli:main"
|
|
31
|
+
|
|
32
|
+
[tool.hatch.build.targets.wheel]
|
|
33
|
+
packages = ["py_self_md5"]
|