reproducibly 0.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,105 @@
1
+ Metadata-Version: 2.4
2
+ Name: reproducibly
3
+ Version: 0.0.20
4
+ Summary: Reproducibly build Python packages.
5
+ Author-email: Keith Maxwell <keith.maxwell@gmail.com>
6
+ Requires-Python: >=3.13
7
+ Description-Content-Type: text/markdown
8
+ License-Expression: MPL-2.0
9
+ Classifier: Programming Language :: Python :: 3
10
+ Requires-Dist: build==1.4.0
11
+ Requires-Dist: cibuildwheel==3.3.1
12
+ Requires-Dist: packaging==26.0
13
+ Requires-Dist: pyproject-hooks==1.2.0
14
+ Project-URL: Homepage, https://github.com/maxwell-k/reproducibly/
15
+ Project-URL: Issues, https://github.com/maxwell-k/reproducibly/issues
16
+
17
+ # reproducibly.py
18
+
19
+ ## Introduction / Reproducibly build Python packages.
20
+
21
+ This project is a convenient wrapper around [build] and [cibuildwheel] that sets
22
+ metadata like file modification times, user and group IDs and names, and file
23
+ permissions predictably. The code can be used from PyPI or as a single [file]
24
+ with [inline script metadata].
25
+
26
+ ## Usage
27
+
28
+ Command to run from PyPI and view help:
29
+
30
+ pipx run reproducibly --help
31
+
32
+ Command to run from a local file and view help:
33
+
34
+ pipx run ./reproducibly.py --help
35
+
36
+ Output:
37
+
38
+ <!--[[[cog
39
+ from subprocess import run
40
+
41
+ import cog
42
+
43
+ CMD = ".venv/bin/python ./reproducibly.py --help"
44
+
45
+ cog.out("\n")
46
+ cog.out("```\n")
47
+ cog.out(run(CMD.split(), text=True, check=True, capture_output=True).stdout)
48
+ cog.out("```\n")
49
+ cog.out("\n")
50
+ ]]]-->
51
+
52
+ ```
53
+ usage: reproducibly.py [-h] [--version] input [input ...] output
54
+
55
+ Reproducibly build Python packages.
56
+
57
+ features:
58
+
59
+ - Builds a source distribution (sdist) from a git repository
60
+ - Builds a wheel from a sdist
61
+ - Resets metadata like user and group names and ids to predictable values
62
+ - Uses no compression for predictable file hashes across Linux distributions
63
+ - By default uses the last commit date and time from git
64
+ - Respects SOURCE_DATE_EPOCH when building a sdist
65
+ - Single file script with inline script metadata or PyPI package
66
+
67
+ positional arguments:
68
+ input Input git repository or source distribution
69
+ output Output directory
70
+
71
+ options:
72
+ -h, --help show this help message and exit
73
+ --version show program's version number and exit
74
+ ```
75
+
76
+ <!--[[[end]]]-->
77
+
78
+ ## Development
79
+
80
+ This project uses [Nox](https://nox.thea.codes/en/stable/).
81
+
82
+ Builds are run every day to check for reproducibility: <br />
83
+ [![status](https://github.com/maxwell-k/reproducibly/actions/workflows/nox.yaml/badge.svg?event=schedule)](https://github.com/maxwell-k/reproducibly/actions?query=event:schedule)
84
+
85
+ To set up a development environment use:
86
+
87
+ nox --session=dev
88
+
89
+ To run unit tests and integration tests:
90
+
91
+ nox
92
+
93
+ [build]: https://pypi.org/project/build/
94
+ [cibuildwheel]: https://pypi.org/project/cibuildwheel/
95
+ [file]: https://github.com/maxwell-k/reproducibly/blob/main/reproducibly.py
96
+ [inline script metadata]: https://packaging.python.org/en/latest/specifications/inline-script-metadata/
97
+
98
+ <!--
99
+ README.md
100
+ Copyright 2023 Keith Maxwell
101
+ SPDX-License-Identifier: CC-BY-SA-4.0
102
+
103
+ vim: set filetype=markdown.dprint.cog.htmlCommentNoSpell :
104
+ -->
105
+
@@ -0,0 +1,5 @@
1
+ reproducibly.py,sha256=sai8QpR75lxIE7e1ALoXoakB9jXhG2m-QC_tED3XGkM,13926
2
+ reproducibly-0.0.20.dist-info/METADATA,sha256=G1LB7KEf1wazKt1pYwdBTGSVgvRZoslUcJl1TYvTL9o,2975
3
+ reproducibly-0.0.20.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
4
+ reproducibly-0.0.20.dist-info/entry_points.txt,sha256=J4fRzmY7XffHnoo9etzvgeph8np598MPpIy3jpnGlfk,50
5
+ reproducibly-0.0.20.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: flit 3.12.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ reproducibly=reproducibly:main
3
+
reproducibly.py ADDED
@@ -0,0 +1,409 @@
1
+ """Reproducibly build Python packages.
2
+
3
+ features:
4
+
5
+ - Builds a source distribution (sdist) from a git repository
6
+ - Builds a wheel from a sdist
7
+ - Resets metadata like user and group names and ids to predictable values
8
+ - Uses no compression for predictable file hashes across Linux distributions
9
+ - By default uses the last commit date and time from git
10
+ - Respects SOURCE_DATE_EPOCH when building a sdist
11
+ - Single file script with inline script metadata or PyPI package
12
+ """
13
+
14
+ # reproducibly.py
15
+ # Copyright 2024 Keith Maxwell
16
+ # SPDX-License-Identifier: MPL-2.0
17
+ import gzip
18
+ from argparse import ArgumentParser, RawDescriptionHelpFormatter
19
+ from contextlib import chdir
20
+ from datetime import datetime, UTC
21
+ from enum import auto, Enum
22
+ from os import environ, utime
23
+ from pathlib import Path
24
+ from shutil import copyfileobj, move
25
+ from stat import S_IWGRP, S_IWOTH
26
+ from subprocess import CalledProcessError, run
27
+ from sys import version_info
28
+ from tarfile import TarFile, TarInfo
29
+ from tempfile import TemporaryDirectory
30
+ from types import TracebackType
31
+ from typing import cast, Literal, TypedDict
32
+ from zipfile import ZIP_DEFLATED, ZipFile, ZipInfo
33
+
34
+ from build import ProjectBuilder
35
+ from build.env import DefaultIsolatedEnv
36
+ from cibuildwheel.__main__ import build_in_directory
37
+ from cibuildwheel.options import CommandLineArguments
38
+ from pyproject_hooks import default_subprocess_runner
39
+
40
+ # /// script
41
+ # requires-python = ">=3.13"
42
+ # dependencies = [
43
+ # "build==1.4.0",
44
+ # "cibuildwheel==3.3.1",
45
+ # "packaging==26.0",
46
+ # "pyproject-hooks==1.2.0",
47
+ # ]
48
+ # ///
49
+
50
+
51
+ # - Built distributions are created from source distributions
52
+ # - Source distributions are typically gzipped tar files
53
+ # - Built distributions are typically zip files
54
+ # - The default date for this script is the earliest date supported by both
55
+ # - The minimum date value supported by zip files, is documented in
56
+ # <https://github.com/python/cpython/blob/3.13/Lib/zipfile.py>.
57
+ EARLIEST = datetime(1980, 1, 1, 0, 0, 0, tzinfo=UTC).timestamp() # 315532800.0
58
+
59
+
60
+ __version__ = "0.0.20"
61
+
62
+
63
+ def _build(
64
+ srcdir: Path,
65
+ output: Path,
66
+ distribution: Literal["wheel", "sdist"],
67
+ ) -> Path:
68
+ """Call the build API.
69
+
70
+ Returns the path to the built distribution
71
+ """
72
+ with DefaultIsolatedEnv() as env:
73
+ builder = ProjectBuilder.from_isolated_env(
74
+ env,
75
+ srcdir,
76
+ runner=default_subprocess_runner,
77
+ )
78
+ env.install(builder.build_system_requires)
79
+ env.install(builder.get_requires_for_build(distribution))
80
+ built = builder.build(distribution, output)
81
+ return output / built
82
+
83
+
84
+ def _extract_to_empty_directory(sdist: Path, directory: str) -> Path:
85
+ with TarFile.open(sdist) as t:
86
+ t.extractall(directory, filter="data")
87
+ return next(Path(directory).iterdir())
88
+
89
+
90
+ def _cibuildwheel(sdist: Path, output: Path) -> Path:
91
+ """Call the cibuildwheel API.
92
+
93
+ Returns the path to the built distribution
94
+ """
95
+ with (
96
+ ModifiedEnvironment(
97
+ CIBW_BUILD_FRONTEND="build",
98
+ CIBW_CONTAINER_ENGINE="podman",
99
+ CIBW_ENVIRONMENT_PASS_LINUX="SOURCE_DATE_EPOCH", # noqa: S106 …_PASS_… is not a password
100
+ CIBW_ENVIRONMENT="PIP_TIMEOUT=150",
101
+ ),
102
+ TemporaryDirectory() as directory,
103
+ ):
104
+ args = CommandLineArguments.defaults()
105
+ args.package_dir = _extract_to_empty_directory(sdist, directory) # input
106
+ args.only = f"cp{version_info[0]}{version_info[1]}-manylinux_x86_64"
107
+ args.output_dir = Path(directory).resolve()
108
+ args.platform = None
109
+ with chdir(directory): # output maybe a relative path
110
+ build_in_directory(args)
111
+ wheel = next(args.output_dir.glob("*.whl"))
112
+ output.joinpath(wheel.name).unlink(missing_ok=True)
113
+ return Path(move(wheel, output))
114
+
115
+
116
+ class Arguments(TypedDict):
117
+ """Input arguments to reproducibly.py."""
118
+
119
+ repositories: list[Path]
120
+ sdists: list[Path]
121
+ output: Path
122
+
123
+
124
+ class ModifiedEnvironment:
125
+ """A context manager to temporarily change environment variables."""
126
+
127
+ def __init__(self, **kwargs: str | None) -> None:
128
+ """Initialise with all arguments as environment variables."""
129
+ self.during: dict[str, str | None] = kwargs
130
+
131
+ def __enter__(self) -> None:
132
+ """Set the environment variables."""
133
+ self.before = {key: environ.get(key) for key in self.during}
134
+ self._update(self.during)
135
+
136
+ def __exit__(
137
+ self,
138
+ exc_type: type[BaseException] | None,
139
+ exc_value: BaseException | None,
140
+ exc_traceback: TracebackType | None,
141
+ ) -> None:
142
+ """Reset environment."""
143
+ self._update(self.before)
144
+
145
+ def _update(self, other: dict[str, str | None]) -> None:
146
+ for key, value in other.items():
147
+ if value is None:
148
+ if key in environ:
149
+ del environ[key]
150
+ else:
151
+ environ[key] = value
152
+
153
+
154
+ class Builder(Enum):
155
+ """Enum to identifier build package."""
156
+
157
+ cibuildwheel = auto()
158
+ build = auto()
159
+
160
+ @staticmethod
161
+ def which(archive: Path) -> "Builder":
162
+ """Return Builder.cibuildwheel if .c files are present otherwise Build.build."""
163
+ with TarFile.open(archive, "r:gz") as tar:
164
+ c = any(i.name.endswith(".c") for i in tar.getmembers())
165
+ return Builder.cibuildwheel if c else Builder.build
166
+
167
+
168
+ def cleanse_sdist(path_: Path, mtime: float) -> int:
169
+ """Cleanse a single source distribution.
170
+
171
+ - Set all uids and gids to zero
172
+ - Set all unames and gnames to root
173
+ - Set access and modified time for .tar.gz
174
+ - Set modified time for .tar inside .gz
175
+ - Set modified time for files inside the .tar
176
+ - Remove group and other write permissions for files inside the .tar
177
+ - Set the compression level to zero i.e. no compression
178
+ """
179
+ filename = path_.absolute()
180
+
181
+ mtime = max(mtime, EARLIEST)
182
+
183
+ with TemporaryDirectory() as path:
184
+ with TarFile.open(filename) as tarfile:
185
+ tarfile.extractall(path=path, filter="data")
186
+
187
+ filename.unlink(missing_ok=True)
188
+ (bare,) = Path(path).iterdir()
189
+
190
+ prefix = path.removeprefix("/") + "/"
191
+
192
+ def filter_(tarinfo: TarInfo) -> TarInfo:
193
+ tarinfo.mtime = int(mtime)
194
+ tarinfo.uid = tarinfo.gid = 0
195
+ tarinfo.uname = tarinfo.gname = "root"
196
+ tarinfo.mode = tarinfo.mode & ~S_IWGRP & ~S_IWOTH
197
+ tarinfo.name = tarinfo.name.removeprefix(prefix)
198
+ return tarinfo
199
+
200
+ tar = f"{bare}.tar"
201
+ with TarFile.open(tar, "w") as tarfile:
202
+ tarfile.add(bare, filter=filter_)
203
+
204
+ with (
205
+ Path(tar).open("rb") as fsrc,
206
+ gzip.GzipFile(
207
+ filename=filename,
208
+ mode="wb",
209
+ mtime=mtime,
210
+ compresslevel=0,
211
+ ) as fdst,
212
+ ):
213
+ copyfileobj(fsrc, fdst)
214
+ utime(filename, (mtime, mtime))
215
+ return 0
216
+
217
+
218
+ def latest_modification_time(archive: Path) -> str:
219
+ """Latest modification time for a gzipped tarfile as a string."""
220
+ with TarFile.open(archive, "r:gz") as tar:
221
+ latest = max(member.mtime for member in tar.getmembers())
222
+ return f"{latest:.0f}"
223
+
224
+
225
+ def latest_commit_time(repository: Path) -> float:
226
+ """Return the time of the last commit to a repository.
227
+
228
+ As a UNIX timestamp, defined as the number of seconds, excluding leap
229
+ seconds, since 01 Jan 1970 00:00:00 UTC.
230
+ """
231
+ cmd = ("git", "-C", repository, "log", "-1", "--pretty=%ct")
232
+ output = run(cmd, check=True, capture_output=True, text=True).stdout
233
+ return float(output.rstrip("\n"))
234
+
235
+
236
+ def breadth_first_key(path: str) -> list[str | list]:
237
+ """Key for sorting breadth first.
238
+
239
+ An example of breadth first sorted strings:
240
+
241
+ 1. z
242
+ 2. a/y
243
+ 3. a/b/x
244
+
245
+ """
246
+ start, sep, end = path.partition("/")
247
+ return [sep, start, breadth_first_key(end)] if end else [sep, start]
248
+
249
+
250
+ def key(input_: bytes | ZipInfo) -> tuple[int, list[str | list]]:
251
+ """Key for reproducibly sorting ZipFiles."""
252
+ if hasattr(input_, "filename"):
253
+ item = cast("ZipInfo", input_).filename
254
+ path = item
255
+ else:
256
+ item = cast("bytes", input_).decode()
257
+ path = item.split(",")[0]
258
+ if "/RECORD" in path:
259
+ group = 3
260
+ elif "dist-info" in path:
261
+ group = 2
262
+ else:
263
+ group = 1
264
+ return (group, breadth_first_key(item))
265
+
266
+
267
+ def fix_zip_members(path: Path, umask: int = 0o022) -> Path:
268
+ """Apply fixes to members in a zip file.
269
+
270
+ Processes the zip file in place. Path is both the source and destination, a
271
+ temporary working copy is made.
272
+
273
+ - Apply a umask to each member
274
+ - Change to compression level zero
275
+
276
+ When using the default deflate compression and comparing wheels created on
277
+ Ubuntu 24.04 and Fedora 40, minor differences in the size of the compressed
278
+ wheel were observed. For example:
279
+
280
+ │ -112 files, 909030 bytes uncompressed, 272160 bytes compressed: 70.1%
281
+ │ +112 files, 909030 bytes uncompressed, 271653 bytes compressed: 70.1%
282
+
283
+ As a solution this function uses compression level zero i.e. no compression.
284
+ """
285
+ operand = ~(umask << 16)
286
+
287
+ with TemporaryDirectory() as directory:
288
+ copy = Path(directory) / path.name
289
+ with ZipFile(path, "r") as original, ZipFile(copy, "w") as destination:
290
+ for member in original.infolist():
291
+ data = original.read(member)
292
+ member.external_attr = member.external_attr & operand
293
+ member.compress_type = ZIP_DEFLATED
294
+ member.compress_level = 0
295
+ destination.writestr(member, data)
296
+ path.unlink()
297
+ move(copy, path) # can't rename as /tmp may be a different device
298
+
299
+ return path
300
+
301
+
302
+ def _is_git_repository(path: Path) -> bool:
303
+ if not path.is_dir():
304
+ return False
305
+
306
+ try:
307
+ process = run(
308
+ ("git", "rev-parse", "--show-toplevel"),
309
+ cwd=path,
310
+ check=True,
311
+ capture_output=True,
312
+ text=True,
313
+ )
314
+ except (FileNotFoundError, CalledProcessError):
315
+ return False
316
+
317
+ actual = process.stdout.rstrip("\n")
318
+ expected = str(path.absolute())
319
+ return actual == expected
320
+
321
+
322
+ def parse_args(args: list[str] | None) -> Arguments:
323
+ """Parse command line arguments."""
324
+ parser = ArgumentParser(
325
+ prog="reproducibly.py",
326
+ formatter_class=RawDescriptionHelpFormatter,
327
+ description=__doc__,
328
+ )
329
+ parser.add_argument("--version", action="version", version=__version__)
330
+ help_ = "Input git repository or source distribution"
331
+ parser.add_argument("input", type=Path, nargs="+", help=help_)
332
+ parser.add_argument("output", type=Path, help="Output directory")
333
+ args_ = parser.parse_args(args)
334
+ parsed = Arguments(repositories=[], sdists=[], output=args_.output)
335
+ if not parsed["output"].exists():
336
+ parsed["output"].mkdir(parents=True)
337
+ if not parsed["output"].is_dir():
338
+ parser.error(f"{parsed['output']} is not a directory")
339
+ for path in args_.input.copy():
340
+ if path.is_file() and path.name.endswith(".tar.gz"):
341
+ parsed["sdists"].append(path)
342
+ elif _is_git_repository(path):
343
+ parsed["repositories"].append(path)
344
+ else:
345
+ parser.error(f"{path} is not a git repository or source distribution")
346
+ return parsed
347
+
348
+
349
+ def _sortwheel(wheel: Path) -> Path:
350
+ """Sort the lines in */RECORD and files in a wheel.
351
+
352
+ pypa/wheel has had reproducible builds since 0.27.0 (2016-02-05); this
353
+ script post processes a wheel file to match the ordering that pypa/wheel
354
+ implements. Specifically it will:
355
+
356
+ 1. Order the lines inside */RECORD
357
+ 2. Order the files inside the zip file
358
+
359
+ The ordering will be:
360
+
361
+ 1. Files and directories sorted breadth first
362
+ 2. Files with dist-info in their path sorted alphabetically
363
+ 3. Files with /RECORD in their path sorted alphabetically
364
+
365
+ From observation of pypa/wheel output desired order is below. This can be
366
+ called breadth first. It is easily created recursively. For a directory,
367
+ list all the files in order then repeat for all of the subdirectories in
368
+ order.
369
+ """
370
+ with TemporaryDirectory() as directory:
371
+ intermediate = Path(directory) / wheel.name
372
+ with ZipFile(wheel, "r") as original, ZipFile(intermediate, "w") as destination:
373
+ members = sorted(original.infolist(), key=key)
374
+ for member in members:
375
+ data = original.read(member)
376
+ if member.filename.endswith("RECORD"):
377
+ sorted_ = sorted(data.splitlines(keepends=True), key=key)
378
+ data = b"".join(sorted_)
379
+ destination.writestr(member, data)
380
+ wheel.unlink()
381
+ move(intermediate, wheel) # can't rename as /tmp may be a different device
382
+
383
+ return wheel
384
+
385
+
386
+ def main(arguments: list[str] | None = None) -> int:
387
+ """Reproducibly build Python packages."""
388
+ parsed = parse_args(arguments)
389
+ for repository in parsed["repositories"]:
390
+ sdist = _build(repository, parsed["output"], "sdist")
391
+ if "SOURCE_DATE_EPOCH" in environ:
392
+ date = float(environ["SOURCE_DATE_EPOCH"])
393
+ else:
394
+ date = latest_commit_time(repository)
395
+ cleanse_sdist(sdist, date)
396
+ for sdist in parsed["sdists"]:
397
+ with ModifiedEnvironment(SOURCE_DATE_EPOCH=latest_modification_time(sdist)):
398
+ if Builder.which(sdist) == Builder.cibuildwheel:
399
+ built = _cibuildwheel(sdist, parsed["output"])
400
+ else:
401
+ with TemporaryDirectory() as directory:
402
+ srcdir = _extract_to_empty_directory(sdist, directory)
403
+ built = _build(srcdir, parsed["output"], "wheel")
404
+ fix_zip_members(_sortwheel(built))
405
+ return 0
406
+
407
+
408
+ if __name__ == "__main__":
409
+ raise SystemExit(main())