reproducibly 0.0.12__tar.gz → 0.0.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,15 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: reproducibly
3
- Version: 0.0.12
4
- Summary: Reproducibly build Python packages
3
+ Version: 0.0.16
4
+ Summary: Reproducibly build Python packages.
5
5
  Author-email: Keith Maxwell <keith.maxwell@gmail.com>
6
- Requires-Python: >=3.11
6
+ Requires-Python: >=3.13
7
7
  Description-Content-Type: text/markdown
8
8
  Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
10
- Requires-Dist: build==1.2.1
11
- Requires-Dist: cibuildwheel==2.20.0
12
- Requires-Dist: packaging==24.1
13
- Requires-Dist: pyproject_hooks==1.1.0
9
+ Requires-Dist: build==1.2.2.post1
10
+ Requires-Dist: cibuildwheel==3.0.1
11
+ Requires-Dist: packaging==25.0
12
+ Requires-Dist: pyproject-hooks==1.2.0
14
13
  Project-URL: Homepage, https://github.com/maxwell-k/reproducibly/
15
14
  Project-URL: Issues, https://github.com/maxwell-k/reproducibly/issues
16
15
 
@@ -47,13 +46,14 @@ cog.out("\n```\n" + RESULT.stdout + "```\n\n")
47
46
  ```
48
47
  usage: reproducibly.py [-h] [--version] input [input ...] output
49
48
 
50
- Reproducibly build Python packages
49
+ Reproducibly build Python packages.
51
50
 
52
51
  features:
53
52
 
54
53
  - Builds a source distribution (sdist) from a git repository
55
54
  - Builds a wheel from a sdist
56
55
  - Resets metadata like user and group names and ids to predictable values
56
+ - Uses no compression for predictable file hashes across Linux distributions
57
57
  - By default uses the last commit date and time from git
58
58
  - Respects SOURCE_DATE_EPOCH when building a sdist
59
59
  - Single file script with inline script metadata or PyPI package
@@ -93,5 +93,7 @@ To run unit tests and integration tests:
93
93
  README.md
94
94
  Copyright 2023 Keith Maxwell
95
95
  SPDX-License-Identifier: CC-BY-SA-4.0
96
+
97
+ vim: set filetype=markdown.dprint.cog.htmlCommentNoSpell :
96
98
  -->
97
99
 
@@ -31,13 +31,14 @@ cog.out("\n```\n" + RESULT.stdout + "```\n\n")
31
31
  ```
32
32
  usage: reproducibly.py [-h] [--version] input [input ...] output
33
33
 
34
- Reproducibly build Python packages
34
+ Reproducibly build Python packages.
35
35
 
36
36
  features:
37
37
 
38
38
  - Builds a source distribution (sdist) from a git repository
39
39
  - Builds a wheel from a sdist
40
40
  - Resets metadata like user and group names and ids to predictable values
41
+ - Uses no compression for predictable file hashes across Linux distributions
41
42
  - By default uses the last commit date and time from git
42
43
  - Respects SOURCE_DATE_EPOCH when building a sdist
43
44
  - Single file script with inline script metadata or PyPI package
@@ -77,4 +78,6 @@ To run unit tests and integration tests:
77
78
  README.md
78
79
  Copyright 2023 Keith Maxwell
79
80
  SPDX-License-Identifier: CC-BY-SA-4.0
81
+
82
+ vim: set filetype=markdown.dprint.cog.htmlCommentNoSpell :
80
83
  -->
@@ -1,9 +1,8 @@
1
1
  # SPDX-FileCopyrightText: 2024 Keith Maxwell <keith.maxwell@gmail.com>
2
- #
3
2
  # SPDX-License-Identifier: CC0-1.0
4
3
 
5
4
  [build-system]
6
- requires = ["flit_core >=3.2,<4"]
5
+ requires = ["flit_core ==3.12.0"]
7
6
  build-backend = "flit_core.buildapi"
8
7
 
9
8
  [project]
@@ -13,17 +12,17 @@ authors = [
13
12
  { name = "Keith Maxwell", email = "keith.maxwell@gmail.com" },
14
13
  ]
15
14
  readme = "README.md"
16
- requires-python = ">=3.11"
15
+ requires-python = ">=3.13"
17
16
  classifiers = [
18
17
  "Programming Language :: Python :: 3",
19
- "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
20
18
  ]
21
19
  dependencies = [
22
- "build==1.2.1",
23
- "cibuildwheel==2.20.0",
24
- "packaging==24.1",
25
- "pyproject_hooks==1.1.0",
20
+ "build==1.2.2.post1",
21
+ "cibuildwheel==3.0.1",
22
+ "packaging==25.0",
23
+ "pyproject-hooks==1.2.0",
26
24
  ]
25
+ licence = "MPL-2.0"
27
26
 
28
27
  [project.urls]
29
28
  Homepage = "https://github.com/maxwell-k/reproducibly/"
@@ -34,3 +33,16 @@ reproducibly = "reproducibly:main"
34
33
 
35
34
  [tool.codespell]
36
35
  skip = './htmlcov'
36
+
37
+ [tool.ruff.lint]
38
+ select = ["ALL"]
39
+ ignore = [
40
+ "D203", # incompatible with D211
41
+ "D213", # incompatible with D212
42
+ "I", # prefer usort to ruff isort implementation
43
+ "PT", # prefer unittest style
44
+ "S310", # the rule errors on the "use instead" code from `ruff rule S310`
45
+ "S602", # assume arguments to subprocess.run are validated
46
+ "S603", # assume trusted input to subprocess.run
47
+ "T201", # print is used for output in command line scripts
48
+ ]
@@ -1,10 +1,11 @@
1
- """Reproducibly build Python packages
1
+ """Reproducibly build Python packages.
2
2
 
3
3
  features:
4
4
 
5
5
  - Builds a source distribution (sdist) from a git repository
6
6
  - Builds a wheel from a sdist
7
7
  - Resets metadata like user and group names and ids to predictable values
8
+ - Uses no compression for predictable file hashes across Linux distributions
8
9
  - By default uses the last commit date and time from git
9
10
  - Respects SOURCE_DATE_EPOCH when building a sdist
10
11
  - Single file script with inline script metadata or PyPI package
@@ -14,20 +15,21 @@ features:
14
15
  # Copyright 2024 Keith Maxwell
15
16
  # SPDX-License-Identifier: MPL-2.0
16
17
  import gzip
17
- import tarfile
18
18
  from argparse import ArgumentParser, RawDescriptionHelpFormatter
19
19
  from contextlib import chdir
20
- from datetime import datetime
21
- from enum import auto, Enum, nonmember
20
+ from datetime import datetime, UTC
21
+ from enum import auto, Enum
22
22
  from os import environ, utime
23
23
  from pathlib import Path
24
24
  from shutil import copyfileobj, move
25
25
  from stat import S_IWGRP, S_IWOTH
26
26
  from subprocess import CalledProcessError, run
27
27
  from sys import version_info
28
+ from tarfile import TarFile, TarInfo
28
29
  from tempfile import TemporaryDirectory
30
+ from types import TracebackType
29
31
  from typing import cast, Literal, TypedDict
30
- from zipfile import ZipFile, ZipInfo
32
+ from zipfile import ZIP_DEFLATED, ZipFile, ZipInfo
31
33
 
32
34
  from build import ProjectBuilder
33
35
  from build.env import DefaultIsolatedEnv
@@ -35,31 +37,15 @@ from cibuildwheel.__main__ import build_in_directory
35
37
  from cibuildwheel.options import CommandLineArguments
36
38
  from pyproject_hooks import default_subprocess_runner
37
39
 
38
- # [[[cog import cog ; from pathlib import Path ]]]
39
- # [[[end]]]
40
-
41
- # [[[cog
42
- # import tomllib
43
- # with open("pyproject.toml", "rb") as f:
44
- # pyproject = tomllib.load(f)
45
- # cog.outl("# /// script")
46
- # cog.outl(f'# requires-python = "{pyproject["project"]["requires-python"]}"')
47
- # cog.outl("# dependencies = [")
48
- # for dependency in pyproject["project"]["dependencies"]:
49
- # cog.outl(f"# \"{dependency}\",")
50
- # cog.outl("# ]")
51
- # cog.outl("# ///")
52
- # ]]]
53
40
  # /// script
54
- # requires-python = ">=3.11"
41
+ # requires-python = ">=3.13"
55
42
  # dependencies = [
56
- # "build==1.2.1",
57
- # "cibuildwheel==2.20.0",
58
- # "packaging==24.1",
59
- # "pyproject_hooks==1.1.0",
43
+ # "build==1.2.2.post1",
44
+ # "cibuildwheel==3.0.1",
45
+ # "packaging==25.0",
46
+ # "pyproject-hooks==1.2.0",
60
47
  # ]
61
48
  # ///
62
- # [[[end]]]
63
49
 
64
50
 
65
51
  # - Built distributions are created from source distributions
@@ -67,19 +53,22 @@ from pyproject_hooks import default_subprocess_runner
67
53
  # - Built distributions are typically zip files
68
54
  # - The default date for this script is the earliest date supported by both
69
55
  # - The minimum date value supported by zip files, is documented in
70
- # <https://github.com/python/cpython/blob/3.11/Lib/zipfile.py>.
71
- EARLIEST = datetime(1980, 1, 1, 0, 0, 0).timestamp() # 315532800.0
56
+ # <https://github.com/python/cpython/blob/3.13/Lib/zipfile.py>.
57
+ EARLIEST = datetime(1980, 1, 1, 0, 0, 0, tzinfo=UTC).timestamp() # 315532800.0
72
58
 
73
59
 
74
- __version__ = "0.0.12"
60
+ __version__ = "0.0.16"
75
61
 
76
62
 
77
63
  def _build(
78
- srcdir: Path, output: Path, distribution: Literal["wheel"] | Literal["sdist"]
64
+ srcdir: Path,
65
+ output: Path,
66
+ distribution: Literal["wheel", "sdist"],
79
67
  ) -> Path:
80
- """Call the build API
68
+ """Call the build API.
81
69
 
82
- Returns the path to the built distribution"""
70
+ Returns the path to the built distribution
71
+ """
83
72
  with DefaultIsolatedEnv() as env:
84
73
  builder = ProjectBuilder.from_isolated_env(
85
74
  env,
@@ -93,20 +82,21 @@ def _build(
93
82
 
94
83
 
95
84
  def _extract_to_empty_directory(sdist: Path, directory: str) -> Path:
96
- with tarfile.open(sdist) as t:
97
- t.extractall(directory)
85
+ with TarFile.open(sdist) as t:
86
+ t.extractall(directory, filter="data")
98
87
  return next(Path(directory).iterdir())
99
88
 
100
89
 
101
90
  def _cibuildwheel(sdist: Path, output: Path) -> Path:
102
- """Call the cibuildwheel API
91
+ """Call the cibuildwheel API.
103
92
 
104
- Returns the path to the built distribution"""
93
+ Returns the path to the built distribution
94
+ """
105
95
  with (
106
96
  ModifiedEnvironment(
107
97
  CIBW_BUILD_FRONTEND="build",
108
98
  CIBW_CONTAINER_ENGINE="podman",
109
- CIBW_ENVIRONMENT_PASS_LINUX="SOURCE_DATE_EPOCH",
99
+ CIBW_ENVIRONMENT_PASS_LINUX="SOURCE_DATE_EPOCH", # noqa: S106 …_PASS_… is not a password
110
100
  CIBW_ENVIRONMENT="PIP_TIMEOUT=150",
111
101
  ),
112
102
  TemporaryDirectory() as directory,
@@ -120,30 +110,39 @@ def _cibuildwheel(sdist: Path, output: Path) -> Path:
120
110
  build_in_directory(args)
121
111
  wheel = next(args.output_dir.glob("*.whl"))
122
112
  output.joinpath(wheel.name).unlink(missing_ok=True)
123
- path = Path(move(wheel, output))
124
- return path
113
+ return Path(move(wheel, output))
125
114
 
126
115
 
127
116
  class Arguments(TypedDict):
117
+ """Input arguments to reproducibly.py."""
118
+
128
119
  repositories: list[Path]
129
120
  sdists: list[Path]
130
121
  output: Path
131
122
 
132
123
 
133
124
  class ModifiedEnvironment:
134
- """A context manager to temporarily change environment variables"""
125
+ """A context manager to temporarily change environment variables."""
135
126
 
136
- def __init__(self, **kwargs: str | None):
127
+ def __init__(self, **kwargs: str | None) -> None:
128
+ """Initialise with all arguments as environment variables."""
137
129
  self.during: dict[str, str | None] = kwargs
138
130
 
139
- def __enter__(self):
140
- self.before = {key: environ.get(key) for key in self.during.keys()}
131
+ def __enter__(self) -> None:
132
+ """Set the environment variables."""
133
+ self.before = {key: environ.get(key) for key in self.during}
141
134
  self._update(self.during)
142
135
 
143
- def __exit__(self, exc_type, exc_value, exc_traceback):
136
+ def __exit__(
137
+ self,
138
+ exc_type: type[BaseException] | None,
139
+ exc_value: BaseException | None,
140
+ exc_traceback: TracebackType | None,
141
+ ) -> None:
142
+ """Reset environment."""
144
143
  self._update(self.before)
145
144
 
146
- def _update(self, other):
145
+ def _update(self, other: dict[str, str | None]) -> None:
147
146
  for key, value in other.items():
148
147
  if value is None:
149
148
  if key in environ:
@@ -153,19 +152,21 @@ class ModifiedEnvironment:
153
152
 
154
153
 
155
154
  class Builder(Enum):
155
+ """Enum to identifier build package."""
156
+
156
157
  cibuildwheel = auto()
157
158
  build = auto()
158
159
 
159
- @nonmember
160
160
  @staticmethod
161
161
  def which(archive: Path) -> "Builder":
162
- with tarfile.open(archive, "r:gz") as tar:
162
+ """Return Builder.cibuildwheel if .c files are present otherwise Build.build."""
163
+ with TarFile.open(archive, "r:gz") as tar:
163
164
  c = any(i.name.endswith(".c") for i in tar.getmembers())
164
165
  return Builder.cibuildwheel if c else Builder.build
165
166
 
166
167
 
167
- def cleanse_metadata(path_: Path, mtime: float) -> int:
168
- """Cleanse metadata from a single source distribution
168
+ def cleanse_sdist(path_: Path, mtime: float) -> int:
169
+ """Cleanse a single source distribution.
169
170
 
170
171
  - Set all uids and gids to zero
171
172
  - Set all unames and gnames to root
@@ -173,22 +174,22 @@ def cleanse_metadata(path_: Path, mtime: float) -> int:
173
174
  - Set modified time for .tar inside .gz
174
175
  - Set modified time for files inside the .tar
175
176
  - Remove group and other write permissions for files inside the .tar
177
+ - Set the compression level to zero i.e. no compression
176
178
  """
177
- path = path_.absolute()
179
+ filename = path_.absolute()
178
180
 
179
181
  mtime = max(mtime, EARLIEST)
180
182
 
181
- with TemporaryDirectory() as directory:
182
- with tarfile.open(path) as tar:
183
- tar.extractall(path=directory)
183
+ with TemporaryDirectory() as path:
184
+ with TarFile.open(filename) as tarfile:
185
+ tarfile.extractall(path=path, filter="data")
184
186
 
185
- path.unlink(missing_ok=True)
186
- (extracted,) = Path(directory).iterdir()
187
- uncompressed = f"{extracted}.tar"
187
+ filename.unlink(missing_ok=True)
188
+ (bare,) = Path(path).iterdir()
188
189
 
189
- prefix = directory.removeprefix("/") + "/"
190
+ prefix = path.removeprefix("/") + "/"
190
191
 
191
- def filter_(tarinfo: tarfile.TarInfo) -> tarfile.TarInfo:
192
+ def filter_(tarinfo: TarInfo) -> TarInfo:
192
193
  tarinfo.mtime = int(mtime)
193
194
  tarinfo.uid = tarinfo.gid = 0
194
195
  tarinfo.uname = tarinfo.gname = "root"
@@ -196,44 +197,63 @@ def cleanse_metadata(path_: Path, mtime: float) -> int:
196
197
  tarinfo.path = tarinfo.path.removeprefix(prefix)
197
198
  return tarinfo
198
199
 
199
- with tarfile.open(uncompressed, "w") as tar:
200
- tar.add(extracted, filter=filter_)
201
-
202
- with gzip.GzipFile(filename=path, mode="wb", mtime=mtime) as file:
203
- with open(uncompressed, "rb") as tar:
204
- copyfileobj(tar, file)
205
- utime(path, (mtime, mtime))
200
+ tar = f"{bare}.tar"
201
+ with TarFile.open(tar, "w") as tarfile:
202
+ tarfile.add(bare, filter=filter_)
203
+
204
+ with (
205
+ Path(tar).open("rb") as fsrc,
206
+ gzip.GzipFile(
207
+ filename=filename,
208
+ mode="wb",
209
+ mtime=mtime,
210
+ compresslevel=0,
211
+ ) as fdst,
212
+ ):
213
+ copyfileobj(fsrc, fdst)
214
+ utime(filename, (mtime, mtime))
206
215
  return 0
207
216
 
208
217
 
209
218
  def latest_modification_time(archive: Path) -> str:
210
- """Latest modification time for a gzipped tarfile as a string"""
211
- with tarfile.open(archive, "r:gz") as tar:
219
+ """Latest modification time for a gzipped tarfile as a string."""
220
+ with TarFile.open(archive, "r:gz") as tar:
212
221
  latest = max(member.mtime for member in tar.getmembers())
213
- return "{:.0f}".format(latest)
222
+ return f"{latest:.0f}"
214
223
 
215
224
 
216
225
  def latest_commit_time(repository: Path) -> float:
217
- """Return the time of the last commit to a repository
226
+ """Return the time of the last commit to a repository.
218
227
 
219
228
  As a UNIX timestamp, defined as the number of seconds, excluding leap
220
- seconds, since 01 Jan 1970 00:00:00 UTC."""
229
+ seconds, since 01 Jan 1970 00:00:00 UTC.
230
+ """
221
231
  cmd = ("git", "-C", repository, "log", "-1", "--pretty=%ct")
222
232
  output = run(cmd, check=True, capture_output=True, text=True).stdout
223
233
  return float(output.rstrip("\n"))
224
234
 
225
235
 
226
236
  def breadth_first_key(path: str) -> list[str | list]:
237
+ """Key for sorting breadth first.
238
+
239
+ An example of breadth first sorted strings:
240
+
241
+ 1. z
242
+ 2. a/y
243
+ 3. a/b/x
244
+
245
+ """
227
246
  start, sep, end = path.partition("/")
228
247
  return [sep, start, breadth_first_key(end)] if end else [sep, start]
229
248
 
230
249
 
231
250
  def key(input_: bytes | ZipInfo) -> tuple[int, list[str | list]]:
251
+ """Key for reproducibly sorting ZipFiles."""
232
252
  if hasattr(input_, "filename"):
233
- item = cast(ZipInfo, input_).filename
253
+ item = cast("ZipInfo", input_).filename
234
254
  path = item
235
255
  else:
236
- item = cast(bytes, input_).decode()
256
+ item = cast("bytes", input_).decode()
237
257
  path = item.split(",")[0]
238
258
  if "/RECORD" in path:
239
259
  group = 3
@@ -244,11 +264,24 @@ def key(input_: bytes | ZipInfo) -> tuple[int, list[str | list]]:
244
264
  return (group, breadth_first_key(item))
245
265
 
246
266
 
247
- def zipumask(path: Path, umask: int = 0o022) -> Path:
248
- """Apply a umask to a zip file at path
267
+ def fix_zip_members(path: Path, umask: int = 0o022) -> Path:
268
+ """Apply fixes to members in a zip file.
269
+
270
+ Processes the zip file in place. Path is both the source and destination, a
271
+ temporary working copy is made.
272
+
273
+ - Apply a umask to each member
274
+ - Change to compression level zero
249
275
 
250
- Path is both the source and destination, a temporary working copy is
251
- made."""
276
+ When using the default deflate compression and comparing wheels created on
277
+ Ubuntu 24.04 and Fedora 40, minor differences in the size of the compressed
278
+ wheel were observed. For example:
279
+
280
+ │ -112 files, 909030 bytes uncompressed, 272160 bytes compressed: 70.1%
281
+ │ +112 files, 909030 bytes uncompressed, 271653 bytes compressed: 70.1%
282
+
283
+ As a solution this function uses compression level zero i.e. no compression.
284
+ """
252
285
  operand = ~(umask << 16)
253
286
 
254
287
  with TemporaryDirectory() as directory:
@@ -257,6 +290,8 @@ def zipumask(path: Path, umask: int = 0o022) -> Path:
257
290
  for member in original.infolist():
258
291
  data = original.read(member)
259
292
  member.external_attr = member.external_attr & operand
293
+ member.compress_type = ZIP_DEFLATED
294
+ member.compress_level = 0
260
295
  destination.writestr(member, data)
261
296
  path.unlink()
262
297
  move(copy, path) # can't rename as /tmp may be a different device
@@ -270,7 +305,7 @@ def _is_git_repository(path: Path) -> bool:
270
305
 
271
306
  try:
272
307
  process = run(
273
- ["git", "rev-parse", "--show-toplevel"],
308
+ ("git", "rev-parse", "--show-toplevel"),
274
309
  cwd=path,
275
310
  check=True,
276
311
  capture_output=True,
@@ -285,6 +320,7 @@ def _is_git_repository(path: Path) -> bool:
285
320
 
286
321
 
287
322
  def parse_args(args: list[str] | None) -> Arguments:
323
+ """Parse command line arguments."""
288
324
  parser = ArgumentParser(
289
325
  prog="reproducibly.py",
290
326
  formatter_class=RawDescriptionHelpFormatter,
@@ -311,7 +347,7 @@ def parse_args(args: list[str] | None) -> Arguments:
311
347
 
312
348
 
313
349
  def _sortwheel(wheel: Path) -> Path:
314
- """Sort the lines in */RECORD and files in a wheel
350
+ """Sort the lines in */RECORD and files in a wheel.
315
351
 
316
352
  pypa/wheel has had reproducible builds since 0.27.0 (2016-02-05); this
317
353
  script post processes a wheel file to match the ordering that pypa/wheel
@@ -329,7 +365,8 @@ def _sortwheel(wheel: Path) -> Path:
329
365
  From observation of pypa/wheel output desired order is below. This can be
330
366
  called breadth first. It is easily created recursively. For a directory,
331
367
  list all the files in order then repeat for all of the subdirectories in
332
- order."""
368
+ order.
369
+ """
333
370
  with TemporaryDirectory() as directory:
334
371
  intermediate = Path(directory) / wheel.name
335
372
  with ZipFile(wheel, "r") as original, ZipFile(intermediate, "w") as destination:
@@ -347,6 +384,7 @@ def _sortwheel(wheel: Path) -> Path:
347
384
 
348
385
 
349
386
  def main(arguments: list[str] | None = None) -> int:
387
+ """Reproducibly build Python packages."""
350
388
  parsed = parse_args(arguments)
351
389
  for repository in parsed["repositories"]:
352
390
  sdist = _build(repository, parsed["output"], "sdist")
@@ -354,7 +392,7 @@ def main(arguments: list[str] | None = None) -> int:
354
392
  date = float(environ["SOURCE_DATE_EPOCH"])
355
393
  else:
356
394
  date = latest_commit_time(repository)
357
- cleanse_metadata(sdist, date)
395
+ cleanse_sdist(sdist, date)
358
396
  for sdist in parsed["sdists"]:
359
397
  with ModifiedEnvironment(SOURCE_DATE_EPOCH=latest_modification_time(sdist)):
360
398
  if Builder.which(sdist) == Builder.cibuildwheel:
@@ -363,7 +401,7 @@ def main(arguments: list[str] | None = None) -> int:
363
401
  with TemporaryDirectory() as directory:
364
402
  srcdir = _extract_to_empty_directory(sdist, directory)
365
403
  built = _build(srcdir, parsed["output"], "wheel")
366
- _sortwheel(zipumask(built))
404
+ fix_zip_members(_sortwheel(built))
367
405
  return 0
368
406
 
369
407