pytecode 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pytecode/jar.py ADDED
@@ -0,0 +1,271 @@
1
+ """Read, modify, and rewrite JAR archives with optional bytecode transformation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import copy
6
+ import os
7
+ import tempfile
8
+ import zipfile
9
+ from dataclasses import dataclass
10
+ from pathlib import Path, PurePosixPath
11
+
12
+ from .class_reader import ClassReader
13
+ from .debug_info import DebugInfoPolicy, normalize_debug_info_policy
14
+ from .hierarchy import ClassResolver
15
+ from .model import ClassModel
16
+ from .transforms import ClassTransform
17
+
18
+ __all__ = ["JarFile", "JarInfo"]
19
+
20
+
21
+ @dataclass
22
+ class JarInfo:
23
+ """Metadata and raw content for a single entry in a JAR archive.
24
+
25
+ Attributes:
26
+ filename: Normalized, OS-native relative path of the entry.
27
+ zipinfo: Original ZIP central-directory header for the entry.
28
+ bytes: Raw (uninterpreted) byte content of the entry.
29
+ """
30
+
31
+ filename: str
32
+ zipinfo: zipfile.ZipInfo
33
+ bytes: bytes
34
+
35
+
36
+ def _normalize_filename(
37
+ filename: str | os.PathLike[str],
38
+ *,
39
+ is_dir: bool | None = None,
40
+ ) -> str:
41
+ raw = os.fspath(filename)
42
+ if not raw:
43
+ raise ValueError("JAR entry filename must not be empty")
44
+ if raw.startswith(("/", "\\")) or Path(raw).is_absolute():
45
+ raise ValueError(f"JAR entry filename must be relative: {raw!r}")
46
+
47
+ posix_path = raw.replace("\\", "/")
48
+ parts = PurePosixPath(posix_path).parts
49
+ if ".." in parts:
50
+ raise ValueError(f"JAR entry filename must not contain parent directory references: {raw!r}")
51
+
52
+ normalized = str(Path(*parts))
53
+ if normalized in ("", "."):
54
+ raise ValueError("JAR entry filename must not be empty")
55
+
56
+ if is_dir is None:
57
+ is_dir = raw.endswith(("/", "\\"))
58
+ if is_dir:
59
+ return normalized.rstrip("\\/") + os.sep
60
+ return normalized
61
+
62
+
63
+ def _archive_name(filename: str) -> str:
64
+ is_dir = filename.endswith(os.sep)
65
+ stripped = filename.rstrip("\\/")
66
+ archive_name = PurePosixPath(*Path(stripped).parts).as_posix()
67
+ if is_dir:
68
+ return archive_name + "/"
69
+ return archive_name
70
+
71
+
72
+ def _clone_zipinfo(zipinfo: zipfile.ZipInfo, *, filename: str) -> zipfile.ZipInfo:
73
+ clone = copy.copy(zipinfo)
74
+ clone.filename = _archive_name(filename)
75
+ return clone
76
+
77
+
78
+ def _is_class_filename(filename: str) -> bool:
79
+ return not filename.endswith(os.sep) and filename.endswith(".class")
80
+
81
+
82
+ def _read_archive_state(filename: str | os.PathLike[str]) -> tuple[list[zipfile.ZipInfo], dict[str, JarInfo]]:
83
+ files: dict[str, JarInfo] = {}
84
+ with zipfile.ZipFile(filename, "r") as jar:
85
+ infolist = jar.infolist()
86
+ for info in infolist:
87
+ normalized = _normalize_filename(info.filename, is_dir=info.is_dir())
88
+ data = b"" if info.is_dir() else jar.read(info.filename)
89
+ files[normalized] = JarInfo(normalized, info, data)
90
+ return infolist, files
91
+
92
+
93
+ class JarFile:
94
+ """In-memory representation of a JAR (ZIP) archive.
95
+
96
+ On construction the archive is read into memory so entries can be
97
+ inspected, added, removed, and optionally transformed before being
98
+ written back to disk via ``rewrite``.
99
+
100
+ Signed-JAR artifacts (``META-INF/*.SF``, ``*.RSA``, etc.) are kept as
101
+ ordinary resources and are **not** re-signed when the archive is
102
+ rewritten.
103
+ """
104
+
105
+ def __init__(self, filename: str | os.PathLike[str]) -> None:
106
+ """Open and read a JAR archive into memory.
107
+
108
+ Args:
109
+ filename: Path to an existing JAR file on disk.
110
+ """
111
+ self.filename = os.fspath(filename)
112
+ self.infolist: list[zipfile.ZipInfo] = []
113
+ self.files: dict[str, JarInfo] = {}
114
+ self.read()
115
+
116
+ def read(self) -> None:
117
+ """Re-read the archive from disk, replacing all in-memory state."""
118
+ self.infolist, self.files = _read_archive_state(self.filename)
119
+
120
+ def add_file(
121
+ self,
122
+ filename: str | os.PathLike[str],
123
+ data: bytes | bytearray,
124
+ *,
125
+ zipinfo: zipfile.ZipInfo | None = None,
126
+ ) -> JarInfo:
127
+ """Add or replace an entry in the archive.
128
+
129
+ If *filename* already exists its ZIP metadata is reused unless an
130
+ explicit *zipinfo* is supplied.
131
+
132
+ Args:
133
+ filename: Relative path for the entry inside the JAR.
134
+ data: Raw bytes to store for this entry.
135
+ zipinfo: Optional ZIP header to use instead of the default.
136
+
137
+ Returns:
138
+ The ``JarInfo`` for the newly added entry.
139
+ """
140
+ normalized = _normalize_filename(filename)
141
+ if zipinfo is not None:
142
+ entry_zipinfo = copy.copy(zipinfo)
143
+ elif normalized in self.files:
144
+ entry_zipinfo = copy.copy(self.files[normalized].zipinfo)
145
+ else:
146
+ entry_zipinfo = zipfile.ZipInfo()
147
+ entry_zipinfo.filename = _archive_name(normalized)
148
+ jar_info = JarInfo(normalized, entry_zipinfo, bytes(data))
149
+ self.files[normalized] = jar_info
150
+ self.infolist = [item.zipinfo for item in self.files.values()]
151
+ return jar_info
152
+
153
+ def remove_file(self, filename: str | os.PathLike[str]) -> JarInfo:
154
+ """Remove an entry from the archive.
155
+
156
+ Args:
157
+ filename: Relative path of the entry to remove.
158
+
159
+ Returns:
160
+ The ``JarInfo`` that was removed.
161
+
162
+ Raises:
163
+ KeyError: If the entry does not exist.
164
+ """
165
+ normalized = _normalize_filename(filename)
166
+ try:
167
+ jar_info = self.files.pop(normalized)
168
+ except KeyError as exc:
169
+ raise KeyError(normalized) from exc
170
+ self.infolist = [item.zipinfo for item in self.files.values()]
171
+ return jar_info
172
+
173
+ def parse_classes(self) -> tuple[list[tuple[JarInfo, ClassReader]], list[JarInfo]]:
174
+ """Parse all ``.class`` entries and separate them from other resources.
175
+
176
+ Returns:
177
+ A two-element tuple of (class entries, non-class entries).
178
+ Each class entry is a ``(JarInfo, ClassReader)`` pair.
179
+ """
180
+ classes: list[tuple[JarInfo, ClassReader]] = []
181
+ other_files: list[JarInfo] = []
182
+ for jar_info in self.files.values():
183
+ if _is_class_filename(jar_info.filename):
184
+ classes.append((jar_info, ClassReader.from_bytes(jar_info.bytes)))
185
+ else:
186
+ other_files.append(jar_info)
187
+ return classes, other_files
188
+
189
+ def rewrite(
190
+ self,
191
+ output_path: str | os.PathLike[str] | None = None,
192
+ *,
193
+ transform: ClassTransform | None = None,
194
+ recompute_frames: bool = False,
195
+ resolver: ClassResolver | None = None,
196
+ debug_info: DebugInfoPolicy | str = DebugInfoPolicy.PRESERVE,
197
+ skip_debug: bool = False,
198
+ ) -> Path:
199
+ """Write the current archive state back to disk.
200
+
201
+ By default the archive is rewritten in place. ``.class`` entries are
202
+ copied verbatim unless *transform* or non-default lowering options
203
+ require re-lowering through ``ClassModel``.
204
+
205
+ Signed-JAR artifacts under ``META-INF`` are preserved as ordinary
206
+ resources and are **not** re-signed; if class bytes change the
207
+ resulting archive may no longer verify.
208
+
209
+ Args:
210
+ output_path: Destination path. When ``None`` the original file
211
+ is overwritten.
212
+ transform: Optional callable applied to each ``ClassModel``
213
+ in place. Must return ``None``.
214
+ recompute_frames: Whether to recompute ``StackMapTable`` frames
215
+ when lowering classes.
216
+ resolver: Class hierarchy resolver used during frame computation.
217
+ debug_info: Policy controlling how debug attributes are emitted.
218
+ skip_debug: If ``True``, discard debug attributes when lifting
219
+ class bytes into ``ClassModel``.
220
+
221
+ Returns:
222
+ The resolved destination ``Path``.
223
+
224
+ Raises:
225
+ TypeError: If *transform* returns a non-``None`` value.
226
+ """
227
+
228
+ debug_policy = normalize_debug_info_policy(debug_info)
229
+ should_rewrite_classes = (
230
+ transform is not None
231
+ or recompute_frames
232
+ or resolver is not None
233
+ or debug_policy is not DebugInfoPolicy.PRESERVE
234
+ or skip_debug
235
+ )
236
+
237
+ destination = Path(self.filename if output_path is None else output_path)
238
+ destination.parent.mkdir(parents=True, exist_ok=True)
239
+
240
+ fd, temp_name = tempfile.mkstemp(prefix=f"{destination.name}-", suffix=".tmp", dir=destination.parent)
241
+ os.close(fd)
242
+ temp_path = Path(temp_name)
243
+
244
+ try:
245
+ with zipfile.ZipFile(temp_path, "w") as jar:
246
+ for jar_info in self.files.values():
247
+ data = jar_info.bytes
248
+ if should_rewrite_classes and _is_class_filename(jar_info.filename):
249
+ model = ClassModel.from_bytes(data, skip_debug=skip_debug)
250
+ if transform is not None:
251
+ result = transform(model)
252
+ if result is not None:
253
+ raise TypeError(
254
+ "JarFile.rewrite() transforms must mutate ClassModel in place and return None"
255
+ )
256
+ data = model.to_bytes(
257
+ recompute_frames=recompute_frames,
258
+ resolver=resolver,
259
+ debug_info=debug_policy,
260
+ )
261
+ jar.writestr(_clone_zipinfo(jar_info.zipinfo, filename=jar_info.filename), data)
262
+
263
+ temp_path.replace(destination)
264
+ new_infolist, new_files = _read_archive_state(destination)
265
+ self.filename = os.fspath(destination)
266
+ self.infolist = new_infolist
267
+ self.files = new_files
268
+ return destination
269
+ finally:
270
+ if temp_path.exists():
271
+ temp_path.unlink()