python-hwpx 2.7__py3-none-any.whl → 2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,21 +4,25 @@ import argparse
4
4
  import io
5
5
  import xml.etree.ElementTree as ET
6
6
  from dataclasses import dataclass
7
- from pathlib import Path
8
- from typing import BinaryIO, Sequence
7
+ from pathlib import Path, PurePosixPath
8
+ from typing import BinaryIO, Literal, Sequence
9
9
  from zipfile import ZIP_STORED, BadZipFile, ZipFile
10
10
 
11
+ from ..opc.relationships import (
12
+ MAIN_ROOTFILE_MEDIA_TYPE,
13
+ is_section_part_name,
14
+ parse_container_rootfiles,
15
+ parse_manifest_relationships,
16
+ select_main_rootfile,
17
+ )
18
+
11
19
  EXPECTED_MIMETYPE = "application/hwp+zip"
20
+ MIMETYPE_PATH = "mimetype"
12
21
  CONTAINER_PATH = "META-INF/container.xml"
13
- MANIFEST_PATH = "Contents/content.hpf"
14
22
  HEADER_PATH = "Contents/header.xml"
15
23
  VERSION_PATH = "version.xml"
16
- REQUIRED_CORE_FILES = ("mimetype", CONTAINER_PATH, MANIFEST_PATH, HEADER_PATH, VERSION_PATH)
17
- OPF_NS = {"opf": "http://www.idpf.org/2007/opf/"}
18
- CONTAINER_NS = {
19
- "ct": "urn:oasis:names:tc:opendocument:xmlns:container",
20
- "ocf": "urn:oasis:names:tc:opendocument:xmlns:container",
21
- }
24
+
25
+ IssueLevel = Literal["error", "warning"]
22
26
 
23
27
  __all__ = [
24
28
  "PackageValidationIssue",
@@ -32,6 +36,11 @@ __all__ = [
32
36
  class PackageValidationIssue:
33
37
  part_name: str
34
38
  message: str
39
+ level: IssueLevel = "error"
40
+
41
+ @property
42
+ def is_error(self) -> bool:
43
+ return self.level == "error"
35
44
 
36
45
  def __str__(self) -> str: # pragma: no cover - human readable helper
37
46
  return f"{self.part_name}: {self.message}"
@@ -42,9 +51,17 @@ class PackageValidationReport:
42
51
  checked_parts: tuple[str, ...]
43
52
  issues: tuple[PackageValidationIssue, ...]
44
53
 
54
+ @property
55
+ def errors(self) -> tuple[PackageValidationIssue, ...]:
56
+ return tuple(issue for issue in self.issues if issue.is_error)
57
+
58
+ @property
59
+ def warnings(self) -> tuple[PackageValidationIssue, ...]:
60
+ return tuple(issue for issue in self.issues if not issue.is_error)
61
+
45
62
  @property
46
63
  def ok(self) -> bool:
47
- return not self.issues
64
+ return not self.errors
48
65
 
49
66
  def __bool__(self) -> bool: # pragma: no cover - convenience alias
50
67
  return self.ok
@@ -65,43 +82,31 @@ def _parse_xml(payload: bytes) -> ET.Element:
65
82
  raise ValueError(f"malformed XML: {exc}") from exc
66
83
 
67
84
 
68
- def _container_rootfiles(container_root: ET.Element) -> list[str]:
69
- paths: list[str] = []
70
- for namespace in CONTAINER_NS.values():
71
- for elem in container_root.findall(f".//{{{namespace}}}rootfile"):
72
- path = (
73
- elem.get("full-path")
74
- or elem.get("fullPath")
75
- or elem.get("full_path")
76
- )
77
- if path:
78
- paths.append(path)
79
- return paths
85
+ def _error(issues: list[PackageValidationIssue], part_name: str, message: str) -> None:
86
+ issues.append(PackageValidationIssue(part_name, message, "error"))
80
87
 
81
88
 
82
- def _manifest_hrefs(manifest_root: ET.Element) -> set[str]:
83
- hrefs: set[str] = set()
84
- for item in manifest_root.findall(".//opf:item", OPF_NS):
85
- href = item.get("href")
86
- if href:
87
- hrefs.add(href)
88
- return hrefs
89
+ def _warning(issues: list[PackageValidationIssue], part_name: str, message: str) -> None:
90
+ issues.append(PackageValidationIssue(part_name, message, "warning"))
89
91
 
90
92
 
91
- def _spine_hrefs(manifest_root: ET.Element) -> list[str]:
92
- hrefs: list[str] = []
93
- id_to_href: dict[str, str] = {}
94
- for item in manifest_root.findall(".//opf:item", OPF_NS):
95
- item_id = item.get("id")
96
- href = item.get("href")
97
- if item_id and href:
98
- id_to_href[item_id] = href
93
+ def _safe_read(zf: ZipFile, part_name: str) -> bytes | None:
94
+ try:
95
+ return zf.read(part_name)
96
+ except (BadZipFile, KeyError, OSError):
97
+ return None
98
+
99
99
 
100
- for itemref in manifest_root.findall(".//opf:itemref", OPF_NS):
101
- idref = itemref.get("idref")
102
- if idref and idref in id_to_href:
103
- hrefs.append(id_to_href[idref])
104
- return hrefs
100
+ def _fallback_named_parts(names: set[str], *, token: str, extra_token: str | None = None) -> list[str]:
101
+ matches: list[str] = []
102
+ for name in sorted(names):
103
+ part_name = PurePosixPath(name).name.lower()
104
+ if token not in part_name:
105
+ continue
106
+ if extra_token is not None and extra_token not in part_name:
107
+ continue
108
+ matches.append(name)
109
+ return matches
105
110
 
106
111
 
107
112
  def validate_package(source: str | Path | bytes | BinaryIO) -> PackageValidationReport:
@@ -117,101 +122,229 @@ def validate_package(source: str | Path | bytes | BinaryIO) -> PackageValidation
117
122
  )
118
123
 
119
124
  with archive as zf:
120
- names = zf.namelist()
125
+ infos = [info for info in zf.infolist() if not info.is_dir()]
126
+ names = [info.filename for info in infos]
127
+ name_set = set(names)
121
128
  checked_parts.extend(names)
122
129
 
123
- for required in REQUIRED_CORE_FILES:
124
- if required not in names:
125
- issues.append(PackageValidationIssue(required, "missing required file"))
126
-
127
- if not names:
128
- issues.append(PackageValidationIssue("archive", "empty archive"))
130
+ if not infos:
131
+ _error(issues, "archive", "empty archive")
129
132
  return PackageValidationReport(tuple(checked_parts), tuple(issues))
130
133
 
131
- if "mimetype" in names:
132
- try:
133
- mimetype = zf.read("mimetype").decode("utf-8").strip()
134
- except UnicodeDecodeError:
135
- mimetype = "<binary>"
136
- if mimetype != EXPECTED_MIMETYPE:
137
- issues.append(
138
- PackageValidationIssue(
139
- "mimetype",
134
+ bad_entry = zf.testzip()
135
+ if bad_entry is not None:
136
+ _error(issues, bad_entry, "ZIP CRC/integrity check failed")
137
+
138
+ if MIMETYPE_PATH not in name_set:
139
+ _error(issues, MIMETYPE_PATH, "missing required file")
140
+ else:
141
+ mimetype_bytes = _safe_read(zf, MIMETYPE_PATH)
142
+ if mimetype_bytes is None:
143
+ _error(issues, MIMETYPE_PATH, "unable to read entry for integrity validation")
144
+ else:
145
+ try:
146
+ mimetype = mimetype_bytes.decode("utf-8").strip()
147
+ except UnicodeDecodeError:
148
+ mimetype = "<binary>"
149
+ if mimetype != EXPECTED_MIMETYPE:
150
+ _error(
151
+ issues,
152
+ MIMETYPE_PATH,
140
153
  f"expected {EXPECTED_MIMETYPE!r}, got {mimetype!r}",
141
154
  )
142
- )
143
- if names[0] != "mimetype":
144
- issues.append(PackageValidationIssue("mimetype", "must be the first ZIP entry"))
145
- if zf.getinfo("mimetype").compress_type != ZIP_STORED:
146
- issues.append(PackageValidationIssue("mimetype", "must use ZIP_STORED"))
155
+ if infos[0].filename != MIMETYPE_PATH:
156
+ _error(issues, MIMETYPE_PATH, "must be the first ZIP entry")
157
+ if zf.getinfo(MIMETYPE_PATH).compress_type != ZIP_STORED:
158
+ _error(issues, MIMETYPE_PATH, "must use ZIP_STORED")
159
+
160
+ if CONTAINER_PATH not in name_set:
161
+ _error(issues, CONTAINER_PATH, "missing required file")
162
+ if VERSION_PATH not in name_set:
163
+ _error(issues, VERSION_PATH, "missing required file under current engine semantics")
147
164
 
148
165
  xml_roots: dict[str, ET.Element] = {}
149
166
  for name in names:
150
167
  if not (name.endswith(".xml") or name.endswith(".hpf")):
151
168
  continue
169
+ payload = _safe_read(zf, name)
170
+ if payload is None:
171
+ _error(issues, name, "unable to read entry for XML parsing")
172
+ continue
152
173
  try:
153
- xml_roots[name] = _parse_xml(zf.read(name))
174
+ xml_roots[name] = _parse_xml(payload)
154
175
  except ValueError as exc:
155
- issues.append(PackageValidationIssue(name, str(exc)))
176
+ _error(issues, name, str(exc))
156
177
 
157
178
  container_root = xml_roots.get(CONTAINER_PATH)
158
- if container_root is not None:
159
- rootfiles = _container_rootfiles(container_root)
160
- if not rootfiles:
161
- issues.append(PackageValidationIssue(CONTAINER_PATH, "declares no rootfile entries"))
162
- for rootfile in rootfiles:
163
- if rootfile not in names:
164
- issues.append(
165
- PackageValidationIssue(
166
- CONTAINER_PATH,
167
- f"rootfile points to missing part {rootfile!r}",
168
- )
169
- )
179
+ if container_root is None:
180
+ return PackageValidationReport(tuple(checked_parts), tuple(issues))
170
181
 
171
- manifest_root = xml_roots.get(MANIFEST_PATH)
172
- if manifest_root is not None:
173
- hrefs = _manifest_hrefs(manifest_root)
174
- for href in sorted(hrefs):
175
- if href not in names:
176
- issues.append(
177
- PackageValidationIssue(
178
- MANIFEST_PATH,
179
- f"manifest href missing from archive: {href}",
180
- )
181
- )
182
+ rootfiles = parse_container_rootfiles(container_root)
183
+ if not rootfiles:
184
+ _error(issues, CONTAINER_PATH, "declares no rootfile entries")
185
+ return PackageValidationReport(tuple(checked_parts), tuple(issues))
186
+
187
+ for rootfile in rootfiles:
188
+ if rootfile.full_path not in name_set:
189
+ _error(
190
+ issues,
191
+ CONTAINER_PATH,
192
+ f"rootfile points to missing part {rootfile.full_path!r}",
193
+ )
194
+
195
+ selected_rootfile, used_rootfile_fallback = select_main_rootfile(rootfiles)
196
+ if selected_rootfile is None:
197
+ return PackageValidationReport(tuple(checked_parts), tuple(issues))
198
+ if used_rootfile_fallback:
199
+ _warning(
200
+ issues,
201
+ CONTAINER_PATH,
202
+ "no rootfile is marked as "
203
+ f"{MAIN_ROOTFILE_MEDIA_TYPE!r}; engine will use the first declaration "
204
+ f"{selected_rootfile.full_path!r}",
205
+ )
206
+
207
+ manifest_root = xml_roots.get(selected_rootfile.full_path)
208
+ if manifest_root is None:
209
+ _error(
210
+ issues,
211
+ selected_rootfile.full_path,
212
+ "selected main rootfile is missing or not well-formed XML",
213
+ )
214
+ return PackageValidationReport(tuple(checked_parts), tuple(issues))
215
+
216
+ relationships = parse_manifest_relationships(
217
+ manifest_root,
218
+ selected_rootfile.full_path,
219
+ known_parts=name_set,
220
+ )
182
221
 
183
- spine_hrefs = _spine_hrefs(manifest_root)
184
- if not spine_hrefs:
185
- issues.append(PackageValidationIssue(MANIFEST_PATH, "spine declares no section parts"))
186
- for href in spine_hrefs:
187
- if href not in names:
188
- issues.append(
189
- PackageValidationIssue(
190
- MANIFEST_PATH,
191
- f"spine item missing from archive: {href}",
192
- )
222
+ for item in relationships.items:
223
+ if item.resolved_path not in name_set:
224
+ _error(
225
+ issues,
226
+ selected_rootfile.full_path,
227
+ f"manifest href missing from archive: {item.href!r} -> {item.resolved_path!r}",
228
+ )
229
+
230
+ for idref in relationships.dangling_idrefs:
231
+ _warning(
232
+ issues,
233
+ selected_rootfile.full_path,
234
+ f"spine itemref references missing manifest id {idref!r}",
235
+ )
236
+
237
+ section_paths = [path for path in relationships.spine_paths if is_section_part_name(path)]
238
+ if section_paths:
239
+ for path in section_paths:
240
+ if path not in name_set:
241
+ _error(
242
+ issues,
243
+ selected_rootfile.full_path,
244
+ f"spine section part missing from archive: {path!r}",
193
245
  )
246
+ else:
247
+ fallback_sections = [name for name in sorted(name_set) if is_section_part_name(name)]
248
+ if fallback_sections:
249
+ _warning(
250
+ issues,
251
+ selected_rootfile.full_path,
252
+ "manifest spine does not resolve any section parts; engine will fall back "
253
+ "to filename-based section discovery",
254
+ )
255
+ else:
256
+ _error(
257
+ issues,
258
+ selected_rootfile.full_path,
259
+ "no section parts found in manifest spine or archive fallback",
260
+ )
194
261
 
195
- if HEADER_PATH in names and HEADER_PATH not in hrefs:
196
- issues.append(
197
- PackageValidationIssue(MANIFEST_PATH, "header.xml is not referenced in manifest")
262
+ if not relationships.header_paths and HEADER_PATH in name_set:
263
+ _warning(
264
+ issues,
265
+ selected_rootfile.full_path,
266
+ "manifest spine does not resolve a header part; engine will fall back to "
267
+ f"{HEADER_PATH!r}",
268
+ )
269
+
270
+ for path in relationships.header_paths:
271
+ if path not in name_set:
272
+ _error(
273
+ issues,
274
+ selected_rootfile.full_path,
275
+ f"header part missing from archive: {path!r}",
276
+ )
277
+
278
+ if not relationships.master_page_paths:
279
+ fallback_master_pages = _fallback_named_parts(name_set, token="master", extra_token="page")
280
+ if fallback_master_pages:
281
+ _warning(
282
+ issues,
283
+ selected_rootfile.full_path,
284
+ "manifest does not reference masterPage parts; engine will fall back to "
285
+ "filename-based discovery",
286
+ )
287
+ for path in relationships.master_page_paths:
288
+ if path not in name_set:
289
+ _error(
290
+ issues,
291
+ selected_rootfile.full_path,
292
+ f"masterPage part missing from archive: {path!r}",
293
+ )
294
+
295
+ if not relationships.history_paths:
296
+ fallback_histories = _fallback_named_parts(name_set, token="history")
297
+ if fallback_histories:
298
+ _warning(
299
+ issues,
300
+ selected_rootfile.full_path,
301
+ "manifest does not reference history parts; engine will fall back to "
302
+ "filename-based discovery",
303
+ )
304
+ for path in relationships.history_paths:
305
+ if path not in name_set:
306
+ _error(
307
+ issues,
308
+ selected_rootfile.full_path,
309
+ f"history part missing from archive: {path!r}",
198
310
  )
199
311
 
312
+ if relationships.version_path is None and VERSION_PATH in name_set:
313
+ _warning(
314
+ issues,
315
+ selected_rootfile.full_path,
316
+ "manifest does not reference a version part; engine will fall back to "
317
+ f"{VERSION_PATH!r}",
318
+ )
319
+ elif relationships.version_path is not None and relationships.version_path not in name_set:
320
+ _error(
321
+ issues,
322
+ selected_rootfile.full_path,
323
+ f"manifest version part missing from archive: {relationships.version_path!r}",
324
+ )
325
+
200
326
  return PackageValidationReport(tuple(checked_parts), tuple(issues))
201
327
 
202
328
 
203
329
  def main(argv: Sequence[str] | None = None) -> int:
204
- parser = argparse.ArgumentParser(description="Validate HWPX package structure")
330
+ parser = argparse.ArgumentParser(
331
+ description="Validate HWPX package structure using engine-aligned ZIP/container/manifest checks"
332
+ )
205
333
  parser.add_argument("source", help="Path to the HWPX file")
206
334
  args = parser.parse_args(argv)
207
335
 
208
336
  report = validate_package(args.source)
209
- if report.issues:
210
- for issue in report.issues:
211
- print(f"ERROR: {issue}")
337
+ for issue in report.issues:
338
+ prefix = "ERROR" if issue.is_error else "WARN"
339
+ print(f"{prefix}: {issue}")
340
+
341
+ if report.errors:
212
342
  return 1
213
343
 
214
- print("All package validations passed.")
344
+ if report.warnings:
345
+ print("Package validation passed with warnings.")
346
+ else:
347
+ print("All package validations passed.")
215
348
  return 0
216
349
 
217
350
 
hwpx/tools/page_guard.py CHANGED
@@ -7,14 +7,14 @@ textual metrics that often correlate with page-layout drift.
7
7
  from __future__ import annotations
8
8
 
9
9
  import argparse
10
- import io
11
10
  import json
12
11
  from dataclasses import asdict, dataclass
13
12
  from pathlib import Path
14
13
  from typing import BinaryIO, Iterable, Sequence
15
- from zipfile import ZipFile
16
14
 
17
- from lxml import etree
15
+ from lxml import etree # type: ignore[reportAttributeAccessIssue]
16
+
17
+ from ..opc.package import HwpxPackage
18
18
 
19
19
  NS = {
20
20
  "hp": "http://www.hancom.co.kr/hwpml/2011/paragraph",
@@ -63,31 +63,6 @@ class DocumentMetrics:
63
63
  paragraph_text_lengths: list[int]
64
64
 
65
65
 
66
- def _section_files(zf: ZipFile) -> list[str]:
67
- try:
68
- root = etree.fromstring(zf.read("Contents/content.hpf"))
69
- except KeyError:
70
- return [
71
- name
72
- for name in zf.namelist()
73
- if name.startswith("Contents/section") and name.endswith(".xml")
74
- ]
75
-
76
- id_to_href: dict[str, str] = {}
77
- for item in root.findall(".//opf:item", namespaces=NS):
78
- item_id = item.get("id")
79
- href = item.get("href")
80
- if item_id and href:
81
- id_to_href[item_id] = href
82
-
83
- files: list[str] = []
84
- for itemref in root.findall(".//opf:itemref", namespaces=NS):
85
- idref = itemref.get("idref")
86
- if idref and idref in id_to_href:
87
- files.append(id_to_href[idref])
88
- return files
89
-
90
-
91
66
  def _text_of_t_node(node: etree._Element) -> str:
92
67
  return "".join(node.itertext())
93
68
 
@@ -99,16 +74,9 @@ def _local_name(tag: str) -> str:
99
74
 
100
75
 
101
76
  def _iter_section_roots(source: str | Path | bytes | BinaryIO) -> Iterable[etree._Element]:
102
- if isinstance(source, bytes):
103
- archive = ZipFile(io.BytesIO(source), "r")
104
- else:
105
- archive = ZipFile(source, "r")
106
-
107
- try:
108
- for name in _section_files(archive):
109
- yield etree.fromstring(archive.read(name))
110
- finally:
111
- archive.close()
77
+ package = HwpxPackage.open(source)
78
+ for name in package.section_paths():
79
+ yield package.get_xml(name)
112
80
 
113
81
 
114
82
  def collect_metrics(source: str | Path | bytes | BinaryIO) -> DocumentMetrics:
@@ -273,8 +241,12 @@ def main(argv: Sequence[str] | None = None) -> int:
273
241
  parser.add_argument("--json", action="store_true", help="Print collected metrics as JSON")
274
242
  args = parser.parse_args(argv)
275
243
 
276
- reference = collect_metrics(args.reference)
277
- output = collect_metrics(args.output)
244
+ try:
245
+ reference = collect_metrics(args.reference)
246
+ output = collect_metrics(args.output)
247
+ except Exception as exc:
248
+ print(f"ERROR: {exc}")
249
+ return 1
278
250
 
279
251
  if args.json:
280
252
  print(
@@ -3,11 +3,13 @@ from __future__ import annotations
3
3
  import argparse
4
4
  import json
5
5
  from dataclasses import asdict, dataclass
6
- from pathlib import Path
6
+ from pathlib import Path, PurePosixPath
7
7
  from typing import Sequence
8
8
  from xml.etree import ElementTree as ET
9
9
 
10
10
  from ..opc.package import HwpxPackage
11
+ from ..opc.relationships import parse_manifest_relationships
12
+ from .archive_cli import unpack_hwpx
11
13
  from .page_guard import DocumentMetrics, collect_metrics
12
14
 
13
15
  _HH_NS = "http://www.hancom.co.kr/hwpml/2011/head"
@@ -36,8 +38,12 @@ class TemplateAnalysis:
36
38
  part_names: tuple[str, ...]
37
39
  rootfiles: tuple[str, ...]
38
40
  manifest_path: str
41
+ manifest_item_paths: tuple[str, ...]
39
42
  header_paths: tuple[str, ...]
40
43
  section_paths: tuple[str, ...]
44
+ master_page_paths: tuple[str, ...]
45
+ history_paths: tuple[str, ...]
46
+ bin_data_paths: tuple[str, ...]
41
47
  version_path: str | None
42
48
  header_summary: HeaderSummary
43
49
  proxy_metrics: DocumentMetrics
@@ -59,23 +65,36 @@ def _summarize_header(element: ET.Element | None) -> HeaderSummary:
59
65
  )
60
66
 
61
67
 
68
+ def _is_bindata_path(path: str) -> bool:
69
+ return any(part.lower() == "bindata" for part in PurePosixPath(path).parts)
70
+
71
+
62
72
  def analyze_template(source: str | Path) -> TemplateAnalysis:
63
73
  source_path = Path(source)
64
74
  package = HwpxPackage.open(source_path)
75
+ relationships = parse_manifest_relationships(
76
+ package.manifest_tree(),
77
+ package.main_content.full_path,
78
+ known_parts=package.part_names(),
79
+ )
65
80
 
66
81
  header_paths = tuple(package.header_paths())
67
82
  header_xml = package.get_xml(header_paths[0]) if header_paths else None
68
- manifest_path = package.main_content.full_path
69
- version_path = package.version_path()
70
83
 
71
84
  return TemplateAnalysis(
72
85
  source_name=source_path.name,
73
86
  part_names=tuple(package.part_names()),
74
87
  rootfiles=tuple(rootfile.full_path for rootfile in package.iter_rootfiles()),
75
- manifest_path=manifest_path,
88
+ manifest_path=package.main_content.full_path,
89
+ manifest_item_paths=tuple(item.resolved_path for item in relationships.items),
76
90
  header_paths=header_paths,
77
91
  section_paths=tuple(package.section_paths()),
78
- version_path=version_path,
92
+ master_page_paths=tuple(package.master_page_paths()),
93
+ history_paths=tuple(package.history_paths()),
94
+ bin_data_paths=tuple(
95
+ item.resolved_path for item in relationships.items if _is_bindata_path(item.resolved_path)
96
+ ),
97
+ version_path=package.version_path(),
79
98
  header_summary=_summarize_header(header_xml),
80
99
  proxy_metrics=collect_metrics(source_path),
81
100
  )
@@ -100,18 +119,9 @@ def extract_template_parts(
100
119
  written: list[Path] = []
101
120
 
102
121
  if extract_dir is not None:
103
- root = Path(extract_dir)
104
- root.mkdir(parents=True, exist_ok=True)
105
- written.append(_write_part(package, package.main_content.full_path, root / package.main_content.full_path))
106
- for part_name in package.header_paths():
107
- written.append(_write_part(package, part_name, root / part_name))
108
- for part_name in package.section_paths():
109
- written.append(_write_part(package, part_name, root / part_name))
110
- version_path = package.version_path()
111
- if version_path and package.has_part(version_path):
112
- written.append(_write_part(package, version_path, root / version_path))
113
- if package.has_part(package.CONTAINER_PATH):
114
- written.append(_write_part(package, package.CONTAINER_PATH, root / package.CONTAINER_PATH))
122
+ result = unpack_hwpx(source_path, extract_dir, pretty_xml=False)
123
+ written.extend(result.output_dir / entry.path for entry in result.entries)
124
+ written.append(result.metadata_path)
115
125
 
116
126
  if extract_header is not None:
117
127
  header_paths = package.header_paths()
@@ -141,6 +151,9 @@ def _print_summary(analysis: TemplateAnalysis) -> None:
141
151
  print(f"rootfiles: {', '.join(analysis.rootfiles) or '(none)'}")
142
152
  print(f"headers: {', '.join(analysis.header_paths) or '(none)'}")
143
153
  print(f"sections: {', '.join(analysis.section_paths) or '(none)'}")
154
+ print(f"masterPages: {', '.join(analysis.master_page_paths) or '(none)'}")
155
+ print(f"histories: {', '.join(analysis.history_paths) or '(none)'}")
156
+ print(f"BinData: {', '.join(analysis.bin_data_paths) or '(none)'}")
144
157
  if analysis.version_path:
145
158
  print(f"version part: {analysis.version_path}")
146
159
  print(
@@ -163,14 +176,17 @@ def _print_summary(analysis: TemplateAnalysis) -> None:
163
176
 
164
177
  def main(argv: Sequence[str] | None = None) -> int:
165
178
  parser = argparse.ArgumentParser(
166
- description="Analyze a reference HWPX template for template-preserving workflows"
179
+ description="Analyze a reference HWPX template for pack-ready, template-preserving workflows"
167
180
  )
168
181
  parser.add_argument("input", help="Input HWPX path")
169
182
  parser.add_argument("--json", action="store_true", help="Print machine-readable JSON summary")
170
183
  parser.add_argument("--output-json", help="Write the JSON summary to a file")
171
184
  parser.add_argument(
172
185
  "--extract-dir",
173
- help="Copy manifest, header, sections, version, and container.xml into a directory",
186
+ help=(
187
+ "Create a pack-ready extracted workspace that preserves archive-relative paths "
188
+ "and hwpx-pack metadata"
189
+ ),
174
190
  )
175
191
  parser.add_argument("--extract-header", help="Copy the first header.xml part to a path")
176
192
  parser.add_argument("--extract-section", help="Copy the first section XML part to a path")