python-hwpx 2.7__py3-none-any.whl → 2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hwpx/opc/package.py CHANGED
@@ -7,12 +7,21 @@ import io
7
7
  import os
8
8
  import tempfile
9
9
  from dataclasses import dataclass
10
- from pathlib import Path
10
+ from pathlib import Path, PurePosixPath
11
11
  from typing import BinaryIO, Iterable, Iterator, Mapping, MutableMapping
12
12
  from zipfile import ZIP_DEFLATED, ZIP_STORED, ZipFile, ZipInfo
13
13
 
14
- from lxml import etree
14
+ from lxml import etree # type: ignore[reportAttributeAccessIssue]
15
15
 
16
+ from .relationships import (
17
+ MAIN_ROOTFILE_MEDIA_TYPE,
18
+ OPF_NS,
19
+ is_header_part_name,
20
+ is_section_part_name,
21
+ normalize_part_name,
22
+ parse_container_rootfiles,
23
+ parse_manifest_relationships,
24
+ )
16
25
  from .xml_utils import (
17
26
  extract_xml_declaration,
18
27
  iter_declared_namespaces,
@@ -24,8 +33,6 @@ __all__ = ["HwpxPackage", "HwpxPackageError", "HwpxStructureError", "RootFile",
24
33
 
25
34
  logger = logging.getLogger(__name__)
26
35
 
27
- _OPF_NS = "http://www.idpf.org/2007/opf/"
28
-
29
36
 
30
37
  class HwpxPackageError(Exception):
31
38
  """Base error raised for issues related to :class:`HwpxPackage`."""
@@ -169,25 +176,10 @@ class HwpxPackage:
169
176
  except Exception:
170
177
  logger.exception("container.xml 파싱에 실패했습니다.")
171
178
  raise
172
- rootfiles = []
173
- for elem in root.findall(".//{*}rootfile"):
174
- full_path_attr = elem.get("full-path")
175
- full_path = full_path_attr or elem.get("fullPath") or elem.get("full_path")
176
- if full_path and not full_path_attr:
177
- logger.warning(
178
- "container.xml rootfile이 비표준 경로 속성명을 사용했습니다: %s",
179
- elem.attrib,
180
- )
181
- if not full_path:
182
- raise HwpxStructureError("container.xml contains a rootfile without 'full-path'.")
183
- media_type_attr = elem.get("media-type")
184
- media_type = media_type_attr or elem.get("mediaType") or elem.get("media_type")
185
- if media_type and not media_type_attr:
186
- logger.warning(
187
- "container.xml rootfile이 비표준 media-type 속성명을 사용했습니다: %s",
188
- elem.attrib,
189
- )
190
- rootfiles.append(RootFile(full_path, media_type))
179
+ rootfiles = [
180
+ RootFile(ref.full_path, ref.media_type)
181
+ for ref in parse_container_rootfiles(root)
182
+ ]
191
183
  if not rootfiles:
192
184
  raise HwpxStructureError("container.xml does not declare any rootfiles.")
193
185
  return rootfiles
@@ -201,10 +193,6 @@ class HwpxPackage:
201
193
  def _validate_structure(self) -> None:
202
194
  for rootfile in self._rootfiles:
203
195
  rootfile.ensure_exists(self._files)
204
- if not any(path.startswith(("Contents/", "Content/")) for path in self._files):
205
- raise HwpxStructureError(
206
- "HWPX package does not contain a 'Contents' directory."
207
- )
208
196
 
209
197
  @property
210
198
  def mimetype(self) -> str:
@@ -220,7 +208,7 @@ class HwpxPackage:
220
208
  @property
221
209
  def main_content(self) -> RootFile:
222
210
  for rootfile in self._rootfiles:
223
- if rootfile.media_type == "application/hwpml-package+xml":
211
+ if rootfile.media_type == MAIN_ROOTFILE_MEDIA_TYPE:
224
212
  return rootfile
225
213
  selected = self._rootfiles[0]
226
214
  logger.warning(
@@ -254,7 +242,6 @@ class HwpxPackage:
254
242
  elif norm_path == self.VERSION_PATH:
255
243
  pending_version = self._parse_version(data)
256
244
  self._files[norm_path] = data
257
- self._invalidate_caches(norm_path)
258
245
  if norm_path == self.MIMETYPE_PATH:
259
246
  self._mimetype = mimetype
260
247
  elif norm_path == self.CONTAINER_PATH:
@@ -263,6 +250,7 @@ class HwpxPackage:
263
250
  elif norm_path == self.VERSION_PATH:
264
251
  assert pending_version is not None
265
252
  self._version = pending_version
253
+ self._invalidate_caches(norm_path)
266
254
  self._validate_structure()
267
255
 
268
256
  def delete(self, path: str) -> None:
@@ -274,11 +262,12 @@ class HwpxPackage:
274
262
  "Cannot remove mandatory files ('mimetype', 'container.xml', 'version.xml')."
275
263
  )
276
264
  del self._files[norm_path]
265
+ self._invalidate_caches(norm_path)
277
266
  self._validate_structure()
278
267
 
279
268
  @staticmethod
280
269
  def _normalize_path(path: str) -> str:
281
- return path.replace("\\", "/")
270
+ return normalize_part_name(path)
282
271
 
283
272
  def files(self) -> list[str]:
284
273
  return sorted(self._files)
@@ -314,13 +303,12 @@ class HwpxPackage:
314
303
 
315
304
  def manifest_tree(self) -> etree._Element:
316
305
  if self._manifest_tree is None:
317
- self._manifest_tree = self.get_xml(self.MANIFEST_PATH)
306
+ self._manifest_tree = self.get_xml(self.main_content.full_path)
318
307
  return self._manifest_tree
319
308
 
320
309
  def _manifest_items(self) -> list[etree._Element]:
321
310
  manifest = self.manifest_tree()
322
- ns = {"opf": _OPF_NS}
323
- return list(manifest.findall("./opf:manifest/opf:item", ns))
311
+ return list(manifest.findall("./opf:manifest/opf:item", OPF_NS))
324
312
 
325
313
  @staticmethod
326
314
  def _normalized_manifest_value(element: etree._Element) -> str:
@@ -339,52 +327,37 @@ class HwpxPackage:
339
327
 
340
328
  def _resolve_spine_paths(self) -> list[str]:
341
329
  if self._spine_cache is None:
342
- manifest = self.manifest_tree()
343
- ns = {"opf": _OPF_NS}
344
- manifest_items: dict[str, str] = {}
345
- for item in manifest.findall("./opf:manifest/opf:item", ns):
346
- item_id = item.attrib.get("id")
347
- href = item.attrib.get("href", "")
348
- if item_id and href:
349
- manifest_items[item_id] = href
350
- spine_paths: list[str] = []
351
- for itemref in manifest.findall("./opf:spine/opf:itemref", ns):
352
- idref = itemref.attrib.get("idref")
353
- if not idref:
354
- continue
355
- href = manifest_items.get(idref)
356
- if href:
357
- spine_paths.append(href)
358
- self._spine_cache = spine_paths
330
+ relationships = parse_manifest_relationships(
331
+ self.manifest_tree(),
332
+ self.main_content.full_path,
333
+ known_parts=self._files.keys(),
334
+ )
335
+ self._spine_cache = list(relationships.spine_paths)
359
336
  return self._spine_cache
360
337
 
361
338
  def section_paths(self) -> list[str]:
362
339
  if self._section_paths_cache is None:
363
- from pathlib import PurePosixPath
364
-
365
340
  paths = [
366
341
  path
367
342
  for path in self._resolve_spine_paths()
368
- if path and PurePosixPath(path).name.startswith("section")
343
+ if path and is_section_part_name(path)
369
344
  ]
370
345
  if not paths:
371
346
  logger.warning("manifest spine에서 section 경로를 찾지 못해 파일명 기반 fallback을 사용합니다.")
372
347
  paths = [
373
348
  name
374
349
  for name in self._files.keys()
375
- if PurePosixPath(name).name.startswith("section")
350
+ if is_section_part_name(name)
376
351
  ]
377
352
  self._section_paths_cache = paths
378
353
  return list(self._section_paths_cache)
379
354
 
380
355
  def header_paths(self) -> list[str]:
381
356
  if self._header_paths_cache is None:
382
- from pathlib import PurePosixPath
383
-
384
357
  paths = [
385
358
  path
386
359
  for path in self._resolve_spine_paths()
387
- if path and PurePosixPath(path).name.startswith("header")
360
+ if path and is_header_part_name(path)
388
361
  ]
389
362
  if not paths and self.has_part(self.HEADER_PATH):
390
363
  logger.warning(
@@ -397,14 +370,13 @@ class HwpxPackage:
397
370
 
398
371
  def master_page_paths(self) -> list[str]:
399
372
  if self._master_page_paths_cache is None:
400
- from pathlib import PurePosixPath
401
-
402
- paths = [
403
- item.attrib.get("href", "")
404
- for item in self._manifest_items()
405
- if self._manifest_matches(item, "masterpage", "master-page")
406
- and item.attrib.get("href")
407
- ]
373
+ paths = list(
374
+ parse_manifest_relationships(
375
+ self.manifest_tree(),
376
+ self.main_content.full_path,
377
+ known_parts=self._files.keys(),
378
+ ).master_page_paths
379
+ )
408
380
  if not paths:
409
381
  logger.warning("manifest에서 masterPage를 찾지 못해 파일명 탐색 fallback을 사용합니다.")
410
382
  paths = [
@@ -418,13 +390,13 @@ class HwpxPackage:
418
390
 
419
391
  def history_paths(self) -> list[str]:
420
392
  if self._history_paths_cache is None:
421
- from pathlib import PurePosixPath
422
-
423
- paths = [
424
- item.attrib.get("href", "")
425
- for item in self._manifest_items()
426
- if self._manifest_matches(item, "history") and item.attrib.get("href")
427
- ]
393
+ paths = list(
394
+ parse_manifest_relationships(
395
+ self.manifest_tree(),
396
+ self.main_content.full_path,
397
+ known_parts=self._files.keys(),
398
+ ).history_paths
399
+ )
428
400
  if not paths:
429
401
  logger.warning("manifest에서 history를 찾지 못해 파일명 탐색 fallback을 사용합니다.")
430
402
  paths = [
@@ -437,13 +409,11 @@ class HwpxPackage:
437
409
 
438
410
  def version_path(self) -> str | None:
439
411
  if not self._version_path_cache_resolved:
440
- path: str | None = None
441
- for item in self._manifest_items():
442
- if self._manifest_matches(item, "version"):
443
- href = item.attrib.get("href", "").strip()
444
- if href:
445
- path = href
446
- break
412
+ path = parse_manifest_relationships(
413
+ self.manifest_tree(),
414
+ self.main_content.full_path,
415
+ known_parts=self._files.keys(),
416
+ ).version_path
447
417
  if path is None and self.has_part(self.VERSION_PATH):
448
418
  logger.warning(
449
419
  "manifest에서 version 파트를 찾지 못해 기본 경로 fallback을 사용합니다: %s",
@@ -461,8 +431,7 @@ class HwpxPackage:
461
431
  def _manifest_element(self) -> etree._Element | None:
462
432
  """Return the ``<opf:manifest>`` element."""
463
433
  manifest = self.manifest_tree()
464
- ns = {"opf": _OPF_NS}
465
- return manifest.find("opf:manifest", ns)
434
+ return manifest.find("opf:manifest", OPF_NS)
466
435
 
467
436
  def add_manifest_item(
468
437
  self,
@@ -475,13 +444,12 @@ class HwpxPackage:
475
444
  if manifest_el is None:
476
445
  raise HwpxStructureError("Manifest does not contain an <opf:manifest> element.")
477
446
 
478
- ns = {"opf": _OPF_NS}
479
- for existing in manifest_el.findall("opf:item", ns):
447
+ for existing in manifest_el.findall("opf:item", OPF_NS):
480
448
  if existing.get("id") == item_id:
481
449
  return # already present
482
450
 
483
451
  new_item = manifest_el.makeelement(
484
- f"{{{_OPF_NS}}}item",
452
+ f"{{{OPF_NS['opf']}}}item",
485
453
  {"id": item_id, "href": href, "media-type": media_type},
486
454
  )
487
455
  manifest_el.append(new_item)
@@ -493,8 +461,7 @@ class HwpxPackage:
493
461
  if manifest_el is None:
494
462
  return False
495
463
 
496
- ns = {"opf": _OPF_NS}
497
- for existing in manifest_el.findall("opf:item", ns):
464
+ for existing in manifest_el.findall("opf:item", OPF_NS):
498
465
  if existing.get("id") == item_id:
499
466
  manifest_el.remove(existing)
500
467
  self._persist_manifest()
@@ -505,20 +472,18 @@ class HwpxPackage:
505
472
  """Write the in-memory manifest tree back to the package."""
506
473
  tree = self._manifest_tree
507
474
  if tree is not None:
508
- self.set_part(self.MANIFEST_PATH, tree)
475
+ self.set_part(self.main_content.full_path, tree)
509
476
 
510
477
  def _invalidate_caches(self, changed_path: str) -> None:
511
- if changed_path == self.MANIFEST_PATH:
478
+ if changed_path in {self.CONTAINER_PATH, self.main_content.full_path}:
512
479
  self._manifest_tree = None
513
- self._spine_cache = None
514
- self._section_paths_cache = None
515
- self._header_paths_cache = None
516
- self._master_page_paths_cache = None
517
- self._history_paths_cache = None
518
- self._version_path_cache = None
519
- self._version_path_cache_resolved = False
520
- elif changed_path == self.VERSION_PATH:
521
- self._version_path_cache_resolved = False
480
+ self._spine_cache = None
481
+ self._section_paths_cache = None
482
+ self._header_paths_cache = None
483
+ self._master_page_paths_cache = None
484
+ self._history_paths_cache = None
485
+ self._version_path_cache = None
486
+ self._version_path_cache_resolved = False
522
487
 
523
488
  def save(
524
489
  self,
@@ -0,0 +1,227 @@
1
+ """Helpers for resolving HWPX container and manifest relationships."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from pathlib import PurePosixPath
7
+ from typing import Any, Collection, Iterable
8
+
9
+ CONTAINER_NAMESPACES = (
10
+ "urn:oasis:names:tc:opendocument:xmlns:container",
11
+ )
12
+ MAIN_ROOTFILE_MEDIA_TYPE = "application/hwpml-package+xml"
13
+ OPF_NS = {"opf": "http://www.idpf.org/2007/opf/"}
14
+
15
+ __all__ = [
16
+ "CONTAINER_NAMESPACES",
17
+ "MAIN_ROOTFILE_MEDIA_TYPE",
18
+ "OPF_NS",
19
+ "ManifestItemRef",
20
+ "ManifestRelationships",
21
+ "RootFileRef",
22
+ "is_header_part_name",
23
+ "is_section_part_name",
24
+ "normalize_part_name",
25
+ "parse_container_rootfiles",
26
+ "parse_manifest_relationships",
27
+ "resolve_part_name",
28
+ "select_main_rootfile",
29
+ ]
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class RootFileRef:
34
+ full_path: str
35
+ media_type: str | None = None
36
+
37
+
38
+ @dataclass(frozen=True)
39
+ class ManifestItemRef:
40
+ item_id: str | None
41
+ href: str
42
+ resolved_path: str
43
+ media_type: str | None = None
44
+ properties: str | None = None
45
+
46
+
47
+ @dataclass(frozen=True)
48
+ class ManifestRelationships:
49
+ manifest_path: str
50
+ items: tuple[ManifestItemRef, ...]
51
+ spine_paths: tuple[str, ...]
52
+ dangling_idrefs: tuple[str, ...]
53
+ header_paths: tuple[str, ...]
54
+ master_page_paths: tuple[str, ...]
55
+ history_paths: tuple[str, ...]
56
+ version_path: str | None
57
+
58
+
59
+ def normalize_part_name(path: str) -> str:
60
+ raw = path.replace("\\", "/").strip()
61
+ parts: list[str] = []
62
+ for part in PurePosixPath(raw).parts:
63
+ if part in {"", ".", "/"}:
64
+ continue
65
+ if part == "..":
66
+ if parts:
67
+ parts.pop()
68
+ continue
69
+ parts.append(part)
70
+ return "/".join(parts)
71
+
72
+
73
+ def resolve_part_name(
74
+ base_part: str,
75
+ href: str,
76
+ *,
77
+ known_parts: Collection[str] | None = None,
78
+ ) -> str:
79
+ raw_href = href.replace("\\", "/").strip()
80
+ if not raw_href:
81
+ return ""
82
+ if raw_href.startswith("/"):
83
+ return normalize_part_name(raw_href)
84
+ base_dir = PurePosixPath(normalize_part_name(base_part)).parent
85
+ normalized_href = normalize_part_name(raw_href)
86
+ if known_parts is not None:
87
+ normalized_parts = {normalize_part_name(part) for part in known_parts}
88
+ if normalized_href in normalized_parts:
89
+ return normalized_href
90
+ relative_candidate = normalize_part_name(str(base_dir / raw_href))
91
+ if relative_candidate in normalized_parts:
92
+ return relative_candidate
93
+ base_dir_name = normalize_part_name(str(base_dir))
94
+ if base_dir_name and normalized_href.startswith(f"{base_dir_name}/"):
95
+ return normalized_href
96
+ return normalize_part_name(str(base_dir / raw_href))
97
+
98
+
99
+ def parse_container_rootfiles(container_root: Any) -> tuple[RootFileRef, ...]:
100
+ rootfiles: list[RootFileRef] = []
101
+ seen: set[tuple[str, str | None]] = set()
102
+ candidates = list(container_root.findall(".//rootfile"))
103
+ for namespace in CONTAINER_NAMESPACES:
104
+ candidates.extend(container_root.findall(f".//{{{namespace}}}rootfile"))
105
+
106
+ for elem in candidates:
107
+ full_path = (
108
+ elem.get("full-path")
109
+ or elem.get("fullPath")
110
+ or elem.get("full_path")
111
+ )
112
+ if not full_path:
113
+ continue
114
+ media_type = (
115
+ elem.get("media-type")
116
+ or elem.get("mediaType")
117
+ or elem.get("media_type")
118
+ )
119
+ root = RootFileRef(
120
+ full_path=normalize_part_name(full_path),
121
+ media_type=media_type,
122
+ )
123
+ key = (root.full_path, root.media_type)
124
+ if key in seen:
125
+ continue
126
+ seen.add(key)
127
+ rootfiles.append(root)
128
+ return tuple(rootfiles)
129
+
130
+
131
+ def select_main_rootfile(rootfiles: Iterable[RootFileRef]) -> tuple[RootFileRef | None, bool]:
132
+ ordered = list(rootfiles)
133
+ if not ordered:
134
+ return None, False
135
+ for rootfile in ordered:
136
+ if rootfile.media_type == MAIN_ROOTFILE_MEDIA_TYPE:
137
+ return rootfile, False
138
+ return ordered[0], True
139
+
140
+
141
+ def is_section_part_name(path: str) -> bool:
142
+ name = PurePosixPath(path).name.lower()
143
+ return name.startswith("section") and name.endswith(".xml")
144
+
145
+
146
+ def is_header_part_name(path: str) -> bool:
147
+ name = PurePosixPath(path).name.lower()
148
+ return name.startswith("header") and name.endswith(".xml")
149
+
150
+
151
+ def _manifest_matches(item: ManifestItemRef, *candidates: str) -> bool:
152
+ haystack = " ".join(
153
+ part.lower()
154
+ for part in (
155
+ item.item_id or "",
156
+ item.href,
157
+ item.media_type or "",
158
+ item.properties or "",
159
+ )
160
+ if part
161
+ )
162
+ return any(candidate in haystack for candidate in candidates if candidate)
163
+
164
+
165
+ def parse_manifest_relationships(
166
+ manifest_root: Any,
167
+ manifest_path: str,
168
+ *,
169
+ known_parts: Collection[str] | None = None,
170
+ ) -> ManifestRelationships:
171
+ items: list[ManifestItemRef] = []
172
+ id_to_path: dict[str, str] = {}
173
+
174
+ for item in manifest_root.findall(".//opf:item", OPF_NS):
175
+ href = (item.get("href") or "").strip()
176
+ if not href:
177
+ continue
178
+ resolved_path = resolve_part_name(manifest_path, href, known_parts=known_parts)
179
+ item_ref = ManifestItemRef(
180
+ item_id=item.get("id"),
181
+ href=href,
182
+ resolved_path=resolved_path,
183
+ media_type=item.get("media-type"),
184
+ properties=item.get("properties"),
185
+ )
186
+ items.append(item_ref)
187
+ if item_ref.item_id:
188
+ id_to_path[item_ref.item_id] = resolved_path
189
+
190
+ spine_paths: list[str] = []
191
+ dangling_idrefs: list[str] = []
192
+ for itemref in manifest_root.findall(".//opf:itemref", OPF_NS):
193
+ idref = (itemref.get("idref") or "").strip()
194
+ if not idref:
195
+ continue
196
+ spine_path = id_to_path.get(idref)
197
+ if spine_path:
198
+ spine_paths.append(spine_path)
199
+ else:
200
+ dangling_idrefs.append(idref)
201
+
202
+ header_paths = tuple(path for path in spine_paths if is_header_part_name(path))
203
+ master_page_paths = tuple(
204
+ item.resolved_path
205
+ for item in items
206
+ if _manifest_matches(item, "masterpage", "master-page")
207
+ )
208
+ history_paths = tuple(
209
+ item.resolved_path
210
+ for item in items
211
+ if _manifest_matches(item, "history")
212
+ )
213
+ version_path = next(
214
+ (item.resolved_path for item in items if _manifest_matches(item, "version")),
215
+ None,
216
+ )
217
+
218
+ return ManifestRelationships(
219
+ manifest_path=normalize_part_name(manifest_path),
220
+ items=tuple(items),
221
+ spine_paths=tuple(spine_paths),
222
+ dangling_idrefs=tuple(dangling_idrefs),
223
+ header_paths=header_paths,
224
+ master_page_paths=master_page_paths,
225
+ history_paths=history_paths,
226
+ version_path=version_path,
227
+ )
hwpx/oxml/document.py CHANGED
@@ -4249,7 +4249,9 @@ class HwpxOxmlDocument:
4249
4249
  master_pages: Sequence[HwpxOxmlMasterPage] | None = None,
4250
4250
  histories: Sequence[HwpxOxmlHistory] | None = None,
4251
4251
  version: HwpxOxmlVersion | None = None,
4252
+ manifest_path: str = "Contents/content.hpf",
4252
4253
  ):
4254
+ self._manifest_path = manifest_path
4253
4255
  self._manifest = manifest
4254
4256
  self._sections = list(sections)
4255
4257
  self._headers = list(headers)
@@ -4277,7 +4279,7 @@ class HwpxOxmlDocument:
4277
4279
  if not isinstance(package, HwpxPackage):
4278
4280
  raise TypeError("package must be an instance of HwpxPackage")
4279
4281
 
4280
- manifest = package.get_xml(package.MANIFEST_PATH)
4282
+ manifest = package.manifest_tree()
4281
4283
  section_paths = package.section_paths()
4282
4284
  header_paths = package.header_paths()
4283
4285
  master_page_paths = package.master_page_paths()
@@ -4342,6 +4344,7 @@ class HwpxOxmlDocument:
4342
4344
  master_pages=master_pages,
4343
4345
  histories=histories,
4344
4346
  version=version,
4347
+ manifest_path=package.main_content.full_path,
4345
4348
  )
4346
4349
 
4347
4350
  @property
@@ -4756,7 +4759,7 @@ class HwpxOxmlDocument:
4756
4759
  """Return a mapping of part names to updated XML payloads."""
4757
4760
  updates: dict[str, bytes] = {}
4758
4761
  if self._manifest_dirty:
4759
- updates["Contents/content.hpf"] = _serialize_xml(self._manifest)
4762
+ updates[self._manifest_path] = _serialize_xml(self._manifest)
4760
4763
  for section in self._sections:
4761
4764
  if section.dirty:
4762
4765
  updates[section.part_name] = section.to_bytes()
hwpx/tools/archive_cli.py CHANGED
@@ -4,13 +4,14 @@ import argparse
4
4
  import json
5
5
  import os
6
6
  import shutil
7
+ import sys
7
8
  import tempfile
8
9
  from dataclasses import asdict, dataclass
9
10
  from pathlib import Path
10
11
  from typing import Sequence
11
12
  from zipfile import ZIP_DEFLATED, ZIP_STORED, ZipFile
12
13
 
13
- from lxml import etree
14
+ from lxml import etree # type: ignore[reportAttributeAccessIssue]
14
15
 
15
16
  from .package_validator import validate_package
16
17
 
@@ -171,7 +172,7 @@ def _summarize_pack_validation(output_path: Path) -> None:
171
172
  report = validate_package(output_path)
172
173
  if report.ok:
173
174
  return
174
- summary = "\n".join(f"- {issue}" for issue in report.issues[:10])
175
+ summary = "\n".join(f"- {issue}" for issue in report.errors[:10])
175
176
  raise ValueError(f"packed archive failed validation:\n{summary}")
176
177
 
177
178
 
@@ -180,7 +181,7 @@ def unpack_hwpx(
180
181
  output_dir: str | Path,
181
182
  *,
182
183
  overwrite: bool = False,
183
- pretty_xml: bool = True,
184
+ pretty_xml: bool = False,
184
185
  ) -> UnpackResult:
185
186
  source_path = Path(source)
186
187
  if not source_path.is_file():
@@ -203,6 +204,29 @@ def unpack_hwpx(
203
204
  return UnpackResult(output_dir=destination, metadata_path=metadata_path, entries=entries)
204
205
 
205
206
 
207
+ def _add_unpack_xml_format_args(parser: argparse.ArgumentParser) -> None:
208
+ group = parser.add_mutually_exclusive_group()
209
+ group.add_argument(
210
+ "--pretty-xml",
211
+ action="store_true",
212
+ help="Reformat XML/HWPF payloads for easier manual inspection",
213
+ )
214
+ group.add_argument(
215
+ "--no-pretty-xml",
216
+ action="store_true",
217
+ help="Deprecated alias for the default raw-byte preserving behavior",
218
+ )
219
+
220
+
221
+ def _resolve_pretty_xml_flag(args: argparse.Namespace) -> bool:
222
+ if getattr(args, "no_pretty_xml", False):
223
+ print(
224
+ "WARN: --no-pretty-xml is deprecated because raw XML preservation is now the default.",
225
+ file=sys.stderr,
226
+ )
227
+ return bool(getattr(args, "pretty_xml", False))
228
+
229
+
206
230
  def pack_hwpx(
207
231
  input_dir: str | Path,
208
232
  output_path: str | Path,
@@ -251,7 +275,9 @@ def pack_hwpx(
251
275
 
252
276
 
253
277
  def unpack_main(argv: Sequence[str] | None = None) -> int:
254
- parser = argparse.ArgumentParser(description="Unpack an HWPX file into a directory")
278
+ parser = argparse.ArgumentParser(
279
+ description="Unpack an HWPX file into a directory (raw XML bytes are preserved by default)"
280
+ )
255
281
  parser.add_argument("input", help="Input .hwpx path")
256
282
  parser.add_argument("output", help="Output directory")
257
283
  parser.add_argument(
@@ -259,11 +285,7 @@ def unpack_main(argv: Sequence[str] | None = None) -> int:
259
285
  action="store_true",
260
286
  help="Allow deleting an existing non-empty output directory",
261
287
  )
262
- parser.add_argument(
263
- "--no-pretty-xml",
264
- action="store_true",
265
- help="Keep XML payloads in their original byte formatting",
266
- )
288
+ _add_unpack_xml_format_args(parser)
267
289
  args = parser.parse_args(argv)
268
290
 
269
291
  try:
@@ -271,7 +293,7 @@ def unpack_main(argv: Sequence[str] | None = None) -> int:
271
293
  args.input,
272
294
  args.output,
273
295
  overwrite=args.force,
274
- pretty_xml=not args.no_pretty_xml,
296
+ pretty_xml=_resolve_pretty_xml_flag(args),
275
297
  )
276
298
  except Exception as exc:
277
299
  print(f"ERROR: {exc}")
@@ -311,7 +333,7 @@ def main(argv: Sequence[str] | None = None) -> int:
311
333
  unpack_parser.add_argument("input")
312
334
  unpack_parser.add_argument("output")
313
335
  unpack_parser.add_argument("--force", action="store_true")
314
- unpack_parser.add_argument("--no-pretty-xml", action="store_true")
336
+ _add_unpack_xml_format_args(unpack_parser)
315
337
 
316
338
  pack_parser = subparsers.add_parser("pack", help="Pack a directory into HWPX")
317
339
  pack_parser.add_argument("input")
@@ -323,6 +345,8 @@ def main(argv: Sequence[str] | None = None) -> int:
323
345
  forward = [args.input, args.output]
324
346
  if args.force:
325
347
  forward.append("--force")
348
+ if args.pretty_xml:
349
+ forward.append("--pretty-xml")
326
350
  if args.no_pretty_xml:
327
351
  forward.append("--no-pretty-xml")
328
352
  return unpack_main(forward)