python-hwpx 2.10.2__py3-none-any.whl → 2.10.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hwpx/__init__.py CHANGED
@@ -25,6 +25,12 @@ from .tools.text_extractor import (
25
25
  TextExtractor,
26
26
  )
27
27
  from .tools.object_finder import FoundElement, ObjectFinder
28
+ from .tools.package_validator import (
29
+ EditorOpenSafetyReport,
30
+ PackageValidationReport,
31
+ validate_editor_open_safety,
32
+ validate_package,
33
+ )
28
34
  from .document import HwpxDocument
29
35
  from .package import HwpxPackage
30
36
  from .authoring import (
@@ -58,7 +64,9 @@ __all__ = [
58
64
  "DocumentBlock",
59
65
  "DocumentPlan",
60
66
  "DocumentStylePreset",
67
+ "EditorOpenSafetyReport",
61
68
  "ParagraphInfo",
69
+ "PackageValidationReport",
62
70
  "PlanValidationReport",
63
71
  "SectionInfo",
64
72
  "TEMPLATE_FORMFIT_BASELINE_SCHEMA_VERSION",
@@ -76,4 +84,6 @@ __all__ = [
76
84
  "inspect_operating_plan_quality",
77
85
  "normalize_document_plan",
78
86
  "validate_document_plan",
87
+ "validate_editor_open_safety",
88
+ "validate_package",
79
89
  ]
hwpx/builder/core.py CHANGED
@@ -7,6 +7,7 @@ from pathlib import Path
7
7
  from typing import Any, Mapping, Sequence
8
8
 
9
9
  from hwpx.document import HwpxDocument
10
+ from hwpx.tools.package_validator import validate_editor_open_safety
10
11
  from hwpx.tools.package_validator import validate_package
11
12
  from hwpx.tools.validator import validate_document
12
13
 
@@ -576,13 +577,24 @@ def _merge_flags(*flag_sets: dict[str, bool]) -> dict[str, bool]:
576
577
  return merged
577
578
 
578
579
 
579
- def _hard_gates(package_report: object, document_report: object, reopen_report: ReopenReport) -> dict[str, str]:
580
+ def _hard_gates(
581
+ package_report: object,
582
+ document_report: object,
583
+ reopen_report: ReopenReport,
584
+ editor_open_safety_report: object | None = None,
585
+ ) -> dict[str, str]:
580
586
  document_warnings = getattr(document_report, "warnings", ())
587
+ editor_open_safety_ok = (
588
+ True
589
+ if editor_open_safety_report is None
590
+ else bool(getattr(editor_open_safety_report, "ok", False))
591
+ )
581
592
  return {
582
593
  "package_validation": "pass" if getattr(package_report, "ok", False) else "fail",
583
594
  "document_errors": "pass" if getattr(document_report, "ok", False) else "fail",
584
595
  "schema_lint": "warning" if document_warnings else "pass",
585
596
  "reopen": "pass" if reopen_report.ok else "fail",
597
+ "editor_open_safety": "pass" if editor_open_safety_ok else "fail",
586
598
  "id_integrity": "unavailable",
587
599
  }
588
600
 
@@ -696,6 +708,7 @@ class Document:
696
708
  document.save_to_path(path)
697
709
  package_report = validate_package(path)
698
710
  document_report = validate_document(path)
711
+ editor_open_safety_report = validate_editor_open_safety(path)
699
712
  try:
700
713
  reopened_document = HwpxDocument.open(path)
701
714
  reopen_report = ReopenReport(ok=True, document=reopened_document)
@@ -713,8 +726,14 @@ class Document:
713
726
  validate_document=document_report,
714
727
  reopened=reopen_report,
715
728
  metadata=self.metadata.as_dict() if self.metadata is not None else {},
716
- hard_gates=_hard_gates(package_report, document_report, reopen_report),
729
+ hard_gates=_hard_gates(
730
+ package_report,
731
+ document_report,
732
+ reopen_report,
733
+ editor_open_safety_report,
734
+ ),
717
735
  visual_review_required=visual_review_required,
718
736
  feature_flags=feature_flags,
737
+ editor_open_safety=editor_open_safety_report,
719
738
  )
720
739
  return report
hwpx/builder/report.py CHANGED
@@ -6,7 +6,7 @@ from os import PathLike
6
6
  from typing import Any
7
7
 
8
8
  from hwpx.tools.id_integrity import IdIntegrityReport, check_id_integrity
9
- from hwpx.tools.package_validator import PackageValidationReport
9
+ from hwpx.tools.package_validator import EditorOpenSafetyReport, PackageValidationReport
10
10
  from hwpx.tools.validator import ValidationReport
11
11
 
12
12
 
@@ -32,6 +32,7 @@ class BuilderSaveReport:
32
32
  visual_review_required: bool = False
33
33
  feature_flags: dict[str, bool] = field(default_factory=dict)
34
34
  id_integrity: IdIntegrityReport | None = None
35
+ editor_open_safety: EditorOpenSafetyReport | None = None
35
36
 
36
37
  def __post_init__(self) -> None:
37
38
  hard_gates = dict(self.hard_gates)
@@ -52,6 +53,11 @@ class BuilderSaveReport:
52
53
  "hard_gates": dict(self.hard_gates),
53
54
  "visual_review_required": self.visual_review_required,
54
55
  "feature_flags": dict(self.feature_flags),
56
+ "editor_open_safety": (
57
+ None
58
+ if self.editor_open_safety is None
59
+ else self.editor_open_safety.to_dict()
60
+ ),
55
61
  "validate_package": {
56
62
  "ok": self.validate_package.ok,
57
63
  "checked_parts": list(self.validate_package.checked_parts),
hwpx/document.py CHANGED
@@ -5,12 +5,15 @@ from __future__ import annotations
5
5
 
6
6
  import xml.etree.ElementTree as ET
7
7
  import io
8
+ import os
9
+ import tempfile
8
10
  import warnings
9
11
  from datetime import datetime
10
12
  import logging
11
13
  import uuid
12
14
 
13
15
  from os import PathLike
16
+ from pathlib import Path
14
17
  from typing import TYPE_CHECKING, Any, BinaryIO, Iterator, Mapping, Sequence, overload
15
18
 
16
19
  from lxml import etree
@@ -39,7 +42,10 @@ from .oxml import (
39
42
  TrackChange,
40
43
  TrackChangeAuthor,
41
44
  )
42
- from .opc.package import HwpxPackage
45
+ from .opc.package import (
46
+ HwpxPackage,
47
+ _UNCHECKED_SAVE_TOKEN,
48
+ )
43
49
  from .oxml.namespaces import HH, HH_NS, HP, HP_NS, register_owpml_namespaces
44
50
  from .templates import blank_document_bytes
45
51
 
@@ -83,6 +89,91 @@ def _png_dimensions(image_data: bytes) -> tuple[int, int] | None:
83
89
  return width, height
84
90
 
85
91
 
92
+ def _write_bytes_atomically(path: str | PathLike[str], data: bytes) -> None:
93
+ target = Path(path)
94
+ fd, tmp_path = tempfile.mkstemp(dir=str(target.parent), suffix=".hwpx.tmp")
95
+ try:
96
+ with os.fdopen(fd, "wb") as tmp_fh:
97
+ tmp_fh.write(data)
98
+ os.replace(tmp_path, str(target))
99
+ except BaseException:
100
+ try:
101
+ os.unlink(tmp_path)
102
+ except OSError:
103
+ pass
104
+ raise
105
+
106
+
107
+ def _capture_stream_checkpoint(stream: BinaryIO) -> tuple[int, bytes] | None:
108
+ try:
109
+ position = stream.tell()
110
+ except (AttributeError, OSError):
111
+ return None
112
+ try:
113
+ tail = stream.read()
114
+ except (AttributeError, OSError):
115
+ try:
116
+ end_position = stream.seek(0, os.SEEK_END)
117
+ except (AttributeError, OSError):
118
+ return None
119
+ try:
120
+ stream.seek(position)
121
+ except (AttributeError, OSError):
122
+ return None
123
+ if end_position == position:
124
+ return position, b""
125
+ return None
126
+ try:
127
+ stream.seek(position)
128
+ except (AttributeError, OSError):
129
+ return None
130
+ return position, tail
131
+
132
+
133
+ def _rollback_stream(stream: BinaryIO, checkpoint: tuple[int, bytes] | None) -> None:
134
+ if checkpoint is None:
135
+ return
136
+ position, tail = checkpoint
137
+ try:
138
+ stream.seek(position)
139
+ if tail:
140
+ stream.write(tail)
141
+ stream.truncate(position + len(tail))
142
+ else:
143
+ stream.truncate(position)
144
+ stream.seek(position)
145
+ except (AttributeError, OSError):
146
+ return
147
+
148
+
149
+ def _write_stream_or_rollback(stream: BinaryIO, data: bytes) -> None:
150
+ checkpoint = _capture_stream_checkpoint(stream)
151
+ if checkpoint is None:
152
+ raise OSError(
153
+ "HWPX stream save requires a checkpointable stream; "
154
+ "use save_to_path() for non-seekable outputs"
155
+ )
156
+ try:
157
+ written = stream.write(data)
158
+ if written is not None and written != len(data):
159
+ raise OSError(
160
+ "short write while saving HWPX stream: "
161
+ f"wrote {written} of {len(data)} bytes"
162
+ )
163
+ except BaseException:
164
+ _rollback_stream(stream, checkpoint)
165
+ raise
166
+
167
+
168
+ def _summarize_validation_issues(issues: Sequence[Any], *, limit: int = 5) -> str:
169
+ selected = [str(issue) for issue in issues[:limit]]
170
+ remaining = len(issues) - len(selected)
171
+ summary = "; ".join(selected)
172
+ if remaining > 0:
173
+ summary += f" ... and {remaining} more"
174
+ return summary
175
+
176
+
86
177
  class HwpxDocument:
87
178
  """Provides a user-friendly API for editing HWPX documents."""
88
179
 
@@ -1493,7 +1584,9 @@ class HwpxDocument:
1493
1584
  """
1494
1585
  from .tools.validator import validate_document
1495
1586
 
1496
- return validate_document(self._to_bytes_raw(reset_dirty=False))
1587
+ return validate_document(
1588
+ self._to_bytes_for_validation()
1589
+ )
1497
1590
 
1498
1591
  def _run_pre_save_validation(self) -> None:
1499
1592
  """Raise if validate_on_save is enabled and the document is invalid."""
@@ -1501,29 +1594,38 @@ class HwpxDocument:
1501
1594
  return
1502
1595
  report = self.validate()
1503
1596
  if not report.ok:
1504
- msgs = "; ".join(str(i) for i in report.issues[:5])
1505
- remaining = len(report.issues) - 5
1506
- if remaining > 0:
1507
- msgs += f" … and {remaining} more"
1597
+ msgs = _summarize_validation_issues(report.issues)
1508
1598
  raise ValueError(f"Document validation failed: {msgs}")
1509
1599
 
1600
+ def _run_open_safety_validation(self, archive_bytes: bytes) -> None:
1601
+ """Raise if generated bytes are unsafe to hand to an HWPX editor."""
1602
+
1603
+ from .tools.package_validator import validate_editor_open_safety
1604
+
1605
+ report = validate_editor_open_safety(archive_bytes)
1606
+ if not report.ok:
1607
+ raise ValueError(
1608
+ "Generated HWPX package failed open-safety validation: "
1609
+ + report.summary
1610
+ )
1611
+
1510
1612
  def save_to_path(self, path: str | PathLike[str]) -> str | PathLike[str]:
1511
1613
  """Persist pending changes to *path* and return the same path."""
1512
1614
 
1513
1615
  self._run_pre_save_validation()
1514
- updates = self._root.serialize()
1515
- result = self._package.save(path, updates)
1516
- self._root.reset_dirty()
1517
- return path if result is None else result
1616
+ archive_bytes = self._to_bytes_raw(reset_dirty=False)
1617
+ _write_bytes_atomically(path, archive_bytes)
1618
+ self._mark_save_clean()
1619
+ return path
1518
1620
 
1519
1621
  def save_to_stream(self, stream: BinaryIO) -> BinaryIO:
1520
1622
  """Persist pending changes to *stream* and return the same stream."""
1521
1623
 
1522
1624
  self._run_pre_save_validation()
1523
- updates = self._root.serialize()
1524
- result = self._package.save(stream, updates)
1525
- self._root.reset_dirty()
1526
- return stream if result is None else result
1625
+ archive_bytes = self._to_bytes_raw(reset_dirty=False)
1626
+ _write_stream_or_rollback(stream, archive_bytes)
1627
+ self._mark_save_clean()
1628
+ return stream
1527
1629
 
1528
1630
  def to_bytes(self) -> bytes:
1529
1631
  """Serialize pending changes and return the HWPX archive as bytes."""
@@ -1531,20 +1633,44 @@ class HwpxDocument:
1531
1633
  self._run_pre_save_validation()
1532
1634
  return self._to_bytes_raw()
1533
1635
 
1534
- def _to_bytes_raw(self, *, reset_dirty: bool = True) -> bytes:
1535
- """Serialize without validation.
1636
+ def _to_bytes_raw(
1637
+ self,
1638
+ *,
1639
+ reset_dirty: bool = True,
1640
+ ) -> bytes:
1641
+ """Serialize and run editor-open safety validation.
1536
1642
 
1537
1643
  When ``reset_dirty`` is ``False``, the document remains marked as
1538
1644
  modified after the archive snapshot is generated.
1539
1645
  """
1540
1646
  updates = self._root.serialize()
1541
- result = self._package.save(None, updates)
1542
- if reset_dirty:
1543
- self._root.reset_dirty()
1647
+ if updates:
1648
+ for part_name, payload in updates.items():
1649
+ self._package.set_part(part_name, payload)
1650
+ result = self._package._save_to_bytes(
1651
+ verify_open_safety=True,
1652
+ mark_clean=False,
1653
+ )
1544
1654
  if isinstance(result, bytes):
1655
+ self._run_open_safety_validation(result)
1656
+ if reset_dirty:
1657
+ self._mark_save_clean()
1545
1658
  return result
1546
1659
  raise TypeError("package.save(None) must return bytes")
1547
1660
 
1661
+ def _to_bytes_for_validation(self) -> bytes:
1662
+ """Serialize current state for document validation without handing bytes to callers."""
1663
+
1664
+ updates = self._root.serialize()
1665
+ return self._package._save_bytes_unchecked(
1666
+ updates,
1667
+ _unchecked_token=_UNCHECKED_SAVE_TOKEN,
1668
+ )
1669
+
1670
+ def _mark_save_clean(self) -> None:
1671
+ self._root.reset_dirty()
1672
+ self._package.version_info.mark_clean()
1673
+
1548
1674
  @overload
1549
1675
  def save(self, path_or_stream: None = None) -> bytes: ...
1550
1676