python-hwpx 2.10.2__py3-none-any.whl → 2.10.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hwpx/__init__.py +10 -0
- hwpx/builder/core.py +21 -2
- hwpx/builder/report.py +7 -1
- hwpx/document.py +145 -19
- hwpx/opc/package.py +331 -9
- hwpx/oxml/document.py +158 -1
- hwpx/template_formfit.py +48 -17
- hwpx/tools/__init__.py +8 -0
- hwpx/tools/archive_cli.py +18 -6
- hwpx/tools/package_validator.py +215 -1
- hwpx/tools/repair.py +91 -8
- {python_hwpx-2.10.2.dist-info → python_hwpx-2.10.3.dist-info}/METADATA +1 -1
- {python_hwpx-2.10.2.dist-info → python_hwpx-2.10.3.dist-info}/RECORD +18 -18
- {python_hwpx-2.10.2.dist-info → python_hwpx-2.10.3.dist-info}/WHEEL +0 -0
- {python_hwpx-2.10.2.dist-info → python_hwpx-2.10.3.dist-info}/entry_points.txt +0 -0
- {python_hwpx-2.10.2.dist-info → python_hwpx-2.10.3.dist-info}/licenses/LICENSE +0 -0
- {python_hwpx-2.10.2.dist-info → python_hwpx-2.10.3.dist-info}/licenses/NOTICE +0 -0
- {python_hwpx-2.10.2.dist-info → python_hwpx-2.10.3.dist-info}/top_level.txt +0 -0
hwpx/__init__.py
CHANGED
|
@@ -25,6 +25,12 @@ from .tools.text_extractor import (
|
|
|
25
25
|
TextExtractor,
|
|
26
26
|
)
|
|
27
27
|
from .tools.object_finder import FoundElement, ObjectFinder
|
|
28
|
+
from .tools.package_validator import (
|
|
29
|
+
EditorOpenSafetyReport,
|
|
30
|
+
PackageValidationReport,
|
|
31
|
+
validate_editor_open_safety,
|
|
32
|
+
validate_package,
|
|
33
|
+
)
|
|
28
34
|
from .document import HwpxDocument
|
|
29
35
|
from .package import HwpxPackage
|
|
30
36
|
from .authoring import (
|
|
@@ -58,7 +64,9 @@ __all__ = [
|
|
|
58
64
|
"DocumentBlock",
|
|
59
65
|
"DocumentPlan",
|
|
60
66
|
"DocumentStylePreset",
|
|
67
|
+
"EditorOpenSafetyReport",
|
|
61
68
|
"ParagraphInfo",
|
|
69
|
+
"PackageValidationReport",
|
|
62
70
|
"PlanValidationReport",
|
|
63
71
|
"SectionInfo",
|
|
64
72
|
"TEMPLATE_FORMFIT_BASELINE_SCHEMA_VERSION",
|
|
@@ -76,4 +84,6 @@ __all__ = [
|
|
|
76
84
|
"inspect_operating_plan_quality",
|
|
77
85
|
"normalize_document_plan",
|
|
78
86
|
"validate_document_plan",
|
|
87
|
+
"validate_editor_open_safety",
|
|
88
|
+
"validate_package",
|
|
79
89
|
]
|
hwpx/builder/core.py
CHANGED
|
@@ -7,6 +7,7 @@ from pathlib import Path
|
|
|
7
7
|
from typing import Any, Mapping, Sequence
|
|
8
8
|
|
|
9
9
|
from hwpx.document import HwpxDocument
|
|
10
|
+
from hwpx.tools.package_validator import validate_editor_open_safety
|
|
10
11
|
from hwpx.tools.package_validator import validate_package
|
|
11
12
|
from hwpx.tools.validator import validate_document
|
|
12
13
|
|
|
@@ -576,13 +577,24 @@ def _merge_flags(*flag_sets: dict[str, bool]) -> dict[str, bool]:
|
|
|
576
577
|
return merged
|
|
577
578
|
|
|
578
579
|
|
|
579
|
-
def _hard_gates(
|
|
580
|
+
def _hard_gates(
|
|
581
|
+
package_report: object,
|
|
582
|
+
document_report: object,
|
|
583
|
+
reopen_report: ReopenReport,
|
|
584
|
+
editor_open_safety_report: object | None = None,
|
|
585
|
+
) -> dict[str, str]:
|
|
580
586
|
document_warnings = getattr(document_report, "warnings", ())
|
|
587
|
+
editor_open_safety_ok = (
|
|
588
|
+
True
|
|
589
|
+
if editor_open_safety_report is None
|
|
590
|
+
else bool(getattr(editor_open_safety_report, "ok", False))
|
|
591
|
+
)
|
|
581
592
|
return {
|
|
582
593
|
"package_validation": "pass" if getattr(package_report, "ok", False) else "fail",
|
|
583
594
|
"document_errors": "pass" if getattr(document_report, "ok", False) else "fail",
|
|
584
595
|
"schema_lint": "warning" if document_warnings else "pass",
|
|
585
596
|
"reopen": "pass" if reopen_report.ok else "fail",
|
|
597
|
+
"editor_open_safety": "pass" if editor_open_safety_ok else "fail",
|
|
586
598
|
"id_integrity": "unavailable",
|
|
587
599
|
}
|
|
588
600
|
|
|
@@ -696,6 +708,7 @@ class Document:
|
|
|
696
708
|
document.save_to_path(path)
|
|
697
709
|
package_report = validate_package(path)
|
|
698
710
|
document_report = validate_document(path)
|
|
711
|
+
editor_open_safety_report = validate_editor_open_safety(path)
|
|
699
712
|
try:
|
|
700
713
|
reopened_document = HwpxDocument.open(path)
|
|
701
714
|
reopen_report = ReopenReport(ok=True, document=reopened_document)
|
|
@@ -713,8 +726,14 @@ class Document:
|
|
|
713
726
|
validate_document=document_report,
|
|
714
727
|
reopened=reopen_report,
|
|
715
728
|
metadata=self.metadata.as_dict() if self.metadata is not None else {},
|
|
716
|
-
hard_gates=_hard_gates(
|
|
729
|
+
hard_gates=_hard_gates(
|
|
730
|
+
package_report,
|
|
731
|
+
document_report,
|
|
732
|
+
reopen_report,
|
|
733
|
+
editor_open_safety_report,
|
|
734
|
+
),
|
|
717
735
|
visual_review_required=visual_review_required,
|
|
718
736
|
feature_flags=feature_flags,
|
|
737
|
+
editor_open_safety=editor_open_safety_report,
|
|
719
738
|
)
|
|
720
739
|
return report
|
hwpx/builder/report.py
CHANGED
|
@@ -6,7 +6,7 @@ from os import PathLike
|
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
8
|
from hwpx.tools.id_integrity import IdIntegrityReport, check_id_integrity
|
|
9
|
-
from hwpx.tools.package_validator import PackageValidationReport
|
|
9
|
+
from hwpx.tools.package_validator import EditorOpenSafetyReport, PackageValidationReport
|
|
10
10
|
from hwpx.tools.validator import ValidationReport
|
|
11
11
|
|
|
12
12
|
|
|
@@ -32,6 +32,7 @@ class BuilderSaveReport:
|
|
|
32
32
|
visual_review_required: bool = False
|
|
33
33
|
feature_flags: dict[str, bool] = field(default_factory=dict)
|
|
34
34
|
id_integrity: IdIntegrityReport | None = None
|
|
35
|
+
editor_open_safety: EditorOpenSafetyReport | None = None
|
|
35
36
|
|
|
36
37
|
def __post_init__(self) -> None:
|
|
37
38
|
hard_gates = dict(self.hard_gates)
|
|
@@ -52,6 +53,11 @@ class BuilderSaveReport:
|
|
|
52
53
|
"hard_gates": dict(self.hard_gates),
|
|
53
54
|
"visual_review_required": self.visual_review_required,
|
|
54
55
|
"feature_flags": dict(self.feature_flags),
|
|
56
|
+
"editor_open_safety": (
|
|
57
|
+
None
|
|
58
|
+
if self.editor_open_safety is None
|
|
59
|
+
else self.editor_open_safety.to_dict()
|
|
60
|
+
),
|
|
55
61
|
"validate_package": {
|
|
56
62
|
"ok": self.validate_package.ok,
|
|
57
63
|
"checked_parts": list(self.validate_package.checked_parts),
|
hwpx/document.py
CHANGED
|
@@ -5,12 +5,15 @@ from __future__ import annotations
|
|
|
5
5
|
|
|
6
6
|
import xml.etree.ElementTree as ET
|
|
7
7
|
import io
|
|
8
|
+
import os
|
|
9
|
+
import tempfile
|
|
8
10
|
import warnings
|
|
9
11
|
from datetime import datetime
|
|
10
12
|
import logging
|
|
11
13
|
import uuid
|
|
12
14
|
|
|
13
15
|
from os import PathLike
|
|
16
|
+
from pathlib import Path
|
|
14
17
|
from typing import TYPE_CHECKING, Any, BinaryIO, Iterator, Mapping, Sequence, overload
|
|
15
18
|
|
|
16
19
|
from lxml import etree
|
|
@@ -39,7 +42,10 @@ from .oxml import (
|
|
|
39
42
|
TrackChange,
|
|
40
43
|
TrackChangeAuthor,
|
|
41
44
|
)
|
|
42
|
-
from .opc.package import
|
|
45
|
+
from .opc.package import (
|
|
46
|
+
HwpxPackage,
|
|
47
|
+
_UNCHECKED_SAVE_TOKEN,
|
|
48
|
+
)
|
|
43
49
|
from .oxml.namespaces import HH, HH_NS, HP, HP_NS, register_owpml_namespaces
|
|
44
50
|
from .templates import blank_document_bytes
|
|
45
51
|
|
|
@@ -83,6 +89,91 @@ def _png_dimensions(image_data: bytes) -> tuple[int, int] | None:
|
|
|
83
89
|
return width, height
|
|
84
90
|
|
|
85
91
|
|
|
92
|
+
def _write_bytes_atomically(path: str | PathLike[str], data: bytes) -> None:
|
|
93
|
+
target = Path(path)
|
|
94
|
+
fd, tmp_path = tempfile.mkstemp(dir=str(target.parent), suffix=".hwpx.tmp")
|
|
95
|
+
try:
|
|
96
|
+
with os.fdopen(fd, "wb") as tmp_fh:
|
|
97
|
+
tmp_fh.write(data)
|
|
98
|
+
os.replace(tmp_path, str(target))
|
|
99
|
+
except BaseException:
|
|
100
|
+
try:
|
|
101
|
+
os.unlink(tmp_path)
|
|
102
|
+
except OSError:
|
|
103
|
+
pass
|
|
104
|
+
raise
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _capture_stream_checkpoint(stream: BinaryIO) -> tuple[int, bytes] | None:
|
|
108
|
+
try:
|
|
109
|
+
position = stream.tell()
|
|
110
|
+
except (AttributeError, OSError):
|
|
111
|
+
return None
|
|
112
|
+
try:
|
|
113
|
+
tail = stream.read()
|
|
114
|
+
except (AttributeError, OSError):
|
|
115
|
+
try:
|
|
116
|
+
end_position = stream.seek(0, os.SEEK_END)
|
|
117
|
+
except (AttributeError, OSError):
|
|
118
|
+
return None
|
|
119
|
+
try:
|
|
120
|
+
stream.seek(position)
|
|
121
|
+
except (AttributeError, OSError):
|
|
122
|
+
return None
|
|
123
|
+
if end_position == position:
|
|
124
|
+
return position, b""
|
|
125
|
+
return None
|
|
126
|
+
try:
|
|
127
|
+
stream.seek(position)
|
|
128
|
+
except (AttributeError, OSError):
|
|
129
|
+
return None
|
|
130
|
+
return position, tail
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _rollback_stream(stream: BinaryIO, checkpoint: tuple[int, bytes] | None) -> None:
|
|
134
|
+
if checkpoint is None:
|
|
135
|
+
return
|
|
136
|
+
position, tail = checkpoint
|
|
137
|
+
try:
|
|
138
|
+
stream.seek(position)
|
|
139
|
+
if tail:
|
|
140
|
+
stream.write(tail)
|
|
141
|
+
stream.truncate(position + len(tail))
|
|
142
|
+
else:
|
|
143
|
+
stream.truncate(position)
|
|
144
|
+
stream.seek(position)
|
|
145
|
+
except (AttributeError, OSError):
|
|
146
|
+
return
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _write_stream_or_rollback(stream: BinaryIO, data: bytes) -> None:
|
|
150
|
+
checkpoint = _capture_stream_checkpoint(stream)
|
|
151
|
+
if checkpoint is None:
|
|
152
|
+
raise OSError(
|
|
153
|
+
"HWPX stream save requires a checkpointable stream; "
|
|
154
|
+
"use save_to_path() for non-seekable outputs"
|
|
155
|
+
)
|
|
156
|
+
try:
|
|
157
|
+
written = stream.write(data)
|
|
158
|
+
if written is not None and written != len(data):
|
|
159
|
+
raise OSError(
|
|
160
|
+
"short write while saving HWPX stream: "
|
|
161
|
+
f"wrote {written} of {len(data)} bytes"
|
|
162
|
+
)
|
|
163
|
+
except BaseException:
|
|
164
|
+
_rollback_stream(stream, checkpoint)
|
|
165
|
+
raise
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _summarize_validation_issues(issues: Sequence[Any], *, limit: int = 5) -> str:
|
|
169
|
+
selected = [str(issue) for issue in issues[:limit]]
|
|
170
|
+
remaining = len(issues) - len(selected)
|
|
171
|
+
summary = "; ".join(selected)
|
|
172
|
+
if remaining > 0:
|
|
173
|
+
summary += f" ... and {remaining} more"
|
|
174
|
+
return summary
|
|
175
|
+
|
|
176
|
+
|
|
86
177
|
class HwpxDocument:
|
|
87
178
|
"""Provides a user-friendly API for editing HWPX documents."""
|
|
88
179
|
|
|
@@ -1493,7 +1584,9 @@ class HwpxDocument:
|
|
|
1493
1584
|
"""
|
|
1494
1585
|
from .tools.validator import validate_document
|
|
1495
1586
|
|
|
1496
|
-
return validate_document(
|
|
1587
|
+
return validate_document(
|
|
1588
|
+
self._to_bytes_for_validation()
|
|
1589
|
+
)
|
|
1497
1590
|
|
|
1498
1591
|
def _run_pre_save_validation(self) -> None:
|
|
1499
1592
|
"""Raise if validate_on_save is enabled and the document is invalid."""
|
|
@@ -1501,29 +1594,38 @@ class HwpxDocument:
|
|
|
1501
1594
|
return
|
|
1502
1595
|
report = self.validate()
|
|
1503
1596
|
if not report.ok:
|
|
1504
|
-
msgs =
|
|
1505
|
-
remaining = len(report.issues) - 5
|
|
1506
|
-
if remaining > 0:
|
|
1507
|
-
msgs += f" … and {remaining} more"
|
|
1597
|
+
msgs = _summarize_validation_issues(report.issues)
|
|
1508
1598
|
raise ValueError(f"Document validation failed: {msgs}")
|
|
1509
1599
|
|
|
1600
|
+
def _run_open_safety_validation(self, archive_bytes: bytes) -> None:
|
|
1601
|
+
"""Raise if generated bytes are unsafe to hand to an HWPX editor."""
|
|
1602
|
+
|
|
1603
|
+
from .tools.package_validator import validate_editor_open_safety
|
|
1604
|
+
|
|
1605
|
+
report = validate_editor_open_safety(archive_bytes)
|
|
1606
|
+
if not report.ok:
|
|
1607
|
+
raise ValueError(
|
|
1608
|
+
"Generated HWPX package failed open-safety validation: "
|
|
1609
|
+
+ report.summary
|
|
1610
|
+
)
|
|
1611
|
+
|
|
1510
1612
|
def save_to_path(self, path: str | PathLike[str]) -> str | PathLike[str]:
|
|
1511
1613
|
"""Persist pending changes to *path* and return the same path."""
|
|
1512
1614
|
|
|
1513
1615
|
self._run_pre_save_validation()
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
self.
|
|
1517
|
-
return path
|
|
1616
|
+
archive_bytes = self._to_bytes_raw(reset_dirty=False)
|
|
1617
|
+
_write_bytes_atomically(path, archive_bytes)
|
|
1618
|
+
self._mark_save_clean()
|
|
1619
|
+
return path
|
|
1518
1620
|
|
|
1519
1621
|
def save_to_stream(self, stream: BinaryIO) -> BinaryIO:
|
|
1520
1622
|
"""Persist pending changes to *stream* and return the same stream."""
|
|
1521
1623
|
|
|
1522
1624
|
self._run_pre_save_validation()
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
self.
|
|
1526
|
-
return stream
|
|
1625
|
+
archive_bytes = self._to_bytes_raw(reset_dirty=False)
|
|
1626
|
+
_write_stream_or_rollback(stream, archive_bytes)
|
|
1627
|
+
self._mark_save_clean()
|
|
1628
|
+
return stream
|
|
1527
1629
|
|
|
1528
1630
|
def to_bytes(self) -> bytes:
|
|
1529
1631
|
"""Serialize pending changes and return the HWPX archive as bytes."""
|
|
@@ -1531,20 +1633,44 @@ class HwpxDocument:
|
|
|
1531
1633
|
self._run_pre_save_validation()
|
|
1532
1634
|
return self._to_bytes_raw()
|
|
1533
1635
|
|
|
1534
|
-
def _to_bytes_raw(
|
|
1535
|
-
|
|
1636
|
+
def _to_bytes_raw(
|
|
1637
|
+
self,
|
|
1638
|
+
*,
|
|
1639
|
+
reset_dirty: bool = True,
|
|
1640
|
+
) -> bytes:
|
|
1641
|
+
"""Serialize and run editor-open safety validation.
|
|
1536
1642
|
|
|
1537
1643
|
When ``reset_dirty`` is ``False``, the document remains marked as
|
|
1538
1644
|
modified after the archive snapshot is generated.
|
|
1539
1645
|
"""
|
|
1540
1646
|
updates = self._root.serialize()
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1647
|
+
if updates:
|
|
1648
|
+
for part_name, payload in updates.items():
|
|
1649
|
+
self._package.set_part(part_name, payload)
|
|
1650
|
+
result = self._package._save_to_bytes(
|
|
1651
|
+
verify_open_safety=True,
|
|
1652
|
+
mark_clean=False,
|
|
1653
|
+
)
|
|
1544
1654
|
if isinstance(result, bytes):
|
|
1655
|
+
self._run_open_safety_validation(result)
|
|
1656
|
+
if reset_dirty:
|
|
1657
|
+
self._mark_save_clean()
|
|
1545
1658
|
return result
|
|
1546
1659
|
raise TypeError("package.save(None) must return bytes")
|
|
1547
1660
|
|
|
1661
|
+
def _to_bytes_for_validation(self) -> bytes:
|
|
1662
|
+
"""Serialize current state for document validation without handing bytes to callers."""
|
|
1663
|
+
|
|
1664
|
+
updates = self._root.serialize()
|
|
1665
|
+
return self._package._save_bytes_unchecked(
|
|
1666
|
+
updates,
|
|
1667
|
+
_unchecked_token=_UNCHECKED_SAVE_TOKEN,
|
|
1668
|
+
)
|
|
1669
|
+
|
|
1670
|
+
def _mark_save_clean(self) -> None:
|
|
1671
|
+
self._root.reset_dirty()
|
|
1672
|
+
self._package.version_info.mark_clean()
|
|
1673
|
+
|
|
1548
1674
|
@overload
|
|
1549
1675
|
def save(self, path_or_stream: None = None) -> bytes: ...
|
|
1550
1676
|
|