python-hwpx 2.10.1__py3-none-any.whl → 2.10.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hwpx/__init__.py +10 -0
- hwpx/builder/core.py +21 -2
- hwpx/builder/report.py +7 -1
- hwpx/document.py +153 -19
- hwpx/opc/package.py +331 -9
- hwpx/oxml/document.py +305 -3
- hwpx/template_formfit.py +48 -17
- hwpx/tools/__init__.py +8 -0
- hwpx/tools/archive_cli.py +18 -6
- hwpx/tools/markdown_export.py +488 -0
- hwpx/tools/package_validator.py +215 -1
- hwpx/tools/repair.py +91 -8
- hwpx/tools/table_navigation.py +77 -1
- {python_hwpx-2.10.1.dist-info → python_hwpx-2.10.3.dist-info}/METADATA +49 -1
- {python_hwpx-2.10.1.dist-info → python_hwpx-2.10.3.dist-info}/RECORD +20 -19
- {python_hwpx-2.10.1.dist-info → python_hwpx-2.10.3.dist-info}/WHEEL +0 -0
- {python_hwpx-2.10.1.dist-info → python_hwpx-2.10.3.dist-info}/entry_points.txt +0 -0
- {python_hwpx-2.10.1.dist-info → python_hwpx-2.10.3.dist-info}/licenses/LICENSE +0 -0
- {python_hwpx-2.10.1.dist-info → python_hwpx-2.10.3.dist-info}/licenses/NOTICE +0 -0
- {python_hwpx-2.10.1.dist-info → python_hwpx-2.10.3.dist-info}/top_level.txt +0 -0
hwpx/__init__.py
CHANGED
|
@@ -25,6 +25,12 @@ from .tools.text_extractor import (
|
|
|
25
25
|
TextExtractor,
|
|
26
26
|
)
|
|
27
27
|
from .tools.object_finder import FoundElement, ObjectFinder
|
|
28
|
+
from .tools.package_validator import (
|
|
29
|
+
EditorOpenSafetyReport,
|
|
30
|
+
PackageValidationReport,
|
|
31
|
+
validate_editor_open_safety,
|
|
32
|
+
validate_package,
|
|
33
|
+
)
|
|
28
34
|
from .document import HwpxDocument
|
|
29
35
|
from .package import HwpxPackage
|
|
30
36
|
from .authoring import (
|
|
@@ -58,7 +64,9 @@ __all__ = [
|
|
|
58
64
|
"DocumentBlock",
|
|
59
65
|
"DocumentPlan",
|
|
60
66
|
"DocumentStylePreset",
|
|
67
|
+
"EditorOpenSafetyReport",
|
|
61
68
|
"ParagraphInfo",
|
|
69
|
+
"PackageValidationReport",
|
|
62
70
|
"PlanValidationReport",
|
|
63
71
|
"SectionInfo",
|
|
64
72
|
"TEMPLATE_FORMFIT_BASELINE_SCHEMA_VERSION",
|
|
@@ -76,4 +84,6 @@ __all__ = [
|
|
|
76
84
|
"inspect_operating_plan_quality",
|
|
77
85
|
"normalize_document_plan",
|
|
78
86
|
"validate_document_plan",
|
|
87
|
+
"validate_editor_open_safety",
|
|
88
|
+
"validate_package",
|
|
79
89
|
]
|
hwpx/builder/core.py
CHANGED
|
@@ -7,6 +7,7 @@ from pathlib import Path
|
|
|
7
7
|
from typing import Any, Mapping, Sequence
|
|
8
8
|
|
|
9
9
|
from hwpx.document import HwpxDocument
|
|
10
|
+
from hwpx.tools.package_validator import validate_editor_open_safety
|
|
10
11
|
from hwpx.tools.package_validator import validate_package
|
|
11
12
|
from hwpx.tools.validator import validate_document
|
|
12
13
|
|
|
@@ -576,13 +577,24 @@ def _merge_flags(*flag_sets: dict[str, bool]) -> dict[str, bool]:
|
|
|
576
577
|
return merged
|
|
577
578
|
|
|
578
579
|
|
|
579
|
-
def _hard_gates(
|
|
580
|
+
def _hard_gates(
|
|
581
|
+
package_report: object,
|
|
582
|
+
document_report: object,
|
|
583
|
+
reopen_report: ReopenReport,
|
|
584
|
+
editor_open_safety_report: object | None = None,
|
|
585
|
+
) -> dict[str, str]:
|
|
580
586
|
document_warnings = getattr(document_report, "warnings", ())
|
|
587
|
+
editor_open_safety_ok = (
|
|
588
|
+
True
|
|
589
|
+
if editor_open_safety_report is None
|
|
590
|
+
else bool(getattr(editor_open_safety_report, "ok", False))
|
|
591
|
+
)
|
|
581
592
|
return {
|
|
582
593
|
"package_validation": "pass" if getattr(package_report, "ok", False) else "fail",
|
|
583
594
|
"document_errors": "pass" if getattr(document_report, "ok", False) else "fail",
|
|
584
595
|
"schema_lint": "warning" if document_warnings else "pass",
|
|
585
596
|
"reopen": "pass" if reopen_report.ok else "fail",
|
|
597
|
+
"editor_open_safety": "pass" if editor_open_safety_ok else "fail",
|
|
586
598
|
"id_integrity": "unavailable",
|
|
587
599
|
}
|
|
588
600
|
|
|
@@ -696,6 +708,7 @@ class Document:
|
|
|
696
708
|
document.save_to_path(path)
|
|
697
709
|
package_report = validate_package(path)
|
|
698
710
|
document_report = validate_document(path)
|
|
711
|
+
editor_open_safety_report = validate_editor_open_safety(path)
|
|
699
712
|
try:
|
|
700
713
|
reopened_document = HwpxDocument.open(path)
|
|
701
714
|
reopen_report = ReopenReport(ok=True, document=reopened_document)
|
|
@@ -713,8 +726,14 @@ class Document:
|
|
|
713
726
|
validate_document=document_report,
|
|
714
727
|
reopened=reopen_report,
|
|
715
728
|
metadata=self.metadata.as_dict() if self.metadata is not None else {},
|
|
716
|
-
hard_gates=_hard_gates(
|
|
729
|
+
hard_gates=_hard_gates(
|
|
730
|
+
package_report,
|
|
731
|
+
document_report,
|
|
732
|
+
reopen_report,
|
|
733
|
+
editor_open_safety_report,
|
|
734
|
+
),
|
|
717
735
|
visual_review_required=visual_review_required,
|
|
718
736
|
feature_flags=feature_flags,
|
|
737
|
+
editor_open_safety=editor_open_safety_report,
|
|
719
738
|
)
|
|
720
739
|
return report
|
hwpx/builder/report.py
CHANGED
|
@@ -6,7 +6,7 @@ from os import PathLike
|
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
8
|
from hwpx.tools.id_integrity import IdIntegrityReport, check_id_integrity
|
|
9
|
-
from hwpx.tools.package_validator import PackageValidationReport
|
|
9
|
+
from hwpx.tools.package_validator import EditorOpenSafetyReport, PackageValidationReport
|
|
10
10
|
from hwpx.tools.validator import ValidationReport
|
|
11
11
|
|
|
12
12
|
|
|
@@ -32,6 +32,7 @@ class BuilderSaveReport:
|
|
|
32
32
|
visual_review_required: bool = False
|
|
33
33
|
feature_flags: dict[str, bool] = field(default_factory=dict)
|
|
34
34
|
id_integrity: IdIntegrityReport | None = None
|
|
35
|
+
editor_open_safety: EditorOpenSafetyReport | None = None
|
|
35
36
|
|
|
36
37
|
def __post_init__(self) -> None:
|
|
37
38
|
hard_gates = dict(self.hard_gates)
|
|
@@ -52,6 +53,11 @@ class BuilderSaveReport:
|
|
|
52
53
|
"hard_gates": dict(self.hard_gates),
|
|
53
54
|
"visual_review_required": self.visual_review_required,
|
|
54
55
|
"feature_flags": dict(self.feature_flags),
|
|
56
|
+
"editor_open_safety": (
|
|
57
|
+
None
|
|
58
|
+
if self.editor_open_safety is None
|
|
59
|
+
else self.editor_open_safety.to_dict()
|
|
60
|
+
),
|
|
55
61
|
"validate_package": {
|
|
56
62
|
"ok": self.validate_package.ok,
|
|
57
63
|
"checked_parts": list(self.validate_package.checked_parts),
|
hwpx/document.py
CHANGED
|
@@ -5,12 +5,15 @@ from __future__ import annotations
|
|
|
5
5
|
|
|
6
6
|
import xml.etree.ElementTree as ET
|
|
7
7
|
import io
|
|
8
|
+
import os
|
|
9
|
+
import tempfile
|
|
8
10
|
import warnings
|
|
9
11
|
from datetime import datetime
|
|
10
12
|
import logging
|
|
11
13
|
import uuid
|
|
12
14
|
|
|
13
15
|
from os import PathLike
|
|
16
|
+
from pathlib import Path
|
|
14
17
|
from typing import TYPE_CHECKING, Any, BinaryIO, Iterator, Mapping, Sequence, overload
|
|
15
18
|
|
|
16
19
|
from lxml import etree
|
|
@@ -39,7 +42,10 @@ from .oxml import (
|
|
|
39
42
|
TrackChange,
|
|
40
43
|
TrackChangeAuthor,
|
|
41
44
|
)
|
|
42
|
-
from .opc.package import
|
|
45
|
+
from .opc.package import (
|
|
46
|
+
HwpxPackage,
|
|
47
|
+
_UNCHECKED_SAVE_TOKEN,
|
|
48
|
+
)
|
|
43
49
|
from .oxml.namespaces import HH, HH_NS, HP, HP_NS, register_owpml_namespaces
|
|
44
50
|
from .templates import blank_document_bytes
|
|
45
51
|
|
|
@@ -83,6 +89,91 @@ def _png_dimensions(image_data: bytes) -> tuple[int, int] | None:
|
|
|
83
89
|
return width, height
|
|
84
90
|
|
|
85
91
|
|
|
92
|
+
def _write_bytes_atomically(path: str | PathLike[str], data: bytes) -> None:
|
|
93
|
+
target = Path(path)
|
|
94
|
+
fd, tmp_path = tempfile.mkstemp(dir=str(target.parent), suffix=".hwpx.tmp")
|
|
95
|
+
try:
|
|
96
|
+
with os.fdopen(fd, "wb") as tmp_fh:
|
|
97
|
+
tmp_fh.write(data)
|
|
98
|
+
os.replace(tmp_path, str(target))
|
|
99
|
+
except BaseException:
|
|
100
|
+
try:
|
|
101
|
+
os.unlink(tmp_path)
|
|
102
|
+
except OSError:
|
|
103
|
+
pass
|
|
104
|
+
raise
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _capture_stream_checkpoint(stream: BinaryIO) -> tuple[int, bytes] | None:
|
|
108
|
+
try:
|
|
109
|
+
position = stream.tell()
|
|
110
|
+
except (AttributeError, OSError):
|
|
111
|
+
return None
|
|
112
|
+
try:
|
|
113
|
+
tail = stream.read()
|
|
114
|
+
except (AttributeError, OSError):
|
|
115
|
+
try:
|
|
116
|
+
end_position = stream.seek(0, os.SEEK_END)
|
|
117
|
+
except (AttributeError, OSError):
|
|
118
|
+
return None
|
|
119
|
+
try:
|
|
120
|
+
stream.seek(position)
|
|
121
|
+
except (AttributeError, OSError):
|
|
122
|
+
return None
|
|
123
|
+
if end_position == position:
|
|
124
|
+
return position, b""
|
|
125
|
+
return None
|
|
126
|
+
try:
|
|
127
|
+
stream.seek(position)
|
|
128
|
+
except (AttributeError, OSError):
|
|
129
|
+
return None
|
|
130
|
+
return position, tail
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _rollback_stream(stream: BinaryIO, checkpoint: tuple[int, bytes] | None) -> None:
|
|
134
|
+
if checkpoint is None:
|
|
135
|
+
return
|
|
136
|
+
position, tail = checkpoint
|
|
137
|
+
try:
|
|
138
|
+
stream.seek(position)
|
|
139
|
+
if tail:
|
|
140
|
+
stream.write(tail)
|
|
141
|
+
stream.truncate(position + len(tail))
|
|
142
|
+
else:
|
|
143
|
+
stream.truncate(position)
|
|
144
|
+
stream.seek(position)
|
|
145
|
+
except (AttributeError, OSError):
|
|
146
|
+
return
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _write_stream_or_rollback(stream: BinaryIO, data: bytes) -> None:
|
|
150
|
+
checkpoint = _capture_stream_checkpoint(stream)
|
|
151
|
+
if checkpoint is None:
|
|
152
|
+
raise OSError(
|
|
153
|
+
"HWPX stream save requires a checkpointable stream; "
|
|
154
|
+
"use save_to_path() for non-seekable outputs"
|
|
155
|
+
)
|
|
156
|
+
try:
|
|
157
|
+
written = stream.write(data)
|
|
158
|
+
if written is not None and written != len(data):
|
|
159
|
+
raise OSError(
|
|
160
|
+
"short write while saving HWPX stream: "
|
|
161
|
+
f"wrote {written} of {len(data)} bytes"
|
|
162
|
+
)
|
|
163
|
+
except BaseException:
|
|
164
|
+
_rollback_stream(stream, checkpoint)
|
|
165
|
+
raise
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _summarize_validation_issues(issues: Sequence[Any], *, limit: int = 5) -> str:
|
|
169
|
+
selected = [str(issue) for issue in issues[:limit]]
|
|
170
|
+
remaining = len(issues) - len(selected)
|
|
171
|
+
summary = "; ".join(selected)
|
|
172
|
+
if remaining > 0:
|
|
173
|
+
summary += f" ... and {remaining} more"
|
|
174
|
+
return summary
|
|
175
|
+
|
|
176
|
+
|
|
86
177
|
class HwpxDocument:
|
|
87
178
|
"""Provides a user-friendly API for editing HWPX documents."""
|
|
88
179
|
|
|
@@ -1472,6 +1563,14 @@ class HwpxDocument:
|
|
|
1472
1563
|
from .tools.exporter import export_markdown
|
|
1473
1564
|
return export_markdown(self, **kwargs) # type: ignore[arg-type]
|
|
1474
1565
|
|
|
1566
|
+
def export_rich_markdown(self, **kwargs: object) -> str:
|
|
1567
|
+
"""Export rich Markdown preserving inline styles, tables, footnotes, hyperlinks, images, and shape text.
|
|
1568
|
+
|
|
1569
|
+
Keyword args forwarded to :func:`~hwpx.tools.markdown_export.export_markdown`.
|
|
1570
|
+
"""
|
|
1571
|
+
from .tools.markdown_export import export_markdown as _rich
|
|
1572
|
+
return _rich(self, **kwargs) # type: ignore[arg-type]
|
|
1573
|
+
|
|
1475
1574
|
# ------------------------------------------------------------------
|
|
1476
1575
|
# Validation
|
|
1477
1576
|
# ------------------------------------------------------------------
|
|
@@ -1485,7 +1584,9 @@ class HwpxDocument:
|
|
|
1485
1584
|
"""
|
|
1486
1585
|
from .tools.validator import validate_document
|
|
1487
1586
|
|
|
1488
|
-
return validate_document(
|
|
1587
|
+
return validate_document(
|
|
1588
|
+
self._to_bytes_for_validation()
|
|
1589
|
+
)
|
|
1489
1590
|
|
|
1490
1591
|
def _run_pre_save_validation(self) -> None:
|
|
1491
1592
|
"""Raise if validate_on_save is enabled and the document is invalid."""
|
|
@@ -1493,29 +1594,38 @@ class HwpxDocument:
|
|
|
1493
1594
|
return
|
|
1494
1595
|
report = self.validate()
|
|
1495
1596
|
if not report.ok:
|
|
1496
|
-
msgs =
|
|
1497
|
-
remaining = len(report.issues) - 5
|
|
1498
|
-
if remaining > 0:
|
|
1499
|
-
msgs += f" … and {remaining} more"
|
|
1597
|
+
msgs = _summarize_validation_issues(report.issues)
|
|
1500
1598
|
raise ValueError(f"Document validation failed: {msgs}")
|
|
1501
1599
|
|
|
1600
|
+
def _run_open_safety_validation(self, archive_bytes: bytes) -> None:
|
|
1601
|
+
"""Raise if generated bytes are unsafe to hand to an HWPX editor."""
|
|
1602
|
+
|
|
1603
|
+
from .tools.package_validator import validate_editor_open_safety
|
|
1604
|
+
|
|
1605
|
+
report = validate_editor_open_safety(archive_bytes)
|
|
1606
|
+
if not report.ok:
|
|
1607
|
+
raise ValueError(
|
|
1608
|
+
"Generated HWPX package failed open-safety validation: "
|
|
1609
|
+
+ report.summary
|
|
1610
|
+
)
|
|
1611
|
+
|
|
1502
1612
|
def save_to_path(self, path: str | PathLike[str]) -> str | PathLike[str]:
|
|
1503
1613
|
"""Persist pending changes to *path* and return the same path."""
|
|
1504
1614
|
|
|
1505
1615
|
self._run_pre_save_validation()
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
self.
|
|
1509
|
-
return path
|
|
1616
|
+
archive_bytes = self._to_bytes_raw(reset_dirty=False)
|
|
1617
|
+
_write_bytes_atomically(path, archive_bytes)
|
|
1618
|
+
self._mark_save_clean()
|
|
1619
|
+
return path
|
|
1510
1620
|
|
|
1511
1621
|
def save_to_stream(self, stream: BinaryIO) -> BinaryIO:
|
|
1512
1622
|
"""Persist pending changes to *stream* and return the same stream."""
|
|
1513
1623
|
|
|
1514
1624
|
self._run_pre_save_validation()
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
self.
|
|
1518
|
-
return stream
|
|
1625
|
+
archive_bytes = self._to_bytes_raw(reset_dirty=False)
|
|
1626
|
+
_write_stream_or_rollback(stream, archive_bytes)
|
|
1627
|
+
self._mark_save_clean()
|
|
1628
|
+
return stream
|
|
1519
1629
|
|
|
1520
1630
|
def to_bytes(self) -> bytes:
|
|
1521
1631
|
"""Serialize pending changes and return the HWPX archive as bytes."""
|
|
@@ -1523,20 +1633,44 @@ class HwpxDocument:
|
|
|
1523
1633
|
self._run_pre_save_validation()
|
|
1524
1634
|
return self._to_bytes_raw()
|
|
1525
1635
|
|
|
1526
|
-
def _to_bytes_raw(
|
|
1527
|
-
|
|
1636
|
+
def _to_bytes_raw(
|
|
1637
|
+
self,
|
|
1638
|
+
*,
|
|
1639
|
+
reset_dirty: bool = True,
|
|
1640
|
+
) -> bytes:
|
|
1641
|
+
"""Serialize and run editor-open safety validation.
|
|
1528
1642
|
|
|
1529
1643
|
When ``reset_dirty`` is ``False``, the document remains marked as
|
|
1530
1644
|
modified after the archive snapshot is generated.
|
|
1531
1645
|
"""
|
|
1532
1646
|
updates = self._root.serialize()
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1647
|
+
if updates:
|
|
1648
|
+
for part_name, payload in updates.items():
|
|
1649
|
+
self._package.set_part(part_name, payload)
|
|
1650
|
+
result = self._package._save_to_bytes(
|
|
1651
|
+
verify_open_safety=True,
|
|
1652
|
+
mark_clean=False,
|
|
1653
|
+
)
|
|
1536
1654
|
if isinstance(result, bytes):
|
|
1655
|
+
self._run_open_safety_validation(result)
|
|
1656
|
+
if reset_dirty:
|
|
1657
|
+
self._mark_save_clean()
|
|
1537
1658
|
return result
|
|
1538
1659
|
raise TypeError("package.save(None) must return bytes")
|
|
1539
1660
|
|
|
1661
|
+
def _to_bytes_for_validation(self) -> bytes:
|
|
1662
|
+
"""Serialize current state for document validation without handing bytes to callers."""
|
|
1663
|
+
|
|
1664
|
+
updates = self._root.serialize()
|
|
1665
|
+
return self._package._save_bytes_unchecked(
|
|
1666
|
+
updates,
|
|
1667
|
+
_unchecked_token=_UNCHECKED_SAVE_TOKEN,
|
|
1668
|
+
)
|
|
1669
|
+
|
|
1670
|
+
def _mark_save_clean(self) -> None:
|
|
1671
|
+
self._root.reset_dirty()
|
|
1672
|
+
self._package.version_info.mark_clean()
|
|
1673
|
+
|
|
1540
1674
|
@overload
|
|
1541
1675
|
def save(self, path_or_stream: None = None) -> bytes: ...
|
|
1542
1676
|
|