python-hwpx 2.5__py3-none-any.whl → 2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hwpx/document.py +9 -4
- hwpx/tools/__init__.py +16 -0
- hwpx/tools/archive_cli.py +337 -0
- hwpx/tools/package_validator.py +219 -0
- hwpx/tools/page_guard.py +305 -0
- hwpx/tools/template_analyzer.py +218 -0
- hwpx/tools/text_extract_cli.py +66 -0
- {python_hwpx-2.5.dist-info → python_hwpx-2.7.dist-info}/METADATA +27 -3
- {python_hwpx-2.5.dist-info → python_hwpx-2.7.dist-info}/RECORD +13 -8
- python_hwpx-2.7.dist-info/entry_points.txt +8 -0
- python_hwpx-2.5.dist-info/entry_points.txt +0 -2
- {python_hwpx-2.5.dist-info → python_hwpx-2.7.dist-info}/WHEEL +0 -0
- {python_hwpx-2.5.dist-info → python_hwpx-2.7.dist-info}/licenses/LICENSE +0 -0
- {python_hwpx-2.5.dist-info → python_hwpx-2.7.dist-info}/top_level.txt +0 -0
hwpx/document.py
CHANGED
|
@@ -1280,7 +1280,7 @@ class HwpxDocument:
|
|
|
1280
1280
|
"""
|
|
1281
1281
|
from .tools.validator import validate_document
|
|
1282
1282
|
|
|
1283
|
-
return validate_document(self._to_bytes_raw())
|
|
1283
|
+
return validate_document(self._to_bytes_raw(reset_dirty=False))
|
|
1284
1284
|
|
|
1285
1285
|
def _run_pre_save_validation(self) -> None:
|
|
1286
1286
|
"""Raise if validate_on_save is enabled and the document is invalid."""
|
|
@@ -1318,11 +1318,16 @@ class HwpxDocument:
|
|
|
1318
1318
|
self._run_pre_save_validation()
|
|
1319
1319
|
return self._to_bytes_raw()
|
|
1320
1320
|
|
|
1321
|
-
def _to_bytes_raw(self) -> bytes:
|
|
1322
|
-
"""Serialize without validation
|
|
1321
|
+
def _to_bytes_raw(self, *, reset_dirty: bool = True) -> bytes:
|
|
1322
|
+
"""Serialize without validation.
|
|
1323
|
+
|
|
1324
|
+
When ``reset_dirty`` is ``False``, the document remains marked as
|
|
1325
|
+
modified after the archive snapshot is generated.
|
|
1326
|
+
"""
|
|
1323
1327
|
updates = self._root.serialize()
|
|
1324
1328
|
result = self._package.save(None, updates)
|
|
1325
|
-
|
|
1329
|
+
if reset_dirty:
|
|
1330
|
+
self._root.reset_dirty()
|
|
1326
1331
|
if isinstance(result, bytes):
|
|
1327
1332
|
return result
|
|
1328
1333
|
raise TypeError("package.save(None) must return bytes")
|
hwpx/tools/__init__.py
CHANGED
|
@@ -6,6 +6,16 @@ from .exporter import (
|
|
|
6
6
|
export_text,
|
|
7
7
|
)
|
|
8
8
|
from .object_finder import FoundElement, ObjectFinder
|
|
9
|
+
from .package_validator import (
|
|
10
|
+
PackageValidationIssue,
|
|
11
|
+
PackageValidationReport,
|
|
12
|
+
validate_package,
|
|
13
|
+
)
|
|
14
|
+
from .page_guard import (
|
|
15
|
+
DocumentMetrics,
|
|
16
|
+
collect_metrics,
|
|
17
|
+
compare_metrics,
|
|
18
|
+
)
|
|
9
19
|
from .text_extractor import (
|
|
10
20
|
DEFAULT_NAMESPACES,
|
|
11
21
|
ParagraphInfo,
|
|
@@ -33,6 +43,12 @@ __all__ = [
|
|
|
33
43
|
"strip_namespace",
|
|
34
44
|
"FoundElement",
|
|
35
45
|
"ObjectFinder",
|
|
46
|
+
"PackageValidationIssue",
|
|
47
|
+
"PackageValidationReport",
|
|
48
|
+
"validate_package",
|
|
49
|
+
"DocumentMetrics",
|
|
50
|
+
"collect_metrics",
|
|
51
|
+
"compare_metrics",
|
|
36
52
|
"DocumentSchemas",
|
|
37
53
|
"ValidationIssue",
|
|
38
54
|
"ValidationReport",
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import shutil
|
|
7
|
+
import tempfile
|
|
8
|
+
from dataclasses import asdict, dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Sequence
|
|
11
|
+
from zipfile import ZIP_DEFLATED, ZIP_STORED, ZipFile
|
|
12
|
+
|
|
13
|
+
from lxml import etree
|
|
14
|
+
|
|
15
|
+
from .package_validator import validate_package
|
|
16
|
+
|
|
17
|
+
_XML_SUFFIXES = (".xml", ".hpf")
|
|
18
|
+
_PACK_METADATA_NAME = ".hwpx-pack-metadata.json"
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"ArchiveEntryInfo",
|
|
22
|
+
"UnpackResult",
|
|
23
|
+
"PackResult",
|
|
24
|
+
"pack_hwpx",
|
|
25
|
+
"unpack_hwpx",
|
|
26
|
+
"pack_main",
|
|
27
|
+
"unpack_main",
|
|
28
|
+
"main",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True)
|
|
33
|
+
class ArchiveEntryInfo:
|
|
34
|
+
path: str
|
|
35
|
+
compress_type: int
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True)
|
|
39
|
+
class UnpackResult:
|
|
40
|
+
output_dir: Path
|
|
41
|
+
metadata_path: Path
|
|
42
|
+
entries: tuple[ArchiveEntryInfo, ...]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(frozen=True)
|
|
46
|
+
class PackResult:
|
|
47
|
+
output_path: Path
|
|
48
|
+
entries: tuple[str, ...]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _guard_destructive_target(path: Path) -> None:
|
|
52
|
+
resolved = path.resolve()
|
|
53
|
+
if resolved == Path(resolved.anchor):
|
|
54
|
+
raise ValueError(f"refusing to overwrite filesystem root: {resolved}")
|
|
55
|
+
if resolved == Path.cwd().resolve():
|
|
56
|
+
raise ValueError(f"refusing to overwrite current working directory: {resolved}")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _prepare_output_dir(output_dir: Path, *, overwrite: bool) -> None:
|
|
60
|
+
if output_dir.exists() and not output_dir.is_dir():
|
|
61
|
+
raise NotADirectoryError(f"output exists and is not a directory: {output_dir}")
|
|
62
|
+
if output_dir.exists():
|
|
63
|
+
if any(output_dir.iterdir()):
|
|
64
|
+
if not overwrite:
|
|
65
|
+
raise FileExistsError(f"output directory is not empty: {output_dir}")
|
|
66
|
+
_guard_destructive_target(output_dir)
|
|
67
|
+
shutil.rmtree(output_dir)
|
|
68
|
+
else:
|
|
69
|
+
output_dir.rmdir()
|
|
70
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _prepare_output_path(output_path: Path, *, overwrite: bool) -> None:
|
|
74
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
75
|
+
if output_path.exists() and not overwrite:
|
|
76
|
+
raise FileExistsError(f"output file already exists: {output_path}")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _format_xml_bytes(payload: bytes) -> bytes:
|
|
80
|
+
try:
|
|
81
|
+
element = etree.fromstring(payload)
|
|
82
|
+
except etree.XMLSyntaxError:
|
|
83
|
+
return payload
|
|
84
|
+
etree.indent(element, space=" ")
|
|
85
|
+
return etree.tostring(
|
|
86
|
+
element,
|
|
87
|
+
pretty_print=True,
|
|
88
|
+
xml_declaration=True,
|
|
89
|
+
encoding="UTF-8",
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _iter_file_entries(archive: ZipFile) -> tuple[ArchiveEntryInfo, ...]:
|
|
94
|
+
entries: list[ArchiveEntryInfo] = []
|
|
95
|
+
for info in archive.infolist():
|
|
96
|
+
if info.is_dir():
|
|
97
|
+
continue
|
|
98
|
+
entries.append(ArchiveEntryInfo(path=info.filename, compress_type=info.compress_type))
|
|
99
|
+
return tuple(entries)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _metadata_path(root: Path) -> Path:
|
|
103
|
+
return root / _PACK_METADATA_NAME
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _write_pack_metadata(root: Path, entries: tuple[ArchiveEntryInfo, ...]) -> Path:
|
|
107
|
+
destination = _metadata_path(root)
|
|
108
|
+
payload = {
|
|
109
|
+
"format_version": 1,
|
|
110
|
+
"entries": [asdict(entry) for entry in entries],
|
|
111
|
+
}
|
|
112
|
+
destination.write_text(json.dumps(payload, indent=2), encoding="utf-8")
|
|
113
|
+
return destination
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _read_pack_metadata(root: Path) -> tuple[ArchiveEntryInfo, ...]:
|
|
117
|
+
metadata_file = _metadata_path(root)
|
|
118
|
+
if not metadata_file.is_file():
|
|
119
|
+
return ()
|
|
120
|
+
|
|
121
|
+
data = json.loads(metadata_file.read_text(encoding="utf-8"))
|
|
122
|
+
entries: list[ArchiveEntryInfo] = []
|
|
123
|
+
for entry in data.get("entries", []):
|
|
124
|
+
path = str(entry.get("path", "")).strip()
|
|
125
|
+
if not path:
|
|
126
|
+
continue
|
|
127
|
+
entries.append(
|
|
128
|
+
ArchiveEntryInfo(
|
|
129
|
+
path=path.replace("\\", "/"),
|
|
130
|
+
compress_type=int(entry.get("compress_type", ZIP_DEFLATED)),
|
|
131
|
+
)
|
|
132
|
+
)
|
|
133
|
+
return tuple(entries)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _discover_files(root: Path) -> set[str]:
|
|
137
|
+
paths: set[str] = set()
|
|
138
|
+
for path in root.rglob("*"):
|
|
139
|
+
if not path.is_file():
|
|
140
|
+
continue
|
|
141
|
+
rel_path = path.relative_to(root).as_posix()
|
|
142
|
+
if rel_path == _PACK_METADATA_NAME:
|
|
143
|
+
continue
|
|
144
|
+
paths.add(rel_path)
|
|
145
|
+
return paths
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _resolve_write_order(paths: set[str], metadata: tuple[ArchiveEntryInfo, ...]) -> tuple[str, ...]:
|
|
149
|
+
ordered: list[str] = []
|
|
150
|
+
seen: set[str] = set()
|
|
151
|
+
|
|
152
|
+
if "mimetype" in paths:
|
|
153
|
+
ordered.append("mimetype")
|
|
154
|
+
seen.add("mimetype")
|
|
155
|
+
|
|
156
|
+
for entry in metadata:
|
|
157
|
+
if entry.path in paths and entry.path not in seen:
|
|
158
|
+
ordered.append(entry.path)
|
|
159
|
+
seen.add(entry.path)
|
|
160
|
+
|
|
161
|
+
for path in sorted(paths):
|
|
162
|
+
if path in seen:
|
|
163
|
+
continue
|
|
164
|
+
ordered.append(path)
|
|
165
|
+
seen.add(path)
|
|
166
|
+
|
|
167
|
+
return tuple(ordered)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _summarize_pack_validation(output_path: Path) -> None:
|
|
171
|
+
report = validate_package(output_path)
|
|
172
|
+
if report.ok:
|
|
173
|
+
return
|
|
174
|
+
summary = "\n".join(f"- {issue}" for issue in report.issues[:10])
|
|
175
|
+
raise ValueError(f"packed archive failed validation:\n{summary}")
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def unpack_hwpx(
|
|
179
|
+
source: str | Path,
|
|
180
|
+
output_dir: str | Path,
|
|
181
|
+
*,
|
|
182
|
+
overwrite: bool = False,
|
|
183
|
+
pretty_xml: bool = True,
|
|
184
|
+
) -> UnpackResult:
|
|
185
|
+
source_path = Path(source)
|
|
186
|
+
if not source_path.is_file():
|
|
187
|
+
raise FileNotFoundError(f"input file not found: {source_path}")
|
|
188
|
+
|
|
189
|
+
destination = Path(output_dir)
|
|
190
|
+
_prepare_output_dir(destination, overwrite=overwrite)
|
|
191
|
+
|
|
192
|
+
with ZipFile(source_path, "r") as archive:
|
|
193
|
+
entries = _iter_file_entries(archive)
|
|
194
|
+
for entry in entries:
|
|
195
|
+
data = archive.read(entry.path)
|
|
196
|
+
if pretty_xml and entry.path.endswith(_XML_SUFFIXES):
|
|
197
|
+
data = _format_xml_bytes(data)
|
|
198
|
+
target = destination / entry.path
|
|
199
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
200
|
+
target.write_bytes(data)
|
|
201
|
+
|
|
202
|
+
metadata_path = _write_pack_metadata(destination, entries)
|
|
203
|
+
return UnpackResult(output_dir=destination, metadata_path=metadata_path, entries=entries)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def pack_hwpx(
|
|
207
|
+
input_dir: str | Path,
|
|
208
|
+
output_path: str | Path,
|
|
209
|
+
*,
|
|
210
|
+
overwrite: bool = False,
|
|
211
|
+
) -> PackResult:
|
|
212
|
+
root = Path(input_dir)
|
|
213
|
+
if not root.is_dir():
|
|
214
|
+
raise FileNotFoundError(f"input directory not found: {root}")
|
|
215
|
+
|
|
216
|
+
destination = Path(output_path)
|
|
217
|
+
_prepare_output_path(destination, overwrite=overwrite)
|
|
218
|
+
|
|
219
|
+
files = _discover_files(root)
|
|
220
|
+
if "mimetype" not in files:
|
|
221
|
+
raise FileNotFoundError(f"missing required 'mimetype' file in {root}")
|
|
222
|
+
|
|
223
|
+
metadata = _read_pack_metadata(root)
|
|
224
|
+
compress_types = {entry.path: entry.compress_type for entry in metadata}
|
|
225
|
+
ordered_paths = _resolve_write_order(files, metadata)
|
|
226
|
+
|
|
227
|
+
fd, tmp_name = tempfile.mkstemp(dir=str(destination.parent), suffix=".hwpx.tmp")
|
|
228
|
+
os.close(fd)
|
|
229
|
+
tmp_path = Path(tmp_name)
|
|
230
|
+
try:
|
|
231
|
+
with ZipFile(tmp_path, "w", ZIP_DEFLATED) as archive:
|
|
232
|
+
archive.write(root / "mimetype", "mimetype", compress_type=ZIP_STORED)
|
|
233
|
+
for rel_path in ordered_paths:
|
|
234
|
+
if rel_path == "mimetype":
|
|
235
|
+
continue
|
|
236
|
+
compress_type = compress_types.get(rel_path, ZIP_DEFLATED)
|
|
237
|
+
if compress_type != ZIP_STORED:
|
|
238
|
+
compress_type = ZIP_DEFLATED
|
|
239
|
+
archive.write(root / rel_path, rel_path, compress_type=compress_type)
|
|
240
|
+
|
|
241
|
+
_summarize_pack_validation(tmp_path)
|
|
242
|
+
os.replace(tmp_path, destination)
|
|
243
|
+
except BaseException:
|
|
244
|
+
try:
|
|
245
|
+
tmp_path.unlink(missing_ok=True)
|
|
246
|
+
except OSError:
|
|
247
|
+
pass
|
|
248
|
+
raise
|
|
249
|
+
|
|
250
|
+
return PackResult(output_path=destination, entries=ordered_paths)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def unpack_main(argv: Sequence[str] | None = None) -> int:
|
|
254
|
+
parser = argparse.ArgumentParser(description="Unpack an HWPX file into a directory")
|
|
255
|
+
parser.add_argument("input", help="Input .hwpx path")
|
|
256
|
+
parser.add_argument("output", help="Output directory")
|
|
257
|
+
parser.add_argument(
|
|
258
|
+
"--force",
|
|
259
|
+
action="store_true",
|
|
260
|
+
help="Allow deleting an existing non-empty output directory",
|
|
261
|
+
)
|
|
262
|
+
parser.add_argument(
|
|
263
|
+
"--no-pretty-xml",
|
|
264
|
+
action="store_true",
|
|
265
|
+
help="Keep XML payloads in their original byte formatting",
|
|
266
|
+
)
|
|
267
|
+
args = parser.parse_args(argv)
|
|
268
|
+
|
|
269
|
+
try:
|
|
270
|
+
result = unpack_hwpx(
|
|
271
|
+
args.input,
|
|
272
|
+
args.output,
|
|
273
|
+
overwrite=args.force,
|
|
274
|
+
pretty_xml=not args.no_pretty_xml,
|
|
275
|
+
)
|
|
276
|
+
except Exception as exc:
|
|
277
|
+
print(f"ERROR: {exc}")
|
|
278
|
+
return 1
|
|
279
|
+
|
|
280
|
+
print(f"Unpacked {args.input} -> {result.output_dir}")
|
|
281
|
+
print(f"Recorded archive metadata at {result.metadata_path}")
|
|
282
|
+
return 0
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def pack_main(argv: Sequence[str] | None = None) -> int:
|
|
286
|
+
parser = argparse.ArgumentParser(description="Pack a directory into an HWPX archive")
|
|
287
|
+
parser.add_argument("input", help="Input directory")
|
|
288
|
+
parser.add_argument("output", help="Output .hwpx path")
|
|
289
|
+
parser.add_argument(
|
|
290
|
+
"--force",
|
|
291
|
+
action="store_true",
|
|
292
|
+
help="Allow replacing an existing output file",
|
|
293
|
+
)
|
|
294
|
+
args = parser.parse_args(argv)
|
|
295
|
+
|
|
296
|
+
try:
|
|
297
|
+
result = pack_hwpx(args.input, args.output, overwrite=args.force)
|
|
298
|
+
except Exception as exc:
|
|
299
|
+
print(f"ERROR: {exc}")
|
|
300
|
+
return 1
|
|
301
|
+
|
|
302
|
+
print(f"Packed {args.input} -> {result.output_path}")
|
|
303
|
+
return 0
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
307
|
+
parser = argparse.ArgumentParser(description="HWPX archive utility helpers")
|
|
308
|
+
subparsers = parser.add_subparsers(dest="command", required=True)
|
|
309
|
+
|
|
310
|
+
unpack_parser = subparsers.add_parser("unpack", help="Unpack an HWPX file")
|
|
311
|
+
unpack_parser.add_argument("input")
|
|
312
|
+
unpack_parser.add_argument("output")
|
|
313
|
+
unpack_parser.add_argument("--force", action="store_true")
|
|
314
|
+
unpack_parser.add_argument("--no-pretty-xml", action="store_true")
|
|
315
|
+
|
|
316
|
+
pack_parser = subparsers.add_parser("pack", help="Pack a directory into HWPX")
|
|
317
|
+
pack_parser.add_argument("input")
|
|
318
|
+
pack_parser.add_argument("output")
|
|
319
|
+
pack_parser.add_argument("--force", action="store_true")
|
|
320
|
+
|
|
321
|
+
args = parser.parse_args(argv)
|
|
322
|
+
if args.command == "unpack":
|
|
323
|
+
forward = [args.input, args.output]
|
|
324
|
+
if args.force:
|
|
325
|
+
forward.append("--force")
|
|
326
|
+
if args.no_pretty_xml:
|
|
327
|
+
forward.append("--no-pretty-xml")
|
|
328
|
+
return unpack_main(forward)
|
|
329
|
+
|
|
330
|
+
forward = [args.input, args.output]
|
|
331
|
+
if args.force:
|
|
332
|
+
forward.append("--force")
|
|
333
|
+
return pack_main(forward)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
if __name__ == "__main__": # pragma: no cover - CLI convenience
|
|
337
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import io
|
|
5
|
+
import xml.etree.ElementTree as ET
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import BinaryIO, Sequence
|
|
9
|
+
from zipfile import ZIP_STORED, BadZipFile, ZipFile
|
|
10
|
+
|
|
11
|
+
EXPECTED_MIMETYPE = "application/hwp+zip"
|
|
12
|
+
CONTAINER_PATH = "META-INF/container.xml"
|
|
13
|
+
MANIFEST_PATH = "Contents/content.hpf"
|
|
14
|
+
HEADER_PATH = "Contents/header.xml"
|
|
15
|
+
VERSION_PATH = "version.xml"
|
|
16
|
+
REQUIRED_CORE_FILES = ("mimetype", CONTAINER_PATH, MANIFEST_PATH, HEADER_PATH, VERSION_PATH)
|
|
17
|
+
OPF_NS = {"opf": "http://www.idpf.org/2007/opf/"}
|
|
18
|
+
CONTAINER_NS = {
|
|
19
|
+
"ct": "urn:oasis:names:tc:opendocument:xmlns:container",
|
|
20
|
+
"ocf": "urn:oasis:names:tc:opendocument:xmlns:container",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"PackageValidationIssue",
|
|
25
|
+
"PackageValidationReport",
|
|
26
|
+
"validate_package",
|
|
27
|
+
"main",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(frozen=True)
|
|
32
|
+
class PackageValidationIssue:
|
|
33
|
+
part_name: str
|
|
34
|
+
message: str
|
|
35
|
+
|
|
36
|
+
def __str__(self) -> str: # pragma: no cover - human readable helper
|
|
37
|
+
return f"{self.part_name}: {self.message}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(frozen=True)
|
|
41
|
+
class PackageValidationReport:
|
|
42
|
+
checked_parts: tuple[str, ...]
|
|
43
|
+
issues: tuple[PackageValidationIssue, ...]
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def ok(self) -> bool:
|
|
47
|
+
return not self.issues
|
|
48
|
+
|
|
49
|
+
def __bool__(self) -> bool: # pragma: no cover - convenience alias
|
|
50
|
+
return self.ok
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _open_zip(source: str | Path | bytes | BinaryIO) -> ZipFile:
|
|
54
|
+
if isinstance(source, (str, Path)):
|
|
55
|
+
return ZipFile(source, "r")
|
|
56
|
+
if isinstance(source, bytes):
|
|
57
|
+
return ZipFile(io.BytesIO(source), "r")
|
|
58
|
+
return ZipFile(source, "r")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _parse_xml(payload: bytes) -> ET.Element:
|
|
62
|
+
try:
|
|
63
|
+
return ET.fromstring(payload)
|
|
64
|
+
except ET.ParseError as exc:
|
|
65
|
+
raise ValueError(f"malformed XML: {exc}") from exc
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _container_rootfiles(container_root: ET.Element) -> list[str]:
|
|
69
|
+
paths: list[str] = []
|
|
70
|
+
for namespace in CONTAINER_NS.values():
|
|
71
|
+
for elem in container_root.findall(f".//{{{namespace}}}rootfile"):
|
|
72
|
+
path = (
|
|
73
|
+
elem.get("full-path")
|
|
74
|
+
or elem.get("fullPath")
|
|
75
|
+
or elem.get("full_path")
|
|
76
|
+
)
|
|
77
|
+
if path:
|
|
78
|
+
paths.append(path)
|
|
79
|
+
return paths
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _manifest_hrefs(manifest_root: ET.Element) -> set[str]:
|
|
83
|
+
hrefs: set[str] = set()
|
|
84
|
+
for item in manifest_root.findall(".//opf:item", OPF_NS):
|
|
85
|
+
href = item.get("href")
|
|
86
|
+
if href:
|
|
87
|
+
hrefs.add(href)
|
|
88
|
+
return hrefs
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _spine_hrefs(manifest_root: ET.Element) -> list[str]:
|
|
92
|
+
hrefs: list[str] = []
|
|
93
|
+
id_to_href: dict[str, str] = {}
|
|
94
|
+
for item in manifest_root.findall(".//opf:item", OPF_NS):
|
|
95
|
+
item_id = item.get("id")
|
|
96
|
+
href = item.get("href")
|
|
97
|
+
if item_id and href:
|
|
98
|
+
id_to_href[item_id] = href
|
|
99
|
+
|
|
100
|
+
for itemref in manifest_root.findall(".//opf:itemref", OPF_NS):
|
|
101
|
+
idref = itemref.get("idref")
|
|
102
|
+
if idref and idref in id_to_href:
|
|
103
|
+
hrefs.append(id_to_href[idref])
|
|
104
|
+
return hrefs
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def validate_package(source: str | Path | bytes | BinaryIO) -> PackageValidationReport:
|
|
108
|
+
checked_parts: list[str] = []
|
|
109
|
+
issues: list[PackageValidationIssue] = []
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
archive = _open_zip(source)
|
|
113
|
+
except BadZipFile:
|
|
114
|
+
return PackageValidationReport(
|
|
115
|
+
checked_parts=(),
|
|
116
|
+
issues=(PackageValidationIssue("archive", "not a valid ZIP archive"),),
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
with archive as zf:
|
|
120
|
+
names = zf.namelist()
|
|
121
|
+
checked_parts.extend(names)
|
|
122
|
+
|
|
123
|
+
for required in REQUIRED_CORE_FILES:
|
|
124
|
+
if required not in names:
|
|
125
|
+
issues.append(PackageValidationIssue(required, "missing required file"))
|
|
126
|
+
|
|
127
|
+
if not names:
|
|
128
|
+
issues.append(PackageValidationIssue("archive", "empty archive"))
|
|
129
|
+
return PackageValidationReport(tuple(checked_parts), tuple(issues))
|
|
130
|
+
|
|
131
|
+
if "mimetype" in names:
|
|
132
|
+
try:
|
|
133
|
+
mimetype = zf.read("mimetype").decode("utf-8").strip()
|
|
134
|
+
except UnicodeDecodeError:
|
|
135
|
+
mimetype = "<binary>"
|
|
136
|
+
if mimetype != EXPECTED_MIMETYPE:
|
|
137
|
+
issues.append(
|
|
138
|
+
PackageValidationIssue(
|
|
139
|
+
"mimetype",
|
|
140
|
+
f"expected {EXPECTED_MIMETYPE!r}, got {mimetype!r}",
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
if names[0] != "mimetype":
|
|
144
|
+
issues.append(PackageValidationIssue("mimetype", "must be the first ZIP entry"))
|
|
145
|
+
if zf.getinfo("mimetype").compress_type != ZIP_STORED:
|
|
146
|
+
issues.append(PackageValidationIssue("mimetype", "must use ZIP_STORED"))
|
|
147
|
+
|
|
148
|
+
xml_roots: dict[str, ET.Element] = {}
|
|
149
|
+
for name in names:
|
|
150
|
+
if not (name.endswith(".xml") or name.endswith(".hpf")):
|
|
151
|
+
continue
|
|
152
|
+
try:
|
|
153
|
+
xml_roots[name] = _parse_xml(zf.read(name))
|
|
154
|
+
except ValueError as exc:
|
|
155
|
+
issues.append(PackageValidationIssue(name, str(exc)))
|
|
156
|
+
|
|
157
|
+
container_root = xml_roots.get(CONTAINER_PATH)
|
|
158
|
+
if container_root is not None:
|
|
159
|
+
rootfiles = _container_rootfiles(container_root)
|
|
160
|
+
if not rootfiles:
|
|
161
|
+
issues.append(PackageValidationIssue(CONTAINER_PATH, "declares no rootfile entries"))
|
|
162
|
+
for rootfile in rootfiles:
|
|
163
|
+
if rootfile not in names:
|
|
164
|
+
issues.append(
|
|
165
|
+
PackageValidationIssue(
|
|
166
|
+
CONTAINER_PATH,
|
|
167
|
+
f"rootfile points to missing part {rootfile!r}",
|
|
168
|
+
)
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
manifest_root = xml_roots.get(MANIFEST_PATH)
|
|
172
|
+
if manifest_root is not None:
|
|
173
|
+
hrefs = _manifest_hrefs(manifest_root)
|
|
174
|
+
for href in sorted(hrefs):
|
|
175
|
+
if href not in names:
|
|
176
|
+
issues.append(
|
|
177
|
+
PackageValidationIssue(
|
|
178
|
+
MANIFEST_PATH,
|
|
179
|
+
f"manifest href missing from archive: {href}",
|
|
180
|
+
)
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
spine_hrefs = _spine_hrefs(manifest_root)
|
|
184
|
+
if not spine_hrefs:
|
|
185
|
+
issues.append(PackageValidationIssue(MANIFEST_PATH, "spine declares no section parts"))
|
|
186
|
+
for href in spine_hrefs:
|
|
187
|
+
if href not in names:
|
|
188
|
+
issues.append(
|
|
189
|
+
PackageValidationIssue(
|
|
190
|
+
MANIFEST_PATH,
|
|
191
|
+
f"spine item missing from archive: {href}",
|
|
192
|
+
)
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
if HEADER_PATH in names and HEADER_PATH not in hrefs:
|
|
196
|
+
issues.append(
|
|
197
|
+
PackageValidationIssue(MANIFEST_PATH, "header.xml is not referenced in manifest")
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
return PackageValidationReport(tuple(checked_parts), tuple(issues))
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
204
|
+
parser = argparse.ArgumentParser(description="Validate HWPX package structure")
|
|
205
|
+
parser.add_argument("source", help="Path to the HWPX file")
|
|
206
|
+
args = parser.parse_args(argv)
|
|
207
|
+
|
|
208
|
+
report = validate_package(args.source)
|
|
209
|
+
if report.issues:
|
|
210
|
+
for issue in report.issues:
|
|
211
|
+
print(f"ERROR: {issue}")
|
|
212
|
+
return 1
|
|
213
|
+
|
|
214
|
+
print("All package validations passed.")
|
|
215
|
+
return 0
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
if __name__ == "__main__": # pragma: no cover - CLI convenience
|
|
219
|
+
raise SystemExit(main())
|
hwpx/tools/page_guard.py
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
"""Proxy checks for layout drift between a reference and an output HWPX.
|
|
2
|
+
|
|
3
|
+
This module does not calculate rendered page counts. It compares structural and
|
|
4
|
+
textual metrics that often correlate with page-layout drift.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import io
|
|
11
|
+
import json
|
|
12
|
+
from dataclasses import asdict, dataclass
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import BinaryIO, Iterable, Sequence
|
|
15
|
+
from zipfile import ZipFile
|
|
16
|
+
|
|
17
|
+
from lxml import etree
|
|
18
|
+
|
|
19
|
+
NS = {
|
|
20
|
+
"hp": "http://www.hancom.co.kr/hwpml/2011/paragraph",
|
|
21
|
+
"hs": "http://www.hancom.co.kr/hwpml/2011/section",
|
|
22
|
+
"opf": "http://www.idpf.org/2007/opf/",
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
_SHAPE_TAGS = {
|
|
26
|
+
"line",
|
|
27
|
+
"rect",
|
|
28
|
+
"ellipse",
|
|
29
|
+
"arc",
|
|
30
|
+
"polygon",
|
|
31
|
+
"curve",
|
|
32
|
+
"connectLine",
|
|
33
|
+
"textart",
|
|
34
|
+
"pic",
|
|
35
|
+
"compose",
|
|
36
|
+
"equation",
|
|
37
|
+
"ole",
|
|
38
|
+
"container",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
__all__ = [
|
|
42
|
+
"DocumentMetrics",
|
|
43
|
+
"collect_metrics",
|
|
44
|
+
"compare_metrics",
|
|
45
|
+
"main",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass(frozen=True)
|
|
50
|
+
class DocumentMetrics:
|
|
51
|
+
section_count: int
|
|
52
|
+
paragraph_count: int
|
|
53
|
+
page_break_count: int
|
|
54
|
+
column_break_count: int
|
|
55
|
+
table_count: int
|
|
56
|
+
shape_count: int
|
|
57
|
+
control_count: int
|
|
58
|
+
table_shapes: list[tuple[str, str, str, str, str, str]]
|
|
59
|
+
shape_types: list[tuple[str, int]]
|
|
60
|
+
control_types: list[tuple[str, int]]
|
|
61
|
+
text_char_total: int
|
|
62
|
+
text_char_total_nospace: int
|
|
63
|
+
paragraph_text_lengths: list[int]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _section_files(zf: ZipFile) -> list[str]:
|
|
67
|
+
try:
|
|
68
|
+
root = etree.fromstring(zf.read("Contents/content.hpf"))
|
|
69
|
+
except KeyError:
|
|
70
|
+
return [
|
|
71
|
+
name
|
|
72
|
+
for name in zf.namelist()
|
|
73
|
+
if name.startswith("Contents/section") and name.endswith(".xml")
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
id_to_href: dict[str, str] = {}
|
|
77
|
+
for item in root.findall(".//opf:item", namespaces=NS):
|
|
78
|
+
item_id = item.get("id")
|
|
79
|
+
href = item.get("href")
|
|
80
|
+
if item_id and href:
|
|
81
|
+
id_to_href[item_id] = href
|
|
82
|
+
|
|
83
|
+
files: list[str] = []
|
|
84
|
+
for itemref in root.findall(".//opf:itemref", namespaces=NS):
|
|
85
|
+
idref = itemref.get("idref")
|
|
86
|
+
if idref and idref in id_to_href:
|
|
87
|
+
files.append(id_to_href[idref])
|
|
88
|
+
return files
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _text_of_t_node(node: etree._Element) -> str:
|
|
92
|
+
return "".join(node.itertext())
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _local_name(tag: str) -> str:
|
|
96
|
+
if "}" in tag:
|
|
97
|
+
return tag.split("}", 1)[1]
|
|
98
|
+
return tag
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _iter_section_roots(source: str | Path | bytes | BinaryIO) -> Iterable[etree._Element]:
|
|
102
|
+
if isinstance(source, bytes):
|
|
103
|
+
archive = ZipFile(io.BytesIO(source), "r")
|
|
104
|
+
else:
|
|
105
|
+
archive = ZipFile(source, "r")
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
for name in _section_files(archive):
|
|
109
|
+
yield etree.fromstring(archive.read(name))
|
|
110
|
+
finally:
|
|
111
|
+
archive.close()
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def collect_metrics(source: str | Path | bytes | BinaryIO) -> DocumentMetrics:
|
|
115
|
+
section_roots = list(_iter_section_roots(source))
|
|
116
|
+
|
|
117
|
+
paragraphs: list[etree._Element] = []
|
|
118
|
+
tables: list[etree._Element] = []
|
|
119
|
+
table_shapes: list[tuple[str, str, str, str, str, str]] = []
|
|
120
|
+
shape_types: dict[str, int] = {}
|
|
121
|
+
control_types: dict[str, int] = {}
|
|
122
|
+
paragraph_text_lengths: list[int] = []
|
|
123
|
+
text_char_total = 0
|
|
124
|
+
text_char_total_nospace = 0
|
|
125
|
+
page_break_count = 0
|
|
126
|
+
column_break_count = 0
|
|
127
|
+
|
|
128
|
+
for root in section_roots:
|
|
129
|
+
section_paragraphs = root.xpath(".//hs:sec/hp:p", namespaces=NS)
|
|
130
|
+
if not section_paragraphs:
|
|
131
|
+
section_paragraphs = root.xpath(".//hp:p", namespaces=NS)
|
|
132
|
+
paragraphs.extend(section_paragraphs)
|
|
133
|
+
|
|
134
|
+
section_tables = root.xpath(".//hp:tbl", namespaces=NS)
|
|
135
|
+
tables.extend(section_tables)
|
|
136
|
+
|
|
137
|
+
for element in root.iter():
|
|
138
|
+
name = _local_name(element.tag)
|
|
139
|
+
if name in _SHAPE_TAGS:
|
|
140
|
+
shape_types[name] = shape_types.get(name, 0) + 1
|
|
141
|
+
if name == "ctrl":
|
|
142
|
+
control_counted = False
|
|
143
|
+
for child in element:
|
|
144
|
+
child_name = _local_name(child.tag)
|
|
145
|
+
control_types[child_name] = control_types.get(child_name, 0) + 1
|
|
146
|
+
control_counted = True
|
|
147
|
+
if not control_counted:
|
|
148
|
+
control_types["ctrl"] = control_types.get("ctrl", 0) + 1
|
|
149
|
+
|
|
150
|
+
for table in section_tables:
|
|
151
|
+
size = table.find("hp:sz", namespaces=NS)
|
|
152
|
+
table_shapes.append(
|
|
153
|
+
(
|
|
154
|
+
table.get("rowCnt", ""),
|
|
155
|
+
table.get("colCnt", ""),
|
|
156
|
+
size.get("width", "") if size is not None else "",
|
|
157
|
+
size.get("height", "") if size is not None else "",
|
|
158
|
+
table.get("repeatHeader", ""),
|
|
159
|
+
table.get("pageBreak", ""),
|
|
160
|
+
)
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
for paragraph in section_paragraphs:
|
|
164
|
+
if paragraph.get("pageBreak") == "1":
|
|
165
|
+
page_break_count += 1
|
|
166
|
+
if paragraph.get("columnBreak") == "1":
|
|
167
|
+
column_break_count += 1
|
|
168
|
+
paragraph_length = 0
|
|
169
|
+
for text_node in paragraph.xpath(".//hp:t", namespaces=NS):
|
|
170
|
+
text = _text_of_t_node(text_node)
|
|
171
|
+
paragraph_length += len(text)
|
|
172
|
+
text_char_total += len(text)
|
|
173
|
+
text_char_total_nospace += len("".join(text.split()))
|
|
174
|
+
paragraph_text_lengths.append(paragraph_length)
|
|
175
|
+
|
|
176
|
+
return DocumentMetrics(
|
|
177
|
+
section_count=len(section_roots),
|
|
178
|
+
paragraph_count=len(paragraphs),
|
|
179
|
+
page_break_count=page_break_count,
|
|
180
|
+
column_break_count=column_break_count,
|
|
181
|
+
table_count=len(tables),
|
|
182
|
+
shape_count=sum(shape_types.values()),
|
|
183
|
+
control_count=sum(control_types.values()),
|
|
184
|
+
table_shapes=table_shapes,
|
|
185
|
+
shape_types=sorted(shape_types.items()),
|
|
186
|
+
control_types=sorted(control_types.items()),
|
|
187
|
+
text_char_total=text_char_total,
|
|
188
|
+
text_char_total_nospace=text_char_total_nospace,
|
|
189
|
+
paragraph_text_lengths=paragraph_text_lengths,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _ratio_delta(reference_value: int, output_value: int) -> float:
|
|
194
|
+
base = max(reference_value, 1)
|
|
195
|
+
return abs(output_value - reference_value) / base
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def compare_metrics(
|
|
199
|
+
reference: DocumentMetrics,
|
|
200
|
+
output: DocumentMetrics,
|
|
201
|
+
*,
|
|
202
|
+
max_text_delta_ratio: float = 0.15,
|
|
203
|
+
max_paragraph_delta_ratio: float = 0.25,
|
|
204
|
+
) -> list[str]:
|
|
205
|
+
errors: list[str] = []
|
|
206
|
+
|
|
207
|
+
if reference.section_count != output.section_count:
|
|
208
|
+
errors.append(
|
|
209
|
+
f"section count mismatch: ref={reference.section_count}, out={output.section_count}"
|
|
210
|
+
)
|
|
211
|
+
if reference.paragraph_count != output.paragraph_count:
|
|
212
|
+
errors.append(
|
|
213
|
+
f"paragraph count mismatch: ref={reference.paragraph_count}, out={output.paragraph_count}"
|
|
214
|
+
)
|
|
215
|
+
if reference.page_break_count != output.page_break_count:
|
|
216
|
+
errors.append(
|
|
217
|
+
"pageBreak count mismatch: "
|
|
218
|
+
f"ref={reference.page_break_count}, out={output.page_break_count}"
|
|
219
|
+
)
|
|
220
|
+
if reference.column_break_count != output.column_break_count:
|
|
221
|
+
errors.append(
|
|
222
|
+
"columnBreak count mismatch: "
|
|
223
|
+
f"ref={reference.column_break_count}, out={output.column_break_count}"
|
|
224
|
+
)
|
|
225
|
+
if reference.table_count != output.table_count:
|
|
226
|
+
errors.append(f"table count mismatch: ref={reference.table_count}, out={output.table_count}")
|
|
227
|
+
if reference.shape_count != output.shape_count:
|
|
228
|
+
errors.append(f"shape count mismatch: ref={reference.shape_count}, out={output.shape_count}")
|
|
229
|
+
if reference.control_count != output.control_count:
|
|
230
|
+
errors.append(
|
|
231
|
+
f"control count mismatch: ref={reference.control_count}, out={output.control_count}"
|
|
232
|
+
)
|
|
233
|
+
if reference.table_shapes != output.table_shapes:
|
|
234
|
+
errors.append("table shape mismatch (rowCnt/colCnt/width/height/repeatHeader/pageBreak)")
|
|
235
|
+
if reference.shape_types != output.shape_types:
|
|
236
|
+
errors.append("shape type histogram mismatch")
|
|
237
|
+
if reference.control_types != output.control_types:
|
|
238
|
+
errors.append("control type histogram mismatch")
|
|
239
|
+
|
|
240
|
+
text_delta = _ratio_delta(reference.text_char_total_nospace, output.text_char_total_nospace)
|
|
241
|
+
if text_delta > max_text_delta_ratio:
|
|
242
|
+
errors.append(
|
|
243
|
+
"total text length drift exceeded: "
|
|
244
|
+
f"ref={reference.text_char_total_nospace}, out={output.text_char_total_nospace}, "
|
|
245
|
+
f"delta={text_delta:.2%}, limit={max_text_delta_ratio:.2%}"
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
if len(reference.paragraph_text_lengths) == len(output.paragraph_text_lengths):
|
|
249
|
+
for index, (ref_len, out_len) in enumerate(
|
|
250
|
+
zip(reference.paragraph_text_lengths, output.paragraph_text_lengths),
|
|
251
|
+
start=1,
|
|
252
|
+
):
|
|
253
|
+
if ref_len == 0 and out_len == 0:
|
|
254
|
+
continue
|
|
255
|
+
delta = _ratio_delta(ref_len, out_len)
|
|
256
|
+
if delta > max_paragraph_delta_ratio:
|
|
257
|
+
errors.append(
|
|
258
|
+
f"paragraph {index} text drift exceeded: "
|
|
259
|
+
f"ref={ref_len}, out={out_len}, delta={delta:.2%}, "
|
|
260
|
+
f"limit={max_paragraph_delta_ratio:.2%}"
|
|
261
|
+
)
|
|
262
|
+
return errors
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
266
|
+
parser = argparse.ArgumentParser(
|
|
267
|
+
description="Reference-vs-output HWPX layout-drift proxy checker"
|
|
268
|
+
)
|
|
269
|
+
parser.add_argument("--reference", "-r", required=True, help="Reference HWPX path")
|
|
270
|
+
parser.add_argument("--output", "-o", required=True, help="Output HWPX path")
|
|
271
|
+
parser.add_argument("--max-text-delta-ratio", type=float, default=0.15)
|
|
272
|
+
parser.add_argument("--max-paragraph-delta-ratio", type=float, default=0.25)
|
|
273
|
+
parser.add_argument("--json", action="store_true", help="Print collected metrics as JSON")
|
|
274
|
+
args = parser.parse_args(argv)
|
|
275
|
+
|
|
276
|
+
reference = collect_metrics(args.reference)
|
|
277
|
+
output = collect_metrics(args.output)
|
|
278
|
+
|
|
279
|
+
if args.json:
|
|
280
|
+
print(
|
|
281
|
+
json.dumps(
|
|
282
|
+
{"reference": asdict(reference), "output": asdict(output)},
|
|
283
|
+
ensure_ascii=False,
|
|
284
|
+
indent=2,
|
|
285
|
+
)
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
errors = compare_metrics(
|
|
289
|
+
reference,
|
|
290
|
+
output,
|
|
291
|
+
max_text_delta_ratio=args.max_text_delta_ratio,
|
|
292
|
+
max_paragraph_delta_ratio=args.max_paragraph_delta_ratio,
|
|
293
|
+
)
|
|
294
|
+
if errors:
|
|
295
|
+
print("FAIL: page guard")
|
|
296
|
+
for error in errors:
|
|
297
|
+
print(f" - {error}")
|
|
298
|
+
return 1
|
|
299
|
+
|
|
300
|
+
print("PASS: page guard")
|
|
301
|
+
return 0
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
if __name__ == "__main__": # pragma: no cover - CLI convenience
|
|
305
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
from dataclasses import asdict, dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Sequence
|
|
8
|
+
from xml.etree import ElementTree as ET
|
|
9
|
+
|
|
10
|
+
from ..opc.package import HwpxPackage
|
|
11
|
+
from .page_guard import DocumentMetrics, collect_metrics
|
|
12
|
+
|
|
13
|
+
_HH_NS = "http://www.hancom.co.kr/hwpml/2011/head"
|
|
14
|
+
_HH = {"hh": _HH_NS}
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"HeaderSummary",
|
|
18
|
+
"TemplateAnalysis",
|
|
19
|
+
"analyze_template",
|
|
20
|
+
"extract_template_parts",
|
|
21
|
+
"main",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class HeaderSummary:
|
|
27
|
+
font_count: int
|
|
28
|
+
char_pr_count: int
|
|
29
|
+
para_pr_count: int
|
|
30
|
+
border_fill_count: int
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True)
|
|
34
|
+
class TemplateAnalysis:
|
|
35
|
+
source_name: str
|
|
36
|
+
part_names: tuple[str, ...]
|
|
37
|
+
rootfiles: tuple[str, ...]
|
|
38
|
+
manifest_path: str
|
|
39
|
+
header_paths: tuple[str, ...]
|
|
40
|
+
section_paths: tuple[str, ...]
|
|
41
|
+
version_path: str | None
|
|
42
|
+
header_summary: HeaderSummary
|
|
43
|
+
proxy_metrics: DocumentMetrics
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _summarize_header(element: ET.Element | None) -> HeaderSummary:
|
|
47
|
+
if element is None:
|
|
48
|
+
return HeaderSummary(font_count=0, char_pr_count=0, para_pr_count=0, border_fill_count=0)
|
|
49
|
+
|
|
50
|
+
font_count = len(element.findall(".//hh:fontface/hh:font", _HH))
|
|
51
|
+
char_pr_count = len(element.findall(".//hh:charPr", _HH))
|
|
52
|
+
para_pr_count = len(element.findall(".//hh:paraPr", _HH))
|
|
53
|
+
border_fill_count = len(element.findall(".//hh:borderFill", _HH))
|
|
54
|
+
return HeaderSummary(
|
|
55
|
+
font_count=font_count,
|
|
56
|
+
char_pr_count=char_pr_count,
|
|
57
|
+
para_pr_count=para_pr_count,
|
|
58
|
+
border_fill_count=border_fill_count,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def analyze_template(source: str | Path) -> TemplateAnalysis:
|
|
63
|
+
source_path = Path(source)
|
|
64
|
+
package = HwpxPackage.open(source_path)
|
|
65
|
+
|
|
66
|
+
header_paths = tuple(package.header_paths())
|
|
67
|
+
header_xml = package.get_xml(header_paths[0]) if header_paths else None
|
|
68
|
+
manifest_path = package.main_content.full_path
|
|
69
|
+
version_path = package.version_path()
|
|
70
|
+
|
|
71
|
+
return TemplateAnalysis(
|
|
72
|
+
source_name=source_path.name,
|
|
73
|
+
part_names=tuple(package.part_names()),
|
|
74
|
+
rootfiles=tuple(rootfile.full_path for rootfile in package.iter_rootfiles()),
|
|
75
|
+
manifest_path=manifest_path,
|
|
76
|
+
header_paths=header_paths,
|
|
77
|
+
section_paths=tuple(package.section_paths()),
|
|
78
|
+
version_path=version_path,
|
|
79
|
+
header_summary=_summarize_header(header_xml),
|
|
80
|
+
proxy_metrics=collect_metrics(source_path),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _write_part(package: HwpxPackage, part_name: str, destination: Path) -> Path:
|
|
85
|
+
destination.parent.mkdir(parents=True, exist_ok=True)
|
|
86
|
+
destination.write_bytes(package.get_part(part_name))
|
|
87
|
+
return destination
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def extract_template_parts(
|
|
91
|
+
source: str | Path,
|
|
92
|
+
*,
|
|
93
|
+
extract_dir: str | Path | None = None,
|
|
94
|
+
extract_header: str | Path | None = None,
|
|
95
|
+
extract_section: str | Path | None = None,
|
|
96
|
+
extract_section_dir: str | Path | None = None,
|
|
97
|
+
) -> tuple[Path, ...]:
|
|
98
|
+
source_path = Path(source)
|
|
99
|
+
package = HwpxPackage.open(source_path)
|
|
100
|
+
written: list[Path] = []
|
|
101
|
+
|
|
102
|
+
if extract_dir is not None:
|
|
103
|
+
root = Path(extract_dir)
|
|
104
|
+
root.mkdir(parents=True, exist_ok=True)
|
|
105
|
+
written.append(_write_part(package, package.main_content.full_path, root / package.main_content.full_path))
|
|
106
|
+
for part_name in package.header_paths():
|
|
107
|
+
written.append(_write_part(package, part_name, root / part_name))
|
|
108
|
+
for part_name in package.section_paths():
|
|
109
|
+
written.append(_write_part(package, part_name, root / part_name))
|
|
110
|
+
version_path = package.version_path()
|
|
111
|
+
if version_path and package.has_part(version_path):
|
|
112
|
+
written.append(_write_part(package, version_path, root / version_path))
|
|
113
|
+
if package.has_part(package.CONTAINER_PATH):
|
|
114
|
+
written.append(_write_part(package, package.CONTAINER_PATH, root / package.CONTAINER_PATH))
|
|
115
|
+
|
|
116
|
+
if extract_header is not None:
|
|
117
|
+
header_paths = package.header_paths()
|
|
118
|
+
if not header_paths:
|
|
119
|
+
raise FileNotFoundError("package does not contain a header part")
|
|
120
|
+
written.append(_write_part(package, header_paths[0], Path(extract_header)))
|
|
121
|
+
|
|
122
|
+
if extract_section is not None:
|
|
123
|
+
section_paths = package.section_paths()
|
|
124
|
+
if not section_paths:
|
|
125
|
+
raise FileNotFoundError("package does not contain a section part")
|
|
126
|
+
written.append(_write_part(package, section_paths[0], Path(extract_section)))
|
|
127
|
+
|
|
128
|
+
if extract_section_dir is not None:
|
|
129
|
+
section_root = Path(extract_section_dir)
|
|
130
|
+
section_root.mkdir(parents=True, exist_ok=True)
|
|
131
|
+
for part_name in package.section_paths():
|
|
132
|
+
written.append(_write_part(package, part_name, section_root / Path(part_name).name))
|
|
133
|
+
|
|
134
|
+
return tuple(written)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _print_summary(analysis: TemplateAnalysis) -> None:
|
|
138
|
+
metrics = analysis.proxy_metrics
|
|
139
|
+
print(f"source: {analysis.source_name}")
|
|
140
|
+
print(f"manifest: {analysis.manifest_path}")
|
|
141
|
+
print(f"rootfiles: {', '.join(analysis.rootfiles) or '(none)'}")
|
|
142
|
+
print(f"headers: {', '.join(analysis.header_paths) or '(none)'}")
|
|
143
|
+
print(f"sections: {', '.join(analysis.section_paths) or '(none)'}")
|
|
144
|
+
if analysis.version_path:
|
|
145
|
+
print(f"version part: {analysis.version_path}")
|
|
146
|
+
print(
|
|
147
|
+
"header styles: "
|
|
148
|
+
f"fonts={analysis.header_summary.font_count}, "
|
|
149
|
+
f"charPr={analysis.header_summary.char_pr_count}, "
|
|
150
|
+
f"paraPr={analysis.header_summary.para_pr_count}, "
|
|
151
|
+
f"borderFill={analysis.header_summary.border_fill_count}"
|
|
152
|
+
)
|
|
153
|
+
print(
|
|
154
|
+
"layout-drift proxy: "
|
|
155
|
+
f"paragraphs={metrics.paragraph_count}, "
|
|
156
|
+
f"tables={metrics.table_count}, "
|
|
157
|
+
f"shapes={metrics.shape_count}, "
|
|
158
|
+
f"controls={metrics.control_count}, "
|
|
159
|
+
f"pageBreaks={metrics.page_break_count}, "
|
|
160
|
+
f"columnBreaks={metrics.column_break_count}"
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
165
|
+
parser = argparse.ArgumentParser(
|
|
166
|
+
description="Analyze a reference HWPX template for template-preserving workflows"
|
|
167
|
+
)
|
|
168
|
+
parser.add_argument("input", help="Input HWPX path")
|
|
169
|
+
parser.add_argument("--json", action="store_true", help="Print machine-readable JSON summary")
|
|
170
|
+
parser.add_argument("--output-json", help="Write the JSON summary to a file")
|
|
171
|
+
parser.add_argument(
|
|
172
|
+
"--extract-dir",
|
|
173
|
+
help="Copy manifest, header, sections, version, and container.xml into a directory",
|
|
174
|
+
)
|
|
175
|
+
parser.add_argument("--extract-header", help="Copy the first header.xml part to a path")
|
|
176
|
+
parser.add_argument("--extract-section", help="Copy the first section XML part to a path")
|
|
177
|
+
parser.add_argument(
|
|
178
|
+
"--extract-section-dir",
|
|
179
|
+
help="Backward-compatible alias that copies section*.xml files into a directory",
|
|
180
|
+
)
|
|
181
|
+
args = parser.parse_args(argv)
|
|
182
|
+
|
|
183
|
+
input_path = Path(args.input)
|
|
184
|
+
if not input_path.is_file():
|
|
185
|
+
print(f"ERROR: file not found: {input_path}")
|
|
186
|
+
return 1
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
analysis = analyze_template(input_path)
|
|
190
|
+
written = extract_template_parts(
|
|
191
|
+
input_path,
|
|
192
|
+
extract_dir=args.extract_dir,
|
|
193
|
+
extract_header=args.extract_header,
|
|
194
|
+
extract_section=args.extract_section,
|
|
195
|
+
extract_section_dir=args.extract_section_dir,
|
|
196
|
+
)
|
|
197
|
+
except Exception as exc:
|
|
198
|
+
print(f"ERROR: {exc}")
|
|
199
|
+
return 1
|
|
200
|
+
|
|
201
|
+
if args.json or args.output_json:
|
|
202
|
+
payload = json.dumps(asdict(analysis), ensure_ascii=False, indent=2)
|
|
203
|
+
if args.json:
|
|
204
|
+
print(payload)
|
|
205
|
+
if args.output_json:
|
|
206
|
+
output_path = Path(args.output_json)
|
|
207
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
208
|
+
output_path.write_text(payload, encoding="utf-8")
|
|
209
|
+
else:
|
|
210
|
+
_print_summary(analysis)
|
|
211
|
+
|
|
212
|
+
for path in written:
|
|
213
|
+
print(f"extracted: {path}")
|
|
214
|
+
return 0
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
if __name__ == "__main__": # pragma: no cover - CLI convenience
|
|
218
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Sequence
|
|
7
|
+
|
|
8
|
+
from .text_extractor import TextExtractor
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"extract_plain",
|
|
12
|
+
"extract_markdown",
|
|
13
|
+
"main",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def extract_plain(hwpx_path: str, *, include_tables: bool = False) -> str:
|
|
18
|
+
with TextExtractor(hwpx_path) as extractor:
|
|
19
|
+
return extractor.extract_text(
|
|
20
|
+
include_nested=include_tables,
|
|
21
|
+
object_behavior="skip",
|
|
22
|
+
skip_empty=True,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def extract_markdown(hwpx_path: str) -> str:
|
|
27
|
+
lines: list[str] = []
|
|
28
|
+
with TextExtractor(hwpx_path) as extractor:
|
|
29
|
+
for section in extractor.iter_sections():
|
|
30
|
+
if lines:
|
|
31
|
+
lines.extend(["", "---", ""])
|
|
32
|
+
for paragraph in extractor.iter_paragraphs(section, include_nested=True):
|
|
33
|
+
text = paragraph.text(object_behavior="skip")
|
|
34
|
+
if not text.strip():
|
|
35
|
+
continue
|
|
36
|
+
lines.append(f" {text}" if paragraph.is_nested else text)
|
|
37
|
+
return "\n".join(lines)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
41
|
+
parser = argparse.ArgumentParser(description="Extract text from an HWPX document")
|
|
42
|
+
parser.add_argument("input", help="Path to the .hwpx file")
|
|
43
|
+
parser.add_argument("--format", "-f", choices=["plain", "markdown"], default="plain")
|
|
44
|
+
parser.add_argument("--include-tables", action="store_true", help="Include nested table text")
|
|
45
|
+
parser.add_argument("--output", "-o", help="Write output to a file instead of stdout")
|
|
46
|
+
args = parser.parse_args(argv)
|
|
47
|
+
|
|
48
|
+
input_path = Path(args.input)
|
|
49
|
+
if not input_path.is_file():
|
|
50
|
+
print(f"Error: File not found: {args.input}", file=sys.stderr)
|
|
51
|
+
return 1
|
|
52
|
+
|
|
53
|
+
if args.format == "markdown":
|
|
54
|
+
result = extract_markdown(str(input_path))
|
|
55
|
+
else:
|
|
56
|
+
result = extract_plain(str(input_path), include_tables=args.include_tables)
|
|
57
|
+
|
|
58
|
+
if args.output:
|
|
59
|
+
Path(args.output).write_text(result, encoding="utf-8")
|
|
60
|
+
else:
|
|
61
|
+
print(result)
|
|
62
|
+
return 0
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
if __name__ == "__main__": # pragma: no cover - CLI convenience
|
|
66
|
+
raise SystemExit(main())
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-hwpx
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.7
|
|
4
4
|
Summary: Hancom HWPX 패키지를 로드하고 편집하기 위한 Python 유틸리티 모음
|
|
5
5
|
Author: python-hwpx Maintainers
|
|
6
6
|
License: Non-Commercial License
|
|
@@ -165,7 +165,8 @@ doc.save_to_path("결과물.hwpx")
|
|
|
165
165
|
| 🔎 **객체 검색** | 태그/속성/XPath | 특정 요소 탐색, 주석 이터레이터 |
|
|
166
166
|
| 🎨 **스타일 치환** | 서식 기반 필터 | 색상/밑줄/charPrIDRef 기반 Run 검색 및 교체 |
|
|
167
167
|
| 📤 **내보내기** | 텍스트/HTML/Markdown | 문서 변환 출력 |
|
|
168
|
-
| ✅ **유효성 검사** | XSD
|
|
168
|
+
| ✅ **유효성 검사** | XSD + 패키지 구조 | CLI(`hwpx-validate`, `hwpx-validate-package`) 및 API |
|
|
169
|
+
| 🧰 **워크플로 도구** | unpack/pack/template analyze/page guard | 템플릿 보존형 XML-first 작업 보조 |
|
|
169
170
|
| 🏗️ **저수준 XML** | 데이터클래스 매핑 | OWPML 스키마 ↔ Python 객체 직접 조작 |
|
|
170
171
|
| 🔄 **네임스페이스 호환** | 자동 정규화 | HWPML 2016 → 2011 자동 변환 |
|
|
171
172
|
|
|
@@ -262,10 +263,15 @@ python-hwpx
|
|
|
262
263
|
│ ├── body.py # 타입이 지정된 본문 모델
|
|
263
264
|
│ └── common.py # 범용 XML ↔ 데이터클래스
|
|
264
265
|
├── hwpx.tools
|
|
266
|
+
│ ├── archive_cli # unpack/pack CLI 및 재패킹 메타데이터
|
|
265
267
|
│ ├── text_extractor # 텍스트 추출 파이프라인
|
|
268
|
+
│ ├── text_extract_cli # 텍스트 추출 CLI
|
|
266
269
|
│ ├── object_finder # 객체 탐색 유틸리티
|
|
267
270
|
│ ├── exporter # 텍스트/HTML/Markdown 내보내기
|
|
268
|
-
│
|
|
271
|
+
│ ├── validator # 스키마 유효성 검사 (hwpx-validate CLI)
|
|
272
|
+
│ ├── package_validator# ZIP/OPC/HWPX 구조 검사
|
|
273
|
+
│ ├── page_guard # layout-drift proxy
|
|
274
|
+
│ └── template_analyzer# 레퍼런스 문서 분석/추출
|
|
269
275
|
└── hwpx.templates # 내장 빈 문서 템플릿
|
|
270
276
|
```
|
|
271
277
|
|
|
@@ -274,8 +280,26 @@ python-hwpx
|
|
|
274
280
|
```bash
|
|
275
281
|
# HWPX 문서 스키마 유효성 검사
|
|
276
282
|
hwpx-validate 문서.hwpx
|
|
283
|
+
|
|
284
|
+
# ZIP/OPC/HWPX 패키지 구조 검사
|
|
285
|
+
hwpx-validate-package 문서.hwpx
|
|
286
|
+
|
|
287
|
+
# HWPX 풀기 / 다시 묶기
|
|
288
|
+
hwpx-unpack 문서.hwpx ./unpacked
|
|
289
|
+
hwpx-pack ./unpacked ./repacked.hwpx
|
|
290
|
+
|
|
291
|
+
# 레퍼런스 템플릿 분석과 파트 추출
|
|
292
|
+
hwpx-analyze-template 문서.hwpx --extract-dir ./template-parts --json
|
|
293
|
+
|
|
294
|
+
# plain / markdown 텍스트 추출
|
|
295
|
+
hwpx-text-extract 문서.hwpx --format markdown --output 문서.md
|
|
296
|
+
|
|
297
|
+
# 레이아웃 드리프트 프록시 비교
|
|
298
|
+
hwpx-page-guard --reference 원본.hwpx --output 결과.hwpx
|
|
277
299
|
```
|
|
278
300
|
|
|
301
|
+
`hwpx-page-guard`는 렌더된 실제 쪽수를 계산하지 않습니다. 대신 단락 수, 표 수, shape/control 수, 명시적 page/column break, 텍스트 길이 통계를 비교해 레이아웃 드리프트 위험을 탐지하는 프록시 도구입니다.
|
|
302
|
+
|
|
279
303
|
## 문서
|
|
280
304
|
|
|
281
305
|
| | |
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
hwpx/__init__.py,sha256=RZ4O84G3Zp_L8ELArtwO3KVPvhx1vLYyKC2Ka1M5mwc,857
|
|
2
|
-
hwpx/document.py,sha256=
|
|
2
|
+
hwpx/document.py,sha256=UnM61gSf9Hno5n0YWrVSTod9USmA3WtRQeeQadLbYdQ,48133
|
|
3
3
|
hwpx/package.py,sha256=YK4oYEPk7la2BZKZepoVHzrjGIPMDnDdPa02Hh-RTBw,1103
|
|
4
4
|
hwpx/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
hwpx/templates.py,sha256=kZ_gV0bP-DIvr5CJuzs-uGnt8XVncJCI3cGFq083uTg,1149
|
|
@@ -20,16 +20,21 @@ hwpx/oxml/schema.py,sha256=THswXdMNpAiSoLxpvUGbdbI66hW-SKuUqSw4vdkIYmA,1246
|
|
|
20
20
|
hwpx/oxml/section.py,sha256=WwxZ6PWPeMrj2L9mz4JlqFGXwd7E7qAuSBuM5dgRjZk,199
|
|
21
21
|
hwpx/oxml/table.py,sha256=pdO2TTAcbEC6Z4cnaOnB-bcmuZ1KVado7J3RiY_zOfE,193
|
|
22
22
|
hwpx/oxml/utils.py,sha256=to0yytS7vtLSvWl-dQyegT6MWClMK55b1Sp1uagEkI4,2591
|
|
23
|
-
hwpx/tools/__init__.py,sha256=
|
|
23
|
+
hwpx/tools/__init__.py,sha256=e1OaIVdbkmjTvLOzQ7qVRfuuQ1611225pNZByB2ln9w,1270
|
|
24
|
+
hwpx/tools/archive_cli.py,sha256=ih14UmayJTpOw14cRBmrPKbfMFNFfoyiHzFQ2CYt_sE,10419
|
|
24
25
|
hwpx/tools/exporter.py,sha256=GcbNtV4rIWOJv5nBcgdX0yfkXQa-xQhfrCzXWgaNbTE,8862
|
|
25
26
|
hwpx/tools/object_finder.py,sha256=vbZ8FuIpGF-2vpbWDeZWi4UgZ2-3PK_ddQCs0oq1dRw,13440
|
|
27
|
+
hwpx/tools/package_validator.py,sha256=JpXLcWxM0orD38G1v_eeWtuScDXtGqT7Tgh6GR-qOto,7420
|
|
28
|
+
hwpx/tools/page_guard.py,sha256=AaKDWet8QHduoB8smIUgBf8muYBuYenj-xAIv1uFbVA,10454
|
|
29
|
+
hwpx/tools/template_analyzer.py,sha256=QCqZRMxLFMTwoyYAzEmtzc8B4AwtqTMHV2hBCWXLKtQ,7919
|
|
30
|
+
hwpx/tools/text_extract_cli.py,sha256=pIBMIFuFX10IEegw7fQ3gtUbQyjNgbAUYkQWh2S3aQs,2150
|
|
26
31
|
hwpx/tools/text_extractor.py,sha256=r2OJRgDOiR6n14hXRcvkYuSFtEHpAV6jasHv-ZLHx1Y,24238
|
|
27
32
|
hwpx/tools/validator.py,sha256=KThqBQKKQfZkuLMGtzONbPkzy877-2FgT22FHPmt_gI,5979
|
|
28
33
|
hwpx/tools/_schemas/header.xsd,sha256=mJXuFMuHGT1JnFFaluUpYUglwjMCNlfbFCRVM26eHXE,664
|
|
29
34
|
hwpx/tools/_schemas/section.xsd,sha256=MgvavVHG05RDfUnVPxVU10H4FQOja5ON04_m9Uk_m7E,522
|
|
30
|
-
python_hwpx-2.
|
|
31
|
-
python_hwpx-2.
|
|
32
|
-
python_hwpx-2.
|
|
33
|
-
python_hwpx-2.
|
|
34
|
-
python_hwpx-2.
|
|
35
|
-
python_hwpx-2.
|
|
35
|
+
python_hwpx-2.7.dist-info/licenses/LICENSE,sha256=3F1-JUTcmjmxMpHGeB77ZzaSdhms3h8p1DBBa3lvV08,1609
|
|
36
|
+
python_hwpx-2.7.dist-info/METADATA,sha256=hHFXogt-RCjDcrLVmYVEwi7N8WT8ysqYyUhNCwnfOuM,14236
|
|
37
|
+
python_hwpx-2.7.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
|
|
38
|
+
python_hwpx-2.7.dist-info/entry_points.txt,sha256=zKneV9VceQKwbJUo-mUUbwRmQjNyNSzrv44XuMhsaUU,368
|
|
39
|
+
python_hwpx-2.7.dist-info/top_level.txt,sha256=R1iToqDh80Nf2oQhRjTN0rbN2X6kyDUizIocZjkhuxc,5
|
|
40
|
+
python_hwpx-2.7.dist-info/RECORD,,
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
[console_scripts]
|
|
2
|
+
hwpx-analyze-template = hwpx.tools.template_analyzer:main
|
|
3
|
+
hwpx-pack = hwpx.tools.archive_cli:pack_main
|
|
4
|
+
hwpx-page-guard = hwpx.tools.page_guard:main
|
|
5
|
+
hwpx-text-extract = hwpx.tools.text_extract_cli:main
|
|
6
|
+
hwpx-unpack = hwpx.tools.archive_cli:unpack_main
|
|
7
|
+
hwpx-validate = hwpx.tools.validator:main
|
|
8
|
+
hwpx-validate-package = hwpx.tools.package_validator:main
|
|
File without changes
|
|
File without changes
|
|
File without changes
|