python-hwpx 2.10.1__tar.gz → 2.10.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_hwpx-2.10.1/src/python_hwpx.egg-info → python_hwpx-2.10.3}/PKG-INFO +49 -1
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/README.md +48 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/pyproject.toml +1 -1
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/__init__.py +10 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/builder/core.py +21 -2
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/builder/report.py +7 -1
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/document.py +153 -19
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/opc/package.py +331 -9
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/document.py +305 -3
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/template_formfit.py +48 -17
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/__init__.py +8 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/archive_cli.py +18 -6
- python_hwpx-2.10.3/src/hwpx/tools/markdown_export.py +488 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/package_validator.py +215 -1
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/repair.py +91 -8
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/table_navigation.py +77 -1
- {python_hwpx-2.10.1 → python_hwpx-2.10.3/src/python_hwpx.egg-info}/PKG-INFO +49 -1
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/python_hwpx.egg-info/SOURCES.txt +3 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_builder_core.py +5 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_document_formatting.py +101 -0
- python_hwpx-2.10.3/tests/test_document_save_api.py +333 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_gap_closure_tools.py +164 -3
- python_hwpx-2.10.3/tests/test_markdown_export.py +436 -0
- python_hwpx-2.10.3/tests/test_opc_package.py +549 -0
- python_hwpx-2.10.3/tests/test_open_safety_corpus.py +88 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_recover_broken_zip.py +90 -1
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_repair_repack.py +123 -1
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_table_navigation.py +57 -31
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_template_formfit.py +61 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_version_metadata.py +9 -0
- python_hwpx-2.10.1/tests/test_document_save_api.py +0 -55
- python_hwpx-2.10.1/tests/test_opc_package.py +0 -126
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/LICENSE +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/NOTICE +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/setup.cfg +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/authoring.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/builder/__init__.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/data/Skeleton.hwpx +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/form_fill.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/opc/relationships.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/opc/xml_utils.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/__init__.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/body.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/common.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/header.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/header_part.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/memo.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/namespaces.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/paragraph.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/parser.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/schema.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/section.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/table.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/utils.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/package.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/presets/__init__.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/presets/proposal.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/py.typed +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/templates.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/_schemas/header.xsd +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/_schemas/section.xsd +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/exporter.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/generic_inventory.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/id_integrity.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/object_finder.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/page_guard.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/recover.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/report_parser.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/report_utils.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/roundtrip_diff.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/table_cleanup.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/template_analyzer.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/text_extract_cli.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/text_extractor.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/validator.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/python_hwpx.egg-info/dependency_links.txt +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/python_hwpx.egg-info/entry_points.txt +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/python_hwpx.egg-info/requires.txt +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/python_hwpx.egg-info/top_level.txt +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_builder_plan_v2.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_builder_vertical_slice.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_coverage_promotion.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_coverage_targets.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_deviations_registry.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_document_context_manager.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_document_plan.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_document_plan_computed_fields.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_form_fill_split_run.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_government_report_preset.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_government_table_profile.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_hp_tab_support.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_hwpxlib_corpus_read.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_id_generator_range.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_id_integrity.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_inline_models.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_integration_hwpx_compatibility.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_integration_roundtrip.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_memo_and_style_editing.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_namespace_handling.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_new_features.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_oxml_parsing.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_packaging_license_metadata.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_packaging_py_typed.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_paragraph_section_management.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_proposal_preset.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_report_parser.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_report_utils.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_repr_snapshots.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_roundtrip_fidelity.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_section_headers.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_skeleton_template_ids.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_split_merged_cell.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_table_cleanup.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_tables_default_border.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_text_extractor_annotations.py +0 -0
- {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_validation_severity.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-hwpx
|
|
3
|
-
Version: 2.10.
|
|
3
|
+
Version: 2.10.3
|
|
4
4
|
Summary: 한글 없이 HWPX 문서를 열고, 편집하고, 생성하고, 검증하는 Python 자동화 라이브러리
|
|
5
5
|
Author: python-hwpx Maintainers
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -115,6 +115,47 @@ hwpx-validate-package 보고서.hwpx
|
|
|
115
115
|
hwpx-analyze-template 보고서.hwpx
|
|
116
116
|
```
|
|
117
117
|
|
|
118
|
+
### 4. 풍부한 Markdown 변환 (서식·표·각주·이미지 보존)
|
|
119
|
+
|
|
120
|
+
`export_markdown()`는 단순 평문 추출이고, `export_rich_markdown()`는 인라인 서식(`**굵게**`, `*기울임*`, `~~취소선~~`),
|
|
121
|
+
표(중첩 포함, colspan/rowspan 안전), 도형 텍스트, 이미지, 각주/미주, 하이퍼링크, 제목(`#`/`##`) 자동 감지까지 보존한다.
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
from hwpx import HwpxDocument
|
|
125
|
+
|
|
126
|
+
doc = HwpxDocument.open("보고서.hwpx")
|
|
127
|
+
|
|
128
|
+
md = doc.export_rich_markdown(
|
|
129
|
+
image_dir="out/images", # BinData 이미지를 디스크에 추출
|
|
130
|
+
image_ref_prefix="images/", # 마크다운 내  경로 접두
|
|
131
|
+
detect_headings=True, # Ⅰ./1. 패턴 기반 #/## 자동
|
|
132
|
+
)
|
|
133
|
+
print(md)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
문자열·경로·바이트도 그대로 받는다:
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
from hwpx.tools.markdown_export import export_markdown
|
|
140
|
+
|
|
141
|
+
md = export_markdown("보고서.hwpx") # 경로
|
|
142
|
+
md = export_markdown(open("a.hwpx", "rb").read()) # bytes
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### 5. 각주 본문에 혼합 서식 / 하이퍼링크 추가
|
|
146
|
+
|
|
147
|
+
`HwpxOxmlNote`에 `body_paragraph`, `add_run`, `add_hyperlink` helper가 있어 각주 본문을
|
|
148
|
+
직접 paragraph로 다루지 않고도 인라인 서식·링크를 손쉽게 채울 수 있다.
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
para = section.paragraphs[0]
|
|
152
|
+
note = para.add_footnote("") # 빈 각주 생성 후 본문 구성
|
|
153
|
+
note.add_run("자세한 내용은 ", )
|
|
154
|
+
note.add_run("정부 공식 사이트", bold=True)
|
|
155
|
+
note.add_run("를 참고하라: ")
|
|
156
|
+
note.add_hyperlink("https://www.kasa.go.kr", "우주항공청")
|
|
157
|
+
```
|
|
158
|
+
|
|
118
159
|
처음에는 `open/new -> edit/extract -> save_to_path` 흐름만 잡으면 된다. 패키지 구조, XML 파트, 템플릿 회귀 점검은 필요할 때만 확장하면 된다.
|
|
119
160
|
|
|
120
161
|
## 어디부터 읽으면 되나
|
|
@@ -244,6 +285,7 @@ doc.set_footer_text("1 / 10", page_type="BOTH")
|
|
|
244
285
|
# 표 셀 병합·분할
|
|
245
286
|
table.merge_cells(0, 0, 1, 1) # (0,0)~(1,1) 병합
|
|
246
287
|
table.set_cell_text(0, 0, "병합된 셀", logical=True, split_merged=True)
|
|
288
|
+
table.set_cell_text(0, 0, "line 1\nline 2", split_paragraphs=True)
|
|
247
289
|
|
|
248
290
|
# 양식형 표 자동 채우기
|
|
249
291
|
form = doc.add_table(2, 2)
|
|
@@ -257,6 +299,12 @@ doc.fill_by_path({
|
|
|
257
299
|
})
|
|
258
300
|
```
|
|
259
301
|
|
|
302
|
+
`doc.paragraphs`의 인덱스는 본문 직속 문단 0-based 기준입니다. 표 안 문단은
|
|
303
|
+
본문 `paragraph_index`에 섞지 않고 `get_table_map()`의 cell `location`
|
|
304
|
+
(`table_index`, `row`, `col`, `cell_paragraph_index`)으로 다룹니다.
|
|
305
|
+
`get_table_map()`은 `caption_text`와 `preceding_paragraph_text`를 분리해
|
|
306
|
+
반환하고, 셀 미리보기의 여러 문단은 `\n`으로 유지합니다.
|
|
307
|
+
|
|
260
308
|
### 🔍 텍스트 추출 & 검색
|
|
261
309
|
|
|
262
310
|
```python
|
|
@@ -79,6 +79,47 @@ hwpx-validate-package 보고서.hwpx
|
|
|
79
79
|
hwpx-analyze-template 보고서.hwpx
|
|
80
80
|
```
|
|
81
81
|
|
|
82
|
+
### 4. 풍부한 Markdown 변환 (서식·표·각주·이미지 보존)
|
|
83
|
+
|
|
84
|
+
`export_markdown()`는 단순 평문 추출이고, `export_rich_markdown()`는 인라인 서식(`**굵게**`, `*기울임*`, `~~취소선~~`),
|
|
85
|
+
표(중첩 포함, colspan/rowspan 안전), 도형 텍스트, 이미지, 각주/미주, 하이퍼링크, 제목(`#`/`##`) 자동 감지까지 보존한다.
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
from hwpx import HwpxDocument
|
|
89
|
+
|
|
90
|
+
doc = HwpxDocument.open("보고서.hwpx")
|
|
91
|
+
|
|
92
|
+
md = doc.export_rich_markdown(
|
|
93
|
+
image_dir="out/images", # BinData 이미지를 디스크에 추출
|
|
94
|
+
image_ref_prefix="images/", # 마크다운 내  경로 접두
|
|
95
|
+
detect_headings=True, # Ⅰ./1. 패턴 기반 #/## 자동
|
|
96
|
+
)
|
|
97
|
+
print(md)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
문자열·경로·바이트도 그대로 받는다:
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
from hwpx.tools.markdown_export import export_markdown
|
|
104
|
+
|
|
105
|
+
md = export_markdown("보고서.hwpx") # 경로
|
|
106
|
+
md = export_markdown(open("a.hwpx", "rb").read()) # bytes
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### 5. 각주 본문에 혼합 서식 / 하이퍼링크 추가
|
|
110
|
+
|
|
111
|
+
`HwpxOxmlNote`에 `body_paragraph`, `add_run`, `add_hyperlink` helper가 있어 각주 본문을
|
|
112
|
+
직접 paragraph로 다루지 않고도 인라인 서식·링크를 손쉽게 채울 수 있다.
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
para = section.paragraphs[0]
|
|
116
|
+
note = para.add_footnote("") # 빈 각주 생성 후 본문 구성
|
|
117
|
+
note.add_run("자세한 내용은 ", )
|
|
118
|
+
note.add_run("정부 공식 사이트", bold=True)
|
|
119
|
+
note.add_run("를 참고하라: ")
|
|
120
|
+
note.add_hyperlink("https://www.kasa.go.kr", "우주항공청")
|
|
121
|
+
```
|
|
122
|
+
|
|
82
123
|
처음에는 `open/new -> edit/extract -> save_to_path` 흐름만 잡으면 된다. 패키지 구조, XML 파트, 템플릿 회귀 점검은 필요할 때만 확장하면 된다.
|
|
83
124
|
|
|
84
125
|
## 어디부터 읽으면 되나
|
|
@@ -208,6 +249,7 @@ doc.set_footer_text("1 / 10", page_type="BOTH")
|
|
|
208
249
|
# 표 셀 병합·분할
|
|
209
250
|
table.merge_cells(0, 0, 1, 1) # (0,0)~(1,1) 병합
|
|
210
251
|
table.set_cell_text(0, 0, "병합된 셀", logical=True, split_merged=True)
|
|
252
|
+
table.set_cell_text(0, 0, "line 1\nline 2", split_paragraphs=True)
|
|
211
253
|
|
|
212
254
|
# 양식형 표 자동 채우기
|
|
213
255
|
form = doc.add_table(2, 2)
|
|
@@ -221,6 +263,12 @@ doc.fill_by_path({
|
|
|
221
263
|
})
|
|
222
264
|
```
|
|
223
265
|
|
|
266
|
+
`doc.paragraphs`의 인덱스는 본문 직속 문단 0-based 기준입니다. 표 안 문단은
|
|
267
|
+
본문 `paragraph_index`에 섞지 않고 `get_table_map()`의 cell `location`
|
|
268
|
+
(`table_index`, `row`, `col`, `cell_paragraph_index`)으로 다룹니다.
|
|
269
|
+
`get_table_map()`은 `caption_text`와 `preceding_paragraph_text`를 분리해
|
|
270
|
+
반환하고, 셀 미리보기의 여러 문단은 `\n`으로 유지합니다.
|
|
271
|
+
|
|
224
272
|
### 🔍 텍스트 추출 & 검색
|
|
225
273
|
|
|
226
274
|
```python
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "python-hwpx"
|
|
7
|
-
version = "2.10.
|
|
7
|
+
version = "2.10.3"
|
|
8
8
|
description = "한글 없이 HWPX 문서를 열고, 편집하고, 생성하고, 검증하는 Python 자동화 라이브러리"
|
|
9
9
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
10
|
license = "Apache-2.0"
|
|
@@ -25,6 +25,12 @@ from .tools.text_extractor import (
|
|
|
25
25
|
TextExtractor,
|
|
26
26
|
)
|
|
27
27
|
from .tools.object_finder import FoundElement, ObjectFinder
|
|
28
|
+
from .tools.package_validator import (
|
|
29
|
+
EditorOpenSafetyReport,
|
|
30
|
+
PackageValidationReport,
|
|
31
|
+
validate_editor_open_safety,
|
|
32
|
+
validate_package,
|
|
33
|
+
)
|
|
28
34
|
from .document import HwpxDocument
|
|
29
35
|
from .package import HwpxPackage
|
|
30
36
|
from .authoring import (
|
|
@@ -58,7 +64,9 @@ __all__ = [
|
|
|
58
64
|
"DocumentBlock",
|
|
59
65
|
"DocumentPlan",
|
|
60
66
|
"DocumentStylePreset",
|
|
67
|
+
"EditorOpenSafetyReport",
|
|
61
68
|
"ParagraphInfo",
|
|
69
|
+
"PackageValidationReport",
|
|
62
70
|
"PlanValidationReport",
|
|
63
71
|
"SectionInfo",
|
|
64
72
|
"TEMPLATE_FORMFIT_BASELINE_SCHEMA_VERSION",
|
|
@@ -76,4 +84,6 @@ __all__ = [
|
|
|
76
84
|
"inspect_operating_plan_quality",
|
|
77
85
|
"normalize_document_plan",
|
|
78
86
|
"validate_document_plan",
|
|
87
|
+
"validate_editor_open_safety",
|
|
88
|
+
"validate_package",
|
|
79
89
|
]
|
|
@@ -7,6 +7,7 @@ from pathlib import Path
|
|
|
7
7
|
from typing import Any, Mapping, Sequence
|
|
8
8
|
|
|
9
9
|
from hwpx.document import HwpxDocument
|
|
10
|
+
from hwpx.tools.package_validator import validate_editor_open_safety
|
|
10
11
|
from hwpx.tools.package_validator import validate_package
|
|
11
12
|
from hwpx.tools.validator import validate_document
|
|
12
13
|
|
|
@@ -576,13 +577,24 @@ def _merge_flags(*flag_sets: dict[str, bool]) -> dict[str, bool]:
|
|
|
576
577
|
return merged
|
|
577
578
|
|
|
578
579
|
|
|
579
|
-
def _hard_gates(
|
|
580
|
+
def _hard_gates(
|
|
581
|
+
package_report: object,
|
|
582
|
+
document_report: object,
|
|
583
|
+
reopen_report: ReopenReport,
|
|
584
|
+
editor_open_safety_report: object | None = None,
|
|
585
|
+
) -> dict[str, str]:
|
|
580
586
|
document_warnings = getattr(document_report, "warnings", ())
|
|
587
|
+
editor_open_safety_ok = (
|
|
588
|
+
True
|
|
589
|
+
if editor_open_safety_report is None
|
|
590
|
+
else bool(getattr(editor_open_safety_report, "ok", False))
|
|
591
|
+
)
|
|
581
592
|
return {
|
|
582
593
|
"package_validation": "pass" if getattr(package_report, "ok", False) else "fail",
|
|
583
594
|
"document_errors": "pass" if getattr(document_report, "ok", False) else "fail",
|
|
584
595
|
"schema_lint": "warning" if document_warnings else "pass",
|
|
585
596
|
"reopen": "pass" if reopen_report.ok else "fail",
|
|
597
|
+
"editor_open_safety": "pass" if editor_open_safety_ok else "fail",
|
|
586
598
|
"id_integrity": "unavailable",
|
|
587
599
|
}
|
|
588
600
|
|
|
@@ -696,6 +708,7 @@ class Document:
|
|
|
696
708
|
document.save_to_path(path)
|
|
697
709
|
package_report = validate_package(path)
|
|
698
710
|
document_report = validate_document(path)
|
|
711
|
+
editor_open_safety_report = validate_editor_open_safety(path)
|
|
699
712
|
try:
|
|
700
713
|
reopened_document = HwpxDocument.open(path)
|
|
701
714
|
reopen_report = ReopenReport(ok=True, document=reopened_document)
|
|
@@ -713,8 +726,14 @@ class Document:
|
|
|
713
726
|
validate_document=document_report,
|
|
714
727
|
reopened=reopen_report,
|
|
715
728
|
metadata=self.metadata.as_dict() if self.metadata is not None else {},
|
|
716
|
-
hard_gates=_hard_gates(
|
|
729
|
+
hard_gates=_hard_gates(
|
|
730
|
+
package_report,
|
|
731
|
+
document_report,
|
|
732
|
+
reopen_report,
|
|
733
|
+
editor_open_safety_report,
|
|
734
|
+
),
|
|
717
735
|
visual_review_required=visual_review_required,
|
|
718
736
|
feature_flags=feature_flags,
|
|
737
|
+
editor_open_safety=editor_open_safety_report,
|
|
719
738
|
)
|
|
720
739
|
return report
|
|
@@ -6,7 +6,7 @@ from os import PathLike
|
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
8
|
from hwpx.tools.id_integrity import IdIntegrityReport, check_id_integrity
|
|
9
|
-
from hwpx.tools.package_validator import PackageValidationReport
|
|
9
|
+
from hwpx.tools.package_validator import EditorOpenSafetyReport, PackageValidationReport
|
|
10
10
|
from hwpx.tools.validator import ValidationReport
|
|
11
11
|
|
|
12
12
|
|
|
@@ -32,6 +32,7 @@ class BuilderSaveReport:
|
|
|
32
32
|
visual_review_required: bool = False
|
|
33
33
|
feature_flags: dict[str, bool] = field(default_factory=dict)
|
|
34
34
|
id_integrity: IdIntegrityReport | None = None
|
|
35
|
+
editor_open_safety: EditorOpenSafetyReport | None = None
|
|
35
36
|
|
|
36
37
|
def __post_init__(self) -> None:
|
|
37
38
|
hard_gates = dict(self.hard_gates)
|
|
@@ -52,6 +53,11 @@ class BuilderSaveReport:
|
|
|
52
53
|
"hard_gates": dict(self.hard_gates),
|
|
53
54
|
"visual_review_required": self.visual_review_required,
|
|
54
55
|
"feature_flags": dict(self.feature_flags),
|
|
56
|
+
"editor_open_safety": (
|
|
57
|
+
None
|
|
58
|
+
if self.editor_open_safety is None
|
|
59
|
+
else self.editor_open_safety.to_dict()
|
|
60
|
+
),
|
|
55
61
|
"validate_package": {
|
|
56
62
|
"ok": self.validate_package.ok,
|
|
57
63
|
"checked_parts": list(self.validate_package.checked_parts),
|
|
@@ -5,12 +5,15 @@ from __future__ import annotations
|
|
|
5
5
|
|
|
6
6
|
import xml.etree.ElementTree as ET
|
|
7
7
|
import io
|
|
8
|
+
import os
|
|
9
|
+
import tempfile
|
|
8
10
|
import warnings
|
|
9
11
|
from datetime import datetime
|
|
10
12
|
import logging
|
|
11
13
|
import uuid
|
|
12
14
|
|
|
13
15
|
from os import PathLike
|
|
16
|
+
from pathlib import Path
|
|
14
17
|
from typing import TYPE_CHECKING, Any, BinaryIO, Iterator, Mapping, Sequence, overload
|
|
15
18
|
|
|
16
19
|
from lxml import etree
|
|
@@ -39,7 +42,10 @@ from .oxml import (
|
|
|
39
42
|
TrackChange,
|
|
40
43
|
TrackChangeAuthor,
|
|
41
44
|
)
|
|
42
|
-
from .opc.package import
|
|
45
|
+
from .opc.package import (
|
|
46
|
+
HwpxPackage,
|
|
47
|
+
_UNCHECKED_SAVE_TOKEN,
|
|
48
|
+
)
|
|
43
49
|
from .oxml.namespaces import HH, HH_NS, HP, HP_NS, register_owpml_namespaces
|
|
44
50
|
from .templates import blank_document_bytes
|
|
45
51
|
|
|
@@ -83,6 +89,91 @@ def _png_dimensions(image_data: bytes) -> tuple[int, int] | None:
|
|
|
83
89
|
return width, height
|
|
84
90
|
|
|
85
91
|
|
|
92
|
+
def _write_bytes_atomically(path: str | PathLike[str], data: bytes) -> None:
|
|
93
|
+
target = Path(path)
|
|
94
|
+
fd, tmp_path = tempfile.mkstemp(dir=str(target.parent), suffix=".hwpx.tmp")
|
|
95
|
+
try:
|
|
96
|
+
with os.fdopen(fd, "wb") as tmp_fh:
|
|
97
|
+
tmp_fh.write(data)
|
|
98
|
+
os.replace(tmp_path, str(target))
|
|
99
|
+
except BaseException:
|
|
100
|
+
try:
|
|
101
|
+
os.unlink(tmp_path)
|
|
102
|
+
except OSError:
|
|
103
|
+
pass
|
|
104
|
+
raise
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _capture_stream_checkpoint(stream: BinaryIO) -> tuple[int, bytes] | None:
|
|
108
|
+
try:
|
|
109
|
+
position = stream.tell()
|
|
110
|
+
except (AttributeError, OSError):
|
|
111
|
+
return None
|
|
112
|
+
try:
|
|
113
|
+
tail = stream.read()
|
|
114
|
+
except (AttributeError, OSError):
|
|
115
|
+
try:
|
|
116
|
+
end_position = stream.seek(0, os.SEEK_END)
|
|
117
|
+
except (AttributeError, OSError):
|
|
118
|
+
return None
|
|
119
|
+
try:
|
|
120
|
+
stream.seek(position)
|
|
121
|
+
except (AttributeError, OSError):
|
|
122
|
+
return None
|
|
123
|
+
if end_position == position:
|
|
124
|
+
return position, b""
|
|
125
|
+
return None
|
|
126
|
+
try:
|
|
127
|
+
stream.seek(position)
|
|
128
|
+
except (AttributeError, OSError):
|
|
129
|
+
return None
|
|
130
|
+
return position, tail
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _rollback_stream(stream: BinaryIO, checkpoint: tuple[int, bytes] | None) -> None:
|
|
134
|
+
if checkpoint is None:
|
|
135
|
+
return
|
|
136
|
+
position, tail = checkpoint
|
|
137
|
+
try:
|
|
138
|
+
stream.seek(position)
|
|
139
|
+
if tail:
|
|
140
|
+
stream.write(tail)
|
|
141
|
+
stream.truncate(position + len(tail))
|
|
142
|
+
else:
|
|
143
|
+
stream.truncate(position)
|
|
144
|
+
stream.seek(position)
|
|
145
|
+
except (AttributeError, OSError):
|
|
146
|
+
return
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _write_stream_or_rollback(stream: BinaryIO, data: bytes) -> None:
|
|
150
|
+
checkpoint = _capture_stream_checkpoint(stream)
|
|
151
|
+
if checkpoint is None:
|
|
152
|
+
raise OSError(
|
|
153
|
+
"HWPX stream save requires a checkpointable stream; "
|
|
154
|
+
"use save_to_path() for non-seekable outputs"
|
|
155
|
+
)
|
|
156
|
+
try:
|
|
157
|
+
written = stream.write(data)
|
|
158
|
+
if written is not None and written != len(data):
|
|
159
|
+
raise OSError(
|
|
160
|
+
"short write while saving HWPX stream: "
|
|
161
|
+
f"wrote {written} of {len(data)} bytes"
|
|
162
|
+
)
|
|
163
|
+
except BaseException:
|
|
164
|
+
_rollback_stream(stream, checkpoint)
|
|
165
|
+
raise
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _summarize_validation_issues(issues: Sequence[Any], *, limit: int = 5) -> str:
|
|
169
|
+
selected = [str(issue) for issue in issues[:limit]]
|
|
170
|
+
remaining = len(issues) - len(selected)
|
|
171
|
+
summary = "; ".join(selected)
|
|
172
|
+
if remaining > 0:
|
|
173
|
+
summary += f" ... and {remaining} more"
|
|
174
|
+
return summary
|
|
175
|
+
|
|
176
|
+
|
|
86
177
|
class HwpxDocument:
|
|
87
178
|
"""Provides a user-friendly API for editing HWPX documents."""
|
|
88
179
|
|
|
@@ -1472,6 +1563,14 @@ class HwpxDocument:
|
|
|
1472
1563
|
from .tools.exporter import export_markdown
|
|
1473
1564
|
return export_markdown(self, **kwargs) # type: ignore[arg-type]
|
|
1474
1565
|
|
|
1566
|
+
def export_rich_markdown(self, **kwargs: object) -> str:
|
|
1567
|
+
"""Export rich Markdown preserving inline styles, tables, footnotes, hyperlinks, images, and shape text.
|
|
1568
|
+
|
|
1569
|
+
Keyword args forwarded to :func:`~hwpx.tools.markdown_export.export_markdown`.
|
|
1570
|
+
"""
|
|
1571
|
+
from .tools.markdown_export import export_markdown as _rich
|
|
1572
|
+
return _rich(self, **kwargs) # type: ignore[arg-type]
|
|
1573
|
+
|
|
1475
1574
|
# ------------------------------------------------------------------
|
|
1476
1575
|
# Validation
|
|
1477
1576
|
# ------------------------------------------------------------------
|
|
@@ -1485,7 +1584,9 @@ class HwpxDocument:
|
|
|
1485
1584
|
"""
|
|
1486
1585
|
from .tools.validator import validate_document
|
|
1487
1586
|
|
|
1488
|
-
return validate_document(
|
|
1587
|
+
return validate_document(
|
|
1588
|
+
self._to_bytes_for_validation()
|
|
1589
|
+
)
|
|
1489
1590
|
|
|
1490
1591
|
def _run_pre_save_validation(self) -> None:
|
|
1491
1592
|
"""Raise if validate_on_save is enabled and the document is invalid."""
|
|
@@ -1493,29 +1594,38 @@ class HwpxDocument:
|
|
|
1493
1594
|
return
|
|
1494
1595
|
report = self.validate()
|
|
1495
1596
|
if not report.ok:
|
|
1496
|
-
msgs =
|
|
1497
|
-
remaining = len(report.issues) - 5
|
|
1498
|
-
if remaining > 0:
|
|
1499
|
-
msgs += f" … and {remaining} more"
|
|
1597
|
+
msgs = _summarize_validation_issues(report.issues)
|
|
1500
1598
|
raise ValueError(f"Document validation failed: {msgs}")
|
|
1501
1599
|
|
|
1600
|
+
def _run_open_safety_validation(self, archive_bytes: bytes) -> None:
|
|
1601
|
+
"""Raise if generated bytes are unsafe to hand to an HWPX editor."""
|
|
1602
|
+
|
|
1603
|
+
from .tools.package_validator import validate_editor_open_safety
|
|
1604
|
+
|
|
1605
|
+
report = validate_editor_open_safety(archive_bytes)
|
|
1606
|
+
if not report.ok:
|
|
1607
|
+
raise ValueError(
|
|
1608
|
+
"Generated HWPX package failed open-safety validation: "
|
|
1609
|
+
+ report.summary
|
|
1610
|
+
)
|
|
1611
|
+
|
|
1502
1612
|
def save_to_path(self, path: str | PathLike[str]) -> str | PathLike[str]:
|
|
1503
1613
|
"""Persist pending changes to *path* and return the same path."""
|
|
1504
1614
|
|
|
1505
1615
|
self._run_pre_save_validation()
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
self.
|
|
1509
|
-
return path
|
|
1616
|
+
archive_bytes = self._to_bytes_raw(reset_dirty=False)
|
|
1617
|
+
_write_bytes_atomically(path, archive_bytes)
|
|
1618
|
+
self._mark_save_clean()
|
|
1619
|
+
return path
|
|
1510
1620
|
|
|
1511
1621
|
def save_to_stream(self, stream: BinaryIO) -> BinaryIO:
|
|
1512
1622
|
"""Persist pending changes to *stream* and return the same stream."""
|
|
1513
1623
|
|
|
1514
1624
|
self._run_pre_save_validation()
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
self.
|
|
1518
|
-
return stream
|
|
1625
|
+
archive_bytes = self._to_bytes_raw(reset_dirty=False)
|
|
1626
|
+
_write_stream_or_rollback(stream, archive_bytes)
|
|
1627
|
+
self._mark_save_clean()
|
|
1628
|
+
return stream
|
|
1519
1629
|
|
|
1520
1630
|
def to_bytes(self) -> bytes:
|
|
1521
1631
|
"""Serialize pending changes and return the HWPX archive as bytes."""
|
|
@@ -1523,20 +1633,44 @@ class HwpxDocument:
|
|
|
1523
1633
|
self._run_pre_save_validation()
|
|
1524
1634
|
return self._to_bytes_raw()
|
|
1525
1635
|
|
|
1526
|
-
def _to_bytes_raw(
|
|
1527
|
-
|
|
1636
|
+
def _to_bytes_raw(
|
|
1637
|
+
self,
|
|
1638
|
+
*,
|
|
1639
|
+
reset_dirty: bool = True,
|
|
1640
|
+
) -> bytes:
|
|
1641
|
+
"""Serialize and run editor-open safety validation.
|
|
1528
1642
|
|
|
1529
1643
|
When ``reset_dirty`` is ``False``, the document remains marked as
|
|
1530
1644
|
modified after the archive snapshot is generated.
|
|
1531
1645
|
"""
|
|
1532
1646
|
updates = self._root.serialize()
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1647
|
+
if updates:
|
|
1648
|
+
for part_name, payload in updates.items():
|
|
1649
|
+
self._package.set_part(part_name, payload)
|
|
1650
|
+
result = self._package._save_to_bytes(
|
|
1651
|
+
verify_open_safety=True,
|
|
1652
|
+
mark_clean=False,
|
|
1653
|
+
)
|
|
1536
1654
|
if isinstance(result, bytes):
|
|
1655
|
+
self._run_open_safety_validation(result)
|
|
1656
|
+
if reset_dirty:
|
|
1657
|
+
self._mark_save_clean()
|
|
1537
1658
|
return result
|
|
1538
1659
|
raise TypeError("package.save(None) must return bytes")
|
|
1539
1660
|
|
|
1661
|
+
def _to_bytes_for_validation(self) -> bytes:
|
|
1662
|
+
"""Serialize current state for document validation without handing bytes to callers."""
|
|
1663
|
+
|
|
1664
|
+
updates = self._root.serialize()
|
|
1665
|
+
return self._package._save_bytes_unchecked(
|
|
1666
|
+
updates,
|
|
1667
|
+
_unchecked_token=_UNCHECKED_SAVE_TOKEN,
|
|
1668
|
+
)
|
|
1669
|
+
|
|
1670
|
+
def _mark_save_clean(self) -> None:
|
|
1671
|
+
self._root.reset_dirty()
|
|
1672
|
+
self._package.version_info.mark_clean()
|
|
1673
|
+
|
|
1540
1674
|
@overload
|
|
1541
1675
|
def save(self, path_or_stream: None = None) -> bytes: ...
|
|
1542
1676
|
|