python-hwpx 2.10.1__tar.gz → 2.10.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. {python_hwpx-2.10.1/src/python_hwpx.egg-info → python_hwpx-2.10.3}/PKG-INFO +49 -1
  2. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/README.md +48 -0
  3. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/pyproject.toml +1 -1
  4. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/__init__.py +10 -0
  5. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/builder/core.py +21 -2
  6. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/builder/report.py +7 -1
  7. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/document.py +153 -19
  8. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/opc/package.py +331 -9
  9. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/document.py +305 -3
  10. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/template_formfit.py +48 -17
  11. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/__init__.py +8 -0
  12. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/archive_cli.py +18 -6
  13. python_hwpx-2.10.3/src/hwpx/tools/markdown_export.py +488 -0
  14. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/package_validator.py +215 -1
  15. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/repair.py +91 -8
  16. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/table_navigation.py +77 -1
  17. {python_hwpx-2.10.1 → python_hwpx-2.10.3/src/python_hwpx.egg-info}/PKG-INFO +49 -1
  18. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/python_hwpx.egg-info/SOURCES.txt +3 -0
  19. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_builder_core.py +5 -0
  20. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_document_formatting.py +101 -0
  21. python_hwpx-2.10.3/tests/test_document_save_api.py +333 -0
  22. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_gap_closure_tools.py +164 -3
  23. python_hwpx-2.10.3/tests/test_markdown_export.py +436 -0
  24. python_hwpx-2.10.3/tests/test_opc_package.py +549 -0
  25. python_hwpx-2.10.3/tests/test_open_safety_corpus.py +88 -0
  26. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_recover_broken_zip.py +90 -1
  27. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_repair_repack.py +123 -1
  28. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_table_navigation.py +57 -31
  29. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_template_formfit.py +61 -0
  30. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_version_metadata.py +9 -0
  31. python_hwpx-2.10.1/tests/test_document_save_api.py +0 -55
  32. python_hwpx-2.10.1/tests/test_opc_package.py +0 -126
  33. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/LICENSE +0 -0
  34. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/NOTICE +0 -0
  35. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/setup.cfg +0 -0
  36. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/authoring.py +0 -0
  37. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/builder/__init__.py +0 -0
  38. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/data/Skeleton.hwpx +0 -0
  39. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/form_fill.py +0 -0
  40. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/opc/relationships.py +0 -0
  41. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/opc/xml_utils.py +0 -0
  42. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/__init__.py +0 -0
  43. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/body.py +0 -0
  44. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/common.py +0 -0
  45. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/header.py +0 -0
  46. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/header_part.py +0 -0
  47. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/memo.py +0 -0
  48. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/namespaces.py +0 -0
  49. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/paragraph.py +0 -0
  50. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/parser.py +0 -0
  51. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/schema.py +0 -0
  52. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/section.py +0 -0
  53. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/table.py +0 -0
  54. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/oxml/utils.py +0 -0
  55. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/package.py +0 -0
  56. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/presets/__init__.py +0 -0
  57. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/presets/proposal.py +0 -0
  58. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/py.typed +0 -0
  59. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/templates.py +0 -0
  60. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/_schemas/header.xsd +0 -0
  61. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/_schemas/section.xsd +0 -0
  62. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/exporter.py +0 -0
  63. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/generic_inventory.py +0 -0
  64. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/id_integrity.py +0 -0
  65. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/object_finder.py +0 -0
  66. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/page_guard.py +0 -0
  67. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/recover.py +0 -0
  68. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/report_parser.py +0 -0
  69. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/report_utils.py +0 -0
  70. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/roundtrip_diff.py +0 -0
  71. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/table_cleanup.py +0 -0
  72. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/template_analyzer.py +0 -0
  73. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/text_extract_cli.py +0 -0
  74. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/text_extractor.py +0 -0
  75. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/hwpx/tools/validator.py +0 -0
  76. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/python_hwpx.egg-info/dependency_links.txt +0 -0
  77. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/python_hwpx.egg-info/entry_points.txt +0 -0
  78. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/python_hwpx.egg-info/requires.txt +0 -0
  79. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/src/python_hwpx.egg-info/top_level.txt +0 -0
  80. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_builder_plan_v2.py +0 -0
  81. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_builder_vertical_slice.py +0 -0
  82. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_coverage_promotion.py +0 -0
  83. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_coverage_targets.py +0 -0
  84. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_deviations_registry.py +0 -0
  85. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_document_context_manager.py +0 -0
  86. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_document_plan.py +0 -0
  87. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_document_plan_computed_fields.py +0 -0
  88. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_form_fill_split_run.py +0 -0
  89. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_government_report_preset.py +0 -0
  90. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_government_table_profile.py +0 -0
  91. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_hp_tab_support.py +0 -0
  92. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_hwpxlib_corpus_read.py +0 -0
  93. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_id_generator_range.py +0 -0
  94. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_id_integrity.py +0 -0
  95. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_inline_models.py +0 -0
  96. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_integration_hwpx_compatibility.py +0 -0
  97. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_integration_roundtrip.py +0 -0
  98. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_memo_and_style_editing.py +0 -0
  99. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_namespace_handling.py +0 -0
  100. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_new_features.py +0 -0
  101. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_oxml_parsing.py +0 -0
  102. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_packaging_license_metadata.py +0 -0
  103. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_packaging_py_typed.py +0 -0
  104. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_paragraph_section_management.py +0 -0
  105. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_proposal_preset.py +0 -0
  106. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_report_parser.py +0 -0
  107. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_report_utils.py +0 -0
  108. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_repr_snapshots.py +0 -0
  109. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_roundtrip_fidelity.py +0 -0
  110. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_section_headers.py +0 -0
  111. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_skeleton_template_ids.py +0 -0
  112. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_split_merged_cell.py +0 -0
  113. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_table_cleanup.py +0 -0
  114. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_tables_default_border.py +0 -0
  115. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_text_extractor_annotations.py +0 -0
  116. {python_hwpx-2.10.1 → python_hwpx-2.10.3}/tests/test_validation_severity.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-hwpx
3
- Version: 2.10.1
3
+ Version: 2.10.3
4
4
  Summary: 한글 없이 HWPX 문서를 열고, 편집하고, 생성하고, 검증하는 Python 자동화 라이브러리
5
5
  Author: python-hwpx Maintainers
6
6
  License-Expression: Apache-2.0
@@ -115,6 +115,47 @@ hwpx-validate-package 보고서.hwpx
115
115
  hwpx-analyze-template 보고서.hwpx
116
116
  ```
117
117
 
118
+ ### 4. 풍부한 Markdown 변환 (서식·표·각주·이미지 보존)
119
+
120
+ `export_markdown()`는 단순 평문 추출이고, `export_rich_markdown()`는 인라인 서식(`**굵게**`, `*기울임*`, `~~취소선~~`),
121
+ 표(중첩 포함, colspan/rowspan 안전), 도형 텍스트, 이미지, 각주/미주, 하이퍼링크, 제목(`#`/`##`) 자동 감지까지 보존한다.
122
+
123
+ ```python
124
+ from hwpx import HwpxDocument
125
+
126
+ doc = HwpxDocument.open("보고서.hwpx")
127
+
128
+ md = doc.export_rich_markdown(
129
+ image_dir="out/images", # BinData 이미지를 디스크에 추출
130
+ image_ref_prefix="images/", # 마크다운 내 ![](images/...) 경로 접두
131
+ detect_headings=True, # Ⅰ./1. 패턴 기반 #/## 자동
132
+ )
133
+ print(md)
134
+ ```
135
+
136
+ 문자열·경로·바이트도 그대로 받는다:
137
+
138
+ ```python
139
+ from hwpx.tools.markdown_export import export_markdown
140
+
141
+ md = export_markdown("보고서.hwpx") # 경로
142
+ md = export_markdown(open("a.hwpx", "rb").read()) # bytes
143
+ ```
144
+
145
+ ### 5. 각주 본문에 혼합 서식 / 하이퍼링크 추가
146
+
147
+ `HwpxOxmlNote`에 `body_paragraph`, `add_run`, `add_hyperlink` helper가 있어 각주 본문을
148
+ 직접 paragraph로 다루지 않고도 인라인 서식·링크를 손쉽게 채울 수 있다.
149
+
150
+ ```python
151
+ para = section.paragraphs[0]
152
+ note = para.add_footnote("") # 빈 각주 생성 후 본문 구성
153
+ note.add_run("자세한 내용은 ", )
154
+ note.add_run("정부 공식 사이트", bold=True)
155
+ note.add_run("를 참고하라: ")
156
+ note.add_hyperlink("https://www.kasa.go.kr", "우주항공청")
157
+ ```
158
+
118
159
  처음에는 `open/new -> edit/extract -> save_to_path` 흐름만 잡으면 된다. 패키지 구조, XML 파트, 템플릿 회귀 점검은 필요할 때만 확장하면 된다.
119
160
 
120
161
  ## 어디부터 읽으면 되나
@@ -244,6 +285,7 @@ doc.set_footer_text("1 / 10", page_type="BOTH")
244
285
  # 표 셀 병합·분할
245
286
  table.merge_cells(0, 0, 1, 1) # (0,0)~(1,1) 병합
246
287
  table.set_cell_text(0, 0, "병합된 셀", logical=True, split_merged=True)
288
+ table.set_cell_text(0, 0, "line 1\nline 2", split_paragraphs=True)
247
289
 
248
290
  # 양식형 표 자동 채우기
249
291
  form = doc.add_table(2, 2)
@@ -257,6 +299,12 @@ doc.fill_by_path({
257
299
  })
258
300
  ```
259
301
 
302
+ `doc.paragraphs`의 인덱스는 본문 직속 문단 0-based 기준입니다. 표 안 문단은
303
+ 본문 `paragraph_index`에 섞지 않고 `get_table_map()`의 cell `location`
304
+ (`table_index`, `row`, `col`, `cell_paragraph_index`)으로 다룹니다.
305
+ `get_table_map()`은 `caption_text`와 `preceding_paragraph_text`를 분리해
306
+ 반환하고, 셀 미리보기의 여러 문단은 `\n`으로 유지합니다.
307
+
260
308
  ### 🔍 텍스트 추출 & 검색
261
309
 
262
310
  ```python
@@ -79,6 +79,47 @@ hwpx-validate-package 보고서.hwpx
79
79
  hwpx-analyze-template 보고서.hwpx
80
80
  ```
81
81
 
82
+ ### 4. 풍부한 Markdown 변환 (서식·표·각주·이미지 보존)
83
+
84
+ `export_markdown()`는 단순 평문 추출이고, `export_rich_markdown()`는 인라인 서식(`**굵게**`, `*기울임*`, `~~취소선~~`),
85
+ 표(중첩 포함, colspan/rowspan 안전), 도형 텍스트, 이미지, 각주/미주, 하이퍼링크, 제목(`#`/`##`) 자동 감지까지 보존한다.
86
+
87
+ ```python
88
+ from hwpx import HwpxDocument
89
+
90
+ doc = HwpxDocument.open("보고서.hwpx")
91
+
92
+ md = doc.export_rich_markdown(
93
+ image_dir="out/images", # BinData 이미지를 디스크에 추출
94
+ image_ref_prefix="images/", # 마크다운 내 ![](images/...) 경로 접두
95
+ detect_headings=True, # Ⅰ./1. 패턴 기반 #/## 자동
96
+ )
97
+ print(md)
98
+ ```
99
+
100
+ 문자열·경로·바이트도 그대로 받는다:
101
+
102
+ ```python
103
+ from hwpx.tools.markdown_export import export_markdown
104
+
105
+ md = export_markdown("보고서.hwpx") # 경로
106
+ md = export_markdown(open("a.hwpx", "rb").read()) # bytes
107
+ ```
108
+
109
+ ### 5. 각주 본문에 혼합 서식 / 하이퍼링크 추가
110
+
111
+ `HwpxOxmlNote`에 `body_paragraph`, `add_run`, `add_hyperlink` helper가 있어 각주 본문을
112
+ 직접 paragraph로 다루지 않고도 인라인 서식·링크를 손쉽게 채울 수 있다.
113
+
114
+ ```python
115
+ para = section.paragraphs[0]
116
+ note = para.add_footnote("") # 빈 각주 생성 후 본문 구성
117
+ note.add_run("자세한 내용은 ", )
118
+ note.add_run("정부 공식 사이트", bold=True)
119
+ note.add_run("를 참고하라: ")
120
+ note.add_hyperlink("https://www.kasa.go.kr", "우주항공청")
121
+ ```
122
+
82
123
  처음에는 `open/new -> edit/extract -> save_to_path` 흐름만 잡으면 된다. 패키지 구조, XML 파트, 템플릿 회귀 점검은 필요할 때만 확장하면 된다.
83
124
 
84
125
  ## 어디부터 읽으면 되나
@@ -208,6 +249,7 @@ doc.set_footer_text("1 / 10", page_type="BOTH")
208
249
  # 표 셀 병합·분할
209
250
  table.merge_cells(0, 0, 1, 1) # (0,0)~(1,1) 병합
210
251
  table.set_cell_text(0, 0, "병합된 셀", logical=True, split_merged=True)
252
+ table.set_cell_text(0, 0, "line 1\nline 2", split_paragraphs=True)
211
253
 
212
254
  # 양식형 표 자동 채우기
213
255
  form = doc.add_table(2, 2)
@@ -221,6 +263,12 @@ doc.fill_by_path({
221
263
  })
222
264
  ```
223
265
 
266
+ `doc.paragraphs`의 인덱스는 본문 직속 문단 0-based 기준입니다. 표 안 문단은
267
+ 본문 `paragraph_index`에 섞지 않고 `get_table_map()`의 cell `location`
268
+ (`table_index`, `row`, `col`, `cell_paragraph_index`)으로 다룹니다.
269
+ `get_table_map()`은 `caption_text`와 `preceding_paragraph_text`를 분리해
270
+ 반환하고, 셀 미리보기의 여러 문단은 `\n`으로 유지합니다.
271
+
224
272
  ### 🔍 텍스트 추출 & 검색
225
273
 
226
274
  ```python
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "python-hwpx"
7
- version = "2.10.1"
7
+ version = "2.10.3"
8
8
  description = "한글 없이 HWPX 문서를 열고, 편집하고, 생성하고, 검증하는 Python 자동화 라이브러리"
9
9
  readme = { file = "README.md", content-type = "text/markdown" }
10
10
  license = "Apache-2.0"
@@ -25,6 +25,12 @@ from .tools.text_extractor import (
25
25
  TextExtractor,
26
26
  )
27
27
  from .tools.object_finder import FoundElement, ObjectFinder
28
+ from .tools.package_validator import (
29
+ EditorOpenSafetyReport,
30
+ PackageValidationReport,
31
+ validate_editor_open_safety,
32
+ validate_package,
33
+ )
28
34
  from .document import HwpxDocument
29
35
  from .package import HwpxPackage
30
36
  from .authoring import (
@@ -58,7 +64,9 @@ __all__ = [
58
64
  "DocumentBlock",
59
65
  "DocumentPlan",
60
66
  "DocumentStylePreset",
67
+ "EditorOpenSafetyReport",
61
68
  "ParagraphInfo",
69
+ "PackageValidationReport",
62
70
  "PlanValidationReport",
63
71
  "SectionInfo",
64
72
  "TEMPLATE_FORMFIT_BASELINE_SCHEMA_VERSION",
@@ -76,4 +84,6 @@ __all__ = [
76
84
  "inspect_operating_plan_quality",
77
85
  "normalize_document_plan",
78
86
  "validate_document_plan",
87
+ "validate_editor_open_safety",
88
+ "validate_package",
79
89
  ]
@@ -7,6 +7,7 @@ from pathlib import Path
7
7
  from typing import Any, Mapping, Sequence
8
8
 
9
9
  from hwpx.document import HwpxDocument
10
+ from hwpx.tools.package_validator import validate_editor_open_safety
10
11
  from hwpx.tools.package_validator import validate_package
11
12
  from hwpx.tools.validator import validate_document
12
13
 
@@ -576,13 +577,24 @@ def _merge_flags(*flag_sets: dict[str, bool]) -> dict[str, bool]:
576
577
  return merged
577
578
 
578
579
 
579
- def _hard_gates(package_report: object, document_report: object, reopen_report: ReopenReport) -> dict[str, str]:
580
+ def _hard_gates(
581
+ package_report: object,
582
+ document_report: object,
583
+ reopen_report: ReopenReport,
584
+ editor_open_safety_report: object | None = None,
585
+ ) -> dict[str, str]:
580
586
  document_warnings = getattr(document_report, "warnings", ())
587
+ editor_open_safety_ok = (
588
+ True
589
+ if editor_open_safety_report is None
590
+ else bool(getattr(editor_open_safety_report, "ok", False))
591
+ )
581
592
  return {
582
593
  "package_validation": "pass" if getattr(package_report, "ok", False) else "fail",
583
594
  "document_errors": "pass" if getattr(document_report, "ok", False) else "fail",
584
595
  "schema_lint": "warning" if document_warnings else "pass",
585
596
  "reopen": "pass" if reopen_report.ok else "fail",
597
+ "editor_open_safety": "pass" if editor_open_safety_ok else "fail",
586
598
  "id_integrity": "unavailable",
587
599
  }
588
600
 
@@ -696,6 +708,7 @@ class Document:
696
708
  document.save_to_path(path)
697
709
  package_report = validate_package(path)
698
710
  document_report = validate_document(path)
711
+ editor_open_safety_report = validate_editor_open_safety(path)
699
712
  try:
700
713
  reopened_document = HwpxDocument.open(path)
701
714
  reopen_report = ReopenReport(ok=True, document=reopened_document)
@@ -713,8 +726,14 @@ class Document:
713
726
  validate_document=document_report,
714
727
  reopened=reopen_report,
715
728
  metadata=self.metadata.as_dict() if self.metadata is not None else {},
716
- hard_gates=_hard_gates(package_report, document_report, reopen_report),
729
+ hard_gates=_hard_gates(
730
+ package_report,
731
+ document_report,
732
+ reopen_report,
733
+ editor_open_safety_report,
734
+ ),
717
735
  visual_review_required=visual_review_required,
718
736
  feature_flags=feature_flags,
737
+ editor_open_safety=editor_open_safety_report,
719
738
  )
720
739
  return report
@@ -6,7 +6,7 @@ from os import PathLike
6
6
  from typing import Any
7
7
 
8
8
  from hwpx.tools.id_integrity import IdIntegrityReport, check_id_integrity
9
- from hwpx.tools.package_validator import PackageValidationReport
9
+ from hwpx.tools.package_validator import EditorOpenSafetyReport, PackageValidationReport
10
10
  from hwpx.tools.validator import ValidationReport
11
11
 
12
12
 
@@ -32,6 +32,7 @@ class BuilderSaveReport:
32
32
  visual_review_required: bool = False
33
33
  feature_flags: dict[str, bool] = field(default_factory=dict)
34
34
  id_integrity: IdIntegrityReport | None = None
35
+ editor_open_safety: EditorOpenSafetyReport | None = None
35
36
 
36
37
  def __post_init__(self) -> None:
37
38
  hard_gates = dict(self.hard_gates)
@@ -52,6 +53,11 @@ class BuilderSaveReport:
52
53
  "hard_gates": dict(self.hard_gates),
53
54
  "visual_review_required": self.visual_review_required,
54
55
  "feature_flags": dict(self.feature_flags),
56
+ "editor_open_safety": (
57
+ None
58
+ if self.editor_open_safety is None
59
+ else self.editor_open_safety.to_dict()
60
+ ),
55
61
  "validate_package": {
56
62
  "ok": self.validate_package.ok,
57
63
  "checked_parts": list(self.validate_package.checked_parts),
@@ -5,12 +5,15 @@ from __future__ import annotations
5
5
 
6
6
  import xml.etree.ElementTree as ET
7
7
  import io
8
+ import os
9
+ import tempfile
8
10
  import warnings
9
11
  from datetime import datetime
10
12
  import logging
11
13
  import uuid
12
14
 
13
15
  from os import PathLike
16
+ from pathlib import Path
14
17
  from typing import TYPE_CHECKING, Any, BinaryIO, Iterator, Mapping, Sequence, overload
15
18
 
16
19
  from lxml import etree
@@ -39,7 +42,10 @@ from .oxml import (
39
42
  TrackChange,
40
43
  TrackChangeAuthor,
41
44
  )
42
- from .opc.package import HwpxPackage
45
+ from .opc.package import (
46
+ HwpxPackage,
47
+ _UNCHECKED_SAVE_TOKEN,
48
+ )
43
49
  from .oxml.namespaces import HH, HH_NS, HP, HP_NS, register_owpml_namespaces
44
50
  from .templates import blank_document_bytes
45
51
 
@@ -83,6 +89,91 @@ def _png_dimensions(image_data: bytes) -> tuple[int, int] | None:
83
89
  return width, height
84
90
 
85
91
 
92
+ def _write_bytes_atomically(path: str | PathLike[str], data: bytes) -> None:
93
+ target = Path(path)
94
+ fd, tmp_path = tempfile.mkstemp(dir=str(target.parent), suffix=".hwpx.tmp")
95
+ try:
96
+ with os.fdopen(fd, "wb") as tmp_fh:
97
+ tmp_fh.write(data)
98
+ os.replace(tmp_path, str(target))
99
+ except BaseException:
100
+ try:
101
+ os.unlink(tmp_path)
102
+ except OSError:
103
+ pass
104
+ raise
105
+
106
+
107
+ def _capture_stream_checkpoint(stream: BinaryIO) -> tuple[int, bytes] | None:
108
+ try:
109
+ position = stream.tell()
110
+ except (AttributeError, OSError):
111
+ return None
112
+ try:
113
+ tail = stream.read()
114
+ except (AttributeError, OSError):
115
+ try:
116
+ end_position = stream.seek(0, os.SEEK_END)
117
+ except (AttributeError, OSError):
118
+ return None
119
+ try:
120
+ stream.seek(position)
121
+ except (AttributeError, OSError):
122
+ return None
123
+ if end_position == position:
124
+ return position, b""
125
+ return None
126
+ try:
127
+ stream.seek(position)
128
+ except (AttributeError, OSError):
129
+ return None
130
+ return position, tail
131
+
132
+
133
+ def _rollback_stream(stream: BinaryIO, checkpoint: tuple[int, bytes] | None) -> None:
134
+ if checkpoint is None:
135
+ return
136
+ position, tail = checkpoint
137
+ try:
138
+ stream.seek(position)
139
+ if tail:
140
+ stream.write(tail)
141
+ stream.truncate(position + len(tail))
142
+ else:
143
+ stream.truncate(position)
144
+ stream.seek(position)
145
+ except (AttributeError, OSError):
146
+ return
147
+
148
+
149
+ def _write_stream_or_rollback(stream: BinaryIO, data: bytes) -> None:
150
+ checkpoint = _capture_stream_checkpoint(stream)
151
+ if checkpoint is None:
152
+ raise OSError(
153
+ "HWPX stream save requires a checkpointable stream; "
154
+ "use save_to_path() for non-seekable outputs"
155
+ )
156
+ try:
157
+ written = stream.write(data)
158
+ if written is not None and written != len(data):
159
+ raise OSError(
160
+ "short write while saving HWPX stream: "
161
+ f"wrote {written} of {len(data)} bytes"
162
+ )
163
+ except BaseException:
164
+ _rollback_stream(stream, checkpoint)
165
+ raise
166
+
167
+
168
+ def _summarize_validation_issues(issues: Sequence[Any], *, limit: int = 5) -> str:
169
+ selected = [str(issue) for issue in issues[:limit]]
170
+ remaining = len(issues) - len(selected)
171
+ summary = "; ".join(selected)
172
+ if remaining > 0:
173
+ summary += f" ... and {remaining} more"
174
+ return summary
175
+
176
+
86
177
  class HwpxDocument:
87
178
  """Provides a user-friendly API for editing HWPX documents."""
88
179
 
@@ -1472,6 +1563,14 @@ class HwpxDocument:
1472
1563
  from .tools.exporter import export_markdown
1473
1564
  return export_markdown(self, **kwargs) # type: ignore[arg-type]
1474
1565
 
1566
+ def export_rich_markdown(self, **kwargs: object) -> str:
1567
+ """Export rich Markdown preserving inline styles, tables, footnotes, hyperlinks, images, and shape text.
1568
+
1569
+ Keyword args forwarded to :func:`~hwpx.tools.markdown_export.export_markdown`.
1570
+ """
1571
+ from .tools.markdown_export import export_markdown as _rich
1572
+ return _rich(self, **kwargs) # type: ignore[arg-type]
1573
+
1475
1574
  # ------------------------------------------------------------------
1476
1575
  # Validation
1477
1576
  # ------------------------------------------------------------------
@@ -1485,7 +1584,9 @@ class HwpxDocument:
1485
1584
  """
1486
1585
  from .tools.validator import validate_document
1487
1586
 
1488
- return validate_document(self._to_bytes_raw(reset_dirty=False))
1587
+ return validate_document(
1588
+ self._to_bytes_for_validation()
1589
+ )
1489
1590
 
1490
1591
  def _run_pre_save_validation(self) -> None:
1491
1592
  """Raise if validate_on_save is enabled and the document is invalid."""
@@ -1493,29 +1594,38 @@ class HwpxDocument:
1493
1594
  return
1494
1595
  report = self.validate()
1495
1596
  if not report.ok:
1496
- msgs = "; ".join(str(i) for i in report.issues[:5])
1497
- remaining = len(report.issues) - 5
1498
- if remaining > 0:
1499
- msgs += f" … and {remaining} more"
1597
+ msgs = _summarize_validation_issues(report.issues)
1500
1598
  raise ValueError(f"Document validation failed: {msgs}")
1501
1599
 
1600
+ def _run_open_safety_validation(self, archive_bytes: bytes) -> None:
1601
+ """Raise if generated bytes are unsafe to hand to an HWPX editor."""
1602
+
1603
+ from .tools.package_validator import validate_editor_open_safety
1604
+
1605
+ report = validate_editor_open_safety(archive_bytes)
1606
+ if not report.ok:
1607
+ raise ValueError(
1608
+ "Generated HWPX package failed open-safety validation: "
1609
+ + report.summary
1610
+ )
1611
+
1502
1612
  def save_to_path(self, path: str | PathLike[str]) -> str | PathLike[str]:
1503
1613
  """Persist pending changes to *path* and return the same path."""
1504
1614
 
1505
1615
  self._run_pre_save_validation()
1506
- updates = self._root.serialize()
1507
- result = self._package.save(path, updates)
1508
- self._root.reset_dirty()
1509
- return path if result is None else result
1616
+ archive_bytes = self._to_bytes_raw(reset_dirty=False)
1617
+ _write_bytes_atomically(path, archive_bytes)
1618
+ self._mark_save_clean()
1619
+ return path
1510
1620
 
1511
1621
  def save_to_stream(self, stream: BinaryIO) -> BinaryIO:
1512
1622
  """Persist pending changes to *stream* and return the same stream."""
1513
1623
 
1514
1624
  self._run_pre_save_validation()
1515
- updates = self._root.serialize()
1516
- result = self._package.save(stream, updates)
1517
- self._root.reset_dirty()
1518
- return stream if result is None else result
1625
+ archive_bytes = self._to_bytes_raw(reset_dirty=False)
1626
+ _write_stream_or_rollback(stream, archive_bytes)
1627
+ self._mark_save_clean()
1628
+ return stream
1519
1629
 
1520
1630
  def to_bytes(self) -> bytes:
1521
1631
  """Serialize pending changes and return the HWPX archive as bytes."""
@@ -1523,20 +1633,44 @@ class HwpxDocument:
1523
1633
  self._run_pre_save_validation()
1524
1634
  return self._to_bytes_raw()
1525
1635
 
1526
- def _to_bytes_raw(self, *, reset_dirty: bool = True) -> bytes:
1527
- """Serialize without validation.
1636
+ def _to_bytes_raw(
1637
+ self,
1638
+ *,
1639
+ reset_dirty: bool = True,
1640
+ ) -> bytes:
1641
+ """Serialize and run editor-open safety validation.
1528
1642
 
1529
1643
  When ``reset_dirty`` is ``False``, the document remains marked as
1530
1644
  modified after the archive snapshot is generated.
1531
1645
  """
1532
1646
  updates = self._root.serialize()
1533
- result = self._package.save(None, updates)
1534
- if reset_dirty:
1535
- self._root.reset_dirty()
1647
+ if updates:
1648
+ for part_name, payload in updates.items():
1649
+ self._package.set_part(part_name, payload)
1650
+ result = self._package._save_to_bytes(
1651
+ verify_open_safety=True,
1652
+ mark_clean=False,
1653
+ )
1536
1654
  if isinstance(result, bytes):
1655
+ self._run_open_safety_validation(result)
1656
+ if reset_dirty:
1657
+ self._mark_save_clean()
1537
1658
  return result
1538
1659
  raise TypeError("package.save(None) must return bytes")
1539
1660
 
1661
+ def _to_bytes_for_validation(self) -> bytes:
1662
+ """Serialize current state for document validation without handing bytes to callers."""
1663
+
1664
+ updates = self._root.serialize()
1665
+ return self._package._save_bytes_unchecked(
1666
+ updates,
1667
+ _unchecked_token=_UNCHECKED_SAVE_TOKEN,
1668
+ )
1669
+
1670
+ def _mark_save_clean(self) -> None:
1671
+ self._root.reset_dirty()
1672
+ self._package.version_info.mark_clean()
1673
+
1540
1674
  @overload
1541
1675
  def save(self, path_or_stream: None = None) -> bytes: ...
1542
1676