python-hwpx 2.1__py3-none-any.whl → 2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hwpx/__init__.py +36 -36
- hwpx/document.py +890 -890
- hwpx/opc/package.py +514 -514
- hwpx/opc/xml_utils.py +50 -50
- hwpx/oxml/__init__.py +220 -220
- hwpx/oxml/body.py +435 -435
- hwpx/oxml/common.py +36 -36
- hwpx/oxml/document.py +3504 -3445
- hwpx/oxml/header.py +1369 -1369
- hwpx/oxml/header_part.py +10 -10
- hwpx/oxml/memo.py +10 -10
- hwpx/oxml/paragraph.py +10 -10
- hwpx/oxml/parser.py +72 -72
- hwpx/oxml/schema.py +44 -44
- hwpx/oxml/section.py +10 -10
- hwpx/oxml/table.py +10 -10
- hwpx/oxml/utils.py +85 -85
- hwpx/package.py +24 -24
- hwpx/templates.py +33 -33
- hwpx/tools/__init__.py +36 -36
- hwpx/tools/_schemas/header.xsd +14 -14
- hwpx/tools/_schemas/section.xsd +12 -12
- hwpx/tools/object_finder.py +347 -347
- hwpx/tools/text_extractor.py +726 -726
- hwpx/tools/validator.py +184 -184
- {python_hwpx-2.1.dist-info → python_hwpx-2.3.dist-info}/METADATA +257 -257
- python_hwpx-2.3.dist-info/RECORD +33 -0
- {python_hwpx-2.1.dist-info → python_hwpx-2.3.dist-info}/licenses/LICENSE +32 -32
- python_hwpx-2.1.dist-info/RECORD +0 -33
- {python_hwpx-2.1.dist-info → python_hwpx-2.3.dist-info}/WHEEL +0 -0
- {python_hwpx-2.1.dist-info → python_hwpx-2.3.dist-info}/entry_points.txt +0 -0
- {python_hwpx-2.1.dist-info → python_hwpx-2.3.dist-info}/top_level.txt +0 -0
hwpx/document.py
CHANGED
|
@@ -1,890 +1,890 @@
|
|
|
1
|
-
"""High-level representation of an HWPX document."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import xml.etree.ElementTree as ET
|
|
6
|
-
import io
|
|
7
|
-
import warnings
|
|
8
|
-
from datetime import datetime
|
|
9
|
-
import logging
|
|
10
|
-
import uuid
|
|
11
|
-
|
|
12
|
-
from os import PathLike
|
|
13
|
-
from typing import Any, BinaryIO, Iterator, overload
|
|
14
|
-
|
|
15
|
-
from lxml import etree
|
|
16
|
-
|
|
17
|
-
from .oxml import (
|
|
18
|
-
Bullet,
|
|
19
|
-
GenericElement,
|
|
20
|
-
HwpxOxmlDocument,
|
|
21
|
-
HwpxOxmlHeader,
|
|
22
|
-
HwpxOxmlHistory,
|
|
23
|
-
HwpxOxmlInlineObject,
|
|
24
|
-
HwpxOxmlMasterPage,
|
|
25
|
-
HwpxOxmlMemo,
|
|
26
|
-
HwpxOxmlParagraph,
|
|
27
|
-
HwpxOxmlRun,
|
|
28
|
-
HwpxOxmlSection,
|
|
29
|
-
HwpxOxmlSectionHeaderFooter,
|
|
30
|
-
HwpxOxmlTable,
|
|
31
|
-
HwpxOxmlVersion,
|
|
32
|
-
MemoShape,
|
|
33
|
-
ParagraphProperty,
|
|
34
|
-
RunStyle,
|
|
35
|
-
Style,
|
|
36
|
-
TrackChange,
|
|
37
|
-
TrackChangeAuthor,
|
|
38
|
-
)
|
|
39
|
-
from .opc.package import HwpxPackage
|
|
40
|
-
from .templates import blank_document_bytes
|
|
41
|
-
|
|
42
|
-
ET.register_namespace("hp", "http://www.hancom.co.kr/hwpml/2011/paragraph")
|
|
43
|
-
ET.register_namespace("hs", "http://www.hancom.co.kr/hwpml/2011/section")
|
|
44
|
-
ET.register_namespace("hc", "http://www.hancom.co.kr/hwpml/2011/core")
|
|
45
|
-
ET.register_namespace("hh", "http://www.hancom.co.kr/hwpml/2011/head")
|
|
46
|
-
|
|
47
|
-
_HP_NS = "http://www.hancom.co.kr/hwpml/2011/paragraph"
|
|
48
|
-
_HP = f"{{{_HP_NS}}}"
|
|
49
|
-
_HH_NS = "http://www.hancom.co.kr/hwpml/2011/head"
|
|
50
|
-
_HH = f"{{{_HH_NS}}}"
|
|
51
|
-
|
|
52
|
-
logger = logging.getLogger(__name__)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def _append_element(
|
|
56
|
-
parent: Any,
|
|
57
|
-
tag: str,
|
|
58
|
-
attributes: dict[str, str] | None = None,
|
|
59
|
-
) -> Any:
|
|
60
|
-
"""Create and append a child element that matches *parent*'s element type."""
|
|
61
|
-
|
|
62
|
-
child = parent.makeelement(tag, attributes or {})
|
|
63
|
-
parent.append(child)
|
|
64
|
-
return child
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
class HwpxDocument:
|
|
68
|
-
"""Provides a user-friendly API for editing HWPX documents."""
|
|
69
|
-
|
|
70
|
-
def __init__(
|
|
71
|
-
self,
|
|
72
|
-
package: HwpxPackage,
|
|
73
|
-
root: HwpxOxmlDocument,
|
|
74
|
-
*,
|
|
75
|
-
managed_resources: tuple[Any, ...] = (),
|
|
76
|
-
):
|
|
77
|
-
self._package = package
|
|
78
|
-
self._root = root
|
|
79
|
-
self._managed_resources = list(managed_resources)
|
|
80
|
-
self._closed = False
|
|
81
|
-
|
|
82
|
-
def __repr__(self) -> str:
|
|
83
|
-
"""Return a compact and safe summary of the document state."""
|
|
84
|
-
|
|
85
|
-
return (
|
|
86
|
-
f"{self.__class__.__name__}("
|
|
87
|
-
f"sections={len(self.sections)}, "
|
|
88
|
-
f"paragraphs={len(self.paragraphs)}, "
|
|
89
|
-
f"headers={len(self.headers)}, "
|
|
90
|
-
f"master_pages={len(self.master_pages)}, "
|
|
91
|
-
f"histories={len(self.histories)}, "
|
|
92
|
-
f"closed={self._closed}"
|
|
93
|
-
")"
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
# ------------------------------------------------------------------
|
|
97
|
-
# construction helpers
|
|
98
|
-
@classmethod
|
|
99
|
-
def open(
|
|
100
|
-
cls,
|
|
101
|
-
source: str | PathLike[str] | bytes | BinaryIO,
|
|
102
|
-
) -> "HwpxDocument":
|
|
103
|
-
"""Open *source* and return a :class:`HwpxDocument` instance.
|
|
104
|
-
|
|
105
|
-
Raises:
|
|
106
|
-
HwpxStructureError: 필수 파일이나 구조가 올바르지 않은 HWPX를 열 때 발생합니다.
|
|
107
|
-
HwpxPackageError: 패키지를 여는 과정에서 일반적인 I/O/포맷 오류가 발생하면 전달됩니다.
|
|
108
|
-
"""
|
|
109
|
-
internal_resources: list[Any] = []
|
|
110
|
-
open_source = source
|
|
111
|
-
if isinstance(source, bytes):
|
|
112
|
-
stream = io.BytesIO(source)
|
|
113
|
-
open_source = stream
|
|
114
|
-
internal_resources.append(stream)
|
|
115
|
-
package = HwpxPackage.open(open_source)
|
|
116
|
-
root = HwpxOxmlDocument.from_package(package)
|
|
117
|
-
return cls(package, root, managed_resources=tuple(internal_resources))
|
|
118
|
-
|
|
119
|
-
@classmethod
|
|
120
|
-
def new(cls) -> "HwpxDocument":
|
|
121
|
-
"""Return a new blank document based on the default skeleton template."""
|
|
122
|
-
|
|
123
|
-
return cls.open(blank_document_bytes())
|
|
124
|
-
|
|
125
|
-
@classmethod
|
|
126
|
-
def from_package(cls, package: HwpxPackage) -> "HwpxDocument":
|
|
127
|
-
"""Create a document backed by an existing :class:`HwpxPackage`.
|
|
128
|
-
|
|
129
|
-
Args:
|
|
130
|
-
package: :class:`hwpx.opc.package.HwpxPackage` 인스턴스.
|
|
131
|
-
"""
|
|
132
|
-
root = HwpxOxmlDocument.from_package(package)
|
|
133
|
-
return cls(package, root)
|
|
134
|
-
|
|
135
|
-
def __enter__(self) -> "HwpxDocument":
|
|
136
|
-
"""컨텍스트 매니저 진입 시 현재 문서 인스턴스를 반환합니다."""
|
|
137
|
-
|
|
138
|
-
return self
|
|
139
|
-
|
|
140
|
-
def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> bool:
|
|
141
|
-
"""예외 발생 여부와 무관하게 내부 자원을 안전하게 정리합니다."""
|
|
142
|
-
|
|
143
|
-
self.close()
|
|
144
|
-
return False
|
|
145
|
-
|
|
146
|
-
def close(self) -> None:
|
|
147
|
-
"""문서가 관리하는 내부 패키지/스트림 자원을 정리합니다.
|
|
148
|
-
|
|
149
|
-
정리 정책:
|
|
150
|
-
- ``flush()`` 가능한 자원은 먼저 flush를 시도합니다.
|
|
151
|
-
- ``close()`` 가능한 자원은 flush 이후 close를 시도합니다.
|
|
152
|
-
- flush/close 중 발생한 예외는 로깅하고 무시하여 정리 루틴을 계속 진행합니다.
|
|
153
|
-
- 같은 문서에서 ``close()``를 여러 번 호출해도 안전합니다.
|
|
154
|
-
"""
|
|
155
|
-
|
|
156
|
-
if self._closed:
|
|
157
|
-
return
|
|
158
|
-
|
|
159
|
-
self._flush_resource(self._package)
|
|
160
|
-
for resource in self._managed_resources:
|
|
161
|
-
self._flush_resource(resource)
|
|
162
|
-
|
|
163
|
-
self._close_resource(self._package)
|
|
164
|
-
for resource in self._managed_resources:
|
|
165
|
-
self._close_resource(resource)
|
|
166
|
-
|
|
167
|
-
self._managed_resources.clear()
|
|
168
|
-
self._closed = True
|
|
169
|
-
|
|
170
|
-
@staticmethod
|
|
171
|
-
def _flush_resource(resource: Any) -> None:
|
|
172
|
-
flush = getattr(resource, "flush", None)
|
|
173
|
-
if not callable(flush):
|
|
174
|
-
return
|
|
175
|
-
try:
|
|
176
|
-
flush()
|
|
177
|
-
except Exception:
|
|
178
|
-
logger.debug("자원 flush 중 예외를 무시합니다: resource=%r", resource, exc_info=True)
|
|
179
|
-
|
|
180
|
-
@staticmethod
|
|
181
|
-
def _close_resource(resource: Any) -> None:
|
|
182
|
-
close = getattr(resource, "close", None)
|
|
183
|
-
if not callable(close):
|
|
184
|
-
return
|
|
185
|
-
try:
|
|
186
|
-
close()
|
|
187
|
-
except Exception:
|
|
188
|
-
logger.debug("자원 close 중 예외를 무시합니다: resource=%r", resource, exc_info=True)
|
|
189
|
-
|
|
190
|
-
# ------------------------------------------------------------------
|
|
191
|
-
# properties exposing document content
|
|
192
|
-
@property
|
|
193
|
-
def package(self) -> HwpxPackage:
|
|
194
|
-
"""Return the :class:`HwpxPackage` backing this document."""
|
|
195
|
-
return self._package
|
|
196
|
-
|
|
197
|
-
@property
|
|
198
|
-
def oxml(self) -> HwpxOxmlDocument:
|
|
199
|
-
"""Return the low-level XML object tree representing the document."""
|
|
200
|
-
return self._root
|
|
201
|
-
|
|
202
|
-
@property
|
|
203
|
-
def sections(self) -> list[HwpxOxmlSection]:
|
|
204
|
-
"""Return the sections contained in the document."""
|
|
205
|
-
return self._root.sections
|
|
206
|
-
|
|
207
|
-
@property
|
|
208
|
-
def headers(self) -> list[HwpxOxmlHeader]:
|
|
209
|
-
"""Return the header parts referenced by the document."""
|
|
210
|
-
return self._root.headers
|
|
211
|
-
|
|
212
|
-
@property
|
|
213
|
-
def master_pages(self) -> list[HwpxOxmlMasterPage]:
|
|
214
|
-
"""Return the master-page parts declared in the manifest."""
|
|
215
|
-
return self._root.master_pages
|
|
216
|
-
|
|
217
|
-
@property
|
|
218
|
-
def histories(self) -> list[HwpxOxmlHistory]:
|
|
219
|
-
"""Return document history parts referenced by the manifest."""
|
|
220
|
-
return self._root.histories
|
|
221
|
-
|
|
222
|
-
@property
|
|
223
|
-
def version(self) -> HwpxOxmlVersion | None:
|
|
224
|
-
"""Return the version metadata part if present."""
|
|
225
|
-
return self._root.version
|
|
226
|
-
|
|
227
|
-
@property
|
|
228
|
-
def border_fills(self) -> dict[str, GenericElement]:
|
|
229
|
-
"""Return border fill definitions declared in the headers."""
|
|
230
|
-
|
|
231
|
-
return self._root.border_fills
|
|
232
|
-
|
|
233
|
-
def border_fill(self, border_fill_id_ref: int | str | None) -> GenericElement | None:
|
|
234
|
-
"""Return the border fill definition referenced by *border_fill_id_ref*."""
|
|
235
|
-
|
|
236
|
-
return self._root.border_fill(border_fill_id_ref)
|
|
237
|
-
|
|
238
|
-
@property
|
|
239
|
-
def memo_shapes(self) -> dict[str, MemoShape]:
|
|
240
|
-
"""Return memo shapes available in the header reference lists."""
|
|
241
|
-
|
|
242
|
-
return self._root.memo_shapes
|
|
243
|
-
|
|
244
|
-
def memo_shape(self, memo_shape_id_ref: int | str | None) -> MemoShape | None:
|
|
245
|
-
"""Return the memo shape definition referenced by *memo_shape_id_ref*."""
|
|
246
|
-
|
|
247
|
-
return self._root.memo_shape(memo_shape_id_ref)
|
|
248
|
-
|
|
249
|
-
@property
|
|
250
|
-
def bullets(self) -> dict[str, Bullet]:
|
|
251
|
-
"""Return bullet definitions declared in header reference lists."""
|
|
252
|
-
|
|
253
|
-
return self._root.bullets
|
|
254
|
-
|
|
255
|
-
def bullet(self, bullet_id_ref: int | str | None) -> Bullet | None:
|
|
256
|
-
"""Return the bullet definition referenced by *bullet_id_ref*."""
|
|
257
|
-
|
|
258
|
-
return self._root.bullet(bullet_id_ref)
|
|
259
|
-
|
|
260
|
-
@property
|
|
261
|
-
def paragraph_properties(self) -> dict[str, ParagraphProperty]:
|
|
262
|
-
"""Return paragraph property definitions declared in headers."""
|
|
263
|
-
|
|
264
|
-
return self._root.paragraph_properties
|
|
265
|
-
|
|
266
|
-
def paragraph_property(
|
|
267
|
-
self, para_pr_id_ref: int | str | None
|
|
268
|
-
) -> ParagraphProperty | None:
|
|
269
|
-
"""Return the paragraph property referenced by *para_pr_id_ref*."""
|
|
270
|
-
|
|
271
|
-
return self._root.paragraph_property(para_pr_id_ref)
|
|
272
|
-
|
|
273
|
-
@property
|
|
274
|
-
def styles(self) -> dict[str, Style]:
|
|
275
|
-
"""Return style definitions available in the document."""
|
|
276
|
-
|
|
277
|
-
return self._root.styles
|
|
278
|
-
|
|
279
|
-
def style(self, style_id_ref: int | str | None) -> Style | None:
|
|
280
|
-
"""Return the style definition referenced by *style_id_ref*."""
|
|
281
|
-
|
|
282
|
-
return self._root.style(style_id_ref)
|
|
283
|
-
|
|
284
|
-
@property
|
|
285
|
-
def track_changes(self) -> dict[str, TrackChange]:
|
|
286
|
-
"""Return tracked change metadata declared in the headers."""
|
|
287
|
-
|
|
288
|
-
return self._root.track_changes
|
|
289
|
-
|
|
290
|
-
def track_change(self, change_id_ref: int | str | None) -> TrackChange | None:
|
|
291
|
-
"""Return tracked change metadata referenced by *change_id_ref*."""
|
|
292
|
-
|
|
293
|
-
return self._root.track_change(change_id_ref)
|
|
294
|
-
|
|
295
|
-
@property
|
|
296
|
-
def track_change_authors(self) -> dict[str, TrackChangeAuthor]:
|
|
297
|
-
"""Return tracked change author metadata declared in the headers."""
|
|
298
|
-
|
|
299
|
-
return self._root.track_change_authors
|
|
300
|
-
|
|
301
|
-
def track_change_author(
|
|
302
|
-
self, author_id_ref: int | str | None
|
|
303
|
-
) -> TrackChangeAuthor | None:
|
|
304
|
-
"""Return tracked change author details referenced by *author_id_ref*."""
|
|
305
|
-
|
|
306
|
-
return self._root.track_change_author(author_id_ref)
|
|
307
|
-
|
|
308
|
-
@property
|
|
309
|
-
def memos(self) -> list[HwpxOxmlMemo]:
|
|
310
|
-
"""Return all memo entries declared in every section."""
|
|
311
|
-
|
|
312
|
-
memos: list[HwpxOxmlMemo] = []
|
|
313
|
-
for section in self._root.sections:
|
|
314
|
-
memos.extend(section.memos)
|
|
315
|
-
return memos
|
|
316
|
-
|
|
317
|
-
def add_memo(
|
|
318
|
-
self,
|
|
319
|
-
text: str = "",
|
|
320
|
-
*,
|
|
321
|
-
section: HwpxOxmlSection | None = None,
|
|
322
|
-
section_index: int | None = None,
|
|
323
|
-
memo_shape_id_ref: str | int | None = None,
|
|
324
|
-
memo_id: str | None = None,
|
|
325
|
-
char_pr_id_ref: str | int | None = None,
|
|
326
|
-
attributes: dict[str, str] | None = None,
|
|
327
|
-
) -> HwpxOxmlMemo:
|
|
328
|
-
"""Create a memo entry inside *section* (or the last section by default)."""
|
|
329
|
-
|
|
330
|
-
if section is None and section_index is not None:
|
|
331
|
-
section = self._root.sections[section_index]
|
|
332
|
-
if section is None:
|
|
333
|
-
if not self._root.sections:
|
|
334
|
-
raise ValueError("document does not contain any sections")
|
|
335
|
-
section = self._root.sections[-1]
|
|
336
|
-
return section.add_memo(
|
|
337
|
-
text,
|
|
338
|
-
memo_shape_id_ref=memo_shape_id_ref,
|
|
339
|
-
memo_id=memo_id,
|
|
340
|
-
char_pr_id_ref=char_pr_id_ref,
|
|
341
|
-
attributes=attributes,
|
|
342
|
-
)
|
|
343
|
-
|
|
344
|
-
def remove_memo(self, memo: HwpxOxmlMemo) -> None:
|
|
345
|
-
"""Remove *memo* from the section it belongs to."""
|
|
346
|
-
|
|
347
|
-
memo.remove()
|
|
348
|
-
|
|
349
|
-
def attach_memo_field(
|
|
350
|
-
self,
|
|
351
|
-
paragraph: HwpxOxmlParagraph,
|
|
352
|
-
memo: HwpxOxmlMemo,
|
|
353
|
-
*,
|
|
354
|
-
field_id: str | None = None,
|
|
355
|
-
author: str | None = None,
|
|
356
|
-
created: datetime | str | None = None,
|
|
357
|
-
number: int = 1,
|
|
358
|
-
char_pr_id_ref: str | int | None = None,
|
|
359
|
-
) -> str:
|
|
360
|
-
"""Attach a MEMO field control to *paragraph* so Hangul shows *memo*."""
|
|
361
|
-
|
|
362
|
-
if paragraph.section is None:
|
|
363
|
-
raise ValueError("paragraph must belong to a section before anchoring a memo")
|
|
364
|
-
if memo.group.section is None:
|
|
365
|
-
raise ValueError("memo is not attached to a section")
|
|
366
|
-
|
|
367
|
-
field_value = field_id or uuid.uuid4().hex
|
|
368
|
-
author_value = author or memo.attributes.get("author") or ""
|
|
369
|
-
|
|
370
|
-
created_value = created if created is not None else memo.attributes.get("createDateTime")
|
|
371
|
-
if isinstance(created_value, datetime):
|
|
372
|
-
created_value = created_value.strftime("%Y-%m-%d %H:%M:%S")
|
|
373
|
-
elif created_value is None:
|
|
374
|
-
created_value = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
375
|
-
else:
|
|
376
|
-
created_value = str(created_value)
|
|
377
|
-
|
|
378
|
-
memo_shape_id = memo.memo_shape_id_ref or ""
|
|
379
|
-
|
|
380
|
-
char_ref = char_pr_id_ref
|
|
381
|
-
if char_ref is None:
|
|
382
|
-
char_ref = paragraph.char_pr_id_ref
|
|
383
|
-
if char_ref is None:
|
|
384
|
-
char_ref = memo._infer_char_pr_id_ref()
|
|
385
|
-
if char_ref is None:
|
|
386
|
-
char_ref = "0"
|
|
387
|
-
char_ref = str(char_ref)
|
|
388
|
-
|
|
389
|
-
paragraph_element = paragraph.element
|
|
390
|
-
run_begin = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
|
|
391
|
-
ctrl_begin = _append_element(run_begin, f"{_HP}ctrl")
|
|
392
|
-
field_begin = _append_element(
|
|
393
|
-
ctrl_begin,
|
|
394
|
-
f"{_HP}fieldBegin",
|
|
395
|
-
{
|
|
396
|
-
"id": field_value,
|
|
397
|
-
"type": "MEMO",
|
|
398
|
-
"editable": "true",
|
|
399
|
-
"dirty": "false",
|
|
400
|
-
"fieldid": field_value,
|
|
401
|
-
},
|
|
402
|
-
)
|
|
403
|
-
|
|
404
|
-
parameters = _append_element(field_begin, f"{_HP}parameters", {"count": "5", "name": ""})
|
|
405
|
-
_append_element(parameters, f"{_HP}stringParam", {"name": "ID"}).text = memo.id or ""
|
|
406
|
-
_append_element(parameters, f"{_HP}integerParam", {"name": "Number"}).text = str(max(1, number))
|
|
407
|
-
_append_element(parameters, f"{_HP}stringParam", {"name": "CreateDateTime"}).text = created_value
|
|
408
|
-
_append_element(parameters, f"{_HP}stringParam", {"name": "Author"}).text = author_value
|
|
409
|
-
_append_element(parameters, f"{_HP}stringParam", {"name": "MemoShapeID"}).text = memo_shape_id
|
|
410
|
-
|
|
411
|
-
sub_list = _append_element(
|
|
412
|
-
field_begin,
|
|
413
|
-
f"{_HP}subList",
|
|
414
|
-
{
|
|
415
|
-
"id": f"memo-field-{memo.id or field_value}",
|
|
416
|
-
"textDirection": "HORIZONTAL",
|
|
417
|
-
"lineWrap": "BREAK",
|
|
418
|
-
"vertAlign": "TOP",
|
|
419
|
-
},
|
|
420
|
-
)
|
|
421
|
-
sub_para = _append_element(
|
|
422
|
-
sub_list,
|
|
423
|
-
f"{_HP}p",
|
|
424
|
-
{
|
|
425
|
-
"id": f"memo-field-{(memo.id or field_value)}-p",
|
|
426
|
-
"paraPrIDRef": "0",
|
|
427
|
-
"styleIDRef": "0",
|
|
428
|
-
"pageBreak": "0",
|
|
429
|
-
"columnBreak": "0",
|
|
430
|
-
"merged": "0",
|
|
431
|
-
},
|
|
432
|
-
)
|
|
433
|
-
sub_run = _append_element(sub_para, f"{_HP}run", {"charPrIDRef": char_ref})
|
|
434
|
-
_append_element(sub_run, f"{_HP}t").text = memo.id or field_value
|
|
435
|
-
|
|
436
|
-
run_end = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
|
|
437
|
-
ctrl_end = _append_element(run_end, f"{_HP}ctrl")
|
|
438
|
-
_append_element(ctrl_end, f"{_HP}fieldEnd", {"beginIDRef": field_value, "fieldid": field_value})
|
|
439
|
-
|
|
440
|
-
paragraph.element.insert(0, run_begin)
|
|
441
|
-
paragraph.element.append(run_end)
|
|
442
|
-
paragraph.section.mark_dirty()
|
|
443
|
-
|
|
444
|
-
return field_value
|
|
445
|
-
|
|
446
|
-
def add_memo_with_anchor(
|
|
447
|
-
self,
|
|
448
|
-
text: str = "",
|
|
449
|
-
*,
|
|
450
|
-
paragraph: HwpxOxmlParagraph | None = None,
|
|
451
|
-
section: HwpxOxmlSection | None = None,
|
|
452
|
-
section_index: int | None = None,
|
|
453
|
-
paragraph_text: str | None = None,
|
|
454
|
-
memo_shape_id_ref: str | int | None = None,
|
|
455
|
-
memo_id: str | None = None,
|
|
456
|
-
char_pr_id_ref: str | int | None = None,
|
|
457
|
-
attributes: dict[str, str] | None = None,
|
|
458
|
-
field_id: str | None = None,
|
|
459
|
-
author: str | None = None,
|
|
460
|
-
created: datetime | str | None = None,
|
|
461
|
-
number: int = 1,
|
|
462
|
-
anchor_char_pr_id_ref: str | int | None = None,
|
|
463
|
-
) -> tuple[HwpxOxmlMemo, HwpxOxmlParagraph, str]:
|
|
464
|
-
"""Create a memo and ensure it is visible by anchoring a MEMO field."""
|
|
465
|
-
|
|
466
|
-
memo = self.add_memo(
|
|
467
|
-
text,
|
|
468
|
-
section=section,
|
|
469
|
-
section_index=section_index,
|
|
470
|
-
memo_shape_id_ref=memo_shape_id_ref,
|
|
471
|
-
memo_id=memo_id,
|
|
472
|
-
char_pr_id_ref=char_pr_id_ref,
|
|
473
|
-
attributes=attributes,
|
|
474
|
-
)
|
|
475
|
-
|
|
476
|
-
target_paragraph = paragraph
|
|
477
|
-
if target_paragraph is None:
|
|
478
|
-
memo_section = memo.group.section
|
|
479
|
-
if memo_section is None:
|
|
480
|
-
raise ValueError("memo must belong to a section")
|
|
481
|
-
paragraph_value = "" if paragraph_text is None else paragraph_text
|
|
482
|
-
anchor_char = anchor_char_pr_id_ref or char_pr_id_ref
|
|
483
|
-
target_paragraph = self.add_paragraph(
|
|
484
|
-
paragraph_value,
|
|
485
|
-
section=memo_section,
|
|
486
|
-
char_pr_id_ref=anchor_char,
|
|
487
|
-
)
|
|
488
|
-
elif paragraph_text is not None:
|
|
489
|
-
target_paragraph.text = paragraph_text
|
|
490
|
-
|
|
491
|
-
field_value = self.attach_memo_field(
|
|
492
|
-
target_paragraph,
|
|
493
|
-
memo,
|
|
494
|
-
field_id=field_id,
|
|
495
|
-
author=author,
|
|
496
|
-
created=created,
|
|
497
|
-
number=number,
|
|
498
|
-
char_pr_id_ref=anchor_char_pr_id_ref,
|
|
499
|
-
)
|
|
500
|
-
|
|
501
|
-
return memo, target_paragraph, field_value
|
|
502
|
-
|
|
503
|
-
@property
|
|
504
|
-
def paragraphs(self) -> list[HwpxOxmlParagraph]:
|
|
505
|
-
"""Return all paragraphs across every section."""
|
|
506
|
-
return self._root.paragraphs
|
|
507
|
-
|
|
508
|
-
@property
|
|
509
|
-
def char_properties(self) -> dict[str, RunStyle]:
|
|
510
|
-
"""Return the resolved character style definitions available to the document."""
|
|
511
|
-
|
|
512
|
-
return self._root.char_properties
|
|
513
|
-
|
|
514
|
-
def char_property(self, char_pr_id_ref: int | str | None) -> RunStyle | None:
|
|
515
|
-
"""Return the style referenced by *char_pr_id_ref* if known."""
|
|
516
|
-
|
|
517
|
-
return self._root.char_property(char_pr_id_ref)
|
|
518
|
-
|
|
519
|
-
def ensure_run_style(
|
|
520
|
-
self,
|
|
521
|
-
*,
|
|
522
|
-
bold: bool = False,
|
|
523
|
-
italic: bool = False,
|
|
524
|
-
underline: bool = False,
|
|
525
|
-
base_char_pr_id: str | int | None = None,
|
|
526
|
-
) -> str:
|
|
527
|
-
"""Return a ``charPr`` identifier matching the requested flags."""
|
|
528
|
-
|
|
529
|
-
return self._root.ensure_run_style(
|
|
530
|
-
bold=bold,
|
|
531
|
-
italic=italic,
|
|
532
|
-
underline=underline,
|
|
533
|
-
base_char_pr_id=base_char_pr_id,
|
|
534
|
-
)
|
|
535
|
-
|
|
536
|
-
def iter_runs(self) -> Iterator[HwpxOxmlRun]:
|
|
537
|
-
"""Yield every run element contained in the document."""
|
|
538
|
-
|
|
539
|
-
for paragraph in self.paragraphs:
|
|
540
|
-
for run in paragraph.runs:
|
|
541
|
-
yield run
|
|
542
|
-
|
|
543
|
-
def find_runs_by_style(
|
|
544
|
-
self,
|
|
545
|
-
*,
|
|
546
|
-
text_color: str | None = None,
|
|
547
|
-
underline_type: str | None = None,
|
|
548
|
-
underline_color: str | None = None,
|
|
549
|
-
char_pr_id_ref: str | int | None = None,
|
|
550
|
-
) -> list[HwpxOxmlRun]:
|
|
551
|
-
"""Return runs matching the requested style criteria."""
|
|
552
|
-
|
|
553
|
-
matches: list[HwpxOxmlRun] = []
|
|
554
|
-
target_char = str(char_pr_id_ref).strip() if char_pr_id_ref is not None else None
|
|
555
|
-
|
|
556
|
-
for run in self.iter_runs():
|
|
557
|
-
if target_char is not None:
|
|
558
|
-
run_char = (run.char_pr_id_ref or "").strip()
|
|
559
|
-
if run_char != target_char:
|
|
560
|
-
continue
|
|
561
|
-
style = run.style
|
|
562
|
-
if text_color is not None:
|
|
563
|
-
if style is None or style.text_color() != text_color:
|
|
564
|
-
continue
|
|
565
|
-
if underline_type is not None:
|
|
566
|
-
if style is None or style.underline_type() != underline_type:
|
|
567
|
-
continue
|
|
568
|
-
if underline_color is not None:
|
|
569
|
-
if style is None or style.underline_color() != underline_color:
|
|
570
|
-
continue
|
|
571
|
-
matches.append(run)
|
|
572
|
-
return matches
|
|
573
|
-
|
|
574
|
-
def replace_text_in_runs(
|
|
575
|
-
self,
|
|
576
|
-
search: str,
|
|
577
|
-
replacement: str,
|
|
578
|
-
*,
|
|
579
|
-
text_color: str | None = None,
|
|
580
|
-
underline_type: str | None = None,
|
|
581
|
-
underline_color: str | None = None,
|
|
582
|
-
char_pr_id_ref: str | int | None = None,
|
|
583
|
-
limit: int | None = None,
|
|
584
|
-
) -> int:
|
|
585
|
-
"""Replace occurrences of *search* in runs matching the provided style filters."""
|
|
586
|
-
|
|
587
|
-
if not search:
|
|
588
|
-
raise ValueError("search must be a non-empty string")
|
|
589
|
-
|
|
590
|
-
replacements = 0
|
|
591
|
-
runs = self.find_runs_by_style(
|
|
592
|
-
text_color=text_color,
|
|
593
|
-
underline_type=underline_type,
|
|
594
|
-
underline_color=underline_color,
|
|
595
|
-
char_pr_id_ref=char_pr_id_ref,
|
|
596
|
-
)
|
|
597
|
-
|
|
598
|
-
for run in runs:
|
|
599
|
-
remaining = None
|
|
600
|
-
if limit is not None:
|
|
601
|
-
remaining = limit - replacements
|
|
602
|
-
if remaining <= 0:
|
|
603
|
-
break
|
|
604
|
-
original_char_pr = run.char_pr_id_ref
|
|
605
|
-
replaced_here = run.replace_text(
|
|
606
|
-
search,
|
|
607
|
-
replacement,
|
|
608
|
-
count=remaining,
|
|
609
|
-
)
|
|
610
|
-
if replaced_here and original_char_pr is not None:
|
|
611
|
-
# Ensure the run retains its original formatting reference even
|
|
612
|
-
# if XML nodes were rewritten during substitution.
|
|
613
|
-
run.char_pr_id_ref = original_char_pr
|
|
614
|
-
replacements += replaced_here
|
|
615
|
-
if limit is not None and replacements >= limit:
|
|
616
|
-
break
|
|
617
|
-
return replacements
|
|
618
|
-
|
|
619
|
-
# ------------------------------------------------------------------
|
|
620
|
-
# editing helpers
|
|
621
|
-
def add_paragraph(
|
|
622
|
-
self,
|
|
623
|
-
text: str = "",
|
|
624
|
-
*,
|
|
625
|
-
section: HwpxOxmlSection | None = None,
|
|
626
|
-
section_index: int | None = None,
|
|
627
|
-
para_pr_id_ref: str | int | None = None,
|
|
628
|
-
style_id_ref: str | int | None = None,
|
|
629
|
-
char_pr_id_ref: str | int | None = None,
|
|
630
|
-
run_attributes: dict[str, str] | None = None,
|
|
631
|
-
include_run: bool = True,
|
|
632
|
-
**extra_attrs: str,
|
|
633
|
-
) -> HwpxOxmlParagraph:
|
|
634
|
-
"""Append a paragraph to the document and return it.
|
|
635
|
-
|
|
636
|
-
Formatting references may be overridden via ``para_pr_id_ref``,
|
|
637
|
-
``style_id_ref`` and ``char_pr_id_ref``. Any additional keyword
|
|
638
|
-
arguments are added as raw paragraph attributes.
|
|
639
|
-
"""
|
|
640
|
-
return self._root.add_paragraph(
|
|
641
|
-
text,
|
|
642
|
-
section=section,
|
|
643
|
-
section_index=section_index,
|
|
644
|
-
para_pr_id_ref=para_pr_id_ref,
|
|
645
|
-
style_id_ref=style_id_ref,
|
|
646
|
-
char_pr_id_ref=char_pr_id_ref,
|
|
647
|
-
run_attributes=run_attributes,
|
|
648
|
-
include_run=include_run,
|
|
649
|
-
**extra_attrs,
|
|
650
|
-
)
|
|
651
|
-
|
|
652
|
-
def add_table(
|
|
653
|
-
self,
|
|
654
|
-
rows: int,
|
|
655
|
-
cols: int,
|
|
656
|
-
*,
|
|
657
|
-
section: HwpxOxmlSection | None = None,
|
|
658
|
-
section_index: int | None = None,
|
|
659
|
-
width: int | None = None,
|
|
660
|
-
height: int | None = None,
|
|
661
|
-
border_fill_id_ref: str | int | None = None,
|
|
662
|
-
para_pr_id_ref: str | int | None = None,
|
|
663
|
-
style_id_ref: str | int | None = None,
|
|
664
|
-
char_pr_id_ref: str | int | None = None,
|
|
665
|
-
run_attributes: dict[str, str] | None = None,
|
|
666
|
-
**extra_attrs: str,
|
|
667
|
-
) -> HwpxOxmlTable:
|
|
668
|
-
"""Create a table in a new paragraph and return it."""
|
|
669
|
-
|
|
670
|
-
resolved_border_fill: str | int | None = border_fill_id_ref
|
|
671
|
-
if resolved_border_fill is None:
|
|
672
|
-
resolved_border_fill = self._root.ensure_basic_border_fill()
|
|
673
|
-
|
|
674
|
-
paragraph = self.add_paragraph(
|
|
675
|
-
"",
|
|
676
|
-
section=section,
|
|
677
|
-
section_index=section_index,
|
|
678
|
-
para_pr_id_ref=para_pr_id_ref,
|
|
679
|
-
style_id_ref=style_id_ref,
|
|
680
|
-
char_pr_id_ref=char_pr_id_ref,
|
|
681
|
-
include_run=False,
|
|
682
|
-
**extra_attrs,
|
|
683
|
-
)
|
|
684
|
-
return paragraph.add_table(
|
|
685
|
-
rows,
|
|
686
|
-
cols,
|
|
687
|
-
width=width,
|
|
688
|
-
height=height,
|
|
689
|
-
border_fill_id_ref=resolved_border_fill,
|
|
690
|
-
run_attributes=run_attributes,
|
|
691
|
-
char_pr_id_ref=char_pr_id_ref,
|
|
692
|
-
)
|
|
693
|
-
|
|
694
|
-
def add_shape(
|
|
695
|
-
self,
|
|
696
|
-
shape_type: str,
|
|
697
|
-
*,
|
|
698
|
-
section: HwpxOxmlSection | None = None,
|
|
699
|
-
section_index: int | None = None,
|
|
700
|
-
attributes: dict[str, str] | None = None,
|
|
701
|
-
para_pr_id_ref: str | int | None = None,
|
|
702
|
-
style_id_ref: str | int | None = None,
|
|
703
|
-
char_pr_id_ref: str | int | None = None,
|
|
704
|
-
run_attributes: dict[str, str] | None = None,
|
|
705
|
-
**extra_attrs: str,
|
|
706
|
-
) -> HwpxOxmlInlineObject:
|
|
707
|
-
"""Insert an inline shape into a new paragraph."""
|
|
708
|
-
|
|
709
|
-
paragraph = self.add_paragraph(
|
|
710
|
-
"",
|
|
711
|
-
section=section,
|
|
712
|
-
section_index=section_index,
|
|
713
|
-
para_pr_id_ref=para_pr_id_ref,
|
|
714
|
-
style_id_ref=style_id_ref,
|
|
715
|
-
char_pr_id_ref=char_pr_id_ref,
|
|
716
|
-
include_run=False,
|
|
717
|
-
**extra_attrs,
|
|
718
|
-
)
|
|
719
|
-
return paragraph.add_shape(
|
|
720
|
-
shape_type,
|
|
721
|
-
attributes=attributes,
|
|
722
|
-
run_attributes=run_attributes,
|
|
723
|
-
char_pr_id_ref=char_pr_id_ref,
|
|
724
|
-
)
|
|
725
|
-
|
|
726
|
-
def add_control(
|
|
727
|
-
self,
|
|
728
|
-
*,
|
|
729
|
-
section: HwpxOxmlSection | None = None,
|
|
730
|
-
section_index: int | None = None,
|
|
731
|
-
attributes: dict[str, str] | None = None,
|
|
732
|
-
control_type: str | None = None,
|
|
733
|
-
para_pr_id_ref: str | int | None = None,
|
|
734
|
-
style_id_ref: str | int | None = None,
|
|
735
|
-
char_pr_id_ref: str | int | None = None,
|
|
736
|
-
run_attributes: dict[str, str] | None = None,
|
|
737
|
-
**extra_attrs: str,
|
|
738
|
-
) -> HwpxOxmlInlineObject:
|
|
739
|
-
"""Insert a control inline object into a new paragraph."""
|
|
740
|
-
|
|
741
|
-
paragraph = self.add_paragraph(
|
|
742
|
-
"",
|
|
743
|
-
section=section,
|
|
744
|
-
section_index=section_index,
|
|
745
|
-
para_pr_id_ref=para_pr_id_ref,
|
|
746
|
-
style_id_ref=style_id_ref,
|
|
747
|
-
char_pr_id_ref=char_pr_id_ref,
|
|
748
|
-
include_run=False,
|
|
749
|
-
**extra_attrs,
|
|
750
|
-
)
|
|
751
|
-
return paragraph.add_control(
|
|
752
|
-
attributes=attributes,
|
|
753
|
-
control_type=control_type,
|
|
754
|
-
run_attributes=run_attributes,
|
|
755
|
-
char_pr_id_ref=char_pr_id_ref,
|
|
756
|
-
)
|
|
757
|
-
|
|
758
|
-
def set_header_text(
|
|
759
|
-
self,
|
|
760
|
-
text: str,
|
|
761
|
-
*,
|
|
762
|
-
section: HwpxOxmlSection | None = None,
|
|
763
|
-
section_index: int | None = None,
|
|
764
|
-
page_type: str = "BOTH",
|
|
765
|
-
) -> HwpxOxmlSectionHeaderFooter:
|
|
766
|
-
"""Ensure the requested section contains a header for *page_type* and set its text."""
|
|
767
|
-
|
|
768
|
-
target_section = section
|
|
769
|
-
if target_section is None and section_index is not None:
|
|
770
|
-
target_section = self._root.sections[section_index]
|
|
771
|
-
if target_section is None:
|
|
772
|
-
if not self._root.sections:
|
|
773
|
-
raise ValueError("document does not contain any sections")
|
|
774
|
-
target_section = self._root.sections[-1]
|
|
775
|
-
return target_section.properties.set_header_text(text, page_type=page_type)
|
|
776
|
-
|
|
777
|
-
def set_footer_text(
|
|
778
|
-
self,
|
|
779
|
-
text: str,
|
|
780
|
-
*,
|
|
781
|
-
section: HwpxOxmlSection | None = None,
|
|
782
|
-
section_index: int | None = None,
|
|
783
|
-
page_type: str = "BOTH",
|
|
784
|
-
) -> HwpxOxmlSectionHeaderFooter:
|
|
785
|
-
"""Ensure the requested section contains a footer for *page_type* and set its text."""
|
|
786
|
-
|
|
787
|
-
target_section = section
|
|
788
|
-
if target_section is None and section_index is not None:
|
|
789
|
-
target_section = self._root.sections[section_index]
|
|
790
|
-
if target_section is None:
|
|
791
|
-
if not self._root.sections:
|
|
792
|
-
raise ValueError("document does not contain any sections")
|
|
793
|
-
target_section = self._root.sections[-1]
|
|
794
|
-
return target_section.properties.set_footer_text(text, page_type=page_type)
|
|
795
|
-
|
|
796
|
-
def remove_header(
|
|
797
|
-
self,
|
|
798
|
-
*,
|
|
799
|
-
section: HwpxOxmlSection | None = None,
|
|
800
|
-
section_index: int | None = None,
|
|
801
|
-
page_type: str = "BOTH",
|
|
802
|
-
) -> None:
|
|
803
|
-
"""Remove the header linked to *page_type* from the requested section if present."""
|
|
804
|
-
|
|
805
|
-
target_section = section
|
|
806
|
-
if target_section is None and section_index is not None:
|
|
807
|
-
target_section = self._root.sections[section_index]
|
|
808
|
-
if target_section is None:
|
|
809
|
-
if not self._root.sections:
|
|
810
|
-
return
|
|
811
|
-
target_section = self._root.sections[-1]
|
|
812
|
-
target_section.properties.remove_header(page_type=page_type)
|
|
813
|
-
|
|
814
|
-
def remove_footer(
|
|
815
|
-
self,
|
|
816
|
-
*,
|
|
817
|
-
section: HwpxOxmlSection | None = None,
|
|
818
|
-
section_index: int | None = None,
|
|
819
|
-
page_type: str = "BOTH",
|
|
820
|
-
) -> None:
|
|
821
|
-
"""Remove the footer linked to *page_type* from the requested section if present."""
|
|
822
|
-
|
|
823
|
-
target_section = section
|
|
824
|
-
if target_section is None and section_index is not None:
|
|
825
|
-
target_section = self._root.sections[section_index]
|
|
826
|
-
if target_section is None:
|
|
827
|
-
if not self._root.sections:
|
|
828
|
-
return
|
|
829
|
-
target_section = self._root.sections[-1]
|
|
830
|
-
target_section.properties.remove_footer(page_type=page_type)
|
|
831
|
-
|
|
832
|
-
def save_to_path(self, path: str | PathLike[str]) -> str | PathLike[str]:
|
|
833
|
-
"""Persist pending changes to *path* and return the same path."""
|
|
834
|
-
|
|
835
|
-
updates = self._root.serialize()
|
|
836
|
-
result = self._package.save(path, updates)
|
|
837
|
-
self._root.reset_dirty()
|
|
838
|
-
return path if result is None else result
|
|
839
|
-
|
|
840
|
-
def save_to_stream(self, stream: BinaryIO) -> BinaryIO:
|
|
841
|
-
"""Persist pending changes to *stream* and return the same stream."""
|
|
842
|
-
|
|
843
|
-
updates = self._root.serialize()
|
|
844
|
-
result = self._package.save(stream, updates)
|
|
845
|
-
self._root.reset_dirty()
|
|
846
|
-
return stream if result is None else result
|
|
847
|
-
|
|
848
|
-
def to_bytes(self) -> bytes:
|
|
849
|
-
"""Serialize pending changes and return the HWPX archive as bytes."""
|
|
850
|
-
|
|
851
|
-
updates = self._root.serialize()
|
|
852
|
-
result = self._package.save(None, updates)
|
|
853
|
-
self._root.reset_dirty()
|
|
854
|
-
if isinstance(result, bytes):
|
|
855
|
-
return result
|
|
856
|
-
raise TypeError("package.save(None) must return bytes")
|
|
857
|
-
|
|
858
|
-
@overload
|
|
859
|
-
def save(self, path_or_stream: None = None) -> bytes: ...
|
|
860
|
-
|
|
861
|
-
@overload
|
|
862
|
-
def save(self, path_or_stream: str | PathLike[str]) -> str | PathLike[str]: ...
|
|
863
|
-
|
|
864
|
-
@overload
|
|
865
|
-
def save(self, path_or_stream: BinaryIO) -> BinaryIO: ...
|
|
866
|
-
|
|
867
|
-
def save(
|
|
868
|
-
self,
|
|
869
|
-
path_or_stream: str | PathLike[str] | BinaryIO | None = None,
|
|
870
|
-
) -> str | PathLike[str] | BinaryIO | bytes:
|
|
871
|
-
"""Deprecated compatibility wrapper around save_to_path/save_to_stream/to_bytes.
|
|
872
|
-
|
|
873
|
-
Deprecated:
|
|
874
|
-
``save()``는 하위 호환을 위해 유지되며 향후 제거될 수 있습니다.
|
|
875
|
-
- 경로 저장: ``save_to_path(path)``
|
|
876
|
-
- 스트림 저장: ``save_to_stream(stream)``
|
|
877
|
-
- 바이트 반환: ``to_bytes()``
|
|
878
|
-
"""
|
|
879
|
-
|
|
880
|
-
warnings.warn(
|
|
881
|
-
"HwpxDocument.save()는 deprecated 예정입니다. "
|
|
882
|
-
"save_to_path()/save_to_stream()/to_bytes() 사용을 권장합니다.",
|
|
883
|
-
DeprecationWarning,
|
|
884
|
-
stacklevel=2,
|
|
885
|
-
)
|
|
886
|
-
if path_or_stream is None:
|
|
887
|
-
return self.to_bytes()
|
|
888
|
-
if isinstance(path_or_stream, (str, PathLike)):
|
|
889
|
-
return self.save_to_path(path_or_stream)
|
|
890
|
-
return self.save_to_stream(path_or_stream)
|
|
1
|
+
"""High-level representation of an HWPX document."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import xml.etree.ElementTree as ET
|
|
6
|
+
import io
|
|
7
|
+
import warnings
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
import logging
|
|
10
|
+
import uuid
|
|
11
|
+
|
|
12
|
+
from os import PathLike
|
|
13
|
+
from typing import Any, BinaryIO, Iterator, overload
|
|
14
|
+
|
|
15
|
+
from lxml import etree
|
|
16
|
+
|
|
17
|
+
from .oxml import (
|
|
18
|
+
Bullet,
|
|
19
|
+
GenericElement,
|
|
20
|
+
HwpxOxmlDocument,
|
|
21
|
+
HwpxOxmlHeader,
|
|
22
|
+
HwpxOxmlHistory,
|
|
23
|
+
HwpxOxmlInlineObject,
|
|
24
|
+
HwpxOxmlMasterPage,
|
|
25
|
+
HwpxOxmlMemo,
|
|
26
|
+
HwpxOxmlParagraph,
|
|
27
|
+
HwpxOxmlRun,
|
|
28
|
+
HwpxOxmlSection,
|
|
29
|
+
HwpxOxmlSectionHeaderFooter,
|
|
30
|
+
HwpxOxmlTable,
|
|
31
|
+
HwpxOxmlVersion,
|
|
32
|
+
MemoShape,
|
|
33
|
+
ParagraphProperty,
|
|
34
|
+
RunStyle,
|
|
35
|
+
Style,
|
|
36
|
+
TrackChange,
|
|
37
|
+
TrackChangeAuthor,
|
|
38
|
+
)
|
|
39
|
+
from .opc.package import HwpxPackage
|
|
40
|
+
from .templates import blank_document_bytes
|
|
41
|
+
|
|
42
|
+
ET.register_namespace("hp", "http://www.hancom.co.kr/hwpml/2011/paragraph")
|
|
43
|
+
ET.register_namespace("hs", "http://www.hancom.co.kr/hwpml/2011/section")
|
|
44
|
+
ET.register_namespace("hc", "http://www.hancom.co.kr/hwpml/2011/core")
|
|
45
|
+
ET.register_namespace("hh", "http://www.hancom.co.kr/hwpml/2011/head")
|
|
46
|
+
|
|
47
|
+
_HP_NS = "http://www.hancom.co.kr/hwpml/2011/paragraph"
|
|
48
|
+
_HP = f"{{{_HP_NS}}}"
|
|
49
|
+
_HH_NS = "http://www.hancom.co.kr/hwpml/2011/head"
|
|
50
|
+
_HH = f"{{{_HH_NS}}}"
|
|
51
|
+
|
|
52
|
+
logger = logging.getLogger(__name__)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _append_element(
|
|
56
|
+
parent: Any,
|
|
57
|
+
tag: str,
|
|
58
|
+
attributes: dict[str, str] | None = None,
|
|
59
|
+
) -> Any:
|
|
60
|
+
"""Create and append a child element that matches *parent*'s element type."""
|
|
61
|
+
|
|
62
|
+
child = parent.makeelement(tag, attributes or {})
|
|
63
|
+
parent.append(child)
|
|
64
|
+
return child
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class HwpxDocument:
|
|
68
|
+
"""Provides a user-friendly API for editing HWPX documents."""
|
|
69
|
+
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
package: HwpxPackage,
|
|
73
|
+
root: HwpxOxmlDocument,
|
|
74
|
+
*,
|
|
75
|
+
managed_resources: tuple[Any, ...] = (),
|
|
76
|
+
):
|
|
77
|
+
self._package = package
|
|
78
|
+
self._root = root
|
|
79
|
+
self._managed_resources = list(managed_resources)
|
|
80
|
+
self._closed = False
|
|
81
|
+
|
|
82
|
+
def __repr__(self) -> str:
|
|
83
|
+
"""Return a compact and safe summary of the document state."""
|
|
84
|
+
|
|
85
|
+
return (
|
|
86
|
+
f"{self.__class__.__name__}("
|
|
87
|
+
f"sections={len(self.sections)}, "
|
|
88
|
+
f"paragraphs={len(self.paragraphs)}, "
|
|
89
|
+
f"headers={len(self.headers)}, "
|
|
90
|
+
f"master_pages={len(self.master_pages)}, "
|
|
91
|
+
f"histories={len(self.histories)}, "
|
|
92
|
+
f"closed={self._closed}"
|
|
93
|
+
")"
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# ------------------------------------------------------------------
|
|
97
|
+
# construction helpers
|
|
98
|
+
@classmethod
|
|
99
|
+
def open(
|
|
100
|
+
cls,
|
|
101
|
+
source: str | PathLike[str] | bytes | BinaryIO,
|
|
102
|
+
) -> "HwpxDocument":
|
|
103
|
+
"""Open *source* and return a :class:`HwpxDocument` instance.
|
|
104
|
+
|
|
105
|
+
Raises:
|
|
106
|
+
HwpxStructureError: 필수 파일이나 구조가 올바르지 않은 HWPX를 열 때 발생합니다.
|
|
107
|
+
HwpxPackageError: 패키지를 여는 과정에서 일반적인 I/O/포맷 오류가 발생하면 전달됩니다.
|
|
108
|
+
"""
|
|
109
|
+
internal_resources: list[Any] = []
|
|
110
|
+
open_source = source
|
|
111
|
+
if isinstance(source, bytes):
|
|
112
|
+
stream = io.BytesIO(source)
|
|
113
|
+
open_source = stream
|
|
114
|
+
internal_resources.append(stream)
|
|
115
|
+
package = HwpxPackage.open(open_source)
|
|
116
|
+
root = HwpxOxmlDocument.from_package(package)
|
|
117
|
+
return cls(package, root, managed_resources=tuple(internal_resources))
|
|
118
|
+
|
|
119
|
+
@classmethod
|
|
120
|
+
def new(cls) -> "HwpxDocument":
|
|
121
|
+
"""Return a new blank document based on the default skeleton template."""
|
|
122
|
+
|
|
123
|
+
return cls.open(blank_document_bytes())
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def from_package(cls, package: HwpxPackage) -> "HwpxDocument":
|
|
127
|
+
"""Create a document backed by an existing :class:`HwpxPackage`.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
package: :class:`hwpx.opc.package.HwpxPackage` 인스턴스.
|
|
131
|
+
"""
|
|
132
|
+
root = HwpxOxmlDocument.from_package(package)
|
|
133
|
+
return cls(package, root)
|
|
134
|
+
|
|
135
|
+
def __enter__(self) -> "HwpxDocument":
|
|
136
|
+
"""컨텍스트 매니저 진입 시 현재 문서 인스턴스를 반환합니다."""
|
|
137
|
+
|
|
138
|
+
return self
|
|
139
|
+
|
|
140
|
+
def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> bool:
|
|
141
|
+
"""예외 발생 여부와 무관하게 내부 자원을 안전하게 정리합니다."""
|
|
142
|
+
|
|
143
|
+
self.close()
|
|
144
|
+
return False
|
|
145
|
+
|
|
146
|
+
def close(self) -> None:
|
|
147
|
+
"""문서가 관리하는 내부 패키지/스트림 자원을 정리합니다.
|
|
148
|
+
|
|
149
|
+
정리 정책:
|
|
150
|
+
- ``flush()`` 가능한 자원은 먼저 flush를 시도합니다.
|
|
151
|
+
- ``close()`` 가능한 자원은 flush 이후 close를 시도합니다.
|
|
152
|
+
- flush/close 중 발생한 예외는 로깅하고 무시하여 정리 루틴을 계속 진행합니다.
|
|
153
|
+
- 같은 문서에서 ``close()``를 여러 번 호출해도 안전합니다.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
if self._closed:
|
|
157
|
+
return
|
|
158
|
+
|
|
159
|
+
self._flush_resource(self._package)
|
|
160
|
+
for resource in self._managed_resources:
|
|
161
|
+
self._flush_resource(resource)
|
|
162
|
+
|
|
163
|
+
self._close_resource(self._package)
|
|
164
|
+
for resource in self._managed_resources:
|
|
165
|
+
self._close_resource(resource)
|
|
166
|
+
|
|
167
|
+
self._managed_resources.clear()
|
|
168
|
+
self._closed = True
|
|
169
|
+
|
|
170
|
+
@staticmethod
|
|
171
|
+
def _flush_resource(resource: Any) -> None:
|
|
172
|
+
flush = getattr(resource, "flush", None)
|
|
173
|
+
if not callable(flush):
|
|
174
|
+
return
|
|
175
|
+
try:
|
|
176
|
+
flush()
|
|
177
|
+
except Exception:
|
|
178
|
+
logger.debug("자원 flush 중 예외를 무시합니다: resource=%r", resource, exc_info=True)
|
|
179
|
+
|
|
180
|
+
@staticmethod
|
|
181
|
+
def _close_resource(resource: Any) -> None:
|
|
182
|
+
close = getattr(resource, "close", None)
|
|
183
|
+
if not callable(close):
|
|
184
|
+
return
|
|
185
|
+
try:
|
|
186
|
+
close()
|
|
187
|
+
except Exception:
|
|
188
|
+
logger.debug("자원 close 중 예외를 무시합니다: resource=%r", resource, exc_info=True)
|
|
189
|
+
|
|
190
|
+
# ------------------------------------------------------------------
|
|
191
|
+
# properties exposing document content
|
|
192
|
+
@property
|
|
193
|
+
def package(self) -> HwpxPackage:
|
|
194
|
+
"""Return the :class:`HwpxPackage` backing this document."""
|
|
195
|
+
return self._package
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def oxml(self) -> HwpxOxmlDocument:
|
|
199
|
+
"""Return the low-level XML object tree representing the document."""
|
|
200
|
+
return self._root
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def sections(self) -> list[HwpxOxmlSection]:
|
|
204
|
+
"""Return the sections contained in the document."""
|
|
205
|
+
return self._root.sections
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
def headers(self) -> list[HwpxOxmlHeader]:
|
|
209
|
+
"""Return the header parts referenced by the document."""
|
|
210
|
+
return self._root.headers
|
|
211
|
+
|
|
212
|
+
@property
|
|
213
|
+
def master_pages(self) -> list[HwpxOxmlMasterPage]:
|
|
214
|
+
"""Return the master-page parts declared in the manifest."""
|
|
215
|
+
return self._root.master_pages
|
|
216
|
+
|
|
217
|
+
@property
|
|
218
|
+
def histories(self) -> list[HwpxOxmlHistory]:
|
|
219
|
+
"""Return document history parts referenced by the manifest."""
|
|
220
|
+
return self._root.histories
|
|
221
|
+
|
|
222
|
+
@property
|
|
223
|
+
def version(self) -> HwpxOxmlVersion | None:
|
|
224
|
+
"""Return the version metadata part if present."""
|
|
225
|
+
return self._root.version
|
|
226
|
+
|
|
227
|
+
@property
|
|
228
|
+
def border_fills(self) -> dict[str, GenericElement]:
|
|
229
|
+
"""Return border fill definitions declared in the headers."""
|
|
230
|
+
|
|
231
|
+
return self._root.border_fills
|
|
232
|
+
|
|
233
|
+
def border_fill(self, border_fill_id_ref: int | str | None) -> GenericElement | None:
|
|
234
|
+
"""Return the border fill definition referenced by *border_fill_id_ref*."""
|
|
235
|
+
|
|
236
|
+
return self._root.border_fill(border_fill_id_ref)
|
|
237
|
+
|
|
238
|
+
@property
|
|
239
|
+
def memo_shapes(self) -> dict[str, MemoShape]:
|
|
240
|
+
"""Return memo shapes available in the header reference lists."""
|
|
241
|
+
|
|
242
|
+
return self._root.memo_shapes
|
|
243
|
+
|
|
244
|
+
def memo_shape(self, memo_shape_id_ref: int | str | None) -> MemoShape | None:
|
|
245
|
+
"""Return the memo shape definition referenced by *memo_shape_id_ref*."""
|
|
246
|
+
|
|
247
|
+
return self._root.memo_shape(memo_shape_id_ref)
|
|
248
|
+
|
|
249
|
+
@property
|
|
250
|
+
def bullets(self) -> dict[str, Bullet]:
|
|
251
|
+
"""Return bullet definitions declared in header reference lists."""
|
|
252
|
+
|
|
253
|
+
return self._root.bullets
|
|
254
|
+
|
|
255
|
+
def bullet(self, bullet_id_ref: int | str | None) -> Bullet | None:
|
|
256
|
+
"""Return the bullet definition referenced by *bullet_id_ref*."""
|
|
257
|
+
|
|
258
|
+
return self._root.bullet(bullet_id_ref)
|
|
259
|
+
|
|
260
|
+
@property
|
|
261
|
+
def paragraph_properties(self) -> dict[str, ParagraphProperty]:
|
|
262
|
+
"""Return paragraph property definitions declared in headers."""
|
|
263
|
+
|
|
264
|
+
return self._root.paragraph_properties
|
|
265
|
+
|
|
266
|
+
def paragraph_property(
|
|
267
|
+
self, para_pr_id_ref: int | str | None
|
|
268
|
+
) -> ParagraphProperty | None:
|
|
269
|
+
"""Return the paragraph property referenced by *para_pr_id_ref*."""
|
|
270
|
+
|
|
271
|
+
return self._root.paragraph_property(para_pr_id_ref)
|
|
272
|
+
|
|
273
|
+
@property
|
|
274
|
+
def styles(self) -> dict[str, Style]:
|
|
275
|
+
"""Return style definitions available in the document."""
|
|
276
|
+
|
|
277
|
+
return self._root.styles
|
|
278
|
+
|
|
279
|
+
def style(self, style_id_ref: int | str | None) -> Style | None:
|
|
280
|
+
"""Return the style definition referenced by *style_id_ref*."""
|
|
281
|
+
|
|
282
|
+
return self._root.style(style_id_ref)
|
|
283
|
+
|
|
284
|
+
@property
|
|
285
|
+
def track_changes(self) -> dict[str, TrackChange]:
|
|
286
|
+
"""Return tracked change metadata declared in the headers."""
|
|
287
|
+
|
|
288
|
+
return self._root.track_changes
|
|
289
|
+
|
|
290
|
+
def track_change(self, change_id_ref: int | str | None) -> TrackChange | None:
|
|
291
|
+
"""Return tracked change metadata referenced by *change_id_ref*."""
|
|
292
|
+
|
|
293
|
+
return self._root.track_change(change_id_ref)
|
|
294
|
+
|
|
295
|
+
@property
|
|
296
|
+
def track_change_authors(self) -> dict[str, TrackChangeAuthor]:
|
|
297
|
+
"""Return tracked change author metadata declared in the headers."""
|
|
298
|
+
|
|
299
|
+
return self._root.track_change_authors
|
|
300
|
+
|
|
301
|
+
def track_change_author(
|
|
302
|
+
self, author_id_ref: int | str | None
|
|
303
|
+
) -> TrackChangeAuthor | None:
|
|
304
|
+
"""Return tracked change author details referenced by *author_id_ref*."""
|
|
305
|
+
|
|
306
|
+
return self._root.track_change_author(author_id_ref)
|
|
307
|
+
|
|
308
|
+
@property
|
|
309
|
+
def memos(self) -> list[HwpxOxmlMemo]:
|
|
310
|
+
"""Return all memo entries declared in every section."""
|
|
311
|
+
|
|
312
|
+
memos: list[HwpxOxmlMemo] = []
|
|
313
|
+
for section in self._root.sections:
|
|
314
|
+
memos.extend(section.memos)
|
|
315
|
+
return memos
|
|
316
|
+
|
|
317
|
+
def add_memo(
|
|
318
|
+
self,
|
|
319
|
+
text: str = "",
|
|
320
|
+
*,
|
|
321
|
+
section: HwpxOxmlSection | None = None,
|
|
322
|
+
section_index: int | None = None,
|
|
323
|
+
memo_shape_id_ref: str | int | None = None,
|
|
324
|
+
memo_id: str | None = None,
|
|
325
|
+
char_pr_id_ref: str | int | None = None,
|
|
326
|
+
attributes: dict[str, str] | None = None,
|
|
327
|
+
) -> HwpxOxmlMemo:
|
|
328
|
+
"""Create a memo entry inside *section* (or the last section by default)."""
|
|
329
|
+
|
|
330
|
+
if section is None and section_index is not None:
|
|
331
|
+
section = self._root.sections[section_index]
|
|
332
|
+
if section is None:
|
|
333
|
+
if not self._root.sections:
|
|
334
|
+
raise ValueError("document does not contain any sections")
|
|
335
|
+
section = self._root.sections[-1]
|
|
336
|
+
return section.add_memo(
|
|
337
|
+
text,
|
|
338
|
+
memo_shape_id_ref=memo_shape_id_ref,
|
|
339
|
+
memo_id=memo_id,
|
|
340
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
341
|
+
attributes=attributes,
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
def remove_memo(self, memo: HwpxOxmlMemo) -> None:
|
|
345
|
+
"""Remove *memo* from the section it belongs to."""
|
|
346
|
+
|
|
347
|
+
memo.remove()
|
|
348
|
+
|
|
349
|
+
def attach_memo_field(
|
|
350
|
+
self,
|
|
351
|
+
paragraph: HwpxOxmlParagraph,
|
|
352
|
+
memo: HwpxOxmlMemo,
|
|
353
|
+
*,
|
|
354
|
+
field_id: str | None = None,
|
|
355
|
+
author: str | None = None,
|
|
356
|
+
created: datetime | str | None = None,
|
|
357
|
+
number: int = 1,
|
|
358
|
+
char_pr_id_ref: str | int | None = None,
|
|
359
|
+
) -> str:
|
|
360
|
+
"""Attach a MEMO field control to *paragraph* so Hangul shows *memo*."""
|
|
361
|
+
|
|
362
|
+
if paragraph.section is None:
|
|
363
|
+
raise ValueError("paragraph must belong to a section before anchoring a memo")
|
|
364
|
+
if memo.group.section is None:
|
|
365
|
+
raise ValueError("memo is not attached to a section")
|
|
366
|
+
|
|
367
|
+
field_value = field_id or uuid.uuid4().hex
|
|
368
|
+
author_value = author or memo.attributes.get("author") or ""
|
|
369
|
+
|
|
370
|
+
created_value = created if created is not None else memo.attributes.get("createDateTime")
|
|
371
|
+
if isinstance(created_value, datetime):
|
|
372
|
+
created_value = created_value.strftime("%Y-%m-%d %H:%M:%S")
|
|
373
|
+
elif created_value is None:
|
|
374
|
+
created_value = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
375
|
+
else:
|
|
376
|
+
created_value = str(created_value)
|
|
377
|
+
|
|
378
|
+
memo_shape_id = memo.memo_shape_id_ref or ""
|
|
379
|
+
|
|
380
|
+
char_ref = char_pr_id_ref
|
|
381
|
+
if char_ref is None:
|
|
382
|
+
char_ref = paragraph.char_pr_id_ref
|
|
383
|
+
if char_ref is None:
|
|
384
|
+
char_ref = memo._infer_char_pr_id_ref()
|
|
385
|
+
if char_ref is None:
|
|
386
|
+
char_ref = "0"
|
|
387
|
+
char_ref = str(char_ref)
|
|
388
|
+
|
|
389
|
+
paragraph_element = paragraph.element
|
|
390
|
+
run_begin = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
|
|
391
|
+
ctrl_begin = _append_element(run_begin, f"{_HP}ctrl")
|
|
392
|
+
field_begin = _append_element(
|
|
393
|
+
ctrl_begin,
|
|
394
|
+
f"{_HP}fieldBegin",
|
|
395
|
+
{
|
|
396
|
+
"id": field_value,
|
|
397
|
+
"type": "MEMO",
|
|
398
|
+
"editable": "true",
|
|
399
|
+
"dirty": "false",
|
|
400
|
+
"fieldid": field_value,
|
|
401
|
+
},
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
parameters = _append_element(field_begin, f"{_HP}parameters", {"count": "5", "name": ""})
|
|
405
|
+
_append_element(parameters, f"{_HP}stringParam", {"name": "ID"}).text = memo.id or ""
|
|
406
|
+
_append_element(parameters, f"{_HP}integerParam", {"name": "Number"}).text = str(max(1, number))
|
|
407
|
+
_append_element(parameters, f"{_HP}stringParam", {"name": "CreateDateTime"}).text = created_value
|
|
408
|
+
_append_element(parameters, f"{_HP}stringParam", {"name": "Author"}).text = author_value
|
|
409
|
+
_append_element(parameters, f"{_HP}stringParam", {"name": "MemoShapeID"}).text = memo_shape_id
|
|
410
|
+
|
|
411
|
+
sub_list = _append_element(
|
|
412
|
+
field_begin,
|
|
413
|
+
f"{_HP}subList",
|
|
414
|
+
{
|
|
415
|
+
"id": f"memo-field-{memo.id or field_value}",
|
|
416
|
+
"textDirection": "HORIZONTAL",
|
|
417
|
+
"lineWrap": "BREAK",
|
|
418
|
+
"vertAlign": "TOP",
|
|
419
|
+
},
|
|
420
|
+
)
|
|
421
|
+
sub_para = _append_element(
|
|
422
|
+
sub_list,
|
|
423
|
+
f"{_HP}p",
|
|
424
|
+
{
|
|
425
|
+
"id": f"memo-field-{(memo.id or field_value)}-p",
|
|
426
|
+
"paraPrIDRef": "0",
|
|
427
|
+
"styleIDRef": "0",
|
|
428
|
+
"pageBreak": "0",
|
|
429
|
+
"columnBreak": "0",
|
|
430
|
+
"merged": "0",
|
|
431
|
+
},
|
|
432
|
+
)
|
|
433
|
+
sub_run = _append_element(sub_para, f"{_HP}run", {"charPrIDRef": char_ref})
|
|
434
|
+
_append_element(sub_run, f"{_HP}t").text = memo.id or field_value
|
|
435
|
+
|
|
436
|
+
run_end = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
|
|
437
|
+
ctrl_end = _append_element(run_end, f"{_HP}ctrl")
|
|
438
|
+
_append_element(ctrl_end, f"{_HP}fieldEnd", {"beginIDRef": field_value, "fieldid": field_value})
|
|
439
|
+
|
|
440
|
+
paragraph.element.insert(0, run_begin)
|
|
441
|
+
paragraph.element.append(run_end)
|
|
442
|
+
paragraph.section.mark_dirty()
|
|
443
|
+
|
|
444
|
+
return field_value
|
|
445
|
+
|
|
446
|
+
def add_memo_with_anchor(
|
|
447
|
+
self,
|
|
448
|
+
text: str = "",
|
|
449
|
+
*,
|
|
450
|
+
paragraph: HwpxOxmlParagraph | None = None,
|
|
451
|
+
section: HwpxOxmlSection | None = None,
|
|
452
|
+
section_index: int | None = None,
|
|
453
|
+
paragraph_text: str | None = None,
|
|
454
|
+
memo_shape_id_ref: str | int | None = None,
|
|
455
|
+
memo_id: str | None = None,
|
|
456
|
+
char_pr_id_ref: str | int | None = None,
|
|
457
|
+
attributes: dict[str, str] | None = None,
|
|
458
|
+
field_id: str | None = None,
|
|
459
|
+
author: str | None = None,
|
|
460
|
+
created: datetime | str | None = None,
|
|
461
|
+
number: int = 1,
|
|
462
|
+
anchor_char_pr_id_ref: str | int | None = None,
|
|
463
|
+
) -> tuple[HwpxOxmlMemo, HwpxOxmlParagraph, str]:
|
|
464
|
+
"""Create a memo and ensure it is visible by anchoring a MEMO field."""
|
|
465
|
+
|
|
466
|
+
memo = self.add_memo(
|
|
467
|
+
text,
|
|
468
|
+
section=section,
|
|
469
|
+
section_index=section_index,
|
|
470
|
+
memo_shape_id_ref=memo_shape_id_ref,
|
|
471
|
+
memo_id=memo_id,
|
|
472
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
473
|
+
attributes=attributes,
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
target_paragraph = paragraph
|
|
477
|
+
if target_paragraph is None:
|
|
478
|
+
memo_section = memo.group.section
|
|
479
|
+
if memo_section is None:
|
|
480
|
+
raise ValueError("memo must belong to a section")
|
|
481
|
+
paragraph_value = "" if paragraph_text is None else paragraph_text
|
|
482
|
+
anchor_char = anchor_char_pr_id_ref or char_pr_id_ref
|
|
483
|
+
target_paragraph = self.add_paragraph(
|
|
484
|
+
paragraph_value,
|
|
485
|
+
section=memo_section,
|
|
486
|
+
char_pr_id_ref=anchor_char,
|
|
487
|
+
)
|
|
488
|
+
elif paragraph_text is not None:
|
|
489
|
+
target_paragraph.text = paragraph_text
|
|
490
|
+
|
|
491
|
+
field_value = self.attach_memo_field(
|
|
492
|
+
target_paragraph,
|
|
493
|
+
memo,
|
|
494
|
+
field_id=field_id,
|
|
495
|
+
author=author,
|
|
496
|
+
created=created,
|
|
497
|
+
number=number,
|
|
498
|
+
char_pr_id_ref=anchor_char_pr_id_ref,
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
return memo, target_paragraph, field_value
|
|
502
|
+
|
|
503
|
+
@property
|
|
504
|
+
def paragraphs(self) -> list[HwpxOxmlParagraph]:
|
|
505
|
+
"""Return all paragraphs across every section."""
|
|
506
|
+
return self._root.paragraphs
|
|
507
|
+
|
|
508
|
+
@property
|
|
509
|
+
def char_properties(self) -> dict[str, RunStyle]:
|
|
510
|
+
"""Return the resolved character style definitions available to the document."""
|
|
511
|
+
|
|
512
|
+
return self._root.char_properties
|
|
513
|
+
|
|
514
|
+
def char_property(self, char_pr_id_ref: int | str | None) -> RunStyle | None:
|
|
515
|
+
"""Return the style referenced by *char_pr_id_ref* if known."""
|
|
516
|
+
|
|
517
|
+
return self._root.char_property(char_pr_id_ref)
|
|
518
|
+
|
|
519
|
+
def ensure_run_style(
|
|
520
|
+
self,
|
|
521
|
+
*,
|
|
522
|
+
bold: bool = False,
|
|
523
|
+
italic: bool = False,
|
|
524
|
+
underline: bool = False,
|
|
525
|
+
base_char_pr_id: str | int | None = None,
|
|
526
|
+
) -> str:
|
|
527
|
+
"""Return a ``charPr`` identifier matching the requested flags."""
|
|
528
|
+
|
|
529
|
+
return self._root.ensure_run_style(
|
|
530
|
+
bold=bold,
|
|
531
|
+
italic=italic,
|
|
532
|
+
underline=underline,
|
|
533
|
+
base_char_pr_id=base_char_pr_id,
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
def iter_runs(self) -> Iterator[HwpxOxmlRun]:
|
|
537
|
+
"""Yield every run element contained in the document."""
|
|
538
|
+
|
|
539
|
+
for paragraph in self.paragraphs:
|
|
540
|
+
for run in paragraph.runs:
|
|
541
|
+
yield run
|
|
542
|
+
|
|
543
|
+
def find_runs_by_style(
|
|
544
|
+
self,
|
|
545
|
+
*,
|
|
546
|
+
text_color: str | None = None,
|
|
547
|
+
underline_type: str | None = None,
|
|
548
|
+
underline_color: str | None = None,
|
|
549
|
+
char_pr_id_ref: str | int | None = None,
|
|
550
|
+
) -> list[HwpxOxmlRun]:
|
|
551
|
+
"""Return runs matching the requested style criteria."""
|
|
552
|
+
|
|
553
|
+
matches: list[HwpxOxmlRun] = []
|
|
554
|
+
target_char = str(char_pr_id_ref).strip() if char_pr_id_ref is not None else None
|
|
555
|
+
|
|
556
|
+
for run in self.iter_runs():
|
|
557
|
+
if target_char is not None:
|
|
558
|
+
run_char = (run.char_pr_id_ref or "").strip()
|
|
559
|
+
if run_char != target_char:
|
|
560
|
+
continue
|
|
561
|
+
style = run.style
|
|
562
|
+
if text_color is not None:
|
|
563
|
+
if style is None or style.text_color() != text_color:
|
|
564
|
+
continue
|
|
565
|
+
if underline_type is not None:
|
|
566
|
+
if style is None or style.underline_type() != underline_type:
|
|
567
|
+
continue
|
|
568
|
+
if underline_color is not None:
|
|
569
|
+
if style is None or style.underline_color() != underline_color:
|
|
570
|
+
continue
|
|
571
|
+
matches.append(run)
|
|
572
|
+
return matches
|
|
573
|
+
|
|
574
|
+
def replace_text_in_runs(
|
|
575
|
+
self,
|
|
576
|
+
search: str,
|
|
577
|
+
replacement: str,
|
|
578
|
+
*,
|
|
579
|
+
text_color: str | None = None,
|
|
580
|
+
underline_type: str | None = None,
|
|
581
|
+
underline_color: str | None = None,
|
|
582
|
+
char_pr_id_ref: str | int | None = None,
|
|
583
|
+
limit: int | None = None,
|
|
584
|
+
) -> int:
|
|
585
|
+
"""Replace occurrences of *search* in runs matching the provided style filters."""
|
|
586
|
+
|
|
587
|
+
if not search:
|
|
588
|
+
raise ValueError("search must be a non-empty string")
|
|
589
|
+
|
|
590
|
+
replacements = 0
|
|
591
|
+
runs = self.find_runs_by_style(
|
|
592
|
+
text_color=text_color,
|
|
593
|
+
underline_type=underline_type,
|
|
594
|
+
underline_color=underline_color,
|
|
595
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
for run in runs:
|
|
599
|
+
remaining = None
|
|
600
|
+
if limit is not None:
|
|
601
|
+
remaining = limit - replacements
|
|
602
|
+
if remaining <= 0:
|
|
603
|
+
break
|
|
604
|
+
original_char_pr = run.char_pr_id_ref
|
|
605
|
+
replaced_here = run.replace_text(
|
|
606
|
+
search,
|
|
607
|
+
replacement,
|
|
608
|
+
count=remaining,
|
|
609
|
+
)
|
|
610
|
+
if replaced_here and original_char_pr is not None:
|
|
611
|
+
# Ensure the run retains its original formatting reference even
|
|
612
|
+
# if XML nodes were rewritten during substitution.
|
|
613
|
+
run.char_pr_id_ref = original_char_pr
|
|
614
|
+
replacements += replaced_here
|
|
615
|
+
if limit is not None and replacements >= limit:
|
|
616
|
+
break
|
|
617
|
+
return replacements
|
|
618
|
+
|
|
619
|
+
# ------------------------------------------------------------------
|
|
620
|
+
# editing helpers
|
|
621
|
+
def add_paragraph(
|
|
622
|
+
self,
|
|
623
|
+
text: str = "",
|
|
624
|
+
*,
|
|
625
|
+
section: HwpxOxmlSection | None = None,
|
|
626
|
+
section_index: int | None = None,
|
|
627
|
+
para_pr_id_ref: str | int | None = None,
|
|
628
|
+
style_id_ref: str | int | None = None,
|
|
629
|
+
char_pr_id_ref: str | int | None = None,
|
|
630
|
+
run_attributes: dict[str, str] | None = None,
|
|
631
|
+
include_run: bool = True,
|
|
632
|
+
**extra_attrs: str,
|
|
633
|
+
) -> HwpxOxmlParagraph:
|
|
634
|
+
"""Append a paragraph to the document and return it.
|
|
635
|
+
|
|
636
|
+
Formatting references may be overridden via ``para_pr_id_ref``,
|
|
637
|
+
``style_id_ref`` and ``char_pr_id_ref``. Any additional keyword
|
|
638
|
+
arguments are added as raw paragraph attributes.
|
|
639
|
+
"""
|
|
640
|
+
return self._root.add_paragraph(
|
|
641
|
+
text,
|
|
642
|
+
section=section,
|
|
643
|
+
section_index=section_index,
|
|
644
|
+
para_pr_id_ref=para_pr_id_ref,
|
|
645
|
+
style_id_ref=style_id_ref,
|
|
646
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
647
|
+
run_attributes=run_attributes,
|
|
648
|
+
include_run=include_run,
|
|
649
|
+
**extra_attrs,
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
def add_table(
|
|
653
|
+
self,
|
|
654
|
+
rows: int,
|
|
655
|
+
cols: int,
|
|
656
|
+
*,
|
|
657
|
+
section: HwpxOxmlSection | None = None,
|
|
658
|
+
section_index: int | None = None,
|
|
659
|
+
width: int | None = None,
|
|
660
|
+
height: int | None = None,
|
|
661
|
+
border_fill_id_ref: str | int | None = None,
|
|
662
|
+
para_pr_id_ref: str | int | None = None,
|
|
663
|
+
style_id_ref: str | int | None = None,
|
|
664
|
+
char_pr_id_ref: str | int | None = None,
|
|
665
|
+
run_attributes: dict[str, str] | None = None,
|
|
666
|
+
**extra_attrs: str,
|
|
667
|
+
) -> HwpxOxmlTable:
|
|
668
|
+
"""Create a table in a new paragraph and return it."""
|
|
669
|
+
|
|
670
|
+
resolved_border_fill: str | int | None = border_fill_id_ref
|
|
671
|
+
if resolved_border_fill is None:
|
|
672
|
+
resolved_border_fill = self._root.ensure_basic_border_fill()
|
|
673
|
+
|
|
674
|
+
paragraph = self.add_paragraph(
|
|
675
|
+
"",
|
|
676
|
+
section=section,
|
|
677
|
+
section_index=section_index,
|
|
678
|
+
para_pr_id_ref=para_pr_id_ref,
|
|
679
|
+
style_id_ref=style_id_ref,
|
|
680
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
681
|
+
include_run=False,
|
|
682
|
+
**extra_attrs,
|
|
683
|
+
)
|
|
684
|
+
return paragraph.add_table(
|
|
685
|
+
rows,
|
|
686
|
+
cols,
|
|
687
|
+
width=width,
|
|
688
|
+
height=height,
|
|
689
|
+
border_fill_id_ref=resolved_border_fill,
|
|
690
|
+
run_attributes=run_attributes,
|
|
691
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
def add_shape(
|
|
695
|
+
self,
|
|
696
|
+
shape_type: str,
|
|
697
|
+
*,
|
|
698
|
+
section: HwpxOxmlSection | None = None,
|
|
699
|
+
section_index: int | None = None,
|
|
700
|
+
attributes: dict[str, str] | None = None,
|
|
701
|
+
para_pr_id_ref: str | int | None = None,
|
|
702
|
+
style_id_ref: str | int | None = None,
|
|
703
|
+
char_pr_id_ref: str | int | None = None,
|
|
704
|
+
run_attributes: dict[str, str] | None = None,
|
|
705
|
+
**extra_attrs: str,
|
|
706
|
+
) -> HwpxOxmlInlineObject:
|
|
707
|
+
"""Insert an inline shape into a new paragraph."""
|
|
708
|
+
|
|
709
|
+
paragraph = self.add_paragraph(
|
|
710
|
+
"",
|
|
711
|
+
section=section,
|
|
712
|
+
section_index=section_index,
|
|
713
|
+
para_pr_id_ref=para_pr_id_ref,
|
|
714
|
+
style_id_ref=style_id_ref,
|
|
715
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
716
|
+
include_run=False,
|
|
717
|
+
**extra_attrs,
|
|
718
|
+
)
|
|
719
|
+
return paragraph.add_shape(
|
|
720
|
+
shape_type,
|
|
721
|
+
attributes=attributes,
|
|
722
|
+
run_attributes=run_attributes,
|
|
723
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
def add_control(
|
|
727
|
+
self,
|
|
728
|
+
*,
|
|
729
|
+
section: HwpxOxmlSection | None = None,
|
|
730
|
+
section_index: int | None = None,
|
|
731
|
+
attributes: dict[str, str] | None = None,
|
|
732
|
+
control_type: str | None = None,
|
|
733
|
+
para_pr_id_ref: str | int | None = None,
|
|
734
|
+
style_id_ref: str | int | None = None,
|
|
735
|
+
char_pr_id_ref: str | int | None = None,
|
|
736
|
+
run_attributes: dict[str, str] | None = None,
|
|
737
|
+
**extra_attrs: str,
|
|
738
|
+
) -> HwpxOxmlInlineObject:
|
|
739
|
+
"""Insert a control inline object into a new paragraph."""
|
|
740
|
+
|
|
741
|
+
paragraph = self.add_paragraph(
|
|
742
|
+
"",
|
|
743
|
+
section=section,
|
|
744
|
+
section_index=section_index,
|
|
745
|
+
para_pr_id_ref=para_pr_id_ref,
|
|
746
|
+
style_id_ref=style_id_ref,
|
|
747
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
748
|
+
include_run=False,
|
|
749
|
+
**extra_attrs,
|
|
750
|
+
)
|
|
751
|
+
return paragraph.add_control(
|
|
752
|
+
attributes=attributes,
|
|
753
|
+
control_type=control_type,
|
|
754
|
+
run_attributes=run_attributes,
|
|
755
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
756
|
+
)
|
|
757
|
+
|
|
758
|
+
def set_header_text(
|
|
759
|
+
self,
|
|
760
|
+
text: str,
|
|
761
|
+
*,
|
|
762
|
+
section: HwpxOxmlSection | None = None,
|
|
763
|
+
section_index: int | None = None,
|
|
764
|
+
page_type: str = "BOTH",
|
|
765
|
+
) -> HwpxOxmlSectionHeaderFooter:
|
|
766
|
+
"""Ensure the requested section contains a header for *page_type* and set its text."""
|
|
767
|
+
|
|
768
|
+
target_section = section
|
|
769
|
+
if target_section is None and section_index is not None:
|
|
770
|
+
target_section = self._root.sections[section_index]
|
|
771
|
+
if target_section is None:
|
|
772
|
+
if not self._root.sections:
|
|
773
|
+
raise ValueError("document does not contain any sections")
|
|
774
|
+
target_section = self._root.sections[-1]
|
|
775
|
+
return target_section.properties.set_header_text(text, page_type=page_type)
|
|
776
|
+
|
|
777
|
+
def set_footer_text(
|
|
778
|
+
self,
|
|
779
|
+
text: str,
|
|
780
|
+
*,
|
|
781
|
+
section: HwpxOxmlSection | None = None,
|
|
782
|
+
section_index: int | None = None,
|
|
783
|
+
page_type: str = "BOTH",
|
|
784
|
+
) -> HwpxOxmlSectionHeaderFooter:
|
|
785
|
+
"""Ensure the requested section contains a footer for *page_type* and set its text."""
|
|
786
|
+
|
|
787
|
+
target_section = section
|
|
788
|
+
if target_section is None and section_index is not None:
|
|
789
|
+
target_section = self._root.sections[section_index]
|
|
790
|
+
if target_section is None:
|
|
791
|
+
if not self._root.sections:
|
|
792
|
+
raise ValueError("document does not contain any sections")
|
|
793
|
+
target_section = self._root.sections[-1]
|
|
794
|
+
return target_section.properties.set_footer_text(text, page_type=page_type)
|
|
795
|
+
|
|
796
|
+
def remove_header(
|
|
797
|
+
self,
|
|
798
|
+
*,
|
|
799
|
+
section: HwpxOxmlSection | None = None,
|
|
800
|
+
section_index: int | None = None,
|
|
801
|
+
page_type: str = "BOTH",
|
|
802
|
+
) -> None:
|
|
803
|
+
"""Remove the header linked to *page_type* from the requested section if present."""
|
|
804
|
+
|
|
805
|
+
target_section = section
|
|
806
|
+
if target_section is None and section_index is not None:
|
|
807
|
+
target_section = self._root.sections[section_index]
|
|
808
|
+
if target_section is None:
|
|
809
|
+
if not self._root.sections:
|
|
810
|
+
return
|
|
811
|
+
target_section = self._root.sections[-1]
|
|
812
|
+
target_section.properties.remove_header(page_type=page_type)
|
|
813
|
+
|
|
814
|
+
def remove_footer(
|
|
815
|
+
self,
|
|
816
|
+
*,
|
|
817
|
+
section: HwpxOxmlSection | None = None,
|
|
818
|
+
section_index: int | None = None,
|
|
819
|
+
page_type: str = "BOTH",
|
|
820
|
+
) -> None:
|
|
821
|
+
"""Remove the footer linked to *page_type* from the requested section if present."""
|
|
822
|
+
|
|
823
|
+
target_section = section
|
|
824
|
+
if target_section is None and section_index is not None:
|
|
825
|
+
target_section = self._root.sections[section_index]
|
|
826
|
+
if target_section is None:
|
|
827
|
+
if not self._root.sections:
|
|
828
|
+
return
|
|
829
|
+
target_section = self._root.sections[-1]
|
|
830
|
+
target_section.properties.remove_footer(page_type=page_type)
|
|
831
|
+
|
|
832
|
+
def save_to_path(self, path: str | PathLike[str]) -> str | PathLike[str]:
|
|
833
|
+
"""Persist pending changes to *path* and return the same path."""
|
|
834
|
+
|
|
835
|
+
updates = self._root.serialize()
|
|
836
|
+
result = self._package.save(path, updates)
|
|
837
|
+
self._root.reset_dirty()
|
|
838
|
+
return path if result is None else result
|
|
839
|
+
|
|
840
|
+
def save_to_stream(self, stream: BinaryIO) -> BinaryIO:
|
|
841
|
+
"""Persist pending changes to *stream* and return the same stream."""
|
|
842
|
+
|
|
843
|
+
updates = self._root.serialize()
|
|
844
|
+
result = self._package.save(stream, updates)
|
|
845
|
+
self._root.reset_dirty()
|
|
846
|
+
return stream if result is None else result
|
|
847
|
+
|
|
848
|
+
def to_bytes(self) -> bytes:
|
|
849
|
+
"""Serialize pending changes and return the HWPX archive as bytes."""
|
|
850
|
+
|
|
851
|
+
updates = self._root.serialize()
|
|
852
|
+
result = self._package.save(None, updates)
|
|
853
|
+
self._root.reset_dirty()
|
|
854
|
+
if isinstance(result, bytes):
|
|
855
|
+
return result
|
|
856
|
+
raise TypeError("package.save(None) must return bytes")
|
|
857
|
+
|
|
858
|
+
@overload
|
|
859
|
+
def save(self, path_or_stream: None = None) -> bytes: ...
|
|
860
|
+
|
|
861
|
+
@overload
|
|
862
|
+
def save(self, path_or_stream: str | PathLike[str]) -> str | PathLike[str]: ...
|
|
863
|
+
|
|
864
|
+
@overload
|
|
865
|
+
def save(self, path_or_stream: BinaryIO) -> BinaryIO: ...
|
|
866
|
+
|
|
867
|
+
def save(
|
|
868
|
+
self,
|
|
869
|
+
path_or_stream: str | PathLike[str] | BinaryIO | None = None,
|
|
870
|
+
) -> str | PathLike[str] | BinaryIO | bytes:
|
|
871
|
+
"""Deprecated compatibility wrapper around save_to_path/save_to_stream/to_bytes.
|
|
872
|
+
|
|
873
|
+
Deprecated:
|
|
874
|
+
``save()``는 하위 호환을 위해 유지되며 향후 제거될 수 있습니다.
|
|
875
|
+
- 경로 저장: ``save_to_path(path)``
|
|
876
|
+
- 스트림 저장: ``save_to_stream(stream)``
|
|
877
|
+
- 바이트 반환: ``to_bytes()``
|
|
878
|
+
"""
|
|
879
|
+
|
|
880
|
+
warnings.warn(
|
|
881
|
+
"HwpxDocument.save()는 deprecated 예정입니다. "
|
|
882
|
+
"save_to_path()/save_to_stream()/to_bytes() 사용을 권장합니다.",
|
|
883
|
+
DeprecationWarning,
|
|
884
|
+
stacklevel=2,
|
|
885
|
+
)
|
|
886
|
+
if path_or_stream is None:
|
|
887
|
+
return self.to_bytes()
|
|
888
|
+
if isinstance(path_or_stream, (str, PathLike)):
|
|
889
|
+
return self.save_to_path(path_or_stream)
|
|
890
|
+
return self.save_to_stream(path_or_stream)
|