python-hwpx 2.1__py3-none-any.whl → 2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hwpx/document.py CHANGED
@@ -1,890 +1,890 @@
1
- """High-level representation of an HWPX document."""
2
-
3
- from __future__ import annotations
4
-
5
- import xml.etree.ElementTree as ET
6
- import io
7
- import warnings
8
- from datetime import datetime
9
- import logging
10
- import uuid
11
-
12
- from os import PathLike
13
- from typing import Any, BinaryIO, Iterator, overload
14
-
15
- from lxml import etree
16
-
17
- from .oxml import (
18
- Bullet,
19
- GenericElement,
20
- HwpxOxmlDocument,
21
- HwpxOxmlHeader,
22
- HwpxOxmlHistory,
23
- HwpxOxmlInlineObject,
24
- HwpxOxmlMasterPage,
25
- HwpxOxmlMemo,
26
- HwpxOxmlParagraph,
27
- HwpxOxmlRun,
28
- HwpxOxmlSection,
29
- HwpxOxmlSectionHeaderFooter,
30
- HwpxOxmlTable,
31
- HwpxOxmlVersion,
32
- MemoShape,
33
- ParagraphProperty,
34
- RunStyle,
35
- Style,
36
- TrackChange,
37
- TrackChangeAuthor,
38
- )
39
- from .opc.package import HwpxPackage
40
- from .templates import blank_document_bytes
41
-
42
- ET.register_namespace("hp", "http://www.hancom.co.kr/hwpml/2011/paragraph")
43
- ET.register_namespace("hs", "http://www.hancom.co.kr/hwpml/2011/section")
44
- ET.register_namespace("hc", "http://www.hancom.co.kr/hwpml/2011/core")
45
- ET.register_namespace("hh", "http://www.hancom.co.kr/hwpml/2011/head")
46
-
47
- _HP_NS = "http://www.hancom.co.kr/hwpml/2011/paragraph"
48
- _HP = f"{{{_HP_NS}}}"
49
- _HH_NS = "http://www.hancom.co.kr/hwpml/2011/head"
50
- _HH = f"{{{_HH_NS}}}"
51
-
52
- logger = logging.getLogger(__name__)
53
-
54
-
55
- def _append_element(
56
- parent: Any,
57
- tag: str,
58
- attributes: dict[str, str] | None = None,
59
- ) -> Any:
60
- """Create and append a child element that matches *parent*'s element type."""
61
-
62
- child = parent.makeelement(tag, attributes or {})
63
- parent.append(child)
64
- return child
65
-
66
-
67
- class HwpxDocument:
68
- """Provides a user-friendly API for editing HWPX documents."""
69
-
70
- def __init__(
71
- self,
72
- package: HwpxPackage,
73
- root: HwpxOxmlDocument,
74
- *,
75
- managed_resources: tuple[Any, ...] = (),
76
- ):
77
- self._package = package
78
- self._root = root
79
- self._managed_resources = list(managed_resources)
80
- self._closed = False
81
-
82
- def __repr__(self) -> str:
83
- """Return a compact and safe summary of the document state."""
84
-
85
- return (
86
- f"{self.__class__.__name__}("
87
- f"sections={len(self.sections)}, "
88
- f"paragraphs={len(self.paragraphs)}, "
89
- f"headers={len(self.headers)}, "
90
- f"master_pages={len(self.master_pages)}, "
91
- f"histories={len(self.histories)}, "
92
- f"closed={self._closed}"
93
- ")"
94
- )
95
-
96
- # ------------------------------------------------------------------
97
- # construction helpers
98
- @classmethod
99
- def open(
100
- cls,
101
- source: str | PathLike[str] | bytes | BinaryIO,
102
- ) -> "HwpxDocument":
103
- """Open *source* and return a :class:`HwpxDocument` instance.
104
-
105
- Raises:
106
- HwpxStructureError: 필수 파일이나 구조가 올바르지 않은 HWPX를 열 때 발생합니다.
107
- HwpxPackageError: 패키지를 여는 과정에서 일반적인 I/O/포맷 오류가 발생하면 전달됩니다.
108
- """
109
- internal_resources: list[Any] = []
110
- open_source = source
111
- if isinstance(source, bytes):
112
- stream = io.BytesIO(source)
113
- open_source = stream
114
- internal_resources.append(stream)
115
- package = HwpxPackage.open(open_source)
116
- root = HwpxOxmlDocument.from_package(package)
117
- return cls(package, root, managed_resources=tuple(internal_resources))
118
-
119
- @classmethod
120
- def new(cls) -> "HwpxDocument":
121
- """Return a new blank document based on the default skeleton template."""
122
-
123
- return cls.open(blank_document_bytes())
124
-
125
- @classmethod
126
- def from_package(cls, package: HwpxPackage) -> "HwpxDocument":
127
- """Create a document backed by an existing :class:`HwpxPackage`.
128
-
129
- Args:
130
- package: :class:`hwpx.opc.package.HwpxPackage` 인스턴스.
131
- """
132
- root = HwpxOxmlDocument.from_package(package)
133
- return cls(package, root)
134
-
135
- def __enter__(self) -> "HwpxDocument":
136
- """컨텍스트 매니저 진입 시 현재 문서 인스턴스를 반환합니다."""
137
-
138
- return self
139
-
140
- def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> bool:
141
- """예외 발생 여부와 무관하게 내부 자원을 안전하게 정리합니다."""
142
-
143
- self.close()
144
- return False
145
-
146
- def close(self) -> None:
147
- """문서가 관리하는 내부 패키지/스트림 자원을 정리합니다.
148
-
149
- 정리 정책:
150
- - ``flush()`` 가능한 자원은 먼저 flush를 시도합니다.
151
- - ``close()`` 가능한 자원은 flush 이후 close를 시도합니다.
152
- - flush/close 중 발생한 예외는 로깅하고 무시하여 정리 루틴을 계속 진행합니다.
153
- - 같은 문서에서 ``close()``를 여러 번 호출해도 안전합니다.
154
- """
155
-
156
- if self._closed:
157
- return
158
-
159
- self._flush_resource(self._package)
160
- for resource in self._managed_resources:
161
- self._flush_resource(resource)
162
-
163
- self._close_resource(self._package)
164
- for resource in self._managed_resources:
165
- self._close_resource(resource)
166
-
167
- self._managed_resources.clear()
168
- self._closed = True
169
-
170
- @staticmethod
171
- def _flush_resource(resource: Any) -> None:
172
- flush = getattr(resource, "flush", None)
173
- if not callable(flush):
174
- return
175
- try:
176
- flush()
177
- except Exception:
178
- logger.debug("자원 flush 중 예외를 무시합니다: resource=%r", resource, exc_info=True)
179
-
180
- @staticmethod
181
- def _close_resource(resource: Any) -> None:
182
- close = getattr(resource, "close", None)
183
- if not callable(close):
184
- return
185
- try:
186
- close()
187
- except Exception:
188
- logger.debug("자원 close 중 예외를 무시합니다: resource=%r", resource, exc_info=True)
189
-
190
- # ------------------------------------------------------------------
191
- # properties exposing document content
192
- @property
193
- def package(self) -> HwpxPackage:
194
- """Return the :class:`HwpxPackage` backing this document."""
195
- return self._package
196
-
197
- @property
198
- def oxml(self) -> HwpxOxmlDocument:
199
- """Return the low-level XML object tree representing the document."""
200
- return self._root
201
-
202
- @property
203
- def sections(self) -> list[HwpxOxmlSection]:
204
- """Return the sections contained in the document."""
205
- return self._root.sections
206
-
207
- @property
208
- def headers(self) -> list[HwpxOxmlHeader]:
209
- """Return the header parts referenced by the document."""
210
- return self._root.headers
211
-
212
- @property
213
- def master_pages(self) -> list[HwpxOxmlMasterPage]:
214
- """Return the master-page parts declared in the manifest."""
215
- return self._root.master_pages
216
-
217
- @property
218
- def histories(self) -> list[HwpxOxmlHistory]:
219
- """Return document history parts referenced by the manifest."""
220
- return self._root.histories
221
-
222
- @property
223
- def version(self) -> HwpxOxmlVersion | None:
224
- """Return the version metadata part if present."""
225
- return self._root.version
226
-
227
- @property
228
- def border_fills(self) -> dict[str, GenericElement]:
229
- """Return border fill definitions declared in the headers."""
230
-
231
- return self._root.border_fills
232
-
233
- def border_fill(self, border_fill_id_ref: int | str | None) -> GenericElement | None:
234
- """Return the border fill definition referenced by *border_fill_id_ref*."""
235
-
236
- return self._root.border_fill(border_fill_id_ref)
237
-
238
- @property
239
- def memo_shapes(self) -> dict[str, MemoShape]:
240
- """Return memo shapes available in the header reference lists."""
241
-
242
- return self._root.memo_shapes
243
-
244
- def memo_shape(self, memo_shape_id_ref: int | str | None) -> MemoShape | None:
245
- """Return the memo shape definition referenced by *memo_shape_id_ref*."""
246
-
247
- return self._root.memo_shape(memo_shape_id_ref)
248
-
249
- @property
250
- def bullets(self) -> dict[str, Bullet]:
251
- """Return bullet definitions declared in header reference lists."""
252
-
253
- return self._root.bullets
254
-
255
- def bullet(self, bullet_id_ref: int | str | None) -> Bullet | None:
256
- """Return the bullet definition referenced by *bullet_id_ref*."""
257
-
258
- return self._root.bullet(bullet_id_ref)
259
-
260
- @property
261
- def paragraph_properties(self) -> dict[str, ParagraphProperty]:
262
- """Return paragraph property definitions declared in headers."""
263
-
264
- return self._root.paragraph_properties
265
-
266
- def paragraph_property(
267
- self, para_pr_id_ref: int | str | None
268
- ) -> ParagraphProperty | None:
269
- """Return the paragraph property referenced by *para_pr_id_ref*."""
270
-
271
- return self._root.paragraph_property(para_pr_id_ref)
272
-
273
- @property
274
- def styles(self) -> dict[str, Style]:
275
- """Return style definitions available in the document."""
276
-
277
- return self._root.styles
278
-
279
- def style(self, style_id_ref: int | str | None) -> Style | None:
280
- """Return the style definition referenced by *style_id_ref*."""
281
-
282
- return self._root.style(style_id_ref)
283
-
284
- @property
285
- def track_changes(self) -> dict[str, TrackChange]:
286
- """Return tracked change metadata declared in the headers."""
287
-
288
- return self._root.track_changes
289
-
290
- def track_change(self, change_id_ref: int | str | None) -> TrackChange | None:
291
- """Return tracked change metadata referenced by *change_id_ref*."""
292
-
293
- return self._root.track_change(change_id_ref)
294
-
295
- @property
296
- def track_change_authors(self) -> dict[str, TrackChangeAuthor]:
297
- """Return tracked change author metadata declared in the headers."""
298
-
299
- return self._root.track_change_authors
300
-
301
- def track_change_author(
302
- self, author_id_ref: int | str | None
303
- ) -> TrackChangeAuthor | None:
304
- """Return tracked change author details referenced by *author_id_ref*."""
305
-
306
- return self._root.track_change_author(author_id_ref)
307
-
308
- @property
309
- def memos(self) -> list[HwpxOxmlMemo]:
310
- """Return all memo entries declared in every section."""
311
-
312
- memos: list[HwpxOxmlMemo] = []
313
- for section in self._root.sections:
314
- memos.extend(section.memos)
315
- return memos
316
-
317
- def add_memo(
318
- self,
319
- text: str = "",
320
- *,
321
- section: HwpxOxmlSection | None = None,
322
- section_index: int | None = None,
323
- memo_shape_id_ref: str | int | None = None,
324
- memo_id: str | None = None,
325
- char_pr_id_ref: str | int | None = None,
326
- attributes: dict[str, str] | None = None,
327
- ) -> HwpxOxmlMemo:
328
- """Create a memo entry inside *section* (or the last section by default)."""
329
-
330
- if section is None and section_index is not None:
331
- section = self._root.sections[section_index]
332
- if section is None:
333
- if not self._root.sections:
334
- raise ValueError("document does not contain any sections")
335
- section = self._root.sections[-1]
336
- return section.add_memo(
337
- text,
338
- memo_shape_id_ref=memo_shape_id_ref,
339
- memo_id=memo_id,
340
- char_pr_id_ref=char_pr_id_ref,
341
- attributes=attributes,
342
- )
343
-
344
- def remove_memo(self, memo: HwpxOxmlMemo) -> None:
345
- """Remove *memo* from the section it belongs to."""
346
-
347
- memo.remove()
348
-
349
- def attach_memo_field(
350
- self,
351
- paragraph: HwpxOxmlParagraph,
352
- memo: HwpxOxmlMemo,
353
- *,
354
- field_id: str | None = None,
355
- author: str | None = None,
356
- created: datetime | str | None = None,
357
- number: int = 1,
358
- char_pr_id_ref: str | int | None = None,
359
- ) -> str:
360
- """Attach a MEMO field control to *paragraph* so Hangul shows *memo*."""
361
-
362
- if paragraph.section is None:
363
- raise ValueError("paragraph must belong to a section before anchoring a memo")
364
- if memo.group.section is None:
365
- raise ValueError("memo is not attached to a section")
366
-
367
- field_value = field_id or uuid.uuid4().hex
368
- author_value = author or memo.attributes.get("author") or ""
369
-
370
- created_value = created if created is not None else memo.attributes.get("createDateTime")
371
- if isinstance(created_value, datetime):
372
- created_value = created_value.strftime("%Y-%m-%d %H:%M:%S")
373
- elif created_value is None:
374
- created_value = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
375
- else:
376
- created_value = str(created_value)
377
-
378
- memo_shape_id = memo.memo_shape_id_ref or ""
379
-
380
- char_ref = char_pr_id_ref
381
- if char_ref is None:
382
- char_ref = paragraph.char_pr_id_ref
383
- if char_ref is None:
384
- char_ref = memo._infer_char_pr_id_ref()
385
- if char_ref is None:
386
- char_ref = "0"
387
- char_ref = str(char_ref)
388
-
389
- paragraph_element = paragraph.element
390
- run_begin = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
391
- ctrl_begin = _append_element(run_begin, f"{_HP}ctrl")
392
- field_begin = _append_element(
393
- ctrl_begin,
394
- f"{_HP}fieldBegin",
395
- {
396
- "id": field_value,
397
- "type": "MEMO",
398
- "editable": "true",
399
- "dirty": "false",
400
- "fieldid": field_value,
401
- },
402
- )
403
-
404
- parameters = _append_element(field_begin, f"{_HP}parameters", {"count": "5", "name": ""})
405
- _append_element(parameters, f"{_HP}stringParam", {"name": "ID"}).text = memo.id or ""
406
- _append_element(parameters, f"{_HP}integerParam", {"name": "Number"}).text = str(max(1, number))
407
- _append_element(parameters, f"{_HP}stringParam", {"name": "CreateDateTime"}).text = created_value
408
- _append_element(parameters, f"{_HP}stringParam", {"name": "Author"}).text = author_value
409
- _append_element(parameters, f"{_HP}stringParam", {"name": "MemoShapeID"}).text = memo_shape_id
410
-
411
- sub_list = _append_element(
412
- field_begin,
413
- f"{_HP}subList",
414
- {
415
- "id": f"memo-field-{memo.id or field_value}",
416
- "textDirection": "HORIZONTAL",
417
- "lineWrap": "BREAK",
418
- "vertAlign": "TOP",
419
- },
420
- )
421
- sub_para = _append_element(
422
- sub_list,
423
- f"{_HP}p",
424
- {
425
- "id": f"memo-field-{(memo.id or field_value)}-p",
426
- "paraPrIDRef": "0",
427
- "styleIDRef": "0",
428
- "pageBreak": "0",
429
- "columnBreak": "0",
430
- "merged": "0",
431
- },
432
- )
433
- sub_run = _append_element(sub_para, f"{_HP}run", {"charPrIDRef": char_ref})
434
- _append_element(sub_run, f"{_HP}t").text = memo.id or field_value
435
-
436
- run_end = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
437
- ctrl_end = _append_element(run_end, f"{_HP}ctrl")
438
- _append_element(ctrl_end, f"{_HP}fieldEnd", {"beginIDRef": field_value, "fieldid": field_value})
439
-
440
- paragraph.element.insert(0, run_begin)
441
- paragraph.element.append(run_end)
442
- paragraph.section.mark_dirty()
443
-
444
- return field_value
445
-
446
- def add_memo_with_anchor(
447
- self,
448
- text: str = "",
449
- *,
450
- paragraph: HwpxOxmlParagraph | None = None,
451
- section: HwpxOxmlSection | None = None,
452
- section_index: int | None = None,
453
- paragraph_text: str | None = None,
454
- memo_shape_id_ref: str | int | None = None,
455
- memo_id: str | None = None,
456
- char_pr_id_ref: str | int | None = None,
457
- attributes: dict[str, str] | None = None,
458
- field_id: str | None = None,
459
- author: str | None = None,
460
- created: datetime | str | None = None,
461
- number: int = 1,
462
- anchor_char_pr_id_ref: str | int | None = None,
463
- ) -> tuple[HwpxOxmlMemo, HwpxOxmlParagraph, str]:
464
- """Create a memo and ensure it is visible by anchoring a MEMO field."""
465
-
466
- memo = self.add_memo(
467
- text,
468
- section=section,
469
- section_index=section_index,
470
- memo_shape_id_ref=memo_shape_id_ref,
471
- memo_id=memo_id,
472
- char_pr_id_ref=char_pr_id_ref,
473
- attributes=attributes,
474
- )
475
-
476
- target_paragraph = paragraph
477
- if target_paragraph is None:
478
- memo_section = memo.group.section
479
- if memo_section is None:
480
- raise ValueError("memo must belong to a section")
481
- paragraph_value = "" if paragraph_text is None else paragraph_text
482
- anchor_char = anchor_char_pr_id_ref or char_pr_id_ref
483
- target_paragraph = self.add_paragraph(
484
- paragraph_value,
485
- section=memo_section,
486
- char_pr_id_ref=anchor_char,
487
- )
488
- elif paragraph_text is not None:
489
- target_paragraph.text = paragraph_text
490
-
491
- field_value = self.attach_memo_field(
492
- target_paragraph,
493
- memo,
494
- field_id=field_id,
495
- author=author,
496
- created=created,
497
- number=number,
498
- char_pr_id_ref=anchor_char_pr_id_ref,
499
- )
500
-
501
- return memo, target_paragraph, field_value
502
-
503
- @property
504
- def paragraphs(self) -> list[HwpxOxmlParagraph]:
505
- """Return all paragraphs across every section."""
506
- return self._root.paragraphs
507
-
508
- @property
509
- def char_properties(self) -> dict[str, RunStyle]:
510
- """Return the resolved character style definitions available to the document."""
511
-
512
- return self._root.char_properties
513
-
514
- def char_property(self, char_pr_id_ref: int | str | None) -> RunStyle | None:
515
- """Return the style referenced by *char_pr_id_ref* if known."""
516
-
517
- return self._root.char_property(char_pr_id_ref)
518
-
519
- def ensure_run_style(
520
- self,
521
- *,
522
- bold: bool = False,
523
- italic: bool = False,
524
- underline: bool = False,
525
- base_char_pr_id: str | int | None = None,
526
- ) -> str:
527
- """Return a ``charPr`` identifier matching the requested flags."""
528
-
529
- return self._root.ensure_run_style(
530
- bold=bold,
531
- italic=italic,
532
- underline=underline,
533
- base_char_pr_id=base_char_pr_id,
534
- )
535
-
536
- def iter_runs(self) -> Iterator[HwpxOxmlRun]:
537
- """Yield every run element contained in the document."""
538
-
539
- for paragraph in self.paragraphs:
540
- for run in paragraph.runs:
541
- yield run
542
-
543
- def find_runs_by_style(
544
- self,
545
- *,
546
- text_color: str | None = None,
547
- underline_type: str | None = None,
548
- underline_color: str | None = None,
549
- char_pr_id_ref: str | int | None = None,
550
- ) -> list[HwpxOxmlRun]:
551
- """Return runs matching the requested style criteria."""
552
-
553
- matches: list[HwpxOxmlRun] = []
554
- target_char = str(char_pr_id_ref).strip() if char_pr_id_ref is not None else None
555
-
556
- for run in self.iter_runs():
557
- if target_char is not None:
558
- run_char = (run.char_pr_id_ref or "").strip()
559
- if run_char != target_char:
560
- continue
561
- style = run.style
562
- if text_color is not None:
563
- if style is None or style.text_color() != text_color:
564
- continue
565
- if underline_type is not None:
566
- if style is None or style.underline_type() != underline_type:
567
- continue
568
- if underline_color is not None:
569
- if style is None or style.underline_color() != underline_color:
570
- continue
571
- matches.append(run)
572
- return matches
573
-
574
- def replace_text_in_runs(
575
- self,
576
- search: str,
577
- replacement: str,
578
- *,
579
- text_color: str | None = None,
580
- underline_type: str | None = None,
581
- underline_color: str | None = None,
582
- char_pr_id_ref: str | int | None = None,
583
- limit: int | None = None,
584
- ) -> int:
585
- """Replace occurrences of *search* in runs matching the provided style filters."""
586
-
587
- if not search:
588
- raise ValueError("search must be a non-empty string")
589
-
590
- replacements = 0
591
- runs = self.find_runs_by_style(
592
- text_color=text_color,
593
- underline_type=underline_type,
594
- underline_color=underline_color,
595
- char_pr_id_ref=char_pr_id_ref,
596
- )
597
-
598
- for run in runs:
599
- remaining = None
600
- if limit is not None:
601
- remaining = limit - replacements
602
- if remaining <= 0:
603
- break
604
- original_char_pr = run.char_pr_id_ref
605
- replaced_here = run.replace_text(
606
- search,
607
- replacement,
608
- count=remaining,
609
- )
610
- if replaced_here and original_char_pr is not None:
611
- # Ensure the run retains its original formatting reference even
612
- # if XML nodes were rewritten during substitution.
613
- run.char_pr_id_ref = original_char_pr
614
- replacements += replaced_here
615
- if limit is not None and replacements >= limit:
616
- break
617
- return replacements
618
-
619
- # ------------------------------------------------------------------
620
- # editing helpers
621
- def add_paragraph(
622
- self,
623
- text: str = "",
624
- *,
625
- section: HwpxOxmlSection | None = None,
626
- section_index: int | None = None,
627
- para_pr_id_ref: str | int | None = None,
628
- style_id_ref: str | int | None = None,
629
- char_pr_id_ref: str | int | None = None,
630
- run_attributes: dict[str, str] | None = None,
631
- include_run: bool = True,
632
- **extra_attrs: str,
633
- ) -> HwpxOxmlParagraph:
634
- """Append a paragraph to the document and return it.
635
-
636
- Formatting references may be overridden via ``para_pr_id_ref``,
637
- ``style_id_ref`` and ``char_pr_id_ref``. Any additional keyword
638
- arguments are added as raw paragraph attributes.
639
- """
640
- return self._root.add_paragraph(
641
- text,
642
- section=section,
643
- section_index=section_index,
644
- para_pr_id_ref=para_pr_id_ref,
645
- style_id_ref=style_id_ref,
646
- char_pr_id_ref=char_pr_id_ref,
647
- run_attributes=run_attributes,
648
- include_run=include_run,
649
- **extra_attrs,
650
- )
651
-
652
- def add_table(
653
- self,
654
- rows: int,
655
- cols: int,
656
- *,
657
- section: HwpxOxmlSection | None = None,
658
- section_index: int | None = None,
659
- width: int | None = None,
660
- height: int | None = None,
661
- border_fill_id_ref: str | int | None = None,
662
- para_pr_id_ref: str | int | None = None,
663
- style_id_ref: str | int | None = None,
664
- char_pr_id_ref: str | int | None = None,
665
- run_attributes: dict[str, str] | None = None,
666
- **extra_attrs: str,
667
- ) -> HwpxOxmlTable:
668
- """Create a table in a new paragraph and return it."""
669
-
670
- resolved_border_fill: str | int | None = border_fill_id_ref
671
- if resolved_border_fill is None:
672
- resolved_border_fill = self._root.ensure_basic_border_fill()
673
-
674
- paragraph = self.add_paragraph(
675
- "",
676
- section=section,
677
- section_index=section_index,
678
- para_pr_id_ref=para_pr_id_ref,
679
- style_id_ref=style_id_ref,
680
- char_pr_id_ref=char_pr_id_ref,
681
- include_run=False,
682
- **extra_attrs,
683
- )
684
- return paragraph.add_table(
685
- rows,
686
- cols,
687
- width=width,
688
- height=height,
689
- border_fill_id_ref=resolved_border_fill,
690
- run_attributes=run_attributes,
691
- char_pr_id_ref=char_pr_id_ref,
692
- )
693
-
694
- def add_shape(
695
- self,
696
- shape_type: str,
697
- *,
698
- section: HwpxOxmlSection | None = None,
699
- section_index: int | None = None,
700
- attributes: dict[str, str] | None = None,
701
- para_pr_id_ref: str | int | None = None,
702
- style_id_ref: str | int | None = None,
703
- char_pr_id_ref: str | int | None = None,
704
- run_attributes: dict[str, str] | None = None,
705
- **extra_attrs: str,
706
- ) -> HwpxOxmlInlineObject:
707
- """Insert an inline shape into a new paragraph."""
708
-
709
- paragraph = self.add_paragraph(
710
- "",
711
- section=section,
712
- section_index=section_index,
713
- para_pr_id_ref=para_pr_id_ref,
714
- style_id_ref=style_id_ref,
715
- char_pr_id_ref=char_pr_id_ref,
716
- include_run=False,
717
- **extra_attrs,
718
- )
719
- return paragraph.add_shape(
720
- shape_type,
721
- attributes=attributes,
722
- run_attributes=run_attributes,
723
- char_pr_id_ref=char_pr_id_ref,
724
- )
725
-
726
- def add_control(
727
- self,
728
- *,
729
- section: HwpxOxmlSection | None = None,
730
- section_index: int | None = None,
731
- attributes: dict[str, str] | None = None,
732
- control_type: str | None = None,
733
- para_pr_id_ref: str | int | None = None,
734
- style_id_ref: str | int | None = None,
735
- char_pr_id_ref: str | int | None = None,
736
- run_attributes: dict[str, str] | None = None,
737
- **extra_attrs: str,
738
- ) -> HwpxOxmlInlineObject:
739
- """Insert a control inline object into a new paragraph."""
740
-
741
- paragraph = self.add_paragraph(
742
- "",
743
- section=section,
744
- section_index=section_index,
745
- para_pr_id_ref=para_pr_id_ref,
746
- style_id_ref=style_id_ref,
747
- char_pr_id_ref=char_pr_id_ref,
748
- include_run=False,
749
- **extra_attrs,
750
- )
751
- return paragraph.add_control(
752
- attributes=attributes,
753
- control_type=control_type,
754
- run_attributes=run_attributes,
755
- char_pr_id_ref=char_pr_id_ref,
756
- )
757
-
758
- def set_header_text(
759
- self,
760
- text: str,
761
- *,
762
- section: HwpxOxmlSection | None = None,
763
- section_index: int | None = None,
764
- page_type: str = "BOTH",
765
- ) -> HwpxOxmlSectionHeaderFooter:
766
- """Ensure the requested section contains a header for *page_type* and set its text."""
767
-
768
- target_section = section
769
- if target_section is None and section_index is not None:
770
- target_section = self._root.sections[section_index]
771
- if target_section is None:
772
- if not self._root.sections:
773
- raise ValueError("document does not contain any sections")
774
- target_section = self._root.sections[-1]
775
- return target_section.properties.set_header_text(text, page_type=page_type)
776
-
777
- def set_footer_text(
778
- self,
779
- text: str,
780
- *,
781
- section: HwpxOxmlSection | None = None,
782
- section_index: int | None = None,
783
- page_type: str = "BOTH",
784
- ) -> HwpxOxmlSectionHeaderFooter:
785
- """Ensure the requested section contains a footer for *page_type* and set its text."""
786
-
787
- target_section = section
788
- if target_section is None and section_index is not None:
789
- target_section = self._root.sections[section_index]
790
- if target_section is None:
791
- if not self._root.sections:
792
- raise ValueError("document does not contain any sections")
793
- target_section = self._root.sections[-1]
794
- return target_section.properties.set_footer_text(text, page_type=page_type)
795
-
796
- def remove_header(
797
- self,
798
- *,
799
- section: HwpxOxmlSection | None = None,
800
- section_index: int | None = None,
801
- page_type: str = "BOTH",
802
- ) -> None:
803
- """Remove the header linked to *page_type* from the requested section if present."""
804
-
805
- target_section = section
806
- if target_section is None and section_index is not None:
807
- target_section = self._root.sections[section_index]
808
- if target_section is None:
809
- if not self._root.sections:
810
- return
811
- target_section = self._root.sections[-1]
812
- target_section.properties.remove_header(page_type=page_type)
813
-
814
- def remove_footer(
815
- self,
816
- *,
817
- section: HwpxOxmlSection | None = None,
818
- section_index: int | None = None,
819
- page_type: str = "BOTH",
820
- ) -> None:
821
- """Remove the footer linked to *page_type* from the requested section if present."""
822
-
823
- target_section = section
824
- if target_section is None and section_index is not None:
825
- target_section = self._root.sections[section_index]
826
- if target_section is None:
827
- if not self._root.sections:
828
- return
829
- target_section = self._root.sections[-1]
830
- target_section.properties.remove_footer(page_type=page_type)
831
-
832
- def save_to_path(self, path: str | PathLike[str]) -> str | PathLike[str]:
833
- """Persist pending changes to *path* and return the same path."""
834
-
835
- updates = self._root.serialize()
836
- result = self._package.save(path, updates)
837
- self._root.reset_dirty()
838
- return path if result is None else result
839
-
840
- def save_to_stream(self, stream: BinaryIO) -> BinaryIO:
841
- """Persist pending changes to *stream* and return the same stream."""
842
-
843
- updates = self._root.serialize()
844
- result = self._package.save(stream, updates)
845
- self._root.reset_dirty()
846
- return stream if result is None else result
847
-
848
- def to_bytes(self) -> bytes:
849
- """Serialize pending changes and return the HWPX archive as bytes."""
850
-
851
- updates = self._root.serialize()
852
- result = self._package.save(None, updates)
853
- self._root.reset_dirty()
854
- if isinstance(result, bytes):
855
- return result
856
- raise TypeError("package.save(None) must return bytes")
857
-
858
- @overload
859
- def save(self, path_or_stream: None = None) -> bytes: ...
860
-
861
- @overload
862
- def save(self, path_or_stream: str | PathLike[str]) -> str | PathLike[str]: ...
863
-
864
- @overload
865
- def save(self, path_or_stream: BinaryIO) -> BinaryIO: ...
866
-
867
- def save(
868
- self,
869
- path_or_stream: str | PathLike[str] | BinaryIO | None = None,
870
- ) -> str | PathLike[str] | BinaryIO | bytes:
871
- """Deprecated compatibility wrapper around save_to_path/save_to_stream/to_bytes.
872
-
873
- Deprecated:
874
- ``save()``는 하위 호환을 위해 유지되며 향후 제거될 수 있습니다.
875
- - 경로 저장: ``save_to_path(path)``
876
- - 스트림 저장: ``save_to_stream(stream)``
877
- - 바이트 반환: ``to_bytes()``
878
- """
879
-
880
- warnings.warn(
881
- "HwpxDocument.save()는 deprecated 예정입니다. "
882
- "save_to_path()/save_to_stream()/to_bytes() 사용을 권장합니다.",
883
- DeprecationWarning,
884
- stacklevel=2,
885
- )
886
- if path_or_stream is None:
887
- return self.to_bytes()
888
- if isinstance(path_or_stream, (str, PathLike)):
889
- return self.save_to_path(path_or_stream)
890
- return self.save_to_stream(path_or_stream)
1
+ """High-level representation of an HWPX document."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import xml.etree.ElementTree as ET
6
+ import io
7
+ import warnings
8
+ from datetime import datetime
9
+ import logging
10
+ import uuid
11
+
12
+ from os import PathLike
13
+ from typing import Any, BinaryIO, Iterator, overload
14
+
15
+ from lxml import etree
16
+
17
+ from .oxml import (
18
+ Bullet,
19
+ GenericElement,
20
+ HwpxOxmlDocument,
21
+ HwpxOxmlHeader,
22
+ HwpxOxmlHistory,
23
+ HwpxOxmlInlineObject,
24
+ HwpxOxmlMasterPage,
25
+ HwpxOxmlMemo,
26
+ HwpxOxmlParagraph,
27
+ HwpxOxmlRun,
28
+ HwpxOxmlSection,
29
+ HwpxOxmlSectionHeaderFooter,
30
+ HwpxOxmlTable,
31
+ HwpxOxmlVersion,
32
+ MemoShape,
33
+ ParagraphProperty,
34
+ RunStyle,
35
+ Style,
36
+ TrackChange,
37
+ TrackChangeAuthor,
38
+ )
39
+ from .opc.package import HwpxPackage
40
+ from .templates import blank_document_bytes
41
+
42
+ ET.register_namespace("hp", "http://www.hancom.co.kr/hwpml/2011/paragraph")
43
+ ET.register_namespace("hs", "http://www.hancom.co.kr/hwpml/2011/section")
44
+ ET.register_namespace("hc", "http://www.hancom.co.kr/hwpml/2011/core")
45
+ ET.register_namespace("hh", "http://www.hancom.co.kr/hwpml/2011/head")
46
+
47
+ _HP_NS = "http://www.hancom.co.kr/hwpml/2011/paragraph"
48
+ _HP = f"{{{_HP_NS}}}"
49
+ _HH_NS = "http://www.hancom.co.kr/hwpml/2011/head"
50
+ _HH = f"{{{_HH_NS}}}"
51
+
52
+ logger = logging.getLogger(__name__)
53
+
54
+
55
+ def _append_element(
56
+ parent: Any,
57
+ tag: str,
58
+ attributes: dict[str, str] | None = None,
59
+ ) -> Any:
60
+ """Create and append a child element that matches *parent*'s element type."""
61
+
62
+ child = parent.makeelement(tag, attributes or {})
63
+ parent.append(child)
64
+ return child
65
+
66
+
67
+ class HwpxDocument:
68
+ """Provides a user-friendly API for editing HWPX documents."""
69
+
70
+ def __init__(
71
+ self,
72
+ package: HwpxPackage,
73
+ root: HwpxOxmlDocument,
74
+ *,
75
+ managed_resources: tuple[Any, ...] = (),
76
+ ):
77
+ self._package = package
78
+ self._root = root
79
+ self._managed_resources = list(managed_resources)
80
+ self._closed = False
81
+
82
+ def __repr__(self) -> str:
83
+ """Return a compact and safe summary of the document state."""
84
+
85
+ return (
86
+ f"{self.__class__.__name__}("
87
+ f"sections={len(self.sections)}, "
88
+ f"paragraphs={len(self.paragraphs)}, "
89
+ f"headers={len(self.headers)}, "
90
+ f"master_pages={len(self.master_pages)}, "
91
+ f"histories={len(self.histories)}, "
92
+ f"closed={self._closed}"
93
+ ")"
94
+ )
95
+
96
+ # ------------------------------------------------------------------
97
+ # construction helpers
98
+ @classmethod
99
+ def open(
100
+ cls,
101
+ source: str | PathLike[str] | bytes | BinaryIO,
102
+ ) -> "HwpxDocument":
103
+ """Open *source* and return a :class:`HwpxDocument` instance.
104
+
105
+ Raises:
106
+ HwpxStructureError: 필수 파일이나 구조가 올바르지 않은 HWPX를 열 때 발생합니다.
107
+ HwpxPackageError: 패키지를 여는 과정에서 일반적인 I/O/포맷 오류가 발생하면 전달됩니다.
108
+ """
109
+ internal_resources: list[Any] = []
110
+ open_source = source
111
+ if isinstance(source, bytes):
112
+ stream = io.BytesIO(source)
113
+ open_source = stream
114
+ internal_resources.append(stream)
115
+ package = HwpxPackage.open(open_source)
116
+ root = HwpxOxmlDocument.from_package(package)
117
+ return cls(package, root, managed_resources=tuple(internal_resources))
118
+
119
+ @classmethod
120
+ def new(cls) -> "HwpxDocument":
121
+ """Return a new blank document based on the default skeleton template."""
122
+
123
+ return cls.open(blank_document_bytes())
124
+
125
+ @classmethod
126
+ def from_package(cls, package: HwpxPackage) -> "HwpxDocument":
127
+ """Create a document backed by an existing :class:`HwpxPackage`.
128
+
129
+ Args:
130
+ package: :class:`hwpx.opc.package.HwpxPackage` 인스턴스.
131
+ """
132
+ root = HwpxOxmlDocument.from_package(package)
133
+ return cls(package, root)
134
+
135
+ def __enter__(self) -> "HwpxDocument":
136
+ """컨텍스트 매니저 진입 시 현재 문서 인스턴스를 반환합니다."""
137
+
138
+ return self
139
+
140
+ def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> bool:
141
+ """예외 발생 여부와 무관하게 내부 자원을 안전하게 정리합니다."""
142
+
143
+ self.close()
144
+ return False
145
+
146
+ def close(self) -> None:
147
+ """문서가 관리하는 내부 패키지/스트림 자원을 정리합니다.
148
+
149
+ 정리 정책:
150
+ - ``flush()`` 가능한 자원은 먼저 flush를 시도합니다.
151
+ - ``close()`` 가능한 자원은 flush 이후 close를 시도합니다.
152
+ - flush/close 중 발생한 예외는 로깅하고 무시하여 정리 루틴을 계속 진행합니다.
153
+ - 같은 문서에서 ``close()``를 여러 번 호출해도 안전합니다.
154
+ """
155
+
156
+ if self._closed:
157
+ return
158
+
159
+ self._flush_resource(self._package)
160
+ for resource in self._managed_resources:
161
+ self._flush_resource(resource)
162
+
163
+ self._close_resource(self._package)
164
+ for resource in self._managed_resources:
165
+ self._close_resource(resource)
166
+
167
+ self._managed_resources.clear()
168
+ self._closed = True
169
+
170
+ @staticmethod
171
+ def _flush_resource(resource: Any) -> None:
172
+ flush = getattr(resource, "flush", None)
173
+ if not callable(flush):
174
+ return
175
+ try:
176
+ flush()
177
+ except Exception:
178
+ logger.debug("자원 flush 중 예외를 무시합니다: resource=%r", resource, exc_info=True)
179
+
180
+ @staticmethod
181
+ def _close_resource(resource: Any) -> None:
182
+ close = getattr(resource, "close", None)
183
+ if not callable(close):
184
+ return
185
+ try:
186
+ close()
187
+ except Exception:
188
+ logger.debug("자원 close 중 예외를 무시합니다: resource=%r", resource, exc_info=True)
189
+
190
+ # ------------------------------------------------------------------
191
+ # properties exposing document content
192
+ @property
193
+ def package(self) -> HwpxPackage:
194
+ """Return the :class:`HwpxPackage` backing this document."""
195
+ return self._package
196
+
197
+ @property
198
+ def oxml(self) -> HwpxOxmlDocument:
199
+ """Return the low-level XML object tree representing the document."""
200
+ return self._root
201
+
202
+ @property
203
+ def sections(self) -> list[HwpxOxmlSection]:
204
+ """Return the sections contained in the document."""
205
+ return self._root.sections
206
+
207
+ @property
208
+ def headers(self) -> list[HwpxOxmlHeader]:
209
+ """Return the header parts referenced by the document."""
210
+ return self._root.headers
211
+
212
+ @property
213
+ def master_pages(self) -> list[HwpxOxmlMasterPage]:
214
+ """Return the master-page parts declared in the manifest."""
215
+ return self._root.master_pages
216
+
217
+ @property
218
+ def histories(self) -> list[HwpxOxmlHistory]:
219
+ """Return document history parts referenced by the manifest."""
220
+ return self._root.histories
221
+
222
+ @property
223
+ def version(self) -> HwpxOxmlVersion | None:
224
+ """Return the version metadata part if present."""
225
+ return self._root.version
226
+
227
+ @property
228
+ def border_fills(self) -> dict[str, GenericElement]:
229
+ """Return border fill definitions declared in the headers."""
230
+
231
+ return self._root.border_fills
232
+
233
+ def border_fill(self, border_fill_id_ref: int | str | None) -> GenericElement | None:
234
+ """Return the border fill definition referenced by *border_fill_id_ref*."""
235
+
236
+ return self._root.border_fill(border_fill_id_ref)
237
+
238
+ @property
239
+ def memo_shapes(self) -> dict[str, MemoShape]:
240
+ """Return memo shapes available in the header reference lists."""
241
+
242
+ return self._root.memo_shapes
243
+
244
+ def memo_shape(self, memo_shape_id_ref: int | str | None) -> MemoShape | None:
245
+ """Return the memo shape definition referenced by *memo_shape_id_ref*."""
246
+
247
+ return self._root.memo_shape(memo_shape_id_ref)
248
+
249
+ @property
250
+ def bullets(self) -> dict[str, Bullet]:
251
+ """Return bullet definitions declared in header reference lists."""
252
+
253
+ return self._root.bullets
254
+
255
+ def bullet(self, bullet_id_ref: int | str | None) -> Bullet | None:
256
+ """Return the bullet definition referenced by *bullet_id_ref*."""
257
+
258
+ return self._root.bullet(bullet_id_ref)
259
+
260
+ @property
261
+ def paragraph_properties(self) -> dict[str, ParagraphProperty]:
262
+ """Return paragraph property definitions declared in headers."""
263
+
264
+ return self._root.paragraph_properties
265
+
266
+ def paragraph_property(
267
+ self, para_pr_id_ref: int | str | None
268
+ ) -> ParagraphProperty | None:
269
+ """Return the paragraph property referenced by *para_pr_id_ref*."""
270
+
271
+ return self._root.paragraph_property(para_pr_id_ref)
272
+
273
+ @property
274
+ def styles(self) -> dict[str, Style]:
275
+ """Return style definitions available in the document."""
276
+
277
+ return self._root.styles
278
+
279
+ def style(self, style_id_ref: int | str | None) -> Style | None:
280
+ """Return the style definition referenced by *style_id_ref*."""
281
+
282
+ return self._root.style(style_id_ref)
283
+
284
+ @property
285
+ def track_changes(self) -> dict[str, TrackChange]:
286
+ """Return tracked change metadata declared in the headers."""
287
+
288
+ return self._root.track_changes
289
+
290
+ def track_change(self, change_id_ref: int | str | None) -> TrackChange | None:
291
+ """Return tracked change metadata referenced by *change_id_ref*."""
292
+
293
+ return self._root.track_change(change_id_ref)
294
+
295
+ @property
296
+ def track_change_authors(self) -> dict[str, TrackChangeAuthor]:
297
+ """Return tracked change author metadata declared in the headers."""
298
+
299
+ return self._root.track_change_authors
300
+
301
+ def track_change_author(
302
+ self, author_id_ref: int | str | None
303
+ ) -> TrackChangeAuthor | None:
304
+ """Return tracked change author details referenced by *author_id_ref*."""
305
+
306
+ return self._root.track_change_author(author_id_ref)
307
+
308
+ @property
309
+ def memos(self) -> list[HwpxOxmlMemo]:
310
+ """Return all memo entries declared in every section."""
311
+
312
+ memos: list[HwpxOxmlMemo] = []
313
+ for section in self._root.sections:
314
+ memos.extend(section.memos)
315
+ return memos
316
+
317
+ def add_memo(
318
+ self,
319
+ text: str = "",
320
+ *,
321
+ section: HwpxOxmlSection | None = None,
322
+ section_index: int | None = None,
323
+ memo_shape_id_ref: str | int | None = None,
324
+ memo_id: str | None = None,
325
+ char_pr_id_ref: str | int | None = None,
326
+ attributes: dict[str, str] | None = None,
327
+ ) -> HwpxOxmlMemo:
328
+ """Create a memo entry inside *section* (or the last section by default)."""
329
+
330
+ if section is None and section_index is not None:
331
+ section = self._root.sections[section_index]
332
+ if section is None:
333
+ if not self._root.sections:
334
+ raise ValueError("document does not contain any sections")
335
+ section = self._root.sections[-1]
336
+ return section.add_memo(
337
+ text,
338
+ memo_shape_id_ref=memo_shape_id_ref,
339
+ memo_id=memo_id,
340
+ char_pr_id_ref=char_pr_id_ref,
341
+ attributes=attributes,
342
+ )
343
+
344
+ def remove_memo(self, memo: HwpxOxmlMemo) -> None:
345
+ """Remove *memo* from the section it belongs to."""
346
+
347
+ memo.remove()
348
+
349
+ def attach_memo_field(
350
+ self,
351
+ paragraph: HwpxOxmlParagraph,
352
+ memo: HwpxOxmlMemo,
353
+ *,
354
+ field_id: str | None = None,
355
+ author: str | None = None,
356
+ created: datetime | str | None = None,
357
+ number: int = 1,
358
+ char_pr_id_ref: str | int | None = None,
359
+ ) -> str:
360
+ """Attach a MEMO field control to *paragraph* so Hangul shows *memo*."""
361
+
362
+ if paragraph.section is None:
363
+ raise ValueError("paragraph must belong to a section before anchoring a memo")
364
+ if memo.group.section is None:
365
+ raise ValueError("memo is not attached to a section")
366
+
367
+ field_value = field_id or uuid.uuid4().hex
368
+ author_value = author or memo.attributes.get("author") or ""
369
+
370
+ created_value = created if created is not None else memo.attributes.get("createDateTime")
371
+ if isinstance(created_value, datetime):
372
+ created_value = created_value.strftime("%Y-%m-%d %H:%M:%S")
373
+ elif created_value is None:
374
+ created_value = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
375
+ else:
376
+ created_value = str(created_value)
377
+
378
+ memo_shape_id = memo.memo_shape_id_ref or ""
379
+
380
+ char_ref = char_pr_id_ref
381
+ if char_ref is None:
382
+ char_ref = paragraph.char_pr_id_ref
383
+ if char_ref is None:
384
+ char_ref = memo._infer_char_pr_id_ref()
385
+ if char_ref is None:
386
+ char_ref = "0"
387
+ char_ref = str(char_ref)
388
+
389
+ paragraph_element = paragraph.element
390
+ run_begin = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
391
+ ctrl_begin = _append_element(run_begin, f"{_HP}ctrl")
392
+ field_begin = _append_element(
393
+ ctrl_begin,
394
+ f"{_HP}fieldBegin",
395
+ {
396
+ "id": field_value,
397
+ "type": "MEMO",
398
+ "editable": "true",
399
+ "dirty": "false",
400
+ "fieldid": field_value,
401
+ },
402
+ )
403
+
404
+ parameters = _append_element(field_begin, f"{_HP}parameters", {"count": "5", "name": ""})
405
+ _append_element(parameters, f"{_HP}stringParam", {"name": "ID"}).text = memo.id or ""
406
+ _append_element(parameters, f"{_HP}integerParam", {"name": "Number"}).text = str(max(1, number))
407
+ _append_element(parameters, f"{_HP}stringParam", {"name": "CreateDateTime"}).text = created_value
408
+ _append_element(parameters, f"{_HP}stringParam", {"name": "Author"}).text = author_value
409
+ _append_element(parameters, f"{_HP}stringParam", {"name": "MemoShapeID"}).text = memo_shape_id
410
+
411
+ sub_list = _append_element(
412
+ field_begin,
413
+ f"{_HP}subList",
414
+ {
415
+ "id": f"memo-field-{memo.id or field_value}",
416
+ "textDirection": "HORIZONTAL",
417
+ "lineWrap": "BREAK",
418
+ "vertAlign": "TOP",
419
+ },
420
+ )
421
+ sub_para = _append_element(
422
+ sub_list,
423
+ f"{_HP}p",
424
+ {
425
+ "id": f"memo-field-{(memo.id or field_value)}-p",
426
+ "paraPrIDRef": "0",
427
+ "styleIDRef": "0",
428
+ "pageBreak": "0",
429
+ "columnBreak": "0",
430
+ "merged": "0",
431
+ },
432
+ )
433
+ sub_run = _append_element(sub_para, f"{_HP}run", {"charPrIDRef": char_ref})
434
+ _append_element(sub_run, f"{_HP}t").text = memo.id or field_value
435
+
436
+ run_end = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
437
+ ctrl_end = _append_element(run_end, f"{_HP}ctrl")
438
+ _append_element(ctrl_end, f"{_HP}fieldEnd", {"beginIDRef": field_value, "fieldid": field_value})
439
+
440
+ paragraph.element.insert(0, run_begin)
441
+ paragraph.element.append(run_end)
442
+ paragraph.section.mark_dirty()
443
+
444
+ return field_value
445
+
446
+ def add_memo_with_anchor(
447
+ self,
448
+ text: str = "",
449
+ *,
450
+ paragraph: HwpxOxmlParagraph | None = None,
451
+ section: HwpxOxmlSection | None = None,
452
+ section_index: int | None = None,
453
+ paragraph_text: str | None = None,
454
+ memo_shape_id_ref: str | int | None = None,
455
+ memo_id: str | None = None,
456
+ char_pr_id_ref: str | int | None = None,
457
+ attributes: dict[str, str] | None = None,
458
+ field_id: str | None = None,
459
+ author: str | None = None,
460
+ created: datetime | str | None = None,
461
+ number: int = 1,
462
+ anchor_char_pr_id_ref: str | int | None = None,
463
+ ) -> tuple[HwpxOxmlMemo, HwpxOxmlParagraph, str]:
464
+ """Create a memo and ensure it is visible by anchoring a MEMO field."""
465
+
466
+ memo = self.add_memo(
467
+ text,
468
+ section=section,
469
+ section_index=section_index,
470
+ memo_shape_id_ref=memo_shape_id_ref,
471
+ memo_id=memo_id,
472
+ char_pr_id_ref=char_pr_id_ref,
473
+ attributes=attributes,
474
+ )
475
+
476
+ target_paragraph = paragraph
477
+ if target_paragraph is None:
478
+ memo_section = memo.group.section
479
+ if memo_section is None:
480
+ raise ValueError("memo must belong to a section")
481
+ paragraph_value = "" if paragraph_text is None else paragraph_text
482
+ anchor_char = anchor_char_pr_id_ref or char_pr_id_ref
483
+ target_paragraph = self.add_paragraph(
484
+ paragraph_value,
485
+ section=memo_section,
486
+ char_pr_id_ref=anchor_char,
487
+ )
488
+ elif paragraph_text is not None:
489
+ target_paragraph.text = paragraph_text
490
+
491
+ field_value = self.attach_memo_field(
492
+ target_paragraph,
493
+ memo,
494
+ field_id=field_id,
495
+ author=author,
496
+ created=created,
497
+ number=number,
498
+ char_pr_id_ref=anchor_char_pr_id_ref,
499
+ )
500
+
501
+ return memo, target_paragraph, field_value
502
+
503
+ @property
504
+ def paragraphs(self) -> list[HwpxOxmlParagraph]:
505
+ """Return all paragraphs across every section."""
506
+ return self._root.paragraphs
507
+
508
+ @property
509
+ def char_properties(self) -> dict[str, RunStyle]:
510
+ """Return the resolved character style definitions available to the document."""
511
+
512
+ return self._root.char_properties
513
+
514
+ def char_property(self, char_pr_id_ref: int | str | None) -> RunStyle | None:
515
+ """Return the style referenced by *char_pr_id_ref* if known."""
516
+
517
+ return self._root.char_property(char_pr_id_ref)
518
+
519
+ def ensure_run_style(
520
+ self,
521
+ *,
522
+ bold: bool = False,
523
+ italic: bool = False,
524
+ underline: bool = False,
525
+ base_char_pr_id: str | int | None = None,
526
+ ) -> str:
527
+ """Return a ``charPr`` identifier matching the requested flags."""
528
+
529
+ return self._root.ensure_run_style(
530
+ bold=bold,
531
+ italic=italic,
532
+ underline=underline,
533
+ base_char_pr_id=base_char_pr_id,
534
+ )
535
+
536
+ def iter_runs(self) -> Iterator[HwpxOxmlRun]:
537
+ """Yield every run element contained in the document."""
538
+
539
+ for paragraph in self.paragraphs:
540
+ for run in paragraph.runs:
541
+ yield run
542
+
543
+ def find_runs_by_style(
544
+ self,
545
+ *,
546
+ text_color: str | None = None,
547
+ underline_type: str | None = None,
548
+ underline_color: str | None = None,
549
+ char_pr_id_ref: str | int | None = None,
550
+ ) -> list[HwpxOxmlRun]:
551
+ """Return runs matching the requested style criteria."""
552
+
553
+ matches: list[HwpxOxmlRun] = []
554
+ target_char = str(char_pr_id_ref).strip() if char_pr_id_ref is not None else None
555
+
556
+ for run in self.iter_runs():
557
+ if target_char is not None:
558
+ run_char = (run.char_pr_id_ref or "").strip()
559
+ if run_char != target_char:
560
+ continue
561
+ style = run.style
562
+ if text_color is not None:
563
+ if style is None or style.text_color() != text_color:
564
+ continue
565
+ if underline_type is not None:
566
+ if style is None or style.underline_type() != underline_type:
567
+ continue
568
+ if underline_color is not None:
569
+ if style is None or style.underline_color() != underline_color:
570
+ continue
571
+ matches.append(run)
572
+ return matches
573
+
574
+ def replace_text_in_runs(
575
+ self,
576
+ search: str,
577
+ replacement: str,
578
+ *,
579
+ text_color: str | None = None,
580
+ underline_type: str | None = None,
581
+ underline_color: str | None = None,
582
+ char_pr_id_ref: str | int | None = None,
583
+ limit: int | None = None,
584
+ ) -> int:
585
+ """Replace occurrences of *search* in runs matching the provided style filters."""
586
+
587
+ if not search:
588
+ raise ValueError("search must be a non-empty string")
589
+
590
+ replacements = 0
591
+ runs = self.find_runs_by_style(
592
+ text_color=text_color,
593
+ underline_type=underline_type,
594
+ underline_color=underline_color,
595
+ char_pr_id_ref=char_pr_id_ref,
596
+ )
597
+
598
+ for run in runs:
599
+ remaining = None
600
+ if limit is not None:
601
+ remaining = limit - replacements
602
+ if remaining <= 0:
603
+ break
604
+ original_char_pr = run.char_pr_id_ref
605
+ replaced_here = run.replace_text(
606
+ search,
607
+ replacement,
608
+ count=remaining,
609
+ )
610
+ if replaced_here and original_char_pr is not None:
611
+ # Ensure the run retains its original formatting reference even
612
+ # if XML nodes were rewritten during substitution.
613
+ run.char_pr_id_ref = original_char_pr
614
+ replacements += replaced_here
615
+ if limit is not None and replacements >= limit:
616
+ break
617
+ return replacements
618
+
619
+ # ------------------------------------------------------------------
620
+ # editing helpers
621
+ def add_paragraph(
622
+ self,
623
+ text: str = "",
624
+ *,
625
+ section: HwpxOxmlSection | None = None,
626
+ section_index: int | None = None,
627
+ para_pr_id_ref: str | int | None = None,
628
+ style_id_ref: str | int | None = None,
629
+ char_pr_id_ref: str | int | None = None,
630
+ run_attributes: dict[str, str] | None = None,
631
+ include_run: bool = True,
632
+ **extra_attrs: str,
633
+ ) -> HwpxOxmlParagraph:
634
+ """Append a paragraph to the document and return it.
635
+
636
+ Formatting references may be overridden via ``para_pr_id_ref``,
637
+ ``style_id_ref`` and ``char_pr_id_ref``. Any additional keyword
638
+ arguments are added as raw paragraph attributes.
639
+ """
640
+ return self._root.add_paragraph(
641
+ text,
642
+ section=section,
643
+ section_index=section_index,
644
+ para_pr_id_ref=para_pr_id_ref,
645
+ style_id_ref=style_id_ref,
646
+ char_pr_id_ref=char_pr_id_ref,
647
+ run_attributes=run_attributes,
648
+ include_run=include_run,
649
+ **extra_attrs,
650
+ )
651
+
652
+ def add_table(
653
+ self,
654
+ rows: int,
655
+ cols: int,
656
+ *,
657
+ section: HwpxOxmlSection | None = None,
658
+ section_index: int | None = None,
659
+ width: int | None = None,
660
+ height: int | None = None,
661
+ border_fill_id_ref: str | int | None = None,
662
+ para_pr_id_ref: str | int | None = None,
663
+ style_id_ref: str | int | None = None,
664
+ char_pr_id_ref: str | int | None = None,
665
+ run_attributes: dict[str, str] | None = None,
666
+ **extra_attrs: str,
667
+ ) -> HwpxOxmlTable:
668
+ """Create a table in a new paragraph and return it."""
669
+
670
+ resolved_border_fill: str | int | None = border_fill_id_ref
671
+ if resolved_border_fill is None:
672
+ resolved_border_fill = self._root.ensure_basic_border_fill()
673
+
674
+ paragraph = self.add_paragraph(
675
+ "",
676
+ section=section,
677
+ section_index=section_index,
678
+ para_pr_id_ref=para_pr_id_ref,
679
+ style_id_ref=style_id_ref,
680
+ char_pr_id_ref=char_pr_id_ref,
681
+ include_run=False,
682
+ **extra_attrs,
683
+ )
684
+ return paragraph.add_table(
685
+ rows,
686
+ cols,
687
+ width=width,
688
+ height=height,
689
+ border_fill_id_ref=resolved_border_fill,
690
+ run_attributes=run_attributes,
691
+ char_pr_id_ref=char_pr_id_ref,
692
+ )
693
+
694
+ def add_shape(
695
+ self,
696
+ shape_type: str,
697
+ *,
698
+ section: HwpxOxmlSection | None = None,
699
+ section_index: int | None = None,
700
+ attributes: dict[str, str] | None = None,
701
+ para_pr_id_ref: str | int | None = None,
702
+ style_id_ref: str | int | None = None,
703
+ char_pr_id_ref: str | int | None = None,
704
+ run_attributes: dict[str, str] | None = None,
705
+ **extra_attrs: str,
706
+ ) -> HwpxOxmlInlineObject:
707
+ """Insert an inline shape into a new paragraph."""
708
+
709
+ paragraph = self.add_paragraph(
710
+ "",
711
+ section=section,
712
+ section_index=section_index,
713
+ para_pr_id_ref=para_pr_id_ref,
714
+ style_id_ref=style_id_ref,
715
+ char_pr_id_ref=char_pr_id_ref,
716
+ include_run=False,
717
+ **extra_attrs,
718
+ )
719
+ return paragraph.add_shape(
720
+ shape_type,
721
+ attributes=attributes,
722
+ run_attributes=run_attributes,
723
+ char_pr_id_ref=char_pr_id_ref,
724
+ )
725
+
726
+ def add_control(
727
+ self,
728
+ *,
729
+ section: HwpxOxmlSection | None = None,
730
+ section_index: int | None = None,
731
+ attributes: dict[str, str] | None = None,
732
+ control_type: str | None = None,
733
+ para_pr_id_ref: str | int | None = None,
734
+ style_id_ref: str | int | None = None,
735
+ char_pr_id_ref: str | int | None = None,
736
+ run_attributes: dict[str, str] | None = None,
737
+ **extra_attrs: str,
738
+ ) -> HwpxOxmlInlineObject:
739
+ """Insert a control inline object into a new paragraph."""
740
+
741
+ paragraph = self.add_paragraph(
742
+ "",
743
+ section=section,
744
+ section_index=section_index,
745
+ para_pr_id_ref=para_pr_id_ref,
746
+ style_id_ref=style_id_ref,
747
+ char_pr_id_ref=char_pr_id_ref,
748
+ include_run=False,
749
+ **extra_attrs,
750
+ )
751
+ return paragraph.add_control(
752
+ attributes=attributes,
753
+ control_type=control_type,
754
+ run_attributes=run_attributes,
755
+ char_pr_id_ref=char_pr_id_ref,
756
+ )
757
+
758
+ def set_header_text(
759
+ self,
760
+ text: str,
761
+ *,
762
+ section: HwpxOxmlSection | None = None,
763
+ section_index: int | None = None,
764
+ page_type: str = "BOTH",
765
+ ) -> HwpxOxmlSectionHeaderFooter:
766
+ """Ensure the requested section contains a header for *page_type* and set its text."""
767
+
768
+ target_section = section
769
+ if target_section is None and section_index is not None:
770
+ target_section = self._root.sections[section_index]
771
+ if target_section is None:
772
+ if not self._root.sections:
773
+ raise ValueError("document does not contain any sections")
774
+ target_section = self._root.sections[-1]
775
+ return target_section.properties.set_header_text(text, page_type=page_type)
776
+
777
+ def set_footer_text(
778
+ self,
779
+ text: str,
780
+ *,
781
+ section: HwpxOxmlSection | None = None,
782
+ section_index: int | None = None,
783
+ page_type: str = "BOTH",
784
+ ) -> HwpxOxmlSectionHeaderFooter:
785
+ """Ensure the requested section contains a footer for *page_type* and set its text."""
786
+
787
+ target_section = section
788
+ if target_section is None and section_index is not None:
789
+ target_section = self._root.sections[section_index]
790
+ if target_section is None:
791
+ if not self._root.sections:
792
+ raise ValueError("document does not contain any sections")
793
+ target_section = self._root.sections[-1]
794
+ return target_section.properties.set_footer_text(text, page_type=page_type)
795
+
796
+ def remove_header(
797
+ self,
798
+ *,
799
+ section: HwpxOxmlSection | None = None,
800
+ section_index: int | None = None,
801
+ page_type: str = "BOTH",
802
+ ) -> None:
803
+ """Remove the header linked to *page_type* from the requested section if present."""
804
+
805
+ target_section = section
806
+ if target_section is None and section_index is not None:
807
+ target_section = self._root.sections[section_index]
808
+ if target_section is None:
809
+ if not self._root.sections:
810
+ return
811
+ target_section = self._root.sections[-1]
812
+ target_section.properties.remove_header(page_type=page_type)
813
+
814
+ def remove_footer(
815
+ self,
816
+ *,
817
+ section: HwpxOxmlSection | None = None,
818
+ section_index: int | None = None,
819
+ page_type: str = "BOTH",
820
+ ) -> None:
821
+ """Remove the footer linked to *page_type* from the requested section if present."""
822
+
823
+ target_section = section
824
+ if target_section is None and section_index is not None:
825
+ target_section = self._root.sections[section_index]
826
+ if target_section is None:
827
+ if not self._root.sections:
828
+ return
829
+ target_section = self._root.sections[-1]
830
+ target_section.properties.remove_footer(page_type=page_type)
831
+
832
+ def save_to_path(self, path: str | PathLike[str]) -> str | PathLike[str]:
833
+ """Persist pending changes to *path* and return the same path."""
834
+
835
+ updates = self._root.serialize()
836
+ result = self._package.save(path, updates)
837
+ self._root.reset_dirty()
838
+ return path if result is None else result
839
+
840
+ def save_to_stream(self, stream: BinaryIO) -> BinaryIO:
841
+ """Persist pending changes to *stream* and return the same stream."""
842
+
843
+ updates = self._root.serialize()
844
+ result = self._package.save(stream, updates)
845
+ self._root.reset_dirty()
846
+ return stream if result is None else result
847
+
848
+ def to_bytes(self) -> bytes:
849
+ """Serialize pending changes and return the HWPX archive as bytes."""
850
+
851
+ updates = self._root.serialize()
852
+ result = self._package.save(None, updates)
853
+ self._root.reset_dirty()
854
+ if isinstance(result, bytes):
855
+ return result
856
+ raise TypeError("package.save(None) must return bytes")
857
+
858
+ @overload
859
+ def save(self, path_or_stream: None = None) -> bytes: ...
860
+
861
+ @overload
862
+ def save(self, path_or_stream: str | PathLike[str]) -> str | PathLike[str]: ...
863
+
864
+ @overload
865
+ def save(self, path_or_stream: BinaryIO) -> BinaryIO: ...
866
+
867
+ def save(
868
+ self,
869
+ path_or_stream: str | PathLike[str] | BinaryIO | None = None,
870
+ ) -> str | PathLike[str] | BinaryIO | bytes:
871
+ """Deprecated compatibility wrapper around save_to_path/save_to_stream/to_bytes.
872
+
873
+ Deprecated:
874
+ ``save()``는 하위 호환을 위해 유지되며 향후 제거될 수 있습니다.
875
+ - 경로 저장: ``save_to_path(path)``
876
+ - 스트림 저장: ``save_to_stream(stream)``
877
+ - 바이트 반환: ``to_bytes()``
878
+ """
879
+
880
+ warnings.warn(
881
+ "HwpxDocument.save()는 deprecated 예정입니다. "
882
+ "save_to_path()/save_to_stream()/to_bytes() 사용을 권장합니다.",
883
+ DeprecationWarning,
884
+ stacklevel=2,
885
+ )
886
+ if path_or_stream is None:
887
+ return self.to_bytes()
888
+ if isinstance(path_or_stream, (str, PathLike)):
889
+ return self.save_to_path(path_or_stream)
890
+ return self.save_to_stream(path_or_stream)