python-hwpx 2.3__py3-none-any.whl → 2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hwpx/document.py CHANGED
@@ -1,890 +1,1362 @@
1
- """High-level representation of an HWPX document."""
2
-
3
- from __future__ import annotations
4
-
5
- import xml.etree.ElementTree as ET
6
- import io
7
- import warnings
8
- from datetime import datetime
9
- import logging
10
- import uuid
11
-
12
- from os import PathLike
13
- from typing import Any, BinaryIO, Iterator, overload
14
-
15
- from lxml import etree
16
-
17
- from .oxml import (
18
- Bullet,
19
- GenericElement,
20
- HwpxOxmlDocument,
21
- HwpxOxmlHeader,
22
- HwpxOxmlHistory,
23
- HwpxOxmlInlineObject,
24
- HwpxOxmlMasterPage,
25
- HwpxOxmlMemo,
26
- HwpxOxmlParagraph,
27
- HwpxOxmlRun,
28
- HwpxOxmlSection,
29
- HwpxOxmlSectionHeaderFooter,
30
- HwpxOxmlTable,
31
- HwpxOxmlVersion,
32
- MemoShape,
33
- ParagraphProperty,
34
- RunStyle,
35
- Style,
36
- TrackChange,
37
- TrackChangeAuthor,
38
- )
39
- from .opc.package import HwpxPackage
40
- from .templates import blank_document_bytes
41
-
42
- ET.register_namespace("hp", "http://www.hancom.co.kr/hwpml/2011/paragraph")
43
- ET.register_namespace("hs", "http://www.hancom.co.kr/hwpml/2011/section")
44
- ET.register_namespace("hc", "http://www.hancom.co.kr/hwpml/2011/core")
45
- ET.register_namespace("hh", "http://www.hancom.co.kr/hwpml/2011/head")
46
-
47
- _HP_NS = "http://www.hancom.co.kr/hwpml/2011/paragraph"
48
- _HP = f"{{{_HP_NS}}}"
49
- _HH_NS = "http://www.hancom.co.kr/hwpml/2011/head"
50
- _HH = f"{{{_HH_NS}}}"
51
-
52
- logger = logging.getLogger(__name__)
53
-
54
-
55
- def _append_element(
56
- parent: Any,
57
- tag: str,
58
- attributes: dict[str, str] | None = None,
59
- ) -> Any:
60
- """Create and append a child element that matches *parent*'s element type."""
61
-
62
- child = parent.makeelement(tag, attributes or {})
63
- parent.append(child)
64
- return child
65
-
66
-
67
- class HwpxDocument:
68
- """Provides a user-friendly API for editing HWPX documents."""
69
-
70
- def __init__(
71
- self,
72
- package: HwpxPackage,
73
- root: HwpxOxmlDocument,
74
- *,
75
- managed_resources: tuple[Any, ...] = (),
76
- ):
77
- self._package = package
78
- self._root = root
79
- self._managed_resources = list(managed_resources)
80
- self._closed = False
81
-
82
- def __repr__(self) -> str:
83
- """Return a compact and safe summary of the document state."""
84
-
85
- return (
86
- f"{self.__class__.__name__}("
87
- f"sections={len(self.sections)}, "
88
- f"paragraphs={len(self.paragraphs)}, "
89
- f"headers={len(self.headers)}, "
90
- f"master_pages={len(self.master_pages)}, "
91
- f"histories={len(self.histories)}, "
92
- f"closed={self._closed}"
93
- ")"
94
- )
95
-
96
- # ------------------------------------------------------------------
97
- # construction helpers
98
- @classmethod
99
- def open(
100
- cls,
101
- source: str | PathLike[str] | bytes | BinaryIO,
102
- ) -> "HwpxDocument":
103
- """Open *source* and return a :class:`HwpxDocument` instance.
104
-
105
- Raises:
106
- HwpxStructureError: 필수 파일이나 구조가 올바르지 않은 HWPX를 열 때 발생합니다.
107
- HwpxPackageError: 패키지를 여는 과정에서 일반적인 I/O/포맷 오류가 발생하면 전달됩니다.
108
- """
109
- internal_resources: list[Any] = []
110
- open_source = source
111
- if isinstance(source, bytes):
112
- stream = io.BytesIO(source)
113
- open_source = stream
114
- internal_resources.append(stream)
115
- package = HwpxPackage.open(open_source)
116
- root = HwpxOxmlDocument.from_package(package)
117
- return cls(package, root, managed_resources=tuple(internal_resources))
118
-
119
- @classmethod
120
- def new(cls) -> "HwpxDocument":
121
- """Return a new blank document based on the default skeleton template."""
122
-
123
- return cls.open(blank_document_bytes())
124
-
125
- @classmethod
126
- def from_package(cls, package: HwpxPackage) -> "HwpxDocument":
127
- """Create a document backed by an existing :class:`HwpxPackage`.
128
-
129
- Args:
130
- package: :class:`hwpx.opc.package.HwpxPackage` 인스턴스.
131
- """
132
- root = HwpxOxmlDocument.from_package(package)
133
- return cls(package, root)
134
-
135
- def __enter__(self) -> "HwpxDocument":
136
- """컨텍스트 매니저 진입 시 현재 문서 인스턴스를 반환합니다."""
137
-
138
- return self
139
-
140
- def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> bool:
141
- """예외 발생 여부와 무관하게 내부 자원을 안전하게 정리합니다."""
142
-
143
- self.close()
144
- return False
145
-
146
- def close(self) -> None:
147
- """문서가 관리하는 내부 패키지/스트림 자원을 정리합니다.
148
-
149
- 정리 정책:
150
- - ``flush()`` 가능한 자원은 먼저 flush를 시도합니다.
151
- - ``close()`` 가능한 자원은 flush 이후 close를 시도합니다.
152
- - flush/close 중 발생한 예외는 로깅하고 무시하여 정리 루틴을 계속 진행합니다.
153
- - 같은 문서에서 ``close()``를 여러 번 호출해도 안전합니다.
154
- """
155
-
156
- if self._closed:
157
- return
158
-
159
- self._flush_resource(self._package)
160
- for resource in self._managed_resources:
161
- self._flush_resource(resource)
162
-
163
- self._close_resource(self._package)
164
- for resource in self._managed_resources:
165
- self._close_resource(resource)
166
-
167
- self._managed_resources.clear()
168
- self._closed = True
169
-
170
- @staticmethod
171
- def _flush_resource(resource: Any) -> None:
172
- flush = getattr(resource, "flush", None)
173
- if not callable(flush):
174
- return
175
- try:
176
- flush()
177
- except Exception:
178
- logger.debug("자원 flush 중 예외를 무시합니다: resource=%r", resource, exc_info=True)
179
-
180
- @staticmethod
181
- def _close_resource(resource: Any) -> None:
182
- close = getattr(resource, "close", None)
183
- if not callable(close):
184
- return
185
- try:
186
- close()
187
- except Exception:
188
- logger.debug("자원 close 중 예외를 무시합니다: resource=%r", resource, exc_info=True)
189
-
190
- # ------------------------------------------------------------------
191
- # properties exposing document content
192
- @property
193
- def package(self) -> HwpxPackage:
194
- """Return the :class:`HwpxPackage` backing this document."""
195
- return self._package
196
-
197
- @property
198
- def oxml(self) -> HwpxOxmlDocument:
199
- """Return the low-level XML object tree representing the document."""
200
- return self._root
201
-
202
- @property
203
- def sections(self) -> list[HwpxOxmlSection]:
204
- """Return the sections contained in the document."""
205
- return self._root.sections
206
-
207
- @property
208
- def headers(self) -> list[HwpxOxmlHeader]:
209
- """Return the header parts referenced by the document."""
210
- return self._root.headers
211
-
212
- @property
213
- def master_pages(self) -> list[HwpxOxmlMasterPage]:
214
- """Return the master-page parts declared in the manifest."""
215
- return self._root.master_pages
216
-
217
- @property
218
- def histories(self) -> list[HwpxOxmlHistory]:
219
- """Return document history parts referenced by the manifest."""
220
- return self._root.histories
221
-
222
- @property
223
- def version(self) -> HwpxOxmlVersion | None:
224
- """Return the version metadata part if present."""
225
- return self._root.version
226
-
227
- @property
228
- def border_fills(self) -> dict[str, GenericElement]:
229
- """Return border fill definitions declared in the headers."""
230
-
231
- return self._root.border_fills
232
-
233
- def border_fill(self, border_fill_id_ref: int | str | None) -> GenericElement | None:
234
- """Return the border fill definition referenced by *border_fill_id_ref*."""
235
-
236
- return self._root.border_fill(border_fill_id_ref)
237
-
238
- @property
239
- def memo_shapes(self) -> dict[str, MemoShape]:
240
- """Return memo shapes available in the header reference lists."""
241
-
242
- return self._root.memo_shapes
243
-
244
- def memo_shape(self, memo_shape_id_ref: int | str | None) -> MemoShape | None:
245
- """Return the memo shape definition referenced by *memo_shape_id_ref*."""
246
-
247
- return self._root.memo_shape(memo_shape_id_ref)
248
-
249
- @property
250
- def bullets(self) -> dict[str, Bullet]:
251
- """Return bullet definitions declared in header reference lists."""
252
-
253
- return self._root.bullets
254
-
255
- def bullet(self, bullet_id_ref: int | str | None) -> Bullet | None:
256
- """Return the bullet definition referenced by *bullet_id_ref*."""
257
-
258
- return self._root.bullet(bullet_id_ref)
259
-
260
- @property
261
- def paragraph_properties(self) -> dict[str, ParagraphProperty]:
262
- """Return paragraph property definitions declared in headers."""
263
-
264
- return self._root.paragraph_properties
265
-
266
- def paragraph_property(
267
- self, para_pr_id_ref: int | str | None
268
- ) -> ParagraphProperty | None:
269
- """Return the paragraph property referenced by *para_pr_id_ref*."""
270
-
271
- return self._root.paragraph_property(para_pr_id_ref)
272
-
273
- @property
274
- def styles(self) -> dict[str, Style]:
275
- """Return style definitions available in the document."""
276
-
277
- return self._root.styles
278
-
279
- def style(self, style_id_ref: int | str | None) -> Style | None:
280
- """Return the style definition referenced by *style_id_ref*."""
281
-
282
- return self._root.style(style_id_ref)
283
-
284
- @property
285
- def track_changes(self) -> dict[str, TrackChange]:
286
- """Return tracked change metadata declared in the headers."""
287
-
288
- return self._root.track_changes
289
-
290
- def track_change(self, change_id_ref: int | str | None) -> TrackChange | None:
291
- """Return tracked change metadata referenced by *change_id_ref*."""
292
-
293
- return self._root.track_change(change_id_ref)
294
-
295
- @property
296
- def track_change_authors(self) -> dict[str, TrackChangeAuthor]:
297
- """Return tracked change author metadata declared in the headers."""
298
-
299
- return self._root.track_change_authors
300
-
301
- def track_change_author(
302
- self, author_id_ref: int | str | None
303
- ) -> TrackChangeAuthor | None:
304
- """Return tracked change author details referenced by *author_id_ref*."""
305
-
306
- return self._root.track_change_author(author_id_ref)
307
-
308
- @property
309
- def memos(self) -> list[HwpxOxmlMemo]:
310
- """Return all memo entries declared in every section."""
311
-
312
- memos: list[HwpxOxmlMemo] = []
313
- for section in self._root.sections:
314
- memos.extend(section.memos)
315
- return memos
316
-
317
- def add_memo(
318
- self,
319
- text: str = "",
320
- *,
321
- section: HwpxOxmlSection | None = None,
322
- section_index: int | None = None,
323
- memo_shape_id_ref: str | int | None = None,
324
- memo_id: str | None = None,
325
- char_pr_id_ref: str | int | None = None,
326
- attributes: dict[str, str] | None = None,
327
- ) -> HwpxOxmlMemo:
328
- """Create a memo entry inside *section* (or the last section by default)."""
329
-
330
- if section is None and section_index is not None:
331
- section = self._root.sections[section_index]
332
- if section is None:
333
- if not self._root.sections:
334
- raise ValueError("document does not contain any sections")
335
- section = self._root.sections[-1]
336
- return section.add_memo(
337
- text,
338
- memo_shape_id_ref=memo_shape_id_ref,
339
- memo_id=memo_id,
340
- char_pr_id_ref=char_pr_id_ref,
341
- attributes=attributes,
342
- )
343
-
344
- def remove_memo(self, memo: HwpxOxmlMemo) -> None:
345
- """Remove *memo* from the section it belongs to."""
346
-
347
- memo.remove()
348
-
349
- def attach_memo_field(
350
- self,
351
- paragraph: HwpxOxmlParagraph,
352
- memo: HwpxOxmlMemo,
353
- *,
354
- field_id: str | None = None,
355
- author: str | None = None,
356
- created: datetime | str | None = None,
357
- number: int = 1,
358
- char_pr_id_ref: str | int | None = None,
359
- ) -> str:
360
- """Attach a MEMO field control to *paragraph* so Hangul shows *memo*."""
361
-
362
- if paragraph.section is None:
363
- raise ValueError("paragraph must belong to a section before anchoring a memo")
364
- if memo.group.section is None:
365
- raise ValueError("memo is not attached to a section")
366
-
367
- field_value = field_id or uuid.uuid4().hex
368
- author_value = author or memo.attributes.get("author") or ""
369
-
370
- created_value = created if created is not None else memo.attributes.get("createDateTime")
371
- if isinstance(created_value, datetime):
372
- created_value = created_value.strftime("%Y-%m-%d %H:%M:%S")
373
- elif created_value is None:
374
- created_value = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
375
- else:
376
- created_value = str(created_value)
377
-
378
- memo_shape_id = memo.memo_shape_id_ref or ""
379
-
380
- char_ref = char_pr_id_ref
381
- if char_ref is None:
382
- char_ref = paragraph.char_pr_id_ref
383
- if char_ref is None:
384
- char_ref = memo._infer_char_pr_id_ref()
385
- if char_ref is None:
386
- char_ref = "0"
387
- char_ref = str(char_ref)
388
-
389
- paragraph_element = paragraph.element
390
- run_begin = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
391
- ctrl_begin = _append_element(run_begin, f"{_HP}ctrl")
392
- field_begin = _append_element(
393
- ctrl_begin,
394
- f"{_HP}fieldBegin",
395
- {
396
- "id": field_value,
397
- "type": "MEMO",
398
- "editable": "true",
399
- "dirty": "false",
400
- "fieldid": field_value,
401
- },
402
- )
403
-
404
- parameters = _append_element(field_begin, f"{_HP}parameters", {"count": "5", "name": ""})
405
- _append_element(parameters, f"{_HP}stringParam", {"name": "ID"}).text = memo.id or ""
406
- _append_element(parameters, f"{_HP}integerParam", {"name": "Number"}).text = str(max(1, number))
407
- _append_element(parameters, f"{_HP}stringParam", {"name": "CreateDateTime"}).text = created_value
408
- _append_element(parameters, f"{_HP}stringParam", {"name": "Author"}).text = author_value
409
- _append_element(parameters, f"{_HP}stringParam", {"name": "MemoShapeID"}).text = memo_shape_id
410
-
411
- sub_list = _append_element(
412
- field_begin,
413
- f"{_HP}subList",
414
- {
415
- "id": f"memo-field-{memo.id or field_value}",
416
- "textDirection": "HORIZONTAL",
417
- "lineWrap": "BREAK",
418
- "vertAlign": "TOP",
419
- },
420
- )
421
- sub_para = _append_element(
422
- sub_list,
423
- f"{_HP}p",
424
- {
425
- "id": f"memo-field-{(memo.id or field_value)}-p",
426
- "paraPrIDRef": "0",
427
- "styleIDRef": "0",
428
- "pageBreak": "0",
429
- "columnBreak": "0",
430
- "merged": "0",
431
- },
432
- )
433
- sub_run = _append_element(sub_para, f"{_HP}run", {"charPrIDRef": char_ref})
434
- _append_element(sub_run, f"{_HP}t").text = memo.id or field_value
435
-
436
- run_end = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
437
- ctrl_end = _append_element(run_end, f"{_HP}ctrl")
438
- _append_element(ctrl_end, f"{_HP}fieldEnd", {"beginIDRef": field_value, "fieldid": field_value})
439
-
440
- paragraph.element.insert(0, run_begin)
441
- paragraph.element.append(run_end)
442
- paragraph.section.mark_dirty()
443
-
444
- return field_value
445
-
446
- def add_memo_with_anchor(
447
- self,
448
- text: str = "",
449
- *,
450
- paragraph: HwpxOxmlParagraph | None = None,
451
- section: HwpxOxmlSection | None = None,
452
- section_index: int | None = None,
453
- paragraph_text: str | None = None,
454
- memo_shape_id_ref: str | int | None = None,
455
- memo_id: str | None = None,
456
- char_pr_id_ref: str | int | None = None,
457
- attributes: dict[str, str] | None = None,
458
- field_id: str | None = None,
459
- author: str | None = None,
460
- created: datetime | str | None = None,
461
- number: int = 1,
462
- anchor_char_pr_id_ref: str | int | None = None,
463
- ) -> tuple[HwpxOxmlMemo, HwpxOxmlParagraph, str]:
464
- """Create a memo and ensure it is visible by anchoring a MEMO field."""
465
-
466
- memo = self.add_memo(
467
- text,
468
- section=section,
469
- section_index=section_index,
470
- memo_shape_id_ref=memo_shape_id_ref,
471
- memo_id=memo_id,
472
- char_pr_id_ref=char_pr_id_ref,
473
- attributes=attributes,
474
- )
475
-
476
- target_paragraph = paragraph
477
- if target_paragraph is None:
478
- memo_section = memo.group.section
479
- if memo_section is None:
480
- raise ValueError("memo must belong to a section")
481
- paragraph_value = "" if paragraph_text is None else paragraph_text
482
- anchor_char = anchor_char_pr_id_ref or char_pr_id_ref
483
- target_paragraph = self.add_paragraph(
484
- paragraph_value,
485
- section=memo_section,
486
- char_pr_id_ref=anchor_char,
487
- )
488
- elif paragraph_text is not None:
489
- target_paragraph.text = paragraph_text
490
-
491
- field_value = self.attach_memo_field(
492
- target_paragraph,
493
- memo,
494
- field_id=field_id,
495
- author=author,
496
- created=created,
497
- number=number,
498
- char_pr_id_ref=anchor_char_pr_id_ref,
499
- )
500
-
501
- return memo, target_paragraph, field_value
502
-
503
- @property
504
- def paragraphs(self) -> list[HwpxOxmlParagraph]:
505
- """Return all paragraphs across every section."""
506
- return self._root.paragraphs
507
-
508
- @property
509
- def char_properties(self) -> dict[str, RunStyle]:
510
- """Return the resolved character style definitions available to the document."""
511
-
512
- return self._root.char_properties
513
-
514
- def char_property(self, char_pr_id_ref: int | str | None) -> RunStyle | None:
515
- """Return the style referenced by *char_pr_id_ref* if known."""
516
-
517
- return self._root.char_property(char_pr_id_ref)
518
-
519
- def ensure_run_style(
520
- self,
521
- *,
522
- bold: bool = False,
523
- italic: bool = False,
524
- underline: bool = False,
525
- base_char_pr_id: str | int | None = None,
526
- ) -> str:
527
- """Return a ``charPr`` identifier matching the requested flags."""
528
-
529
- return self._root.ensure_run_style(
530
- bold=bold,
531
- italic=italic,
532
- underline=underline,
533
- base_char_pr_id=base_char_pr_id,
534
- )
535
-
536
- def iter_runs(self) -> Iterator[HwpxOxmlRun]:
537
- """Yield every run element contained in the document."""
538
-
539
- for paragraph in self.paragraphs:
540
- for run in paragraph.runs:
541
- yield run
542
-
543
- def find_runs_by_style(
544
- self,
545
- *,
546
- text_color: str | None = None,
547
- underline_type: str | None = None,
548
- underline_color: str | None = None,
549
- char_pr_id_ref: str | int | None = None,
550
- ) -> list[HwpxOxmlRun]:
551
- """Return runs matching the requested style criteria."""
552
-
553
- matches: list[HwpxOxmlRun] = []
554
- target_char = str(char_pr_id_ref).strip() if char_pr_id_ref is not None else None
555
-
556
- for run in self.iter_runs():
557
- if target_char is not None:
558
- run_char = (run.char_pr_id_ref or "").strip()
559
- if run_char != target_char:
560
- continue
561
- style = run.style
562
- if text_color is not None:
563
- if style is None or style.text_color() != text_color:
564
- continue
565
- if underline_type is not None:
566
- if style is None or style.underline_type() != underline_type:
567
- continue
568
- if underline_color is not None:
569
- if style is None or style.underline_color() != underline_color:
570
- continue
571
- matches.append(run)
572
- return matches
573
-
574
- def replace_text_in_runs(
575
- self,
576
- search: str,
577
- replacement: str,
578
- *,
579
- text_color: str | None = None,
580
- underline_type: str | None = None,
581
- underline_color: str | None = None,
582
- char_pr_id_ref: str | int | None = None,
583
- limit: int | None = None,
584
- ) -> int:
585
- """Replace occurrences of *search* in runs matching the provided style filters."""
586
-
587
- if not search:
588
- raise ValueError("search must be a non-empty string")
589
-
590
- replacements = 0
591
- runs = self.find_runs_by_style(
592
- text_color=text_color,
593
- underline_type=underline_type,
594
- underline_color=underline_color,
595
- char_pr_id_ref=char_pr_id_ref,
596
- )
597
-
598
- for run in runs:
599
- remaining = None
600
- if limit is not None:
601
- remaining = limit - replacements
602
- if remaining <= 0:
603
- break
604
- original_char_pr = run.char_pr_id_ref
605
- replaced_here = run.replace_text(
606
- search,
607
- replacement,
608
- count=remaining,
609
- )
610
- if replaced_here and original_char_pr is not None:
611
- # Ensure the run retains its original formatting reference even
612
- # if XML nodes were rewritten during substitution.
613
- run.char_pr_id_ref = original_char_pr
614
- replacements += replaced_here
615
- if limit is not None and replacements >= limit:
616
- break
617
- return replacements
618
-
619
- # ------------------------------------------------------------------
620
- # editing helpers
621
- def add_paragraph(
622
- self,
623
- text: str = "",
624
- *,
625
- section: HwpxOxmlSection | None = None,
626
- section_index: int | None = None,
627
- para_pr_id_ref: str | int | None = None,
628
- style_id_ref: str | int | None = None,
629
- char_pr_id_ref: str | int | None = None,
630
- run_attributes: dict[str, str] | None = None,
631
- include_run: bool = True,
632
- **extra_attrs: str,
633
- ) -> HwpxOxmlParagraph:
634
- """Append a paragraph to the document and return it.
635
-
636
- Formatting references may be overridden via ``para_pr_id_ref``,
637
- ``style_id_ref`` and ``char_pr_id_ref``. Any additional keyword
638
- arguments are added as raw paragraph attributes.
639
- """
640
- return self._root.add_paragraph(
641
- text,
642
- section=section,
643
- section_index=section_index,
644
- para_pr_id_ref=para_pr_id_ref,
645
- style_id_ref=style_id_ref,
646
- char_pr_id_ref=char_pr_id_ref,
647
- run_attributes=run_attributes,
648
- include_run=include_run,
649
- **extra_attrs,
650
- )
651
-
652
- def add_table(
653
- self,
654
- rows: int,
655
- cols: int,
656
- *,
657
- section: HwpxOxmlSection | None = None,
658
- section_index: int | None = None,
659
- width: int | None = None,
660
- height: int | None = None,
661
- border_fill_id_ref: str | int | None = None,
662
- para_pr_id_ref: str | int | None = None,
663
- style_id_ref: str | int | None = None,
664
- char_pr_id_ref: str | int | None = None,
665
- run_attributes: dict[str, str] | None = None,
666
- **extra_attrs: str,
667
- ) -> HwpxOxmlTable:
668
- """Create a table in a new paragraph and return it."""
669
-
670
- resolved_border_fill: str | int | None = border_fill_id_ref
671
- if resolved_border_fill is None:
672
- resolved_border_fill = self._root.ensure_basic_border_fill()
673
-
674
- paragraph = self.add_paragraph(
675
- "",
676
- section=section,
677
- section_index=section_index,
678
- para_pr_id_ref=para_pr_id_ref,
679
- style_id_ref=style_id_ref,
680
- char_pr_id_ref=char_pr_id_ref,
681
- include_run=False,
682
- **extra_attrs,
683
- )
684
- return paragraph.add_table(
685
- rows,
686
- cols,
687
- width=width,
688
- height=height,
689
- border_fill_id_ref=resolved_border_fill,
690
- run_attributes=run_attributes,
691
- char_pr_id_ref=char_pr_id_ref,
692
- )
693
-
694
- def add_shape(
695
- self,
696
- shape_type: str,
697
- *,
698
- section: HwpxOxmlSection | None = None,
699
- section_index: int | None = None,
700
- attributes: dict[str, str] | None = None,
701
- para_pr_id_ref: str | int | None = None,
702
- style_id_ref: str | int | None = None,
703
- char_pr_id_ref: str | int | None = None,
704
- run_attributes: dict[str, str] | None = None,
705
- **extra_attrs: str,
706
- ) -> HwpxOxmlInlineObject:
707
- """Insert an inline shape into a new paragraph."""
708
-
709
- paragraph = self.add_paragraph(
710
- "",
711
- section=section,
712
- section_index=section_index,
713
- para_pr_id_ref=para_pr_id_ref,
714
- style_id_ref=style_id_ref,
715
- char_pr_id_ref=char_pr_id_ref,
716
- include_run=False,
717
- **extra_attrs,
718
- )
719
- return paragraph.add_shape(
720
- shape_type,
721
- attributes=attributes,
722
- run_attributes=run_attributes,
723
- char_pr_id_ref=char_pr_id_ref,
724
- )
725
-
726
- def add_control(
727
- self,
728
- *,
729
- section: HwpxOxmlSection | None = None,
730
- section_index: int | None = None,
731
- attributes: dict[str, str] | None = None,
732
- control_type: str | None = None,
733
- para_pr_id_ref: str | int | None = None,
734
- style_id_ref: str | int | None = None,
735
- char_pr_id_ref: str | int | None = None,
736
- run_attributes: dict[str, str] | None = None,
737
- **extra_attrs: str,
738
- ) -> HwpxOxmlInlineObject:
739
- """Insert a control inline object into a new paragraph."""
740
-
741
- paragraph = self.add_paragraph(
742
- "",
743
- section=section,
744
- section_index=section_index,
745
- para_pr_id_ref=para_pr_id_ref,
746
- style_id_ref=style_id_ref,
747
- char_pr_id_ref=char_pr_id_ref,
748
- include_run=False,
749
- **extra_attrs,
750
- )
751
- return paragraph.add_control(
752
- attributes=attributes,
753
- control_type=control_type,
754
- run_attributes=run_attributes,
755
- char_pr_id_ref=char_pr_id_ref,
756
- )
757
-
758
- def set_header_text(
759
- self,
760
- text: str,
761
- *,
762
- section: HwpxOxmlSection | None = None,
763
- section_index: int | None = None,
764
- page_type: str = "BOTH",
765
- ) -> HwpxOxmlSectionHeaderFooter:
766
- """Ensure the requested section contains a header for *page_type* and set its text."""
767
-
768
- target_section = section
769
- if target_section is None and section_index is not None:
770
- target_section = self._root.sections[section_index]
771
- if target_section is None:
772
- if not self._root.sections:
773
- raise ValueError("document does not contain any sections")
774
- target_section = self._root.sections[-1]
775
- return target_section.properties.set_header_text(text, page_type=page_type)
776
-
777
- def set_footer_text(
778
- self,
779
- text: str,
780
- *,
781
- section: HwpxOxmlSection | None = None,
782
- section_index: int | None = None,
783
- page_type: str = "BOTH",
784
- ) -> HwpxOxmlSectionHeaderFooter:
785
- """Ensure the requested section contains a footer for *page_type* and set its text."""
786
-
787
- target_section = section
788
- if target_section is None and section_index is not None:
789
- target_section = self._root.sections[section_index]
790
- if target_section is None:
791
- if not self._root.sections:
792
- raise ValueError("document does not contain any sections")
793
- target_section = self._root.sections[-1]
794
- return target_section.properties.set_footer_text(text, page_type=page_type)
795
-
796
- def remove_header(
797
- self,
798
- *,
799
- section: HwpxOxmlSection | None = None,
800
- section_index: int | None = None,
801
- page_type: str = "BOTH",
802
- ) -> None:
803
- """Remove the header linked to *page_type* from the requested section if present."""
804
-
805
- target_section = section
806
- if target_section is None and section_index is not None:
807
- target_section = self._root.sections[section_index]
808
- if target_section is None:
809
- if not self._root.sections:
810
- return
811
- target_section = self._root.sections[-1]
812
- target_section.properties.remove_header(page_type=page_type)
813
-
814
- def remove_footer(
815
- self,
816
- *,
817
- section: HwpxOxmlSection | None = None,
818
- section_index: int | None = None,
819
- page_type: str = "BOTH",
820
- ) -> None:
821
- """Remove the footer linked to *page_type* from the requested section if present."""
822
-
823
- target_section = section
824
- if target_section is None and section_index is not None:
825
- target_section = self._root.sections[section_index]
826
- if target_section is None:
827
- if not self._root.sections:
828
- return
829
- target_section = self._root.sections[-1]
830
- target_section.properties.remove_footer(page_type=page_type)
831
-
832
- def save_to_path(self, path: str | PathLike[str]) -> str | PathLike[str]:
833
- """Persist pending changes to *path* and return the same path."""
834
-
835
- updates = self._root.serialize()
836
- result = self._package.save(path, updates)
837
- self._root.reset_dirty()
838
- return path if result is None else result
839
-
840
- def save_to_stream(self, stream: BinaryIO) -> BinaryIO:
841
- """Persist pending changes to *stream* and return the same stream."""
842
-
843
- updates = self._root.serialize()
844
- result = self._package.save(stream, updates)
845
- self._root.reset_dirty()
846
- return stream if result is None else result
847
-
848
- def to_bytes(self) -> bytes:
849
- """Serialize pending changes and return the HWPX archive as bytes."""
850
-
851
- updates = self._root.serialize()
852
- result = self._package.save(None, updates)
853
- self._root.reset_dirty()
854
- if isinstance(result, bytes):
855
- return result
856
- raise TypeError("package.save(None) must return bytes")
857
-
858
- @overload
859
- def save(self, path_or_stream: None = None) -> bytes: ...
860
-
861
- @overload
862
- def save(self, path_or_stream: str | PathLike[str]) -> str | PathLike[str]: ...
863
-
864
- @overload
865
- def save(self, path_or_stream: BinaryIO) -> BinaryIO: ...
866
-
867
- def save(
868
- self,
869
- path_or_stream: str | PathLike[str] | BinaryIO | None = None,
870
- ) -> str | PathLike[str] | BinaryIO | bytes:
871
- """Deprecated compatibility wrapper around save_to_path/save_to_stream/to_bytes.
872
-
873
- Deprecated:
874
- ``save()``는 하위 호환을 위해 유지되며 향후 제거될 수 있습니다.
875
- - 경로 저장: ``save_to_path(path)``
876
- - 스트림 저장: ``save_to_stream(stream)``
877
- - 바이트 반환: ``to_bytes()``
878
- """
879
-
880
- warnings.warn(
881
- "HwpxDocument.save()는 deprecated 예정입니다. "
882
- "save_to_path()/save_to_stream()/to_bytes() 사용을 권장합니다.",
883
- DeprecationWarning,
884
- stacklevel=2,
885
- )
886
- if path_or_stream is None:
887
- return self.to_bytes()
888
- if isinstance(path_or_stream, (str, PathLike)):
889
- return self.save_to_path(path_or_stream)
890
- return self.save_to_stream(path_or_stream)
1
+ """High-level representation of an HWPX document."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import xml.etree.ElementTree as ET
6
+ import io
7
+ import warnings
8
+ from datetime import datetime
9
+ import logging
10
+ import uuid
11
+
12
+ from os import PathLike
13
+ from typing import Any, BinaryIO, Iterator, Sequence, overload
14
+
15
+ from lxml import etree
16
+
17
+ from .oxml import (
18
+ Bullet,
19
+ GenericElement,
20
+ HwpxOxmlDocument,
21
+ HwpxOxmlHeader,
22
+ HwpxOxmlHistory,
23
+ HwpxOxmlInlineObject,
24
+ HwpxOxmlMasterPage,
25
+ HwpxOxmlMemo,
26
+ HwpxOxmlNote,
27
+ HwpxOxmlParagraph,
28
+ HwpxOxmlRun,
29
+ HwpxOxmlSection,
30
+ HwpxOxmlSectionHeaderFooter,
31
+ HwpxOxmlShape,
32
+ HwpxOxmlTable,
33
+ HwpxOxmlVersion,
34
+ MemoShape,
35
+ ParagraphProperty,
36
+ RunStyle,
37
+ Style,
38
+ TrackChange,
39
+ TrackChangeAuthor,
40
+ )
41
+ from .opc.package import HwpxPackage
42
+ from .templates import blank_document_bytes
43
+
44
+ ET.register_namespace("hp", "http://www.hancom.co.kr/hwpml/2011/paragraph")
45
+ ET.register_namespace("hs", "http://www.hancom.co.kr/hwpml/2011/section")
46
+ ET.register_namespace("hc", "http://www.hancom.co.kr/hwpml/2011/core")
47
+ ET.register_namespace("hh", "http://www.hancom.co.kr/hwpml/2011/head")
48
+
49
+ _HP_NS = "http://www.hancom.co.kr/hwpml/2011/paragraph"
50
+ _HP = f"{{{_HP_NS}}}"
51
+ _HH_NS = "http://www.hancom.co.kr/hwpml/2011/head"
52
+ _HH = f"{{{_HH_NS}}}"
53
+
54
+ logger = logging.getLogger(__name__)
55
+
56
+
57
+ def _append_element(
58
+ parent: Any,
59
+ tag: str,
60
+ attributes: dict[str, str] | None = None,
61
+ ) -> Any:
62
+ """Create and append a child element that matches *parent*'s element type."""
63
+
64
+ child = parent.makeelement(tag, attributes or {})
65
+ parent.append(child)
66
+ return child
67
+
68
+
69
+ class HwpxDocument:
70
+ """Provides a user-friendly API for editing HWPX documents."""
71
+
72
+ def __init__(
73
+ self,
74
+ package: HwpxPackage,
75
+ root: HwpxOxmlDocument,
76
+ *,
77
+ managed_resources: tuple[Any, ...] = (),
78
+ validate_on_save: bool = False,
79
+ ):
80
+ self._package = package
81
+ self._root = root
82
+ self._managed_resources = list(managed_resources)
83
+ self._closed = False
84
+ self.validate_on_save = validate_on_save
85
+
86
+ def __repr__(self) -> str:
87
+ """Return a compact and safe summary of the document state."""
88
+
89
+ return (
90
+ f"{self.__class__.__name__}("
91
+ f"sections={len(self.sections)}, "
92
+ f"paragraphs={len(self.paragraphs)}, "
93
+ f"headers={len(self.headers)}, "
94
+ f"master_pages={len(self.master_pages)}, "
95
+ f"histories={len(self.histories)}, "
96
+ f"closed={self._closed}"
97
+ ")"
98
+ )
99
+
100
+ # ------------------------------------------------------------------
101
+ # construction helpers
102
+ @classmethod
103
+ def open(
104
+ cls,
105
+ source: str | PathLike[str] | bytes | BinaryIO,
106
+ ) -> "HwpxDocument":
107
+ """Open *source* and return a :class:`HwpxDocument` instance.
108
+
109
+ Raises:
110
+ HwpxStructureError: 필수 파일이나 구조가 올바르지 않은 HWPX를 열 때 발생합니다.
111
+ HwpxPackageError: 패키지를 여는 과정에서 일반적인 I/O/포맷 오류가 발생하면 전달됩니다.
112
+ """
113
+ internal_resources: list[Any] = []
114
+ open_source = source
115
+ if isinstance(source, bytes):
116
+ stream = io.BytesIO(source)
117
+ open_source = stream
118
+ internal_resources.append(stream)
119
+ package = HwpxPackage.open(open_source)
120
+ root = HwpxOxmlDocument.from_package(package)
121
+ return cls(package, root, managed_resources=tuple(internal_resources))
122
+
123
+ @classmethod
124
+ def new(cls) -> "HwpxDocument":
125
+ """Return a new blank document based on the default skeleton template."""
126
+
127
+ return cls.open(blank_document_bytes())
128
+
129
+ @classmethod
130
+ def from_package(cls, package: HwpxPackage) -> "HwpxDocument":
131
+ """Create a document backed by an existing :class:`HwpxPackage`.
132
+
133
+ Args:
134
+ package: :class:`hwpx.opc.package.HwpxPackage` 인스턴스.
135
+ """
136
+ root = HwpxOxmlDocument.from_package(package)
137
+ return cls(package, root)
138
+
139
+ def __enter__(self) -> "HwpxDocument":
140
+ """컨텍스트 매니저 진입 현재 문서 인스턴스를 반환합니다."""
141
+
142
+ return self
143
+
144
+ def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> bool:
145
+ """예외 발생 여부와 무관하게 내부 자원을 안전하게 정리합니다."""
146
+
147
+ self.close()
148
+ return False
149
+
150
+ def close(self) -> None:
151
+ """문서가 관리하는 내부 패키지/스트림 자원을 정리합니다.
152
+
153
+ 정리 정책:
154
+ - ``flush()`` 가능한 자원은 먼저 flush를 시도합니다.
155
+ - ``close()`` 가능한 자원은 flush 이후 close를 시도합니다.
156
+ - flush/close 중 발생한 예외는 로깅하고 무시하여 정리 루틴을 계속 진행합니다.
157
+ - 같은 문서에서 ``close()``를 여러 번 호출해도 안전합니다.
158
+ """
159
+
160
+ if self._closed:
161
+ return
162
+
163
+ self._flush_resource(self._package)
164
+ for resource in self._managed_resources:
165
+ self._flush_resource(resource)
166
+
167
+ self._close_resource(self._package)
168
+ for resource in self._managed_resources:
169
+ self._close_resource(resource)
170
+
171
+ self._managed_resources.clear()
172
+ self._closed = True
173
+
174
+ @staticmethod
175
+ def _flush_resource(resource: Any) -> None:
176
+ flush = getattr(resource, "flush", None)
177
+ if not callable(flush):
178
+ return
179
+ try:
180
+ flush()
181
+ except Exception:
182
+ logger.debug("자원 flush 중 예외를 무시합니다: resource=%r", resource, exc_info=True)
183
+
184
+ @staticmethod
185
+ def _close_resource(resource: Any) -> None:
186
+ close = getattr(resource, "close", None)
187
+ if not callable(close):
188
+ return
189
+ try:
190
+ close()
191
+ except Exception:
192
+ logger.debug("자원 close 중 예외를 무시합니다: resource=%r", resource, exc_info=True)
193
+
194
+ # ------------------------------------------------------------------
195
+ # properties exposing document content
196
+ @property
197
+ def package(self) -> HwpxPackage:
198
+ """Return the :class:`HwpxPackage` backing this document."""
199
+ return self._package
200
+
201
+ @property
202
+ def oxml(self) -> HwpxOxmlDocument:
203
+ """Return the low-level XML object tree representing the document."""
204
+ return self._root
205
+
206
+ @property
207
+ def sections(self) -> list[HwpxOxmlSection]:
208
+ """Return the sections contained in the document."""
209
+ return self._root.sections
210
+
211
+ @property
212
+ def headers(self) -> list[HwpxOxmlHeader]:
213
+ """Return the header parts referenced by the document."""
214
+ return self._root.headers
215
+
216
+ @property
217
+ def master_pages(self) -> list[HwpxOxmlMasterPage]:
218
+ """Return the master-page parts declared in the manifest."""
219
+ return self._root.master_pages
220
+
221
+ @property
222
+ def histories(self) -> list[HwpxOxmlHistory]:
223
+ """Return document history parts referenced by the manifest."""
224
+ return self._root.histories
225
+
226
+ @property
227
+ def version(self) -> HwpxOxmlVersion | None:
228
+ """Return the version metadata part if present."""
229
+ return self._root.version
230
+
231
+ @property
232
+ def border_fills(self) -> dict[str, GenericElement]:
233
+ """Return border fill definitions declared in the headers."""
234
+
235
+ return self._root.border_fills
236
+
237
+ def border_fill(self, border_fill_id_ref: int | str | None) -> GenericElement | None:
238
+ """Return the border fill definition referenced by *border_fill_id_ref*."""
239
+
240
+ return self._root.border_fill(border_fill_id_ref)
241
+
242
+ @property
243
+ def memo_shapes(self) -> dict[str, MemoShape]:
244
+ """Return memo shapes available in the header reference lists."""
245
+
246
+ return self._root.memo_shapes
247
+
248
+ def memo_shape(self, memo_shape_id_ref: int | str | None) -> MemoShape | None:
249
+ """Return the memo shape definition referenced by *memo_shape_id_ref*."""
250
+
251
+ return self._root.memo_shape(memo_shape_id_ref)
252
+
253
+ @property
254
+ def bullets(self) -> dict[str, Bullet]:
255
+ """Return bullet definitions declared in header reference lists."""
256
+
257
+ return self._root.bullets
258
+
259
+ def bullet(self, bullet_id_ref: int | str | None) -> Bullet | None:
260
+ """Return the bullet definition referenced by *bullet_id_ref*."""
261
+
262
+ return self._root.bullet(bullet_id_ref)
263
+
264
+ @property
265
+ def paragraph_properties(self) -> dict[str, ParagraphProperty]:
266
+ """Return paragraph property definitions declared in headers."""
267
+
268
+ return self._root.paragraph_properties
269
+
270
+ def paragraph_property(
271
+ self, para_pr_id_ref: int | str | None
272
+ ) -> ParagraphProperty | None:
273
+ """Return the paragraph property referenced by *para_pr_id_ref*."""
274
+
275
+ return self._root.paragraph_property(para_pr_id_ref)
276
+
277
+ @property
278
+ def styles(self) -> dict[str, Style]:
279
+ """Return style definitions available in the document."""
280
+
281
+ return self._root.styles
282
+
283
+ def style(self, style_id_ref: int | str | None) -> Style | None:
284
+ """Return the style definition referenced by *style_id_ref*."""
285
+
286
+ return self._root.style(style_id_ref)
287
+
288
+ @property
289
+ def track_changes(self) -> dict[str, TrackChange]:
290
+ """Return tracked change metadata declared in the headers."""
291
+
292
+ return self._root.track_changes
293
+
294
+ def track_change(self, change_id_ref: int | str | None) -> TrackChange | None:
295
+ """Return tracked change metadata referenced by *change_id_ref*."""
296
+
297
+ return self._root.track_change(change_id_ref)
298
+
299
+ @property
300
+ def track_change_authors(self) -> dict[str, TrackChangeAuthor]:
301
+ """Return tracked change author metadata declared in the headers."""
302
+
303
+ return self._root.track_change_authors
304
+
305
+ def track_change_author(
306
+ self, author_id_ref: int | str | None
307
+ ) -> TrackChangeAuthor | None:
308
+ """Return tracked change author details referenced by *author_id_ref*."""
309
+
310
+ return self._root.track_change_author(author_id_ref)
311
+
312
+ @property
313
+ def memos(self) -> list[HwpxOxmlMemo]:
314
+ """Return all memo entries declared in every section."""
315
+
316
+ memos: list[HwpxOxmlMemo] = []
317
+ for section in self._root.sections:
318
+ memos.extend(section.memos)
319
+ return memos
320
+
321
+ def add_memo(
322
+ self,
323
+ text: str = "",
324
+ *,
325
+ section: HwpxOxmlSection | None = None,
326
+ section_index: int | None = None,
327
+ memo_shape_id_ref: str | int | None = None,
328
+ memo_id: str | None = None,
329
+ char_pr_id_ref: str | int | None = None,
330
+ attributes: dict[str, str] | None = None,
331
+ ) -> HwpxOxmlMemo:
332
+ """Create a memo entry inside *section* (or the last section by default)."""
333
+
334
+ if section is None and section_index is not None:
335
+ section = self._root.sections[section_index]
336
+ if section is None:
337
+ if not self._root.sections:
338
+ raise ValueError("document does not contain any sections")
339
+ section = self._root.sections[-1]
340
+ return section.add_memo(
341
+ text,
342
+ memo_shape_id_ref=memo_shape_id_ref,
343
+ memo_id=memo_id,
344
+ char_pr_id_ref=char_pr_id_ref,
345
+ attributes=attributes,
346
+ )
347
+
348
+ def remove_memo(self, memo: HwpxOxmlMemo) -> None:
349
+ """Remove *memo* from the section it belongs to."""
350
+
351
+ memo.remove()
352
+
353
+ def attach_memo_field(
354
+ self,
355
+ paragraph: HwpxOxmlParagraph,
356
+ memo: HwpxOxmlMemo,
357
+ *,
358
+ field_id: str | None = None,
359
+ author: str | None = None,
360
+ created: datetime | str | None = None,
361
+ number: int = 1,
362
+ char_pr_id_ref: str | int | None = None,
363
+ ) -> str:
364
+ """Attach a MEMO field control to *paragraph* so Hangul shows *memo*."""
365
+
366
+ if paragraph.section is None:
367
+ raise ValueError("paragraph must belong to a section before anchoring a memo")
368
+ if memo.group.section is None:
369
+ raise ValueError("memo is not attached to a section")
370
+
371
+ field_value = field_id or uuid.uuid4().hex
372
+ author_value = author or memo.attributes.get("author") or ""
373
+
374
+ created_value = created if created is not None else memo.attributes.get("createDateTime")
375
+ if isinstance(created_value, datetime):
376
+ created_value = created_value.strftime("%Y-%m-%d %H:%M:%S")
377
+ elif created_value is None:
378
+ created_value = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
379
+ else:
380
+ created_value = str(created_value)
381
+
382
+ memo_shape_id = memo.memo_shape_id_ref or ""
383
+
384
+ char_ref = char_pr_id_ref
385
+ if char_ref is None:
386
+ char_ref = paragraph.char_pr_id_ref
387
+ if char_ref is None:
388
+ char_ref = memo._infer_char_pr_id_ref()
389
+ if char_ref is None:
390
+ char_ref = "0"
391
+ char_ref = str(char_ref)
392
+
393
+ paragraph_element = paragraph.element
394
+ run_begin = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
395
+ ctrl_begin = _append_element(run_begin, f"{_HP}ctrl")
396
+ field_begin = _append_element(
397
+ ctrl_begin,
398
+ f"{_HP}fieldBegin",
399
+ {
400
+ "id": field_value,
401
+ "type": "MEMO",
402
+ "editable": "true",
403
+ "dirty": "false",
404
+ "fieldid": field_value,
405
+ },
406
+ )
407
+
408
+ parameters = _append_element(field_begin, f"{_HP}parameters", {"count": "5", "name": ""})
409
+ _append_element(parameters, f"{_HP}stringParam", {"name": "ID"}).text = memo.id or ""
410
+ _append_element(parameters, f"{_HP}integerParam", {"name": "Number"}).text = str(max(1, number))
411
+ _append_element(parameters, f"{_HP}stringParam", {"name": "CreateDateTime"}).text = created_value
412
+ _append_element(parameters, f"{_HP}stringParam", {"name": "Author"}).text = author_value
413
+ _append_element(parameters, f"{_HP}stringParam", {"name": "MemoShapeID"}).text = memo_shape_id
414
+
415
+ sub_list = _append_element(
416
+ field_begin,
417
+ f"{_HP}subList",
418
+ {
419
+ "id": f"memo-field-{memo.id or field_value}",
420
+ "textDirection": "HORIZONTAL",
421
+ "lineWrap": "BREAK",
422
+ "vertAlign": "TOP",
423
+ },
424
+ )
425
+ sub_para = _append_element(
426
+ sub_list,
427
+ f"{_HP}p",
428
+ {
429
+ "id": f"memo-field-{(memo.id or field_value)}-p",
430
+ "paraPrIDRef": "0",
431
+ "styleIDRef": "0",
432
+ "pageBreak": "0",
433
+ "columnBreak": "0",
434
+ "merged": "0",
435
+ },
436
+ )
437
+ sub_run = _append_element(sub_para, f"{_HP}run", {"charPrIDRef": char_ref})
438
+ _append_element(sub_run, f"{_HP}t").text = memo.id or field_value
439
+
440
+ run_end = paragraph_element.makeelement(f"{_HP}run", {"charPrIDRef": char_ref})
441
+ ctrl_end = _append_element(run_end, f"{_HP}ctrl")
442
+ _append_element(ctrl_end, f"{_HP}fieldEnd", {"beginIDRef": field_value, "fieldid": field_value})
443
+
444
+ paragraph.element.insert(0, run_begin)
445
+ paragraph.element.append(run_end)
446
+ paragraph.section.mark_dirty()
447
+
448
+ return field_value
449
+
450
+ def add_memo_with_anchor(
451
+ self,
452
+ text: str = "",
453
+ *,
454
+ paragraph: HwpxOxmlParagraph | None = None,
455
+ section: HwpxOxmlSection | None = None,
456
+ section_index: int | None = None,
457
+ paragraph_text: str | None = None,
458
+ memo_shape_id_ref: str | int | None = None,
459
+ memo_id: str | None = None,
460
+ char_pr_id_ref: str | int | None = None,
461
+ attributes: dict[str, str] | None = None,
462
+ field_id: str | None = None,
463
+ author: str | None = None,
464
+ created: datetime | str | None = None,
465
+ number: int = 1,
466
+ anchor_char_pr_id_ref: str | int | None = None,
467
+ ) -> tuple[HwpxOxmlMemo, HwpxOxmlParagraph, str]:
468
+ """Create a memo and ensure it is visible by anchoring a MEMO field."""
469
+
470
+ memo = self.add_memo(
471
+ text,
472
+ section=section,
473
+ section_index=section_index,
474
+ memo_shape_id_ref=memo_shape_id_ref,
475
+ memo_id=memo_id,
476
+ char_pr_id_ref=char_pr_id_ref,
477
+ attributes=attributes,
478
+ )
479
+
480
+ target_paragraph = paragraph
481
+ if target_paragraph is None:
482
+ memo_section = memo.group.section
483
+ if memo_section is None:
484
+ raise ValueError("memo must belong to a section")
485
+ paragraph_value = "" if paragraph_text is None else paragraph_text
486
+ anchor_char = anchor_char_pr_id_ref or char_pr_id_ref
487
+ target_paragraph = self.add_paragraph(
488
+ paragraph_value,
489
+ section=memo_section,
490
+ char_pr_id_ref=anchor_char,
491
+ )
492
+ elif paragraph_text is not None:
493
+ target_paragraph.text = paragraph_text
494
+
495
+ field_value = self.attach_memo_field(
496
+ target_paragraph,
497
+ memo,
498
+ field_id=field_id,
499
+ author=author,
500
+ created=created,
501
+ number=number,
502
+ char_pr_id_ref=anchor_char_pr_id_ref,
503
+ )
504
+
505
+ return memo, target_paragraph, field_value
506
+
507
+ def remove_paragraph(
508
+ self,
509
+ paragraph: HwpxOxmlParagraph | int,
510
+ *,
511
+ section: HwpxOxmlSection | None = None,
512
+ section_index: int | None = None,
513
+ ) -> None:
514
+ """Remove a paragraph from the document.
515
+
516
+ *paragraph* may be a :class:`HwpxOxmlParagraph` instance or an
517
+ integer index into the paragraphs of the specified (or last)
518
+ section.
519
+
520
+ Raises ``ValueError`` if the target section would become empty.
521
+ """
522
+ self._root.remove_paragraph(
523
+ paragraph,
524
+ section=section,
525
+ section_index=section_index,
526
+ )
527
+
528
+ def add_section(self, *, after: int | None = None) -> HwpxOxmlSection:
529
+ """Append a new empty section to the document.
530
+
531
+ If *after* is given, the section is inserted after the section at
532
+ that index. Returns the newly created section.
533
+ """
534
+ return self._root.add_section(after=after)
535
+
536
+ def remove_section(
537
+ self, section: HwpxOxmlSection | int,
538
+ ) -> None:
539
+ """Remove a section from the document.
540
+
541
+ Raises ``ValueError`` if the document would have no sections left.
542
+ """
543
+ self._root.remove_section(section)
544
+
545
+ @property
546
+ def paragraphs(self) -> list[HwpxOxmlParagraph]:
547
+ """Return all paragraphs across every section."""
548
+ return self._root.paragraphs
549
+
550
+ @property
551
+ def char_properties(self) -> dict[str, RunStyle]:
552
+ """Return the resolved character style definitions available to the document."""
553
+
554
+ return self._root.char_properties
555
+
556
+ def char_property(self, char_pr_id_ref: int | str | None) -> RunStyle | None:
557
+ """Return the style referenced by *char_pr_id_ref* if known."""
558
+
559
+ return self._root.char_property(char_pr_id_ref)
560
+
561
+ def ensure_run_style(
562
+ self,
563
+ *,
564
+ bold: bool = False,
565
+ italic: bool = False,
566
+ underline: bool = False,
567
+ base_char_pr_id: str | int | None = None,
568
+ ) -> str:
569
+ """Return a ``charPr`` identifier matching the requested flags."""
570
+
571
+ return self._root.ensure_run_style(
572
+ bold=bold,
573
+ italic=italic,
574
+ underline=underline,
575
+ base_char_pr_id=base_char_pr_id,
576
+ )
577
+
578
+ def iter_runs(self) -> Iterator[HwpxOxmlRun]:
579
+ """Yield every run element contained in the document."""
580
+
581
+ for paragraph in self.paragraphs:
582
+ for run in paragraph.runs:
583
+ yield run
584
+
585
+ def find_runs_by_style(
586
+ self,
587
+ *,
588
+ text_color: str | None = None,
589
+ underline_type: str | None = None,
590
+ underline_color: str | None = None,
591
+ char_pr_id_ref: str | int | None = None,
592
+ ) -> list[HwpxOxmlRun]:
593
+ """Return runs matching the requested style criteria."""
594
+
595
+ matches: list[HwpxOxmlRun] = []
596
+ target_char = str(char_pr_id_ref).strip() if char_pr_id_ref is not None else None
597
+
598
+ for run in self.iter_runs():
599
+ if target_char is not None:
600
+ run_char = (run.char_pr_id_ref or "").strip()
601
+ if run_char != target_char:
602
+ continue
603
+ style = run.style
604
+ if text_color is not None:
605
+ if style is None or style.text_color() != text_color:
606
+ continue
607
+ if underline_type is not None:
608
+ if style is None or style.underline_type() != underline_type:
609
+ continue
610
+ if underline_color is not None:
611
+ if style is None or style.underline_color() != underline_color:
612
+ continue
613
+ matches.append(run)
614
+ return matches
615
+
616
+ def replace_text_in_runs(
617
+ self,
618
+ search: str,
619
+ replacement: str,
620
+ *,
621
+ text_color: str | None = None,
622
+ underline_type: str | None = None,
623
+ underline_color: str | None = None,
624
+ char_pr_id_ref: str | int | None = None,
625
+ limit: int | None = None,
626
+ ) -> int:
627
+ """Replace occurrences of *search* in runs matching the provided style filters."""
628
+
629
+ if not search:
630
+ raise ValueError("search must be a non-empty string")
631
+
632
+ replacements = 0
633
+ runs = self.find_runs_by_style(
634
+ text_color=text_color,
635
+ underline_type=underline_type,
636
+ underline_color=underline_color,
637
+ char_pr_id_ref=char_pr_id_ref,
638
+ )
639
+
640
+ for run in runs:
641
+ remaining = None
642
+ if limit is not None:
643
+ remaining = limit - replacements
644
+ if remaining <= 0:
645
+ break
646
+ original_char_pr = run.char_pr_id_ref
647
+ replaced_here = run.replace_text(
648
+ search,
649
+ replacement,
650
+ count=remaining,
651
+ )
652
+ if replaced_here and original_char_pr is not None:
653
+ # Ensure the run retains its original formatting reference even
654
+ # if XML nodes were rewritten during substitution.
655
+ run.char_pr_id_ref = original_char_pr
656
+ replacements += replaced_here
657
+ if limit is not None and replacements >= limit:
658
+ break
659
+ return replacements
660
+
661
+ # ------------------------------------------------------------------
662
+ # editing helpers
663
+ def add_paragraph(
664
+ self,
665
+ text: str = "",
666
+ *,
667
+ section: HwpxOxmlSection | None = None,
668
+ section_index: int | None = None,
669
+ para_pr_id_ref: str | int | None = None,
670
+ style_id_ref: str | int | None = None,
671
+ char_pr_id_ref: str | int | None = None,
672
+ run_attributes: dict[str, str] | None = None,
673
+ include_run: bool = True,
674
+ inherit_style: bool = True,
675
+ **extra_attrs: str,
676
+ ) -> HwpxOxmlParagraph:
677
+ """Append a paragraph to the document and return it.
678
+
679
+ When *inherit_style* is ``True`` (the default) and no explicit
680
+ style references are given, the new paragraph inherits
681
+ ``paraPrIDRef``, ``styleIDRef`` and ``charPrIDRef`` from the
682
+ last paragraph in the target section so that consecutive
683
+ paragraphs share the same formatting.
684
+
685
+ Formatting references may be overridden via ``para_pr_id_ref``,
686
+ ``style_id_ref`` and ``char_pr_id_ref``. Any additional keyword
687
+ arguments are added as raw paragraph attributes.
688
+ """
689
+ return self._root.add_paragraph(
690
+ text,
691
+ section=section,
692
+ section_index=section_index,
693
+ para_pr_id_ref=para_pr_id_ref,
694
+ style_id_ref=style_id_ref,
695
+ char_pr_id_ref=char_pr_id_ref,
696
+ run_attributes=run_attributes,
697
+ include_run=include_run,
698
+ inherit_style=inherit_style,
699
+ **extra_attrs,
700
+ )
701
+
702
+ def add_table(
703
+ self,
704
+ rows: int,
705
+ cols: int,
706
+ *,
707
+ section: HwpxOxmlSection | None = None,
708
+ section_index: int | None = None,
709
+ width: int | None = None,
710
+ height: int | None = None,
711
+ border_fill_id_ref: str | int | None = None,
712
+ para_pr_id_ref: str | int | None = None,
713
+ style_id_ref: str | int | None = None,
714
+ char_pr_id_ref: str | int | None = None,
715
+ run_attributes: dict[str, str] | None = None,
716
+ **extra_attrs: str,
717
+ ) -> HwpxOxmlTable:
718
+ """Create a table in a new paragraph and return it."""
719
+
720
+ resolved_border_fill: str | int | None = border_fill_id_ref
721
+ if resolved_border_fill is None:
722
+ resolved_border_fill = self._root.ensure_basic_border_fill()
723
+
724
+ paragraph = self.add_paragraph(
725
+ "",
726
+ section=section,
727
+ section_index=section_index,
728
+ para_pr_id_ref=para_pr_id_ref,
729
+ style_id_ref=style_id_ref,
730
+ char_pr_id_ref=char_pr_id_ref,
731
+ include_run=False,
732
+ **extra_attrs,
733
+ )
734
+ return paragraph.add_table(
735
+ rows,
736
+ cols,
737
+ width=width,
738
+ height=height,
739
+ border_fill_id_ref=resolved_border_fill,
740
+ run_attributes=run_attributes,
741
+ char_pr_id_ref=char_pr_id_ref,
742
+ )
743
+
744
+ def add_shape(
745
+ self,
746
+ shape_type: str,
747
+ *,
748
+ section: HwpxOxmlSection | None = None,
749
+ section_index: int | None = None,
750
+ attributes: dict[str, str] | None = None,
751
+ para_pr_id_ref: str | int | None = None,
752
+ style_id_ref: str | int | None = None,
753
+ char_pr_id_ref: str | int | None = None,
754
+ run_attributes: dict[str, str] | None = None,
755
+ **extra_attrs: str,
756
+ ) -> HwpxOxmlInlineObject:
757
+ """Insert an inline shape into a new paragraph."""
758
+
759
+ paragraph = self.add_paragraph(
760
+ "",
761
+ section=section,
762
+ section_index=section_index,
763
+ para_pr_id_ref=para_pr_id_ref,
764
+ style_id_ref=style_id_ref,
765
+ char_pr_id_ref=char_pr_id_ref,
766
+ include_run=False,
767
+ **extra_attrs,
768
+ )
769
+ return paragraph.add_shape(
770
+ shape_type,
771
+ attributes=attributes,
772
+ run_attributes=run_attributes,
773
+ char_pr_id_ref=char_pr_id_ref,
774
+ )
775
+
776
+ def add_control(
777
+ self,
778
+ *,
779
+ section: HwpxOxmlSection | None = None,
780
+ section_index: int | None = None,
781
+ attributes: dict[str, str] | None = None,
782
+ control_type: str | None = None,
783
+ para_pr_id_ref: str | int | None = None,
784
+ style_id_ref: str | int | None = None,
785
+ char_pr_id_ref: str | int | None = None,
786
+ run_attributes: dict[str, str] | None = None,
787
+ **extra_attrs: str,
788
+ ) -> HwpxOxmlInlineObject:
789
+ """Insert a control inline object into a new paragraph."""
790
+
791
+ paragraph = self.add_paragraph(
792
+ "",
793
+ section=section,
794
+ section_index=section_index,
795
+ para_pr_id_ref=para_pr_id_ref,
796
+ style_id_ref=style_id_ref,
797
+ char_pr_id_ref=char_pr_id_ref,
798
+ include_run=False,
799
+ **extra_attrs,
800
+ )
801
+ return paragraph.add_control(
802
+ attributes=attributes,
803
+ control_type=control_type,
804
+ run_attributes=run_attributes,
805
+ char_pr_id_ref=char_pr_id_ref,
806
+ )
807
+
808
+ # ------------------------------------------------------------------
809
+ # Footnote / Endnote helpers
810
+ # ------------------------------------------------------------------
811
+
812
+ def add_footnote(
813
+ self,
814
+ text: str,
815
+ paragraph: HwpxOxmlParagraph | None = None,
816
+ *,
817
+ section: HwpxOxmlSection | None = None,
818
+ section_index: int | None = None,
819
+ char_pr_id_ref: str | int | None = None,
820
+ ) -> HwpxOxmlNote:
821
+ """Add a footnote to an existing paragraph, or create a new one.
822
+
823
+ When *paragraph* is ``None`` a new paragraph is appended to the given
824
+ (or last) section.
825
+ """
826
+
827
+ if paragraph is None:
828
+ paragraph = self.add_paragraph(
829
+ "",
830
+ section=section,
831
+ section_index=section_index,
832
+ include_run=False,
833
+ )
834
+ return paragraph.add_footnote(text, char_pr_id_ref=char_pr_id_ref)
835
+
836
+ def add_endnote(
837
+ self,
838
+ text: str,
839
+ paragraph: HwpxOxmlParagraph | None = None,
840
+ *,
841
+ section: HwpxOxmlSection | None = None,
842
+ section_index: int | None = None,
843
+ char_pr_id_ref: str | int | None = None,
844
+ ) -> HwpxOxmlNote:
845
+ """Add an endnote to an existing paragraph, or create a new one."""
846
+
847
+ if paragraph is None:
848
+ paragraph = self.add_paragraph(
849
+ "",
850
+ section=section,
851
+ section_index=section_index,
852
+ include_run=False,
853
+ )
854
+ return paragraph.add_endnote(text, char_pr_id_ref=char_pr_id_ref)
855
+
856
+ # ------------------------------------------------------------------
857
+ # Drawing shapes
858
+ # ------------------------------------------------------------------
859
+
860
+ def add_line(
861
+ self,
862
+ start_x: int = 0,
863
+ start_y: int = 0,
864
+ end_x: int = 14400,
865
+ end_y: int = 0,
866
+ *,
867
+ line_color: str = "#000000",
868
+ line_width: str = "283",
869
+ treat_as_char: bool = True,
870
+ paragraph: HwpxOxmlParagraph | None = None,
871
+ section: HwpxOxmlSection | None = None,
872
+ section_index: int | None = None,
873
+ ) -> HwpxOxmlShape:
874
+ """Insert a line drawing shape.
875
+
876
+ Coordinates are in HWPUNIT (7200 per inch).
877
+ """
878
+ if paragraph is None:
879
+ paragraph = self.add_paragraph(
880
+ "", section=section, section_index=section_index,
881
+ include_run=False,
882
+ )
883
+ return paragraph.add_line(
884
+ start_x, start_y, end_x, end_y,
885
+ line_color=line_color, line_width=line_width,
886
+ treat_as_char=treat_as_char,
887
+ )
888
+
889
+ def add_rectangle(
890
+ self,
891
+ width: int = 14400,
892
+ height: int = 7200,
893
+ *,
894
+ ratio: int = 0,
895
+ line_color: str = "#000000",
896
+ line_width: str = "283",
897
+ fill_color: str | None = None,
898
+ treat_as_char: bool = True,
899
+ paragraph: HwpxOxmlParagraph | None = None,
900
+ section: HwpxOxmlSection | None = None,
901
+ section_index: int | None = None,
902
+ ) -> HwpxOxmlShape:
903
+ """Insert a rectangle drawing shape.
904
+
905
+ Dimensions are in HWPUNIT. *ratio* controls corner roundness
906
+ (0 = sharp, 50 = semicircle).
907
+ """
908
+ if paragraph is None:
909
+ paragraph = self.add_paragraph(
910
+ "", section=section, section_index=section_index,
911
+ include_run=False,
912
+ )
913
+ return paragraph.add_rectangle(
914
+ width, height, ratio=ratio,
915
+ line_color=line_color, line_width=line_width,
916
+ fill_color=fill_color, treat_as_char=treat_as_char,
917
+ )
918
+
919
+ def add_ellipse(
920
+ self,
921
+ width: int = 14400,
922
+ height: int = 7200,
923
+ *,
924
+ line_color: str = "#000000",
925
+ line_width: str = "283",
926
+ fill_color: str | None = None,
927
+ treat_as_char: bool = True,
928
+ paragraph: HwpxOxmlParagraph | None = None,
929
+ section: HwpxOxmlSection | None = None,
930
+ section_index: int | None = None,
931
+ ) -> HwpxOxmlShape:
932
+ """Insert an ellipse drawing shape.
933
+
934
+ Dimensions are in HWPUNIT.
935
+ """
936
+ if paragraph is None:
937
+ paragraph = self.add_paragraph(
938
+ "", section=section, section_index=section_index,
939
+ include_run=False,
940
+ )
941
+ return paragraph.add_ellipse(
942
+ width, height,
943
+ line_color=line_color, line_width=line_width,
944
+ fill_color=fill_color, treat_as_char=treat_as_char,
945
+ )
946
+
947
+ # ------------------------------------------------------------------
948
+ # Column layout
949
+ # ------------------------------------------------------------------
950
+
951
+ def set_columns(
952
+ self,
953
+ col_count: int = 2,
954
+ *,
955
+ col_type: str = "NEWSPAPER",
956
+ layout: str = "LEFT",
957
+ same_size: bool = True,
958
+ same_gap: int = 1200,
959
+ column_widths: "Sequence[tuple[int, int]] | None" = None,
960
+ separator_type: str | None = None,
961
+ separator_width: str | None = None,
962
+ separator_color: str | None = None,
963
+ paragraph: HwpxOxmlParagraph | None = None,
964
+ section: HwpxOxmlSection | None = None,
965
+ section_index: int | None = None,
966
+ ) -> HwpxOxmlInlineObject:
967
+ """Insert a column definition control.
968
+
969
+ This adds a ``<hp:ctrl><hp:colPr>`` element to the specified paragraph.
970
+ Text that follows will be laid out in the specified number of columns.
971
+
972
+ Args:
973
+ col_count: Number of columns (1–255).
974
+ col_type: ``NEWSPAPER``, ``BALANCED_NEWSPAPER``, or ``PARALLEL``.
975
+ same_gap: Gap in HWPUNIT (7200 = 1 inch).
976
+ separator_type: Optional column separator line type (e.g. ``SOLID``).
977
+ """
978
+ if paragraph is None:
979
+ paragraph = self.add_paragraph(
980
+ "", section=section, section_index=section_index,
981
+ include_run=False,
982
+ )
983
+ return paragraph.add_column_definition(
984
+ col_count,
985
+ col_type=col_type,
986
+ layout=layout,
987
+ same_size=same_size,
988
+ same_gap=same_gap,
989
+ column_widths=column_widths,
990
+ separator_type=separator_type,
991
+ separator_width=separator_width,
992
+ separator_color=separator_color,
993
+ )
994
+
995
+ # ------------------------------------------------------------------
996
+ # Bookmarks and hyperlinks
997
+ # ------------------------------------------------------------------
998
+
999
+ def add_bookmark(
1000
+ self,
1001
+ name: str,
1002
+ *,
1003
+ paragraph: HwpxOxmlParagraph | None = None,
1004
+ section: HwpxOxmlSection | None = None,
1005
+ section_index: int | None = None,
1006
+ ) -> HwpxOxmlInlineObject:
1007
+ """Insert a bookmark marker in the document.
1008
+
1009
+ Returns the ``<hp:ctrl>`` wrapper element.
1010
+ """
1011
+ if paragraph is None:
1012
+ paragraph = self.add_paragraph(
1013
+ "", section=section, section_index=section_index,
1014
+ include_run=False,
1015
+ )
1016
+ return paragraph.add_bookmark(name)
1017
+
1018
+ def add_hyperlink(
1019
+ self,
1020
+ url: str,
1021
+ display_text: str,
1022
+ *,
1023
+ paragraph: HwpxOxmlParagraph | None = None,
1024
+ section: HwpxOxmlSection | None = None,
1025
+ section_index: int | None = None,
1026
+ ) -> HwpxOxmlInlineObject:
1027
+ """Insert a hyperlink (fieldBegin + text + fieldEnd).
1028
+
1029
+ Returns the ``<hp:ctrl>`` wrapper containing the ``<hp:fieldBegin>``.
1030
+ """
1031
+ if paragraph is None:
1032
+ paragraph = self.add_paragraph(
1033
+ "", section=section, section_index=section_index,
1034
+ include_run=False,
1035
+ )
1036
+ return paragraph.add_hyperlink(url, display_text)
1037
+
1038
+ def set_header_text(
1039
+ self,
1040
+ text: str,
1041
+ *,
1042
+ section: HwpxOxmlSection | None = None,
1043
+ section_index: int | None = None,
1044
+ page_type: str = "BOTH",
1045
+ ) -> HwpxOxmlSectionHeaderFooter:
1046
+ """Ensure the requested section contains a header for *page_type* and set its text."""
1047
+
1048
+ target_section = section
1049
+ if target_section is None and section_index is not None:
1050
+ target_section = self._root.sections[section_index]
1051
+ if target_section is None:
1052
+ if not self._root.sections:
1053
+ raise ValueError("document does not contain any sections")
1054
+ target_section = self._root.sections[-1]
1055
+ return target_section.properties.set_header_text(text, page_type=page_type)
1056
+
1057
+ def set_footer_text(
1058
+ self,
1059
+ text: str,
1060
+ *,
1061
+ section: HwpxOxmlSection | None = None,
1062
+ section_index: int | None = None,
1063
+ page_type: str = "BOTH",
1064
+ ) -> HwpxOxmlSectionHeaderFooter:
1065
+ """Ensure the requested section contains a footer for *page_type* and set its text."""
1066
+
1067
+ target_section = section
1068
+ if target_section is None and section_index is not None:
1069
+ target_section = self._root.sections[section_index]
1070
+ if target_section is None:
1071
+ if not self._root.sections:
1072
+ raise ValueError("document does not contain any sections")
1073
+ target_section = self._root.sections[-1]
1074
+ return target_section.properties.set_footer_text(text, page_type=page_type)
1075
+
1076
+ def remove_header(
1077
+ self,
1078
+ *,
1079
+ section: HwpxOxmlSection | None = None,
1080
+ section_index: int | None = None,
1081
+ page_type: str = "BOTH",
1082
+ ) -> None:
1083
+ """Remove the header linked to *page_type* from the requested section if present."""
1084
+
1085
+ target_section = section
1086
+ if target_section is None and section_index is not None:
1087
+ target_section = self._root.sections[section_index]
1088
+ if target_section is None:
1089
+ if not self._root.sections:
1090
+ return
1091
+ target_section = self._root.sections[-1]
1092
+ target_section.properties.remove_header(page_type=page_type)
1093
+
1094
+ def remove_footer(
1095
+ self,
1096
+ *,
1097
+ section: HwpxOxmlSection | None = None,
1098
+ section_index: int | None = None,
1099
+ page_type: str = "BOTH",
1100
+ ) -> None:
1101
+ """Remove the footer linked to *page_type* from the requested section if present."""
1102
+
1103
+ target_section = section
1104
+ if target_section is None and section_index is not None:
1105
+ target_section = self._root.sections[section_index]
1106
+ if target_section is None:
1107
+ if not self._root.sections:
1108
+ return
1109
+ target_section = self._root.sections[-1]
1110
+ target_section.properties.remove_footer(page_type=page_type)
1111
+
1112
+ # ------------------------------------------------------------------
1113
+ # BinData / Image management
1114
+ # ------------------------------------------------------------------
1115
+
1116
+ _FORMAT_TO_MEDIA_TYPE: dict[str, str] = {
1117
+ "jpg": "image/jpeg",
1118
+ "jpeg": "image/jpeg",
1119
+ "png": "image/png",
1120
+ "gif": "image/gif",
1121
+ "bmp": "image/bmp",
1122
+ "tiff": "image/tiff",
1123
+ "tif": "image/tiff",
1124
+ "svg": "image/svg+xml",
1125
+ }
1126
+
1127
+ def add_image(
1128
+ self,
1129
+ image_data: bytes,
1130
+ image_format: str,
1131
+ *,
1132
+ item_id: str | None = None,
1133
+ ) -> str:
1134
+ """Embed an image file and return the manifest item id.
1135
+
1136
+ Args:
1137
+ image_data: Raw image bytes.
1138
+ image_format: Image format extension (``jpg``, ``png``, …).
1139
+ item_id: Optional explicit manifest item id. When omitted an
1140
+ auto-generated ``BIN####`` id is used.
1141
+
1142
+ Returns:
1143
+ The manifest item id that can be passed to
1144
+ ``binaryItemIDRef`` when constructing a ``<hp:pic>`` element.
1145
+ """
1146
+
1147
+ fmt = image_format.lower().lstrip(".")
1148
+ media_type = self._FORMAT_TO_MEDIA_TYPE.get(fmt, f"image/{fmt}")
1149
+
1150
+ # Determine a unique item id
1151
+ if item_id is None:
1152
+ existing_ids: set[str] = set()
1153
+ header = self._root.headers[0] if self._root.headers else None
1154
+ if header is not None:
1155
+ for bi in header.list_bin_items():
1156
+ existing_ids.add(bi.get("id", ""))
1157
+ n = len(existing_ids) + 1
1158
+ while True:
1159
+ item_id = f"BIN{n:04d}"
1160
+ if item_id not in existing_ids:
1161
+ break
1162
+ n += 1
1163
+
1164
+ # File path inside the ZIP
1165
+ bin_data_name = f"{item_id}.{fmt}"
1166
+ bin_data_path = f"BinData/{bin_data_name}"
1167
+
1168
+ # 1) Write image bytes into the package
1169
+ self._package.write(bin_data_path, image_data)
1170
+
1171
+ # 2) Register in manifest
1172
+ self._package.add_manifest_item(item_id, bin_data_path, media_type)
1173
+
1174
+ # 3) Register in header binDataList
1175
+ header = self._root.headers[0] if self._root.headers else None
1176
+ if header is not None:
1177
+ header.add_bin_item(
1178
+ item_type="Embedding",
1179
+ bin_data_id=bin_data_name,
1180
+ format=fmt,
1181
+ )
1182
+
1183
+ return item_id
1184
+
1185
+ def list_images(self) -> list[dict[str, str]]:
1186
+ """Return metadata dicts for all embedded binary data items.
1187
+
1188
+ Each dict contains the ``<hh:binItem>`` attributes (``id``, ``Type``,
1189
+ ``BinData``, ``Format``, …).
1190
+ """
1191
+
1192
+ header = self._root.headers[0] if self._root.headers else None
1193
+ if header is None:
1194
+ return []
1195
+ return header.list_bin_items()
1196
+
1197
+ def remove_image(self, item_id: str) -> bool:
1198
+ """Remove an embedded image by its manifest item id.
1199
+
1200
+ This removes the binary data from the ZIP, the manifest entry, and
1201
+ the header binItem entry.
1202
+
1203
+ Returns:
1204
+ ``True`` if any component was removed.
1205
+ """
1206
+
1207
+ removed = False
1208
+ header = self._root.headers[0] if self._root.headers else None
1209
+
1210
+ # Find file path and binItem numeric id from header metadata
1211
+ bin_data_path: str | None = None
1212
+ bin_item_numeric_id: str | None = None
1213
+ if header is not None:
1214
+ for bi in header.list_bin_items():
1215
+ bin_data_val = bi.get("BinData", "")
1216
+ # Match by data file name prefix (e.g. "BIN0001" matches "BIN0001.jpg")
1217
+ if bin_data_val.startswith(item_id):
1218
+ bin_item_numeric_id = bi.get("id")
1219
+ if bin_data_val:
1220
+ bin_data_path = f"BinData/{bin_data_val}"
1221
+ break
1222
+
1223
+ # Also try manifest-based lookup for the file path
1224
+ if bin_data_path is None:
1225
+ manifest_el = self._package._manifest_element()
1226
+ if manifest_el is not None:
1227
+ ns = {"opf": "http://www.idpf.org/2007/opf/"}
1228
+ for it in manifest_el.findall("opf:item", ns):
1229
+ if it.get("id") == item_id:
1230
+ href = it.get("href", "")
1231
+ if href:
1232
+ bin_data_path = href
1233
+ break
1234
+
1235
+ # Remove from header binDataList (use the numeric id)
1236
+ if header is not None and bin_item_numeric_id is not None:
1237
+ if header.remove_bin_item(bin_item_numeric_id):
1238
+ removed = True
1239
+
1240
+ # Remove from manifest
1241
+ if self._package.remove_manifest_item(item_id):
1242
+ removed = True
1243
+
1244
+ # Remove from ZIP
1245
+ if bin_data_path and self._package.has_part(bin_data_path):
1246
+ self._package.delete(bin_data_path)
1247
+ removed = True
1248
+
1249
+ return removed
1250
+
1251
+ # ------------------------------------------------------------------
1252
+ # Export helpers
1253
+ # ------------------------------------------------------------------
1254
+
1255
+ def export_text(self, **kwargs: object) -> str:
1256
+ """Export content as plain text. Keyword args forwarded to :func:`~hwpx.tools.exporter.export_text`."""
1257
+ from .tools.exporter import export_text
1258
+ return export_text(self, **kwargs) # type: ignore[arg-type]
1259
+
1260
+ def export_html(self, **kwargs: object) -> str:
1261
+ """Export content as HTML. Keyword args forwarded to :func:`~hwpx.tools.exporter.export_html`."""
1262
+ from .tools.exporter import export_html
1263
+ return export_html(self, **kwargs) # type: ignore[arg-type]
1264
+
1265
+ def export_markdown(self, **kwargs: object) -> str:
1266
+ """Export content as Markdown. Keyword args forwarded to :func:`~hwpx.tools.exporter.export_markdown`."""
1267
+ from .tools.exporter import export_markdown
1268
+ return export_markdown(self, **kwargs) # type: ignore[arg-type]
1269
+
1270
+ # ------------------------------------------------------------------
1271
+ # Validation
1272
+ # ------------------------------------------------------------------
1273
+
1274
+ def validate(self) -> "ValidationReport":
1275
+ """Run XML schema validation on the current document state.
1276
+
1277
+ Returns a :class:`~hwpx.tools.validator.ValidationReport` with
1278
+ any issues found. This does **not** require ``validate_on_save``
1279
+ to be enabled.
1280
+ """
1281
+ from .tools.validator import validate_document
1282
+
1283
+ return validate_document(self._to_bytes_raw())
1284
+
1285
+ def _run_pre_save_validation(self) -> None:
1286
+ """Raise if validate_on_save is enabled and the document is invalid."""
1287
+ if not self.validate_on_save:
1288
+ return
1289
+ report = self.validate()
1290
+ if not report.ok:
1291
+ msgs = "; ".join(str(i) for i in report.issues[:5])
1292
+ remaining = len(report.issues) - 5
1293
+ if remaining > 0:
1294
+ msgs += f" … and {remaining} more"
1295
+ raise ValueError(f"Document validation failed: {msgs}")
1296
+
1297
+ def save_to_path(self, path: str | PathLike[str]) -> str | PathLike[str]:
1298
+ """Persist pending changes to *path* and return the same path."""
1299
+
1300
+ self._run_pre_save_validation()
1301
+ updates = self._root.serialize()
1302
+ result = self._package.save(path, updates)
1303
+ self._root.reset_dirty()
1304
+ return path if result is None else result
1305
+
1306
+ def save_to_stream(self, stream: BinaryIO) -> BinaryIO:
1307
+ """Persist pending changes to *stream* and return the same stream."""
1308
+
1309
+ self._run_pre_save_validation()
1310
+ updates = self._root.serialize()
1311
+ result = self._package.save(stream, updates)
1312
+ self._root.reset_dirty()
1313
+ return stream if result is None else result
1314
+
1315
+ def to_bytes(self) -> bytes:
1316
+ """Serialize pending changes and return the HWPX archive as bytes."""
1317
+
1318
+ self._run_pre_save_validation()
1319
+ return self._to_bytes_raw()
1320
+
1321
+ def _to_bytes_raw(self) -> bytes:
1322
+ """Serialize without validation (used by :meth:`validate`)."""
1323
+ updates = self._root.serialize()
1324
+ result = self._package.save(None, updates)
1325
+ self._root.reset_dirty()
1326
+ if isinstance(result, bytes):
1327
+ return result
1328
+ raise TypeError("package.save(None) must return bytes")
1329
+
1330
+ @overload
1331
+ def save(self, path_or_stream: None = None) -> bytes: ...
1332
+
1333
+ @overload
1334
+ def save(self, path_or_stream: str | PathLike[str]) -> str | PathLike[str]: ...
1335
+
1336
+ @overload
1337
+ def save(self, path_or_stream: BinaryIO) -> BinaryIO: ...
1338
+
1339
+ def save(
1340
+ self,
1341
+ path_or_stream: str | PathLike[str] | BinaryIO | None = None,
1342
+ ) -> str | PathLike[str] | BinaryIO | bytes:
1343
+ """Deprecated compatibility wrapper around save_to_path/save_to_stream/to_bytes.
1344
+
1345
+ Deprecated:
1346
+ ``save()``는 하위 호환을 위해 유지되며 향후 제거될 수 있습니다.
1347
+ - 경로 저장: ``save_to_path(path)``
1348
+ - 스트림 저장: ``save_to_stream(stream)``
1349
+ - 바이트 반환: ``to_bytes()``
1350
+ """
1351
+
1352
+ warnings.warn(
1353
+ "HwpxDocument.save()는 deprecated 예정입니다. "
1354
+ "save_to_path()/save_to_stream()/to_bytes() 사용을 권장합니다.",
1355
+ DeprecationWarning,
1356
+ stacklevel=2,
1357
+ )
1358
+ if path_or_stream is None:
1359
+ return self.to_bytes()
1360
+ if isinstance(path_or_stream, (str, PathLike)):
1361
+ return self.save_to_path(path_or_stream)
1362
+ return self.save_to_stream(path_or_stream)