python-hwpx 1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hwpx/__init__.py +23 -0
- hwpx/document.py +518 -0
- hwpx/opc/package.py +274 -0
- hwpx/oxml/__init__.py +138 -0
- hwpx/oxml/body.py +151 -0
- hwpx/oxml/common.py +31 -0
- hwpx/oxml/document.py +1932 -0
- hwpx/oxml/header.py +543 -0
- hwpx/oxml/parser.py +62 -0
- hwpx/oxml/schema.py +41 -0
- hwpx/oxml/utils.py +82 -0
- hwpx/package.py +202 -0
- hwpx/tools/__init__.py +36 -0
- hwpx/tools/_schemas/header.xsd +14 -0
- hwpx/tools/_schemas/section.xsd +12 -0
- hwpx/tools/object_finder.py +347 -0
- hwpx/tools/text_extractor.py +726 -0
- hwpx/tools/validator.py +184 -0
- python_hwpx-1.0.dist-info/LICENSE +32 -0
- python_hwpx-1.0.dist-info/METADATA +199 -0
- python_hwpx-1.0.dist-info/RECORD +24 -0
- python_hwpx-1.0.dist-info/WHEEL +5 -0
- python_hwpx-1.0.dist-info/entry_points.txt +2 -0
- python_hwpx-1.0.dist-info/top_level.txt +1 -0
hwpx/oxml/document.py
ADDED
|
@@ -0,0 +1,1932 @@
|
|
|
1
|
+
"""Object model mapping for the XML parts of an HWPX document."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Dict, Iterable, Iterator, List, Optional, Sequence, Tuple
|
|
7
|
+
from uuid import uuid4
|
|
8
|
+
import xml.etree.ElementTree as ET
|
|
9
|
+
|
|
10
|
+
from .header import MemoProperties, MemoShape, memo_shape_from_attributes
|
|
11
|
+
from .utils import parse_int
|
|
12
|
+
|
|
13
|
+
_HP_NS = "http://www.hancom.co.kr/hwpml/2011/paragraph"
|
|
14
|
+
_HP = f"{{{_HP_NS}}}"
|
|
15
|
+
_HH_NS = "http://www.hancom.co.kr/hwpml/2011/head"
|
|
16
|
+
_HH = f"{{{_HH_NS}}}"
|
|
17
|
+
|
|
18
|
+
_DEFAULT_PARAGRAPH_ATTRS = {
|
|
19
|
+
"paraPrIDRef": "0",
|
|
20
|
+
"styleIDRef": "0",
|
|
21
|
+
"pageBreak": "0",
|
|
22
|
+
"columnBreak": "0",
|
|
23
|
+
"merged": "0",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
_DEFAULT_CELL_WIDTH = 7200
|
|
27
|
+
_DEFAULT_CELL_HEIGHT = 3600
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _serialize_xml(element: ET.Element) -> bytes:
|
|
31
|
+
"""Return a UTF-8 encoded XML document for *element*."""
|
|
32
|
+
return ET.tostring(element, encoding="utf-8", xml_declaration=True)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _paragraph_id() -> str:
|
|
36
|
+
"""Generate an identifier for a new paragraph element."""
|
|
37
|
+
return str(uuid4().int & 0xFFFFFFFF)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _object_id() -> str:
|
|
41
|
+
"""Generate an identifier suitable for table and shape objects."""
|
|
42
|
+
return str(uuid4().int & 0xFFFFFFFF)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _memo_id() -> str:
|
|
46
|
+
"""Generate a lightweight identifier for memo elements."""
|
|
47
|
+
return str(uuid4().int & 0xFFFFFFFF)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _create_paragraph_element(
|
|
51
|
+
text: str,
|
|
52
|
+
*,
|
|
53
|
+
char_pr_id_ref: str | int | None = None,
|
|
54
|
+
para_pr_id_ref: str | int | None = None,
|
|
55
|
+
style_id_ref: str | int | None = None,
|
|
56
|
+
paragraph_attributes: Optional[dict[str, str]] = None,
|
|
57
|
+
run_attributes: Optional[dict[str, str]] = None,
|
|
58
|
+
) -> ET.Element:
|
|
59
|
+
"""Return a paragraph element populated with a single run and text node."""
|
|
60
|
+
|
|
61
|
+
attrs = {"id": _paragraph_id(), **_DEFAULT_PARAGRAPH_ATTRS}
|
|
62
|
+
attrs.update(paragraph_attributes or {})
|
|
63
|
+
|
|
64
|
+
if para_pr_id_ref is not None:
|
|
65
|
+
attrs["paraPrIDRef"] = str(para_pr_id_ref)
|
|
66
|
+
if style_id_ref is not None:
|
|
67
|
+
attrs["styleIDRef"] = str(style_id_ref)
|
|
68
|
+
|
|
69
|
+
paragraph = ET.Element(f"{_HP}p", attrs)
|
|
70
|
+
|
|
71
|
+
run_attrs: dict[str, str] = dict(run_attributes or {})
|
|
72
|
+
if char_pr_id_ref is not None:
|
|
73
|
+
run_attrs.setdefault("charPrIDRef", str(char_pr_id_ref))
|
|
74
|
+
else:
|
|
75
|
+
run_attrs.setdefault("charPrIDRef", "0")
|
|
76
|
+
|
|
77
|
+
run = ET.SubElement(paragraph, f"{_HP}run", run_attrs)
|
|
78
|
+
text_element = ET.SubElement(run, f"{_HP}t")
|
|
79
|
+
text_element.text = text
|
|
80
|
+
return paragraph
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _element_local_name(node: ET.Element) -> str:
|
|
84
|
+
tag = node.tag
|
|
85
|
+
if "}" in tag:
|
|
86
|
+
return tag.split("}", 1)[1]
|
|
87
|
+
return tag
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _distribute_size(total: int, parts: int) -> List[int]:
|
|
91
|
+
"""Return *parts* integers that sum to *total* and are as even as possible."""
|
|
92
|
+
|
|
93
|
+
if parts <= 0:
|
|
94
|
+
return []
|
|
95
|
+
|
|
96
|
+
base = total // parts
|
|
97
|
+
remainder = total - (base * parts)
|
|
98
|
+
sizes: List[int] = []
|
|
99
|
+
for index in range(parts):
|
|
100
|
+
value = base
|
|
101
|
+
if remainder > 0:
|
|
102
|
+
value += 1
|
|
103
|
+
remainder -= 1
|
|
104
|
+
sizes.append(max(value, 0))
|
|
105
|
+
return sizes
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _default_cell_attributes(border_fill_id_ref: str) -> dict[str, str]:
|
|
109
|
+
return {
|
|
110
|
+
"name": "",
|
|
111
|
+
"header": "0",
|
|
112
|
+
"hasMargin": "0",
|
|
113
|
+
"protect": "0",
|
|
114
|
+
"editable": "0",
|
|
115
|
+
"dirty": "0",
|
|
116
|
+
"borderFillIDRef": border_fill_id_ref,
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _default_sublist_attributes() -> dict[str, str]:
|
|
121
|
+
return {
|
|
122
|
+
"id": "",
|
|
123
|
+
"textDirection": "HORIZONTAL",
|
|
124
|
+
"lineWrap": "BREAK",
|
|
125
|
+
"vertAlign": "CENTER",
|
|
126
|
+
"linkListIDRef": "0",
|
|
127
|
+
"linkListNextIDRef": "0",
|
|
128
|
+
"textWidth": "0",
|
|
129
|
+
"textHeight": "0",
|
|
130
|
+
"hasTextRef": "0",
|
|
131
|
+
"hasNumRef": "0",
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _default_cell_paragraph_attributes() -> dict[str, str]:
|
|
136
|
+
attrs = dict(_DEFAULT_PARAGRAPH_ATTRS)
|
|
137
|
+
attrs["id"] = _paragraph_id()
|
|
138
|
+
return attrs
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _default_cell_margin_attributes() -> dict[str, str]:
|
|
142
|
+
return {"left": "0", "right": "0", "top": "0", "bottom": "0"}
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _get_int_attr(element: ET.Element, name: str, default: int = 0) -> int:
|
|
146
|
+
"""Return *name* attribute of *element* as an integer."""
|
|
147
|
+
|
|
148
|
+
value = element.get(name)
|
|
149
|
+
if value is None:
|
|
150
|
+
return default
|
|
151
|
+
try:
|
|
152
|
+
return int(value)
|
|
153
|
+
except ValueError:
|
|
154
|
+
return default
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@dataclass(slots=True)
|
|
158
|
+
class PageSize:
|
|
159
|
+
"""Represents the size and orientation of a page."""
|
|
160
|
+
|
|
161
|
+
width: int
|
|
162
|
+
height: int
|
|
163
|
+
orientation: str
|
|
164
|
+
gutter_type: str
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@dataclass(slots=True)
|
|
168
|
+
class PageMargins:
|
|
169
|
+
"""Encapsulates page margin values in HWP units."""
|
|
170
|
+
|
|
171
|
+
left: int
|
|
172
|
+
right: int
|
|
173
|
+
top: int
|
|
174
|
+
bottom: int
|
|
175
|
+
header: int
|
|
176
|
+
footer: int
|
|
177
|
+
gutter: int
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@dataclass(slots=True)
|
|
181
|
+
class SectionStartNumbering:
|
|
182
|
+
"""Starting numbers for section-level counters."""
|
|
183
|
+
|
|
184
|
+
page_starts_on: str
|
|
185
|
+
page: int
|
|
186
|
+
picture: int
|
|
187
|
+
table: int
|
|
188
|
+
equation: int
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
@dataclass(slots=True)
|
|
192
|
+
class DocumentNumbering:
|
|
193
|
+
"""Document-wide numbering initial values defined in ``<hh:beginNum>``."""
|
|
194
|
+
|
|
195
|
+
page: int = 1
|
|
196
|
+
footnote: int = 1
|
|
197
|
+
endnote: int = 1
|
|
198
|
+
picture: int = 1
|
|
199
|
+
table: int = 1
|
|
200
|
+
equation: int = 1
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
@dataclass(slots=True)
|
|
204
|
+
class RunStyle:
|
|
205
|
+
"""Represents the resolved character style applied to a run."""
|
|
206
|
+
|
|
207
|
+
id: str
|
|
208
|
+
attributes: Dict[str, str]
|
|
209
|
+
child_attributes: Dict[str, Dict[str, str]]
|
|
210
|
+
|
|
211
|
+
def text_color(self) -> str | None:
|
|
212
|
+
return self.attributes.get("textColor")
|
|
213
|
+
|
|
214
|
+
def underline_type(self) -> str | None:
|
|
215
|
+
underline = self.child_attributes.get("underline")
|
|
216
|
+
if underline is None:
|
|
217
|
+
return None
|
|
218
|
+
return underline.get("type")
|
|
219
|
+
|
|
220
|
+
def underline_color(self) -> str | None:
|
|
221
|
+
underline = self.child_attributes.get("underline")
|
|
222
|
+
if underline is None:
|
|
223
|
+
return None
|
|
224
|
+
return underline.get("color")
|
|
225
|
+
|
|
226
|
+
def matches(
|
|
227
|
+
self,
|
|
228
|
+
*,
|
|
229
|
+
text_color: str | None = None,
|
|
230
|
+
underline_type: str | None = None,
|
|
231
|
+
underline_color: str | None = None,
|
|
232
|
+
) -> bool:
|
|
233
|
+
if text_color is not None and self.text_color() != text_color:
|
|
234
|
+
return False
|
|
235
|
+
if underline_type is not None and self.underline_type() != underline_type:
|
|
236
|
+
return False
|
|
237
|
+
if underline_color is not None and self.underline_color() != underline_color:
|
|
238
|
+
return False
|
|
239
|
+
return True
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _char_properties_from_header(element: ET.Element) -> Dict[str, RunStyle]:
|
|
243
|
+
mapping: Dict[str, RunStyle] = {}
|
|
244
|
+
ref_list = element.find(f"{_HH}refList")
|
|
245
|
+
if ref_list is None:
|
|
246
|
+
return mapping
|
|
247
|
+
char_props_element = ref_list.find(f"{_HH}charProperties")
|
|
248
|
+
if char_props_element is None:
|
|
249
|
+
return mapping
|
|
250
|
+
|
|
251
|
+
for child in char_props_element.findall(f"{_HH}charPr"):
|
|
252
|
+
char_id = child.get("id")
|
|
253
|
+
if not char_id:
|
|
254
|
+
continue
|
|
255
|
+
attributes = {key: value for key, value in child.attrib.items() if key != "id"}
|
|
256
|
+
child_attributes: Dict[str, Dict[str, str]] = {}
|
|
257
|
+
for grandchild in child:
|
|
258
|
+
if len(list(grandchild)) == 0 and (grandchild.text is None or not grandchild.text.strip()):
|
|
259
|
+
child_attributes[_element_local_name(grandchild)] = {
|
|
260
|
+
key: value for key, value in grandchild.attrib.items()
|
|
261
|
+
}
|
|
262
|
+
style = RunStyle(id=char_id, attributes=attributes, child_attributes=child_attributes)
|
|
263
|
+
if char_id not in mapping:
|
|
264
|
+
mapping[char_id] = style
|
|
265
|
+
try:
|
|
266
|
+
normalized = str(int(char_id))
|
|
267
|
+
except (TypeError, ValueError):
|
|
268
|
+
normalized = None
|
|
269
|
+
if normalized and normalized not in mapping:
|
|
270
|
+
mapping[normalized] = style
|
|
271
|
+
return mapping
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
class HwpxOxmlSectionHeaderFooter:
|
|
275
|
+
"""Wraps a ``<hp:header>`` or ``<hp:footer>`` element."""
|
|
276
|
+
|
|
277
|
+
def __init__(
|
|
278
|
+
self,
|
|
279
|
+
element: ET.Element,
|
|
280
|
+
properties: "HwpxOxmlSectionProperties",
|
|
281
|
+
):
|
|
282
|
+
self.element = element
|
|
283
|
+
self._properties = properties
|
|
284
|
+
|
|
285
|
+
@property
|
|
286
|
+
def id(self) -> str | None:
|
|
287
|
+
"""Return the identifier assigned to the header/footer element."""
|
|
288
|
+
|
|
289
|
+
return self.element.get("id")
|
|
290
|
+
|
|
291
|
+
@id.setter
|
|
292
|
+
def id(self, value: str | None) -> None:
|
|
293
|
+
if value is None:
|
|
294
|
+
if "id" in self.element.attrib:
|
|
295
|
+
del self.element.attrib["id"]
|
|
296
|
+
self._properties.section.mark_dirty()
|
|
297
|
+
return
|
|
298
|
+
|
|
299
|
+
new_value = str(value)
|
|
300
|
+
if self.element.get("id") != new_value:
|
|
301
|
+
self.element.set("id", new_value)
|
|
302
|
+
self._properties.section.mark_dirty()
|
|
303
|
+
|
|
304
|
+
@property
|
|
305
|
+
def apply_page_type(self) -> str:
|
|
306
|
+
"""Return the page type the header/footer applies to."""
|
|
307
|
+
|
|
308
|
+
return self.element.get("applyPageType", "BOTH")
|
|
309
|
+
|
|
310
|
+
@apply_page_type.setter
|
|
311
|
+
def apply_page_type(self, value: str) -> None:
|
|
312
|
+
if self.element.get("applyPageType") != value:
|
|
313
|
+
self.element.set("applyPageType", value)
|
|
314
|
+
self._properties.section.mark_dirty()
|
|
315
|
+
|
|
316
|
+
def _initial_sublist_attributes(self) -> dict[str, str]:
|
|
317
|
+
attrs = dict(_default_sublist_attributes())
|
|
318
|
+
attrs["vertAlign"] = "TOP" if self.element.tag.endswith("header") else "BOTTOM"
|
|
319
|
+
return attrs
|
|
320
|
+
|
|
321
|
+
def _ensure_text_element(self) -> ET.Element:
|
|
322
|
+
sublist = self.element.find(f"{_HP}subList")
|
|
323
|
+
if sublist is None:
|
|
324
|
+
sublist = ET.SubElement(self.element, f"{_HP}subList", self._initial_sublist_attributes())
|
|
325
|
+
paragraph = sublist.find(f"{_HP}p")
|
|
326
|
+
if paragraph is None:
|
|
327
|
+
paragraph_attrs = dict(_DEFAULT_PARAGRAPH_ATTRS)
|
|
328
|
+
paragraph_attrs["id"] = _paragraph_id()
|
|
329
|
+
paragraph = ET.SubElement(sublist, f"{_HP}p", paragraph_attrs)
|
|
330
|
+
run = paragraph.find(f"{_HP}run")
|
|
331
|
+
if run is None:
|
|
332
|
+
run = ET.SubElement(paragraph, f"{_HP}run", {"charPrIDRef": "0"})
|
|
333
|
+
text = run.find(f"{_HP}t")
|
|
334
|
+
if text is None:
|
|
335
|
+
text = ET.SubElement(run, f"{_HP}t")
|
|
336
|
+
return text
|
|
337
|
+
|
|
338
|
+
@property
|
|
339
|
+
def text(self) -> str:
|
|
340
|
+
"""Return the concatenated text content of the header/footer."""
|
|
341
|
+
|
|
342
|
+
parts: List[str] = []
|
|
343
|
+
for node in self.element.findall(f".//{_HP}t"):
|
|
344
|
+
if node.text:
|
|
345
|
+
parts.append(node.text)
|
|
346
|
+
return "".join(parts)
|
|
347
|
+
|
|
348
|
+
@text.setter
|
|
349
|
+
def text(self, value: str) -> None:
|
|
350
|
+
# Replace existing content with a simple paragraph.
|
|
351
|
+
for child in list(self.element):
|
|
352
|
+
if child.tag == f"{_HP}subList":
|
|
353
|
+
self.element.remove(child)
|
|
354
|
+
text_node = self._ensure_text_element()
|
|
355
|
+
text_node.text = value
|
|
356
|
+
self._properties.section.mark_dirty()
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
class HwpxOxmlSectionProperties:
|
|
360
|
+
"""Provides convenient access to ``<hp:secPr>`` configuration."""
|
|
361
|
+
|
|
362
|
+
def __init__(self, element: ET.Element, section: "HwpxOxmlSection"):
|
|
363
|
+
self.element = element
|
|
364
|
+
self.section = section
|
|
365
|
+
|
|
366
|
+
# -- page configuration -------------------------------------------------
|
|
367
|
+
def _page_pr_element(self, create: bool = False) -> ET.Element | None:
|
|
368
|
+
page_pr = self.element.find(f"{_HP}pagePr")
|
|
369
|
+
if page_pr is None and create:
|
|
370
|
+
page_pr = ET.SubElement(
|
|
371
|
+
self.element,
|
|
372
|
+
f"{_HP}pagePr",
|
|
373
|
+
{"landscape": "PORTRAIT", "width": "0", "height": "0", "gutterType": "LEFT_ONLY"},
|
|
374
|
+
)
|
|
375
|
+
self.section.mark_dirty()
|
|
376
|
+
return page_pr
|
|
377
|
+
|
|
378
|
+
def _margin_element(self, create: bool = False) -> ET.Element | None:
|
|
379
|
+
page_pr = self._page_pr_element(create=create)
|
|
380
|
+
if page_pr is None:
|
|
381
|
+
return None
|
|
382
|
+
margin = page_pr.find(f"{_HP}margin")
|
|
383
|
+
if margin is None and create:
|
|
384
|
+
margin = ET.SubElement(
|
|
385
|
+
page_pr,
|
|
386
|
+
f"{_HP}margin",
|
|
387
|
+
{
|
|
388
|
+
"left": "0",
|
|
389
|
+
"right": "0",
|
|
390
|
+
"top": "0",
|
|
391
|
+
"bottom": "0",
|
|
392
|
+
"header": "0",
|
|
393
|
+
"footer": "0",
|
|
394
|
+
"gutter": "0",
|
|
395
|
+
},
|
|
396
|
+
)
|
|
397
|
+
self.section.mark_dirty()
|
|
398
|
+
return margin
|
|
399
|
+
|
|
400
|
+
@property
|
|
401
|
+
def page_size(self) -> PageSize:
|
|
402
|
+
page_pr = self._page_pr_element()
|
|
403
|
+
if page_pr is None:
|
|
404
|
+
return PageSize(width=0, height=0, orientation="PORTRAIT", gutter_type="LEFT_ONLY")
|
|
405
|
+
return PageSize(
|
|
406
|
+
width=_get_int_attr(page_pr, "width", 0),
|
|
407
|
+
height=_get_int_attr(page_pr, "height", 0),
|
|
408
|
+
orientation=page_pr.get("landscape", "PORTRAIT"),
|
|
409
|
+
gutter_type=page_pr.get("gutterType", "LEFT_ONLY"),
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
def set_page_size(
|
|
413
|
+
self,
|
|
414
|
+
*,
|
|
415
|
+
width: int | None = None,
|
|
416
|
+
height: int | None = None,
|
|
417
|
+
orientation: str | None = None,
|
|
418
|
+
gutter_type: str | None = None,
|
|
419
|
+
) -> None:
|
|
420
|
+
page_pr = self._page_pr_element(create=True)
|
|
421
|
+
if page_pr is None:
|
|
422
|
+
return
|
|
423
|
+
|
|
424
|
+
changed = False
|
|
425
|
+
if width is not None:
|
|
426
|
+
value = str(max(width, 0))
|
|
427
|
+
if page_pr.get("width") != value:
|
|
428
|
+
page_pr.set("width", value)
|
|
429
|
+
changed = True
|
|
430
|
+
if height is not None:
|
|
431
|
+
value = str(max(height, 0))
|
|
432
|
+
if page_pr.get("height") != value:
|
|
433
|
+
page_pr.set("height", value)
|
|
434
|
+
changed = True
|
|
435
|
+
if orientation is not None and page_pr.get("landscape") != orientation:
|
|
436
|
+
page_pr.set("landscape", orientation)
|
|
437
|
+
changed = True
|
|
438
|
+
if gutter_type is not None and page_pr.get("gutterType") != gutter_type:
|
|
439
|
+
page_pr.set("gutterType", gutter_type)
|
|
440
|
+
changed = True
|
|
441
|
+
if changed:
|
|
442
|
+
self.section.mark_dirty()
|
|
443
|
+
|
|
444
|
+
@property
|
|
445
|
+
def page_margins(self) -> PageMargins:
|
|
446
|
+
margin = self._margin_element()
|
|
447
|
+
if margin is None:
|
|
448
|
+
return PageMargins(left=0, right=0, top=0, bottom=0, header=0, footer=0, gutter=0)
|
|
449
|
+
return PageMargins(
|
|
450
|
+
left=_get_int_attr(margin, "left", 0),
|
|
451
|
+
right=_get_int_attr(margin, "right", 0),
|
|
452
|
+
top=_get_int_attr(margin, "top", 0),
|
|
453
|
+
bottom=_get_int_attr(margin, "bottom", 0),
|
|
454
|
+
header=_get_int_attr(margin, "header", 0),
|
|
455
|
+
footer=_get_int_attr(margin, "footer", 0),
|
|
456
|
+
gutter=_get_int_attr(margin, "gutter", 0),
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
def set_page_margins(
|
|
460
|
+
self,
|
|
461
|
+
*,
|
|
462
|
+
left: int | None = None,
|
|
463
|
+
right: int | None = None,
|
|
464
|
+
top: int | None = None,
|
|
465
|
+
bottom: int | None = None,
|
|
466
|
+
header: int | None = None,
|
|
467
|
+
footer: int | None = None,
|
|
468
|
+
gutter: int | None = None,
|
|
469
|
+
) -> None:
|
|
470
|
+
margin = self._margin_element(create=True)
|
|
471
|
+
if margin is None:
|
|
472
|
+
return
|
|
473
|
+
|
|
474
|
+
changed = False
|
|
475
|
+
for name, value in (
|
|
476
|
+
("left", left),
|
|
477
|
+
("right", right),
|
|
478
|
+
("top", top),
|
|
479
|
+
("bottom", bottom),
|
|
480
|
+
("header", header),
|
|
481
|
+
("footer", footer),
|
|
482
|
+
("gutter", gutter),
|
|
483
|
+
):
|
|
484
|
+
if value is None:
|
|
485
|
+
continue
|
|
486
|
+
safe_value = str(max(value, 0))
|
|
487
|
+
if margin.get(name) != safe_value:
|
|
488
|
+
margin.set(name, safe_value)
|
|
489
|
+
changed = True
|
|
490
|
+
if changed:
|
|
491
|
+
self.section.mark_dirty()
|
|
492
|
+
|
|
493
|
+
# -- numbering ----------------------------------------------------------
|
|
494
|
+
@property
|
|
495
|
+
def start_numbering(self) -> SectionStartNumbering:
|
|
496
|
+
start_num = self.element.find(f"{_HP}startNum")
|
|
497
|
+
if start_num is None:
|
|
498
|
+
return SectionStartNumbering(
|
|
499
|
+
page_starts_on="BOTH",
|
|
500
|
+
page=0,
|
|
501
|
+
picture=0,
|
|
502
|
+
table=0,
|
|
503
|
+
equation=0,
|
|
504
|
+
)
|
|
505
|
+
return SectionStartNumbering(
|
|
506
|
+
page_starts_on=start_num.get("pageStartsOn", "BOTH"),
|
|
507
|
+
page=_get_int_attr(start_num, "page", 0),
|
|
508
|
+
picture=_get_int_attr(start_num, "pic", 0),
|
|
509
|
+
table=_get_int_attr(start_num, "tbl", 0),
|
|
510
|
+
equation=_get_int_attr(start_num, "equation", 0),
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
def set_start_numbering(
|
|
514
|
+
self,
|
|
515
|
+
*,
|
|
516
|
+
page_starts_on: str | None = None,
|
|
517
|
+
page: int | None = None,
|
|
518
|
+
picture: int | None = None,
|
|
519
|
+
table: int | None = None,
|
|
520
|
+
equation: int | None = None,
|
|
521
|
+
) -> None:
|
|
522
|
+
start_num = self.element.find(f"{_HP}startNum")
|
|
523
|
+
if start_num is None:
|
|
524
|
+
start_num = ET.SubElement(
|
|
525
|
+
self.element,
|
|
526
|
+
f"{_HP}startNum",
|
|
527
|
+
{
|
|
528
|
+
"pageStartsOn": "BOTH",
|
|
529
|
+
"page": "0",
|
|
530
|
+
"pic": "0",
|
|
531
|
+
"tbl": "0",
|
|
532
|
+
"equation": "0",
|
|
533
|
+
},
|
|
534
|
+
)
|
|
535
|
+
self.section.mark_dirty()
|
|
536
|
+
|
|
537
|
+
changed = False
|
|
538
|
+
if page_starts_on is not None and start_num.get("pageStartsOn") != page_starts_on:
|
|
539
|
+
start_num.set("pageStartsOn", page_starts_on)
|
|
540
|
+
changed = True
|
|
541
|
+
|
|
542
|
+
for name, value in (
|
|
543
|
+
("page", page),
|
|
544
|
+
("pic", picture),
|
|
545
|
+
("tbl", table),
|
|
546
|
+
("equation", equation),
|
|
547
|
+
):
|
|
548
|
+
if value is None:
|
|
549
|
+
continue
|
|
550
|
+
safe_value = str(max(value, 0))
|
|
551
|
+
if start_num.get(name) != safe_value:
|
|
552
|
+
start_num.set(name, safe_value)
|
|
553
|
+
changed = True
|
|
554
|
+
|
|
555
|
+
if changed:
|
|
556
|
+
self.section.mark_dirty()
|
|
557
|
+
|
|
558
|
+
# -- header/footer helpers ---------------------------------------------
|
|
559
|
+
def _find_header_footer(self, tag: str, page_type: str) -> ET.Element | None:
|
|
560
|
+
for element in self.element.findall(f"{_HP}{tag}"):
|
|
561
|
+
if element.get("applyPageType", "BOTH") == page_type:
|
|
562
|
+
return element
|
|
563
|
+
return None
|
|
564
|
+
|
|
565
|
+
def _ensure_header_footer(self, tag: str, page_type: str) -> ET.Element:
|
|
566
|
+
element = self._find_header_footer(tag, page_type)
|
|
567
|
+
if element is None:
|
|
568
|
+
element = ET.SubElement(
|
|
569
|
+
self.element,
|
|
570
|
+
f"{_HP}{tag}",
|
|
571
|
+
{"id": _object_id(), "applyPageType": page_type},
|
|
572
|
+
)
|
|
573
|
+
self.section.mark_dirty()
|
|
574
|
+
return element
|
|
575
|
+
|
|
576
|
+
@property
|
|
577
|
+
def headers(self) -> List[HwpxOxmlSectionHeaderFooter]:
|
|
578
|
+
return [HwpxOxmlSectionHeaderFooter(el, self) for el in self.element.findall(f"{_HP}header")]
|
|
579
|
+
|
|
580
|
+
@property
|
|
581
|
+
def footers(self) -> List[HwpxOxmlSectionHeaderFooter]:
|
|
582
|
+
return [HwpxOxmlSectionHeaderFooter(el, self) for el in self.element.findall(f"{_HP}footer")]
|
|
583
|
+
|
|
584
|
+
def get_header(self, page_type: str = "BOTH") -> Optional[HwpxOxmlSectionHeaderFooter]:
|
|
585
|
+
element = self._find_header_footer("header", page_type)
|
|
586
|
+
if element is None:
|
|
587
|
+
return None
|
|
588
|
+
return HwpxOxmlSectionHeaderFooter(element, self)
|
|
589
|
+
|
|
590
|
+
def get_footer(self, page_type: str = "BOTH") -> Optional[HwpxOxmlSectionHeaderFooter]:
|
|
591
|
+
element = self._find_header_footer("footer", page_type)
|
|
592
|
+
if element is None:
|
|
593
|
+
return None
|
|
594
|
+
return HwpxOxmlSectionHeaderFooter(element, self)
|
|
595
|
+
|
|
596
|
+
def set_header_text(self, text: str, page_type: str = "BOTH") -> HwpxOxmlSectionHeaderFooter:
|
|
597
|
+
element = self._ensure_header_footer("header", page_type)
|
|
598
|
+
wrapper = HwpxOxmlSectionHeaderFooter(element, self)
|
|
599
|
+
wrapper.text = text
|
|
600
|
+
return wrapper
|
|
601
|
+
|
|
602
|
+
def set_footer_text(self, text: str, page_type: str = "BOTH") -> HwpxOxmlSectionHeaderFooter:
|
|
603
|
+
element = self._ensure_header_footer("footer", page_type)
|
|
604
|
+
wrapper = HwpxOxmlSectionHeaderFooter(element, self)
|
|
605
|
+
wrapper.text = text
|
|
606
|
+
return wrapper
|
|
607
|
+
|
|
608
|
+
def remove_header(self, page_type: str = "BOTH") -> None:
|
|
609
|
+
element = self._find_header_footer("header", page_type)
|
|
610
|
+
if element is not None:
|
|
611
|
+
self.element.remove(element)
|
|
612
|
+
self.section.mark_dirty()
|
|
613
|
+
|
|
614
|
+
def remove_footer(self, page_type: str = "BOTH") -> None:
|
|
615
|
+
element = self._find_header_footer("footer", page_type)
|
|
616
|
+
if element is not None:
|
|
617
|
+
self.element.remove(element)
|
|
618
|
+
self.section.mark_dirty()
|
|
619
|
+
|
|
620
|
+
|
|
621
|
+
class HwpxOxmlRun:
|
|
622
|
+
"""Lightweight wrapper around an ``<hp:run>`` element."""
|
|
623
|
+
|
|
624
|
+
def __init__(self, element: ET.Element, paragraph: "HwpxOxmlParagraph"):
|
|
625
|
+
self.element = element
|
|
626
|
+
self.paragraph = paragraph
|
|
627
|
+
|
|
628
|
+
@property
|
|
629
|
+
def char_pr_id_ref(self) -> str | None:
|
|
630
|
+
"""Return the character property reference applied to the run."""
|
|
631
|
+
return self.element.get("charPrIDRef")
|
|
632
|
+
|
|
633
|
+
@char_pr_id_ref.setter
|
|
634
|
+
def char_pr_id_ref(self, value: str | int | None) -> None:
|
|
635
|
+
if value is None:
|
|
636
|
+
if "charPrIDRef" in self.element.attrib:
|
|
637
|
+
del self.element.attrib["charPrIDRef"]
|
|
638
|
+
self.paragraph.section.mark_dirty()
|
|
639
|
+
return
|
|
640
|
+
|
|
641
|
+
new_value = str(value)
|
|
642
|
+
if self.element.get("charPrIDRef") != new_value:
|
|
643
|
+
self.element.set("charPrIDRef", new_value)
|
|
644
|
+
self.paragraph.section.mark_dirty()
|
|
645
|
+
|
|
646
|
+
def _plain_text_nodes(self) -> List[ET.Element]:
|
|
647
|
+
return [
|
|
648
|
+
node
|
|
649
|
+
for node in self.element.findall(f"{_HP}t")
|
|
650
|
+
if len(list(node)) == 0
|
|
651
|
+
]
|
|
652
|
+
|
|
653
|
+
def _ensure_plain_text_node(self) -> ET.Element:
|
|
654
|
+
nodes = self._plain_text_nodes()
|
|
655
|
+
if nodes:
|
|
656
|
+
return nodes[0]
|
|
657
|
+
return ET.SubElement(self.element, f"{_HP}t")
|
|
658
|
+
|
|
659
|
+
@property
|
|
660
|
+
def text(self) -> str:
|
|
661
|
+
parts: List[str] = []
|
|
662
|
+
for node in self.element.findall(f"{_HP}t"):
|
|
663
|
+
parts.append("".join(node.itertext()))
|
|
664
|
+
return "".join(parts)
|
|
665
|
+
|
|
666
|
+
@text.setter
|
|
667
|
+
def text(self, value: str) -> None:
|
|
668
|
+
primary = self._ensure_plain_text_node()
|
|
669
|
+
changed = (primary.text or "") != value
|
|
670
|
+
primary.text = value
|
|
671
|
+
for node in self._plain_text_nodes()[1:]:
|
|
672
|
+
if node.text:
|
|
673
|
+
node.text = ""
|
|
674
|
+
changed = True
|
|
675
|
+
if changed:
|
|
676
|
+
self.paragraph.section.mark_dirty()
|
|
677
|
+
|
|
678
|
+
@property
|
|
679
|
+
def style(self) -> RunStyle | None:
|
|
680
|
+
document = self.paragraph.section.document
|
|
681
|
+
if document is None:
|
|
682
|
+
return None
|
|
683
|
+
char_pr_id = self.char_pr_id_ref
|
|
684
|
+
if char_pr_id is None:
|
|
685
|
+
return None
|
|
686
|
+
return document.char_property(char_pr_id)
|
|
687
|
+
|
|
688
|
+
def replace_text(
|
|
689
|
+
self,
|
|
690
|
+
search: str,
|
|
691
|
+
replacement: str,
|
|
692
|
+
*,
|
|
693
|
+
count: int | None = None,
|
|
694
|
+
) -> int:
|
|
695
|
+
"""Replace ``search`` with ``replacement`` in-place.
|
|
696
|
+
|
|
697
|
+
Returns the number of replacements that were performed.
|
|
698
|
+
"""
|
|
699
|
+
|
|
700
|
+
if not search:
|
|
701
|
+
raise ValueError("search text must be a non-empty string")
|
|
702
|
+
|
|
703
|
+
total = 0
|
|
704
|
+
remaining = count
|
|
705
|
+
for node in self._plain_text_nodes():
|
|
706
|
+
current = node.text or ""
|
|
707
|
+
if not current:
|
|
708
|
+
continue
|
|
709
|
+
max_replace: int | None
|
|
710
|
+
if remaining is None:
|
|
711
|
+
max_replace = None
|
|
712
|
+
else:
|
|
713
|
+
if remaining <= 0:
|
|
714
|
+
break
|
|
715
|
+
max_replace = remaining
|
|
716
|
+
if max_replace is None:
|
|
717
|
+
new_text = current.replace(search, replacement)
|
|
718
|
+
replaced_here = current.count(search)
|
|
719
|
+
else:
|
|
720
|
+
new_text = current.replace(search, replacement, max_replace)
|
|
721
|
+
replaced_here = min(max_replace, current.count(search))
|
|
722
|
+
remaining -= replaced_here
|
|
723
|
+
if replaced_here:
|
|
724
|
+
node.text = new_text
|
|
725
|
+
total += replaced_here
|
|
726
|
+
if remaining is not None and remaining <= 0:
|
|
727
|
+
break
|
|
728
|
+
if total:
|
|
729
|
+
self.paragraph.section.mark_dirty()
|
|
730
|
+
return total
|
|
731
|
+
|
|
732
|
+
def remove(self) -> None:
|
|
733
|
+
parent = self.paragraph.element
|
|
734
|
+
try:
|
|
735
|
+
parent.remove(self.element)
|
|
736
|
+
except ValueError: # pragma: no cover - defensive branch
|
|
737
|
+
return
|
|
738
|
+
self.paragraph.section.mark_dirty()
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
class HwpxOxmlMemoGroup:
|
|
742
|
+
"""Wrapper providing access to ``<hp:memogroup>`` containers."""
|
|
743
|
+
|
|
744
|
+
def __init__(self, element: ET.Element, section: "HwpxOxmlSection"):
|
|
745
|
+
self.element = element
|
|
746
|
+
self.section = section
|
|
747
|
+
|
|
748
|
+
@property
|
|
749
|
+
def memos(self) -> List["HwpxOxmlMemo"]:
|
|
750
|
+
return [
|
|
751
|
+
HwpxOxmlMemo(child, self)
|
|
752
|
+
for child in self.element.findall(f"{_HP}memo")
|
|
753
|
+
]
|
|
754
|
+
|
|
755
|
+
def add_memo(
|
|
756
|
+
self,
|
|
757
|
+
text: str = "",
|
|
758
|
+
*,
|
|
759
|
+
memo_shape_id_ref: str | int | None = None,
|
|
760
|
+
memo_id: str | None = None,
|
|
761
|
+
char_pr_id_ref: str | int | None = None,
|
|
762
|
+
attributes: Optional[dict[str, str]] = None,
|
|
763
|
+
) -> "HwpxOxmlMemo":
|
|
764
|
+
memo_attrs = dict(attributes or {})
|
|
765
|
+
memo_attrs.setdefault("id", memo_id or _memo_id())
|
|
766
|
+
if memo_shape_id_ref is not None:
|
|
767
|
+
memo_attrs.setdefault("memoShapeIDRef", str(memo_shape_id_ref))
|
|
768
|
+
memo_element = ET.SubElement(self.element, f"{_HP}memo", memo_attrs)
|
|
769
|
+
memo = HwpxOxmlMemo(memo_element, self)
|
|
770
|
+
memo.set_text(text, char_pr_id_ref=char_pr_id_ref)
|
|
771
|
+
self.section.mark_dirty()
|
|
772
|
+
return memo
|
|
773
|
+
|
|
774
|
+
def _cleanup(self) -> None:
|
|
775
|
+
if list(self.element):
|
|
776
|
+
return
|
|
777
|
+
try:
|
|
778
|
+
self.section.element.remove(self.element)
|
|
779
|
+
except ValueError: # pragma: no cover - defensive branch
|
|
780
|
+
return
|
|
781
|
+
self.section.mark_dirty()
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
class HwpxOxmlMemo:
|
|
785
|
+
"""Represents a memo entry contained within a memo group."""
|
|
786
|
+
|
|
787
|
+
def __init__(self, element: ET.Element, group: HwpxOxmlMemoGroup):
|
|
788
|
+
self.element = element
|
|
789
|
+
self.group = group
|
|
790
|
+
|
|
791
|
+
@property
|
|
792
|
+
def id(self) -> str | None:
|
|
793
|
+
return self.element.get("id")
|
|
794
|
+
|
|
795
|
+
@id.setter
|
|
796
|
+
def id(self, value: str | None) -> None:
|
|
797
|
+
if value is None:
|
|
798
|
+
if "id" in self.element.attrib:
|
|
799
|
+
del self.element.attrib["id"]
|
|
800
|
+
self.group.section.mark_dirty()
|
|
801
|
+
return
|
|
802
|
+
new_value = str(value)
|
|
803
|
+
if self.element.get("id") != new_value:
|
|
804
|
+
self.element.set("id", new_value)
|
|
805
|
+
self.group.section.mark_dirty()
|
|
806
|
+
|
|
807
|
+
@property
|
|
808
|
+
def memo_shape_id_ref(self) -> str | None:
|
|
809
|
+
return self.element.get("memoShapeIDRef")
|
|
810
|
+
|
|
811
|
+
@memo_shape_id_ref.setter
|
|
812
|
+
def memo_shape_id_ref(self, value: str | int | None) -> None:
|
|
813
|
+
if value is None:
|
|
814
|
+
if "memoShapeIDRef" in self.element.attrib:
|
|
815
|
+
del self.element.attrib["memoShapeIDRef"]
|
|
816
|
+
self.group.section.mark_dirty()
|
|
817
|
+
return
|
|
818
|
+
new_value = str(value)
|
|
819
|
+
if self.element.get("memoShapeIDRef") != new_value:
|
|
820
|
+
self.element.set("memoShapeIDRef", new_value)
|
|
821
|
+
self.group.section.mark_dirty()
|
|
822
|
+
|
|
823
|
+
@property
|
|
824
|
+
def attributes(self) -> dict[str, str]:
|
|
825
|
+
return dict(self.element.attrib)
|
|
826
|
+
|
|
827
|
+
def set_attribute(self, name: str, value: str | int | None) -> None:
|
|
828
|
+
if value is None:
|
|
829
|
+
if name in self.element.attrib:
|
|
830
|
+
del self.element.attrib[name]
|
|
831
|
+
self.group.section.mark_dirty()
|
|
832
|
+
return
|
|
833
|
+
new_value = str(value)
|
|
834
|
+
if self.element.get(name) != new_value:
|
|
835
|
+
self.element.set(name, new_value)
|
|
836
|
+
self.group.section.mark_dirty()
|
|
837
|
+
|
|
838
|
+
def _infer_char_pr_id_ref(self) -> str | None:
|
|
839
|
+
for paragraph in self.paragraphs:
|
|
840
|
+
for run in paragraph.runs:
|
|
841
|
+
if run.char_pr_id_ref:
|
|
842
|
+
return run.char_pr_id_ref
|
|
843
|
+
return None
|
|
844
|
+
|
|
845
|
+
@property
|
|
846
|
+
def paragraphs(self) -> List["HwpxOxmlParagraph"]:
|
|
847
|
+
paragraphs: List[HwpxOxmlParagraph] = []
|
|
848
|
+
for node in self.element.findall(f".//{_HP}p"):
|
|
849
|
+
paragraphs.append(HwpxOxmlParagraph(node, self.group.section))
|
|
850
|
+
return paragraphs
|
|
851
|
+
|
|
852
|
+
@property
|
|
853
|
+
def text(self) -> str:
|
|
854
|
+
parts: List[str] = []
|
|
855
|
+
for paragraph in self.paragraphs:
|
|
856
|
+
value = paragraph.text
|
|
857
|
+
if value:
|
|
858
|
+
parts.append(value)
|
|
859
|
+
return "\n".join(parts)
|
|
860
|
+
|
|
861
|
+
def set_text(
|
|
862
|
+
self,
|
|
863
|
+
value: str,
|
|
864
|
+
*,
|
|
865
|
+
char_pr_id_ref: str | int | None = None,
|
|
866
|
+
) -> None:
|
|
867
|
+
desired = value or ""
|
|
868
|
+
existing_char = char_pr_id_ref or self._infer_char_pr_id_ref()
|
|
869
|
+
for child in list(self.element):
|
|
870
|
+
if _element_local_name(child) in {"paraList", "p"}:
|
|
871
|
+
self.element.remove(child)
|
|
872
|
+
para_list = ET.SubElement(self.element, f"{_HP}paraList")
|
|
873
|
+
paragraph = _create_paragraph_element(
|
|
874
|
+
desired,
|
|
875
|
+
char_pr_id_ref=existing_char if existing_char is not None else "0",
|
|
876
|
+
)
|
|
877
|
+
para_list.append(paragraph)
|
|
878
|
+
self.group.section.mark_dirty()
|
|
879
|
+
|
|
880
|
+
@text.setter
|
|
881
|
+
def text(self, value: str) -> None:
|
|
882
|
+
self.set_text(value)
|
|
883
|
+
|
|
884
|
+
def remove(self) -> None:
|
|
885
|
+
try:
|
|
886
|
+
self.group.element.remove(self.element)
|
|
887
|
+
except ValueError: # pragma: no cover - defensive branch
|
|
888
|
+
return
|
|
889
|
+
self.group.section.mark_dirty()
|
|
890
|
+
self.group._cleanup()
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
class HwpxOxmlInlineObject:
|
|
894
|
+
"""Wrapper providing attribute helpers for inline objects."""
|
|
895
|
+
|
|
896
|
+
def __init__(self, element: ET.Element, paragraph: "HwpxOxmlParagraph"):
|
|
897
|
+
self.element = element
|
|
898
|
+
self.paragraph = paragraph
|
|
899
|
+
|
|
900
|
+
@property
|
|
901
|
+
def tag(self) -> str:
|
|
902
|
+
"""Return the fully qualified XML tag for the inline object."""
|
|
903
|
+
|
|
904
|
+
return self.element.tag
|
|
905
|
+
|
|
906
|
+
@property
|
|
907
|
+
def attributes(self) -> dict[str, str]:
|
|
908
|
+
"""Return a copy of the element attributes."""
|
|
909
|
+
|
|
910
|
+
return dict(self.element.attrib)
|
|
911
|
+
|
|
912
|
+
def get_attribute(self, name: str) -> str | None:
|
|
913
|
+
"""Return the value of attribute *name* if present."""
|
|
914
|
+
|
|
915
|
+
return self.element.get(name)
|
|
916
|
+
|
|
917
|
+
def set_attribute(self, name: str, value: str | int | None) -> None:
|
|
918
|
+
"""Update or remove attribute *name* and mark the paragraph dirty."""
|
|
919
|
+
|
|
920
|
+
if value is None:
|
|
921
|
+
if name in self.element.attrib:
|
|
922
|
+
del self.element.attrib[name]
|
|
923
|
+
self.paragraph.section.mark_dirty()
|
|
924
|
+
return
|
|
925
|
+
|
|
926
|
+
new_value = str(value)
|
|
927
|
+
if self.element.get(name) != new_value:
|
|
928
|
+
self.element.set(name, new_value)
|
|
929
|
+
self.paragraph.section.mark_dirty()
|
|
930
|
+
|
|
931
|
+
|
|
932
|
+
class HwpxOxmlTableCell:
|
|
933
|
+
"""Represents an individual table cell."""
|
|
934
|
+
|
|
935
|
+
def __init__(
|
|
936
|
+
self,
|
|
937
|
+
element: ET.Element,
|
|
938
|
+
table: "HwpxOxmlTable",
|
|
939
|
+
row_element: ET.Element,
|
|
940
|
+
):
|
|
941
|
+
self.element = element
|
|
942
|
+
self.table = table
|
|
943
|
+
self._row_element = row_element
|
|
944
|
+
|
|
945
|
+
def _addr_element(self) -> ET.Element | None:
|
|
946
|
+
return self.element.find(f"{_HP}cellAddr")
|
|
947
|
+
|
|
948
|
+
def _span_element(self) -> ET.Element:
|
|
949
|
+
span = self.element.find(f"{_HP}cellSpan")
|
|
950
|
+
if span is None:
|
|
951
|
+
span = ET.SubElement(self.element, f"{_HP}cellSpan", {"colSpan": "1", "rowSpan": "1"})
|
|
952
|
+
return span
|
|
953
|
+
|
|
954
|
+
def _size_element(self) -> ET.Element:
|
|
955
|
+
size = self.element.find(f"{_HP}cellSz")
|
|
956
|
+
if size is None:
|
|
957
|
+
size = ET.SubElement(self.element, f"{_HP}cellSz", {"width": "0", "height": "0"})
|
|
958
|
+
return size
|
|
959
|
+
|
|
960
|
+
def _ensure_text_element(self) -> ET.Element:
|
|
961
|
+
sublist = self.element.find(f"{_HP}subList")
|
|
962
|
+
if sublist is None:
|
|
963
|
+
sublist = ET.SubElement(self.element, f"{_HP}subList", _default_sublist_attributes())
|
|
964
|
+
paragraph = sublist.find(f"{_HP}p")
|
|
965
|
+
if paragraph is None:
|
|
966
|
+
paragraph = ET.SubElement(sublist, f"{_HP}p", _default_cell_paragraph_attributes())
|
|
967
|
+
run = paragraph.find(f"{_HP}run")
|
|
968
|
+
if run is None:
|
|
969
|
+
run = ET.SubElement(paragraph, f"{_HP}run", {"charPrIDRef": "0"})
|
|
970
|
+
text = run.find(f"{_HP}t")
|
|
971
|
+
if text is None:
|
|
972
|
+
text = ET.SubElement(run, f"{_HP}t")
|
|
973
|
+
return text
|
|
974
|
+
|
|
975
|
+
@property
|
|
976
|
+
def address(self) -> Tuple[int, int]:
|
|
977
|
+
addr = self._addr_element()
|
|
978
|
+
if addr is None:
|
|
979
|
+
return (0, 0)
|
|
980
|
+
row = int(addr.get("rowAddr", "0"))
|
|
981
|
+
col = int(addr.get("colAddr", "0"))
|
|
982
|
+
return (row, col)
|
|
983
|
+
|
|
984
|
+
@property
|
|
985
|
+
def span(self) -> Tuple[int, int]:
|
|
986
|
+
span = self._span_element()
|
|
987
|
+
row_span = int(span.get("rowSpan", "1"))
|
|
988
|
+
col_span = int(span.get("colSpan", "1"))
|
|
989
|
+
return (row_span, col_span)
|
|
990
|
+
|
|
991
|
+
def set_span(self, row_span: int, col_span: int) -> None:
|
|
992
|
+
span = self._span_element()
|
|
993
|
+
span.set("rowSpan", str(max(row_span, 1)))
|
|
994
|
+
span.set("colSpan", str(max(col_span, 1)))
|
|
995
|
+
self.table.mark_dirty()
|
|
996
|
+
|
|
997
|
+
@property
|
|
998
|
+
def width(self) -> int:
|
|
999
|
+
size = self._size_element()
|
|
1000
|
+
return int(size.get("width", "0"))
|
|
1001
|
+
|
|
1002
|
+
@property
|
|
1003
|
+
def height(self) -> int:
|
|
1004
|
+
size = self._size_element()
|
|
1005
|
+
return int(size.get("height", "0"))
|
|
1006
|
+
|
|
1007
|
+
def set_size(self, width: int | None = None, height: int | None = None) -> None:
|
|
1008
|
+
size = self._size_element()
|
|
1009
|
+
if width is not None:
|
|
1010
|
+
size.set("width", str(max(width, 0)))
|
|
1011
|
+
if height is not None:
|
|
1012
|
+
size.set("height", str(max(height, 0)))
|
|
1013
|
+
self.table.mark_dirty()
|
|
1014
|
+
|
|
1015
|
+
@property
|
|
1016
|
+
def text(self) -> str:
|
|
1017
|
+
text_element = self.element.find(f".//{_HP}t")
|
|
1018
|
+
if text_element is None or text_element.text is None:
|
|
1019
|
+
return ""
|
|
1020
|
+
return text_element.text
|
|
1021
|
+
|
|
1022
|
+
@text.setter
|
|
1023
|
+
def text(self, value: str) -> None:
|
|
1024
|
+
text_element = self._ensure_text_element()
|
|
1025
|
+
text_element.text = value
|
|
1026
|
+
self.table.mark_dirty()
|
|
1027
|
+
|
|
1028
|
+
def remove(self) -> None:
|
|
1029
|
+
self._row_element.remove(self.element)
|
|
1030
|
+
self.table.mark_dirty()
|
|
1031
|
+
|
|
1032
|
+
|
|
1033
|
+
class HwpxOxmlTableRow:
|
|
1034
|
+
"""Represents a table row."""
|
|
1035
|
+
|
|
1036
|
+
def __init__(self, element: ET.Element, table: "HwpxOxmlTable"):
|
|
1037
|
+
self.element = element
|
|
1038
|
+
self.table = table
|
|
1039
|
+
|
|
1040
|
+
@property
|
|
1041
|
+
def cells(self) -> List[HwpxOxmlTableCell]:
|
|
1042
|
+
return [
|
|
1043
|
+
HwpxOxmlTableCell(cell_element, self.table, self.element)
|
|
1044
|
+
for cell_element in self.element.findall(f"{_HP}tc")
|
|
1045
|
+
]
|
|
1046
|
+
|
|
1047
|
+
|
|
1048
|
+
class HwpxOxmlTable:
|
|
1049
|
+
"""Representation of an ``<hp:tbl>`` inline object."""
|
|
1050
|
+
|
|
1051
|
+
def __init__(self, element: ET.Element, paragraph: "HwpxOxmlParagraph"):
|
|
1052
|
+
self.element = element
|
|
1053
|
+
self.paragraph = paragraph
|
|
1054
|
+
|
|
1055
|
+
@classmethod
|
|
1056
|
+
def create(
|
|
1057
|
+
cls,
|
|
1058
|
+
rows: int,
|
|
1059
|
+
cols: int,
|
|
1060
|
+
*,
|
|
1061
|
+
width: int | None = None,
|
|
1062
|
+
height: int | None = None,
|
|
1063
|
+
border_fill_id_ref: str | int = "0",
|
|
1064
|
+
) -> ET.Element:
|
|
1065
|
+
if rows <= 0 or cols <= 0:
|
|
1066
|
+
raise ValueError("rows and cols must be positive integers")
|
|
1067
|
+
|
|
1068
|
+
table_width = width if width is not None else cols * _DEFAULT_CELL_WIDTH
|
|
1069
|
+
table_height = height if height is not None else rows * _DEFAULT_CELL_HEIGHT
|
|
1070
|
+
border_fill = str(border_fill_id_ref)
|
|
1071
|
+
|
|
1072
|
+
table_attrs = {
|
|
1073
|
+
"id": _object_id(),
|
|
1074
|
+
"zOrder": "0",
|
|
1075
|
+
"numberingType": "TABLE",
|
|
1076
|
+
"textWrap": "TOP_AND_BOTTOM",
|
|
1077
|
+
"textFlow": "BOTH_SIDES",
|
|
1078
|
+
"lock": "0",
|
|
1079
|
+
"dropcapstyle": "None",
|
|
1080
|
+
"pageBreak": "CELL",
|
|
1081
|
+
"repeatHeader": "0",
|
|
1082
|
+
"rowCnt": str(rows),
|
|
1083
|
+
"colCnt": str(cols),
|
|
1084
|
+
"cellSpacing": "0",
|
|
1085
|
+
"borderFillIDRef": border_fill,
|
|
1086
|
+
"noAdjust": "0",
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
table = ET.Element(f"{_HP}tbl", table_attrs)
|
|
1090
|
+
ET.SubElement(
|
|
1091
|
+
table,
|
|
1092
|
+
f"{_HP}sz",
|
|
1093
|
+
{
|
|
1094
|
+
"width": str(max(table_width, 0)),
|
|
1095
|
+
"widthRelTo": "ABSOLUTE",
|
|
1096
|
+
"height": str(max(table_height, 0)),
|
|
1097
|
+
"heightRelTo": "ABSOLUTE",
|
|
1098
|
+
"protect": "0",
|
|
1099
|
+
},
|
|
1100
|
+
)
|
|
1101
|
+
ET.SubElement(
|
|
1102
|
+
table,
|
|
1103
|
+
f"{_HP}pos",
|
|
1104
|
+
{
|
|
1105
|
+
"treatAsChar": "1",
|
|
1106
|
+
"affectLSpacing": "0",
|
|
1107
|
+
"flowWithText": "1",
|
|
1108
|
+
"allowOverlap": "0",
|
|
1109
|
+
"holdAnchorAndSO": "0",
|
|
1110
|
+
"vertRelTo": "PARA",
|
|
1111
|
+
"horzRelTo": "COLUMN",
|
|
1112
|
+
"vertAlign": "TOP",
|
|
1113
|
+
"horzAlign": "LEFT",
|
|
1114
|
+
"vertOffset": "0",
|
|
1115
|
+
"horzOffset": "0",
|
|
1116
|
+
},
|
|
1117
|
+
)
|
|
1118
|
+
ET.SubElement(table, f"{_HP}outMargin", _default_cell_margin_attributes())
|
|
1119
|
+
ET.SubElement(table, f"{_HP}inMargin", _default_cell_margin_attributes())
|
|
1120
|
+
|
|
1121
|
+
column_widths = _distribute_size(max(table_width, 0), cols)
|
|
1122
|
+
row_heights = _distribute_size(max(table_height, 0), rows)
|
|
1123
|
+
|
|
1124
|
+
for row_index in range(rows):
|
|
1125
|
+
row = ET.SubElement(table, f"{_HP}tr")
|
|
1126
|
+
for col_index in range(cols):
|
|
1127
|
+
cell = ET.SubElement(row, f"{_HP}tc", _default_cell_attributes(border_fill))
|
|
1128
|
+
sublist = ET.SubElement(cell, f"{_HP}subList", _default_sublist_attributes())
|
|
1129
|
+
paragraph = ET.SubElement(sublist, f"{_HP}p", _default_cell_paragraph_attributes())
|
|
1130
|
+
run = ET.SubElement(paragraph, f"{_HP}run", {"charPrIDRef": "0"})
|
|
1131
|
+
ET.SubElement(run, f"{_HP}t")
|
|
1132
|
+
ET.SubElement(
|
|
1133
|
+
cell,
|
|
1134
|
+
f"{_HP}cellAddr",
|
|
1135
|
+
{"colAddr": str(col_index), "rowAddr": str(row_index)},
|
|
1136
|
+
)
|
|
1137
|
+
ET.SubElement(cell, f"{_HP}cellSpan", {"colSpan": "1", "rowSpan": "1"})
|
|
1138
|
+
ET.SubElement(
|
|
1139
|
+
cell,
|
|
1140
|
+
f"{_HP}cellSz",
|
|
1141
|
+
{
|
|
1142
|
+
"width": str(column_widths[col_index] if column_widths else 0),
|
|
1143
|
+
"height": str(row_heights[row_index] if row_heights else 0),
|
|
1144
|
+
},
|
|
1145
|
+
)
|
|
1146
|
+
ET.SubElement(cell, f"{_HP}cellMargin", _default_cell_margin_attributes())
|
|
1147
|
+
return table
|
|
1148
|
+
|
|
1149
|
+
def mark_dirty(self) -> None:
|
|
1150
|
+
self.paragraph.section.mark_dirty()
|
|
1151
|
+
|
|
1152
|
+
@property
|
|
1153
|
+
def row_count(self) -> int:
|
|
1154
|
+
value = self.element.get("rowCnt")
|
|
1155
|
+
if value is not None and value.isdigit():
|
|
1156
|
+
return int(value)
|
|
1157
|
+
return len(self.element.findall(f"{_HP}tr"))
|
|
1158
|
+
|
|
1159
|
+
@property
|
|
1160
|
+
def column_count(self) -> int:
|
|
1161
|
+
value = self.element.get("colCnt")
|
|
1162
|
+
if value is not None and value.isdigit():
|
|
1163
|
+
return int(value)
|
|
1164
|
+
first_row = self.element.find(f"{_HP}tr")
|
|
1165
|
+
if first_row is None:
|
|
1166
|
+
return 0
|
|
1167
|
+
return len(first_row.findall(f"{_HP}tc"))
|
|
1168
|
+
|
|
1169
|
+
@property
|
|
1170
|
+
def rows(self) -> List[HwpxOxmlTableRow]:
|
|
1171
|
+
return [HwpxOxmlTableRow(row, self) for row in self.element.findall(f"{_HP}tr")]
|
|
1172
|
+
|
|
1173
|
+
def cell(self, row_index: int, col_index: int) -> HwpxOxmlTableCell:
|
|
1174
|
+
if row_index < 0 or col_index < 0:
|
|
1175
|
+
raise IndexError("row_index and col_index must be non-negative")
|
|
1176
|
+
|
|
1177
|
+
for row in self.element.findall(f"{_HP}tr"):
|
|
1178
|
+
for cell in row.findall(f"{_HP}tc"):
|
|
1179
|
+
wrapper = HwpxOxmlTableCell(cell, self, row)
|
|
1180
|
+
start_row, start_col = wrapper.address
|
|
1181
|
+
span_row, span_col = wrapper.span
|
|
1182
|
+
if (
|
|
1183
|
+
start_row <= row_index < start_row + span_row
|
|
1184
|
+
and start_col <= col_index < start_col + span_col
|
|
1185
|
+
):
|
|
1186
|
+
return wrapper
|
|
1187
|
+
raise IndexError("cell coordinates out of range")
|
|
1188
|
+
|
|
1189
|
+
def set_cell_text(self, row_index: int, col_index: int, text: str) -> None:
|
|
1190
|
+
cell = self.cell(row_index, col_index)
|
|
1191
|
+
cell.text = text
|
|
1192
|
+
|
|
1193
|
+
def merge_cells(
|
|
1194
|
+
self,
|
|
1195
|
+
start_row: int,
|
|
1196
|
+
start_col: int,
|
|
1197
|
+
end_row: int,
|
|
1198
|
+
end_col: int,
|
|
1199
|
+
) -> HwpxOxmlTableCell:
|
|
1200
|
+
if start_row > end_row or start_col > end_col:
|
|
1201
|
+
raise ValueError("merge coordinates must describe a valid rectangle")
|
|
1202
|
+
if start_row < 0 or start_col < 0:
|
|
1203
|
+
raise IndexError("merge coordinates must be non-negative")
|
|
1204
|
+
if end_row >= self.row_count or end_col >= self.column_count:
|
|
1205
|
+
raise IndexError("merge coordinates exceed table bounds")
|
|
1206
|
+
|
|
1207
|
+
target = self.cell(start_row, start_col)
|
|
1208
|
+
addr_row, addr_col = target.address
|
|
1209
|
+
if addr_row != start_row or addr_col != start_col:
|
|
1210
|
+
raise ValueError("top-left cell must align with merge starting position")
|
|
1211
|
+
|
|
1212
|
+
new_row_span = end_row - start_row + 1
|
|
1213
|
+
new_col_span = end_col - start_col + 1
|
|
1214
|
+
|
|
1215
|
+
element_to_row: dict[ET.Element, ET.Element] = {}
|
|
1216
|
+
for row in self.element.findall(f"{_HP}tr"):
|
|
1217
|
+
for cell in row.findall(f"{_HP}tc"):
|
|
1218
|
+
element_to_row[cell] = row
|
|
1219
|
+
|
|
1220
|
+
removal_elements: set[ET.Element] = set()
|
|
1221
|
+
width_elements: set[ET.Element] = set()
|
|
1222
|
+
height_elements: set[ET.Element] = set()
|
|
1223
|
+
total_width = 0
|
|
1224
|
+
total_height = 0
|
|
1225
|
+
|
|
1226
|
+
for row_index in range(start_row, end_row + 1):
|
|
1227
|
+
for col_index in range(start_col, end_col + 1):
|
|
1228
|
+
cell = self.cell(row_index, col_index)
|
|
1229
|
+
cell_row, cell_col = cell.address
|
|
1230
|
+
span_row, span_col = cell.span
|
|
1231
|
+
if (
|
|
1232
|
+
cell_row < start_row
|
|
1233
|
+
or cell_col < start_col
|
|
1234
|
+
or cell_row + span_row - 1 > end_row
|
|
1235
|
+
or cell_col + span_col - 1 > end_col
|
|
1236
|
+
):
|
|
1237
|
+
raise ValueError("Cells to merge must be entirely inside the merge region")
|
|
1238
|
+
if row_index == start_row and cell.element not in width_elements:
|
|
1239
|
+
width_elements.add(cell.element)
|
|
1240
|
+
total_width += cell.width
|
|
1241
|
+
if col_index == start_col and cell.element not in height_elements:
|
|
1242
|
+
height_elements.add(cell.element)
|
|
1243
|
+
total_height += cell.height
|
|
1244
|
+
if cell.element is not target.element:
|
|
1245
|
+
removal_elements.add(cell.element)
|
|
1246
|
+
|
|
1247
|
+
if not removal_elements and target.span == (new_row_span, new_col_span):
|
|
1248
|
+
return target
|
|
1249
|
+
|
|
1250
|
+
for element in removal_elements:
|
|
1251
|
+
row_element = element_to_row.get(element)
|
|
1252
|
+
if row_element is not None:
|
|
1253
|
+
try:
|
|
1254
|
+
row_element.remove(element)
|
|
1255
|
+
except ValueError:
|
|
1256
|
+
continue
|
|
1257
|
+
|
|
1258
|
+
target.set_span(new_row_span, new_col_span)
|
|
1259
|
+
target.set_size(total_width or target.width, total_height or target.height)
|
|
1260
|
+
self.mark_dirty()
|
|
1261
|
+
return target
|
|
1262
|
+
|
|
1263
|
+
@dataclass
|
|
1264
|
+
class HwpxOxmlParagraph:
|
|
1265
|
+
"""Lightweight wrapper around an ``<hp:p>`` element."""
|
|
1266
|
+
|
|
1267
|
+
element: ET.Element
|
|
1268
|
+
section: HwpxOxmlSection
|
|
1269
|
+
|
|
1270
|
+
def _run_elements(self) -> List[ET.Element]:
|
|
1271
|
+
return self.element.findall(f"{_HP}run")
|
|
1272
|
+
|
|
1273
|
+
def _ensure_run(self) -> ET.Element:
|
|
1274
|
+
runs = self._run_elements()
|
|
1275
|
+
if runs:
|
|
1276
|
+
return runs[0]
|
|
1277
|
+
|
|
1278
|
+
run_attrs: dict[str, str] = {}
|
|
1279
|
+
default_char = self.char_pr_id_ref or "0"
|
|
1280
|
+
if default_char is not None:
|
|
1281
|
+
run_attrs["charPrIDRef"] = default_char
|
|
1282
|
+
return ET.SubElement(self.element, f"{_HP}run", run_attrs)
|
|
1283
|
+
|
|
1284
|
+
@property
|
|
1285
|
+
def runs(self) -> List[HwpxOxmlRun]:
|
|
1286
|
+
"""Return the runs contained in this paragraph."""
|
|
1287
|
+
return [HwpxOxmlRun(run, self) for run in self._run_elements()]
|
|
1288
|
+
|
|
1289
|
+
@property
|
|
1290
|
+
def text(self) -> str:
|
|
1291
|
+
"""Return the concatenated textual content of this paragraph."""
|
|
1292
|
+
texts: List[str] = []
|
|
1293
|
+
for text_element in self.element.findall(f".//{_HP}t"):
|
|
1294
|
+
if text_element.text:
|
|
1295
|
+
texts.append(text_element.text)
|
|
1296
|
+
return "".join(texts)
|
|
1297
|
+
|
|
1298
|
+
@text.setter
|
|
1299
|
+
def text(self, value: str) -> None:
|
|
1300
|
+
"""Replace the textual contents of this paragraph."""
|
|
1301
|
+
# Remove existing text nodes but preserve other children (e.g. controls).
|
|
1302
|
+
for run in self._run_elements():
|
|
1303
|
+
for child in list(run):
|
|
1304
|
+
if child.tag == f"{_HP}t":
|
|
1305
|
+
run.remove(child)
|
|
1306
|
+
run = self._ensure_run()
|
|
1307
|
+
text_element = ET.SubElement(run, f"{_HP}t")
|
|
1308
|
+
text_element.text = value
|
|
1309
|
+
self.section.mark_dirty()
|
|
1310
|
+
|
|
1311
|
+
def _create_run_for_object(
|
|
1312
|
+
self,
|
|
1313
|
+
run_attributes: dict[str, str] | None = None,
|
|
1314
|
+
*,
|
|
1315
|
+
char_pr_id_ref: str | int | None = None,
|
|
1316
|
+
) -> ET.Element:
|
|
1317
|
+
attrs = dict(run_attributes or {})
|
|
1318
|
+
if char_pr_id_ref is not None:
|
|
1319
|
+
attrs.setdefault("charPrIDRef", str(char_pr_id_ref))
|
|
1320
|
+
elif "charPrIDRef" not in attrs:
|
|
1321
|
+
default_char = self.char_pr_id_ref or "0"
|
|
1322
|
+
if default_char is not None:
|
|
1323
|
+
attrs["charPrIDRef"] = str(default_char)
|
|
1324
|
+
return ET.SubElement(self.element, f"{_HP}run", attrs)
|
|
1325
|
+
|
|
1326
|
+
@property
|
|
1327
|
+
def tables(self) -> List["HwpxOxmlTable"]:
|
|
1328
|
+
"""Return the tables embedded within this paragraph."""
|
|
1329
|
+
|
|
1330
|
+
tables: List[HwpxOxmlTable] = []
|
|
1331
|
+
for run in self._run_elements():
|
|
1332
|
+
for child in run:
|
|
1333
|
+
if child.tag == f"{_HP}tbl":
|
|
1334
|
+
tables.append(HwpxOxmlTable(child, self))
|
|
1335
|
+
return tables
|
|
1336
|
+
|
|
1337
|
+
def add_table(
|
|
1338
|
+
self,
|
|
1339
|
+
rows: int,
|
|
1340
|
+
cols: int,
|
|
1341
|
+
*,
|
|
1342
|
+
width: int | None = None,
|
|
1343
|
+
height: int | None = None,
|
|
1344
|
+
border_fill_id_ref: str | int = "0",
|
|
1345
|
+
run_attributes: dict[str, str] | None = None,
|
|
1346
|
+
char_pr_id_ref: str | int | None = None,
|
|
1347
|
+
) -> HwpxOxmlTable:
|
|
1348
|
+
run = self._create_run_for_object(
|
|
1349
|
+
run_attributes,
|
|
1350
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
1351
|
+
)
|
|
1352
|
+
table_element = HwpxOxmlTable.create(
|
|
1353
|
+
rows,
|
|
1354
|
+
cols,
|
|
1355
|
+
width=width,
|
|
1356
|
+
height=height,
|
|
1357
|
+
border_fill_id_ref=border_fill_id_ref,
|
|
1358
|
+
)
|
|
1359
|
+
run.append(table_element)
|
|
1360
|
+
self.section.mark_dirty()
|
|
1361
|
+
return HwpxOxmlTable(table_element, self)
|
|
1362
|
+
|
|
1363
|
+
def add_shape(
|
|
1364
|
+
self,
|
|
1365
|
+
shape_type: str,
|
|
1366
|
+
attributes: dict[str, str] | None = None,
|
|
1367
|
+
*,
|
|
1368
|
+
run_attributes: dict[str, str] | None = None,
|
|
1369
|
+
char_pr_id_ref: str | int | None = None,
|
|
1370
|
+
) -> HwpxOxmlInlineObject:
|
|
1371
|
+
if not shape_type:
|
|
1372
|
+
raise ValueError("shape_type must be a non-empty string")
|
|
1373
|
+
run = self._create_run_for_object(
|
|
1374
|
+
run_attributes,
|
|
1375
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
1376
|
+
)
|
|
1377
|
+
element = ET.SubElement(run, f"{_HP}{shape_type}", dict(attributes or {}))
|
|
1378
|
+
self.section.mark_dirty()
|
|
1379
|
+
return HwpxOxmlInlineObject(element, self)
|
|
1380
|
+
|
|
1381
|
+
def add_control(
|
|
1382
|
+
self,
|
|
1383
|
+
attributes: dict[str, str] | None = None,
|
|
1384
|
+
*,
|
|
1385
|
+
control_type: str | None = None,
|
|
1386
|
+
run_attributes: dict[str, str] | None = None,
|
|
1387
|
+
char_pr_id_ref: str | int | None = None,
|
|
1388
|
+
) -> HwpxOxmlInlineObject:
|
|
1389
|
+
attrs = dict(attributes or {})
|
|
1390
|
+
if control_type is not None:
|
|
1391
|
+
attrs.setdefault("type", control_type)
|
|
1392
|
+
run = self._create_run_for_object(
|
|
1393
|
+
run_attributes,
|
|
1394
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
1395
|
+
)
|
|
1396
|
+
element = ET.SubElement(run, f"{_HP}ctrl", attrs)
|
|
1397
|
+
self.section.mark_dirty()
|
|
1398
|
+
return HwpxOxmlInlineObject(element, self)
|
|
1399
|
+
|
|
1400
|
+
@property
|
|
1401
|
+
def para_pr_id_ref(self) -> str | None:
|
|
1402
|
+
"""Return the paragraph property reference applied to this paragraph."""
|
|
1403
|
+
return self.element.get("paraPrIDRef")
|
|
1404
|
+
|
|
1405
|
+
@para_pr_id_ref.setter
|
|
1406
|
+
def para_pr_id_ref(self, value: str | int | None) -> None:
|
|
1407
|
+
if value is None:
|
|
1408
|
+
if "paraPrIDRef" in self.element.attrib:
|
|
1409
|
+
del self.element.attrib["paraPrIDRef"]
|
|
1410
|
+
self.section.mark_dirty()
|
|
1411
|
+
return
|
|
1412
|
+
|
|
1413
|
+
new_value = str(value)
|
|
1414
|
+
if self.element.get("paraPrIDRef") != new_value:
|
|
1415
|
+
self.element.set("paraPrIDRef", new_value)
|
|
1416
|
+
self.section.mark_dirty()
|
|
1417
|
+
|
|
1418
|
+
@property
|
|
1419
|
+
def style_id_ref(self) -> str | None:
|
|
1420
|
+
"""Return the style reference applied to this paragraph."""
|
|
1421
|
+
return self.element.get("styleIDRef")
|
|
1422
|
+
|
|
1423
|
+
@style_id_ref.setter
|
|
1424
|
+
def style_id_ref(self, value: str | int | None) -> None:
|
|
1425
|
+
if value is None:
|
|
1426
|
+
if "styleIDRef" in self.element.attrib:
|
|
1427
|
+
del self.element.attrib["styleIDRef"]
|
|
1428
|
+
self.section.mark_dirty()
|
|
1429
|
+
return
|
|
1430
|
+
|
|
1431
|
+
new_value = str(value)
|
|
1432
|
+
if self.element.get("styleIDRef") != new_value:
|
|
1433
|
+
self.element.set("styleIDRef", new_value)
|
|
1434
|
+
self.section.mark_dirty()
|
|
1435
|
+
|
|
1436
|
+
@property
|
|
1437
|
+
def char_pr_id_ref(self) -> str | None:
|
|
1438
|
+
"""Return the shared character property reference across runs.
|
|
1439
|
+
|
|
1440
|
+
If runs use multiple different references the value ``None`` is
|
|
1441
|
+
returned, indicating the paragraph does not have a uniform character
|
|
1442
|
+
style applied.
|
|
1443
|
+
"""
|
|
1444
|
+
|
|
1445
|
+
values: set[str] = set()
|
|
1446
|
+
for run in self._run_elements():
|
|
1447
|
+
value = run.get("charPrIDRef")
|
|
1448
|
+
if value is not None:
|
|
1449
|
+
values.add(value)
|
|
1450
|
+
|
|
1451
|
+
if not values:
|
|
1452
|
+
return None
|
|
1453
|
+
if len(values) == 1:
|
|
1454
|
+
return next(iter(values))
|
|
1455
|
+
return None
|
|
1456
|
+
|
|
1457
|
+
@char_pr_id_ref.setter
|
|
1458
|
+
def char_pr_id_ref(self, value: str | int | None) -> None:
|
|
1459
|
+
new_value = None if value is None else str(value)
|
|
1460
|
+
runs = self._run_elements()
|
|
1461
|
+
if not runs:
|
|
1462
|
+
runs = [self._ensure_run()]
|
|
1463
|
+
|
|
1464
|
+
changed = False
|
|
1465
|
+
for run in runs:
|
|
1466
|
+
if new_value is None:
|
|
1467
|
+
if "charPrIDRef" in run.attrib:
|
|
1468
|
+
del run.attrib["charPrIDRef"]
|
|
1469
|
+
changed = True
|
|
1470
|
+
else:
|
|
1471
|
+
if run.get("charPrIDRef") != new_value:
|
|
1472
|
+
run.set("charPrIDRef", new_value)
|
|
1473
|
+
changed = True
|
|
1474
|
+
|
|
1475
|
+
if changed:
|
|
1476
|
+
self.section.mark_dirty()
|
|
1477
|
+
|
|
1478
|
+
|
|
1479
|
+
class HwpxOxmlSection:
|
|
1480
|
+
"""Represents the contents of a section XML part."""
|
|
1481
|
+
|
|
1482
|
+
def __init__(
|
|
1483
|
+
self,
|
|
1484
|
+
part_name: str,
|
|
1485
|
+
element: ET.Element,
|
|
1486
|
+
document: "HwpxOxmlDocument" | None = None,
|
|
1487
|
+
):
|
|
1488
|
+
self.part_name = part_name
|
|
1489
|
+
self._element = element
|
|
1490
|
+
self._dirty = False
|
|
1491
|
+
self._properties_cache: HwpxOxmlSectionProperties | None = None
|
|
1492
|
+
self._document = document
|
|
1493
|
+
|
|
1494
|
+
def _section_properties_element(self) -> ET.Element | None:
|
|
1495
|
+
return self._element.find(f".//{_HP}secPr")
|
|
1496
|
+
|
|
1497
|
+
def _ensure_section_properties_element(self) -> ET.Element:
|
|
1498
|
+
element = self._section_properties_element()
|
|
1499
|
+
if element is not None:
|
|
1500
|
+
return element
|
|
1501
|
+
|
|
1502
|
+
paragraph = self._element.find(f"{_HP}p")
|
|
1503
|
+
if paragraph is None:
|
|
1504
|
+
paragraph_attrs = dict(_DEFAULT_PARAGRAPH_ATTRS)
|
|
1505
|
+
paragraph_attrs["id"] = _paragraph_id()
|
|
1506
|
+
paragraph = ET.SubElement(self._element, f"{_HP}p", paragraph_attrs)
|
|
1507
|
+
run = paragraph.find(f"{_HP}run")
|
|
1508
|
+
if run is None:
|
|
1509
|
+
run = ET.SubElement(paragraph, f"{_HP}run", {"charPrIDRef": "0"})
|
|
1510
|
+
element = ET.SubElement(run, f"{_HP}secPr")
|
|
1511
|
+
self._properties_cache = None
|
|
1512
|
+
self.mark_dirty()
|
|
1513
|
+
return element
|
|
1514
|
+
|
|
1515
|
+
@property
|
|
1516
|
+
def properties(self) -> HwpxOxmlSectionProperties:
|
|
1517
|
+
"""Return a wrapper exposing section-level options."""
|
|
1518
|
+
|
|
1519
|
+
if self._properties_cache is None:
|
|
1520
|
+
element = self._section_properties_element()
|
|
1521
|
+
if element is None:
|
|
1522
|
+
element = self._ensure_section_properties_element()
|
|
1523
|
+
self._properties_cache = HwpxOxmlSectionProperties(element, self)
|
|
1524
|
+
return self._properties_cache
|
|
1525
|
+
|
|
1526
|
+
def _paragraph_elements(self) -> Iterable[ET.Element]:
|
|
1527
|
+
return self._element.findall(f"{_HP}p")
|
|
1528
|
+
|
|
1529
|
+
@property
|
|
1530
|
+
def element(self) -> ET.Element:
|
|
1531
|
+
"""Return the underlying XML element."""
|
|
1532
|
+
return self._element
|
|
1533
|
+
|
|
1534
|
+
@property
|
|
1535
|
+
def document(self) -> "HwpxOxmlDocument" | None:
|
|
1536
|
+
return self._document
|
|
1537
|
+
|
|
1538
|
+
def attach_document(self, document: "HwpxOxmlDocument") -> None:
|
|
1539
|
+
self._document = document
|
|
1540
|
+
|
|
1541
|
+
@property
|
|
1542
|
+
def paragraphs(self) -> List[HwpxOxmlParagraph]:
|
|
1543
|
+
"""Return the paragraphs defined in this section."""
|
|
1544
|
+
return [HwpxOxmlParagraph(elm, self) for elm in self._paragraph_elements()]
|
|
1545
|
+
|
|
1546
|
+
def _memo_group_element(self, create: bool = False) -> ET.Element | None:
|
|
1547
|
+
element = self._element.find(f"{_HP}memogroup")
|
|
1548
|
+
if element is None and create:
|
|
1549
|
+
element = ET.SubElement(self._element, f"{_HP}memogroup")
|
|
1550
|
+
self.mark_dirty()
|
|
1551
|
+
return element
|
|
1552
|
+
|
|
1553
|
+
@property
|
|
1554
|
+
def memo_group(self) -> HwpxOxmlMemoGroup | None:
|
|
1555
|
+
element = self._memo_group_element()
|
|
1556
|
+
if element is None:
|
|
1557
|
+
return None
|
|
1558
|
+
return HwpxOxmlMemoGroup(element, self)
|
|
1559
|
+
|
|
1560
|
+
@property
|
|
1561
|
+
def memos(self) -> List[HwpxOxmlMemo]:
|
|
1562
|
+
group = self.memo_group
|
|
1563
|
+
if group is None:
|
|
1564
|
+
return []
|
|
1565
|
+
return group.memos
|
|
1566
|
+
|
|
1567
|
+
def add_memo(
|
|
1568
|
+
self,
|
|
1569
|
+
text: str = "",
|
|
1570
|
+
*,
|
|
1571
|
+
memo_shape_id_ref: str | int | None = None,
|
|
1572
|
+
memo_id: str | None = None,
|
|
1573
|
+
char_pr_id_ref: str | int | None = None,
|
|
1574
|
+
attributes: Optional[dict[str, str]] = None,
|
|
1575
|
+
) -> HwpxOxmlMemo:
|
|
1576
|
+
element = self._memo_group_element(create=True)
|
|
1577
|
+
if element is None: # pragma: no cover - defensive branch
|
|
1578
|
+
raise RuntimeError("failed to create memo group element")
|
|
1579
|
+
group = HwpxOxmlMemoGroup(element, self)
|
|
1580
|
+
return group.add_memo(
|
|
1581
|
+
text,
|
|
1582
|
+
memo_shape_id_ref=memo_shape_id_ref,
|
|
1583
|
+
memo_id=memo_id,
|
|
1584
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
1585
|
+
attributes=attributes,
|
|
1586
|
+
)
|
|
1587
|
+
|
|
1588
|
+
def add_paragraph(
|
|
1589
|
+
self,
|
|
1590
|
+
text: str = "",
|
|
1591
|
+
*,
|
|
1592
|
+
para_pr_id_ref: str | int | None = None,
|
|
1593
|
+
style_id_ref: str | int | None = None,
|
|
1594
|
+
char_pr_id_ref: str | int | None = None,
|
|
1595
|
+
run_attributes: dict[str, str] | None = None,
|
|
1596
|
+
include_run: bool = True,
|
|
1597
|
+
**extra_attrs: str,
|
|
1598
|
+
) -> HwpxOxmlParagraph:
|
|
1599
|
+
"""Create a new paragraph element appended to this section.
|
|
1600
|
+
|
|
1601
|
+
The optional ``para_pr_id_ref`` and ``style_id_ref`` parameters
|
|
1602
|
+
control the paragraph-level references, while ``char_pr_id_ref`` and
|
|
1603
|
+
``run_attributes`` customise the initial ``<hp:run>`` element when
|
|
1604
|
+
``include_run`` is :data:`True`.
|
|
1605
|
+
"""
|
|
1606
|
+
attrs = {"id": _paragraph_id(), **_DEFAULT_PARAGRAPH_ATTRS}
|
|
1607
|
+
attrs.update(extra_attrs)
|
|
1608
|
+
|
|
1609
|
+
if para_pr_id_ref is not None:
|
|
1610
|
+
attrs["paraPrIDRef"] = str(para_pr_id_ref)
|
|
1611
|
+
if style_id_ref is not None:
|
|
1612
|
+
attrs["styleIDRef"] = str(style_id_ref)
|
|
1613
|
+
|
|
1614
|
+
paragraph = ET.Element(f"{_HP}p", attrs)
|
|
1615
|
+
|
|
1616
|
+
if include_run:
|
|
1617
|
+
run_attrs = dict(run_attributes or {})
|
|
1618
|
+
if char_pr_id_ref is not None:
|
|
1619
|
+
run_attrs["charPrIDRef"] = str(char_pr_id_ref)
|
|
1620
|
+
elif "charPrIDRef" not in run_attrs:
|
|
1621
|
+
run_attrs["charPrIDRef"] = "0"
|
|
1622
|
+
|
|
1623
|
+
run = ET.SubElement(paragraph, f"{_HP}run", run_attrs)
|
|
1624
|
+
text_element = ET.SubElement(run, f"{_HP}t")
|
|
1625
|
+
text_element.text = text
|
|
1626
|
+
|
|
1627
|
+
self._element.append(paragraph)
|
|
1628
|
+
self._dirty = True
|
|
1629
|
+
return HwpxOxmlParagraph(paragraph, self)
|
|
1630
|
+
|
|
1631
|
+
def mark_dirty(self) -> None:
|
|
1632
|
+
self._dirty = True
|
|
1633
|
+
|
|
1634
|
+
@property
|
|
1635
|
+
def dirty(self) -> bool:
|
|
1636
|
+
return self._dirty
|
|
1637
|
+
|
|
1638
|
+
def reset_dirty(self) -> None:
|
|
1639
|
+
self._dirty = False
|
|
1640
|
+
|
|
1641
|
+
def to_bytes(self) -> bytes:
|
|
1642
|
+
return _serialize_xml(self._element)
|
|
1643
|
+
|
|
1644
|
+
|
|
1645
|
+
class HwpxOxmlHeader:
|
|
1646
|
+
"""Represents a header XML part."""
|
|
1647
|
+
|
|
1648
|
+
def __init__(self, part_name: str, element: ET.Element):
|
|
1649
|
+
self.part_name = part_name
|
|
1650
|
+
self._element = element
|
|
1651
|
+
self._dirty = False
|
|
1652
|
+
|
|
1653
|
+
@property
|
|
1654
|
+
def element(self) -> ET.Element:
|
|
1655
|
+
return self._element
|
|
1656
|
+
|
|
1657
|
+
def _begin_num_element(self, create: bool = False) -> ET.Element | None:
|
|
1658
|
+
element = self._element.find(f"{_HH}beginNum")
|
|
1659
|
+
if element is None and create:
|
|
1660
|
+
element = ET.SubElement(self._element, f"{_HH}beginNum")
|
|
1661
|
+
return element
|
|
1662
|
+
|
|
1663
|
+
def _memo_properties_element(self) -> ET.Element | None:
|
|
1664
|
+
ref_list = self._element.find(f"{_HH}refList")
|
|
1665
|
+
if ref_list is None:
|
|
1666
|
+
return None
|
|
1667
|
+
return ref_list.find(f"{_HH}memoProperties")
|
|
1668
|
+
|
|
1669
|
+
@property
|
|
1670
|
+
def begin_numbering(self) -> DocumentNumbering:
|
|
1671
|
+
element = self._begin_num_element()
|
|
1672
|
+
if element is None:
|
|
1673
|
+
return DocumentNumbering()
|
|
1674
|
+
return DocumentNumbering(
|
|
1675
|
+
page=_get_int_attr(element, "page", 1),
|
|
1676
|
+
footnote=_get_int_attr(element, "footnote", 1),
|
|
1677
|
+
endnote=_get_int_attr(element, "endnote", 1),
|
|
1678
|
+
picture=_get_int_attr(element, "pic", 1),
|
|
1679
|
+
table=_get_int_attr(element, "tbl", 1),
|
|
1680
|
+
equation=_get_int_attr(element, "equation", 1),
|
|
1681
|
+
)
|
|
1682
|
+
|
|
1683
|
+
def set_begin_numbering(
|
|
1684
|
+
self,
|
|
1685
|
+
*,
|
|
1686
|
+
page: int | None = None,
|
|
1687
|
+
footnote: int | None = None,
|
|
1688
|
+
endnote: int | None = None,
|
|
1689
|
+
picture: int | None = None,
|
|
1690
|
+
table: int | None = None,
|
|
1691
|
+
equation: int | None = None,
|
|
1692
|
+
) -> None:
|
|
1693
|
+
element = self._begin_num_element(create=True)
|
|
1694
|
+
if element is None:
|
|
1695
|
+
return
|
|
1696
|
+
|
|
1697
|
+
current = self.begin_numbering
|
|
1698
|
+
values = {
|
|
1699
|
+
"page": page if page is not None else current.page,
|
|
1700
|
+
"footnote": footnote if footnote is not None else current.footnote,
|
|
1701
|
+
"endnote": endnote if endnote is not None else current.endnote,
|
|
1702
|
+
"pic": picture if picture is not None else current.picture,
|
|
1703
|
+
"tbl": table if table is not None else current.table,
|
|
1704
|
+
"equation": equation if equation is not None else current.equation,
|
|
1705
|
+
}
|
|
1706
|
+
|
|
1707
|
+
changed = False
|
|
1708
|
+
for attr, value in values.items():
|
|
1709
|
+
safe_value = str(max(value, 0))
|
|
1710
|
+
if element.get(attr) != safe_value:
|
|
1711
|
+
element.set(attr, safe_value)
|
|
1712
|
+
changed = True
|
|
1713
|
+
|
|
1714
|
+
if changed:
|
|
1715
|
+
self.mark_dirty()
|
|
1716
|
+
|
|
1717
|
+
@property
|
|
1718
|
+
def memo_shapes(self) -> dict[str, MemoShape]:
|
|
1719
|
+
memo_props_element = self._memo_properties_element()
|
|
1720
|
+
if memo_props_element is None:
|
|
1721
|
+
return {}
|
|
1722
|
+
|
|
1723
|
+
memo_shapes = [
|
|
1724
|
+
memo_shape_from_attributes(child.attrib)
|
|
1725
|
+
for child in memo_props_element.findall(f"{_HH}memoPr")
|
|
1726
|
+
]
|
|
1727
|
+
memo_properties = MemoProperties(
|
|
1728
|
+
item_cnt=parse_int(memo_props_element.get("itemCnt")),
|
|
1729
|
+
memo_shapes=memo_shapes,
|
|
1730
|
+
attributes={
|
|
1731
|
+
key: value
|
|
1732
|
+
for key, value in memo_props_element.attrib.items()
|
|
1733
|
+
if key != "itemCnt"
|
|
1734
|
+
},
|
|
1735
|
+
)
|
|
1736
|
+
return memo_properties.as_dict()
|
|
1737
|
+
|
|
1738
|
+
def memo_shape(self, memo_shape_id_ref: int | str | None) -> MemoShape | None:
|
|
1739
|
+
if memo_shape_id_ref is None:
|
|
1740
|
+
return None
|
|
1741
|
+
|
|
1742
|
+
if isinstance(memo_shape_id_ref, str):
|
|
1743
|
+
key = memo_shape_id_ref.strip()
|
|
1744
|
+
else:
|
|
1745
|
+
key = str(memo_shape_id_ref)
|
|
1746
|
+
|
|
1747
|
+
if not key:
|
|
1748
|
+
return None
|
|
1749
|
+
|
|
1750
|
+
shapes = self.memo_shapes
|
|
1751
|
+
shape = shapes.get(key)
|
|
1752
|
+
if shape is not None:
|
|
1753
|
+
return shape
|
|
1754
|
+
|
|
1755
|
+
try:
|
|
1756
|
+
normalized = str(int(key))
|
|
1757
|
+
except (TypeError, ValueError):
|
|
1758
|
+
return None
|
|
1759
|
+
return shapes.get(normalized)
|
|
1760
|
+
|
|
1761
|
+
@property
|
|
1762
|
+
def dirty(self) -> bool:
|
|
1763
|
+
return self._dirty
|
|
1764
|
+
|
|
1765
|
+
def mark_dirty(self) -> None:
|
|
1766
|
+
self._dirty = True
|
|
1767
|
+
|
|
1768
|
+
def reset_dirty(self) -> None:
|
|
1769
|
+
self._dirty = False
|
|
1770
|
+
|
|
1771
|
+
def to_bytes(self) -> bytes:
|
|
1772
|
+
return _serialize_xml(self._element)
|
|
1773
|
+
|
|
1774
|
+
|
|
1775
|
+
class HwpxOxmlDocument:
|
|
1776
|
+
"""Aggregates the XML parts that make up an HWPX document."""
|
|
1777
|
+
|
|
1778
|
+
def __init__(
|
|
1779
|
+
self,
|
|
1780
|
+
manifest: ET.Element,
|
|
1781
|
+
sections: Sequence[HwpxOxmlSection],
|
|
1782
|
+
headers: Sequence[HwpxOxmlHeader],
|
|
1783
|
+
):
|
|
1784
|
+
self._manifest = manifest
|
|
1785
|
+
self._sections = list(sections)
|
|
1786
|
+
self._headers = list(headers)
|
|
1787
|
+
self._char_property_cache: dict[str, RunStyle] | None = None
|
|
1788
|
+
|
|
1789
|
+
for section in self._sections:
|
|
1790
|
+
section.attach_document(self)
|
|
1791
|
+
|
|
1792
|
+
@classmethod
|
|
1793
|
+
def from_package(cls, package: "HwpxPackage") -> "HwpxOxmlDocument":
|
|
1794
|
+
from hwpx.package import HwpxPackage # Local import to avoid cycle during typing
|
|
1795
|
+
|
|
1796
|
+
if not isinstance(package, HwpxPackage):
|
|
1797
|
+
raise TypeError("package must be an instance of HwpxPackage")
|
|
1798
|
+
|
|
1799
|
+
manifest = package.get_xml(package.MANIFEST_PATH)
|
|
1800
|
+
section_paths = package.section_paths()
|
|
1801
|
+
header_paths = package.header_paths()
|
|
1802
|
+
|
|
1803
|
+
sections = [
|
|
1804
|
+
HwpxOxmlSection(path, package.get_xml(path)) for path in section_paths
|
|
1805
|
+
]
|
|
1806
|
+
headers = [HwpxOxmlHeader(path, package.get_xml(path)) for path in header_paths]
|
|
1807
|
+
return cls(manifest, sections, headers)
|
|
1808
|
+
|
|
1809
|
+
@property
|
|
1810
|
+
def manifest(self) -> ET.Element:
|
|
1811
|
+
return self._manifest
|
|
1812
|
+
|
|
1813
|
+
@property
|
|
1814
|
+
def sections(self) -> List[HwpxOxmlSection]:
|
|
1815
|
+
return list(self._sections)
|
|
1816
|
+
|
|
1817
|
+
@property
|
|
1818
|
+
def headers(self) -> List[HwpxOxmlHeader]:
|
|
1819
|
+
return list(self._headers)
|
|
1820
|
+
|
|
1821
|
+
def _ensure_char_property_cache(self) -> dict[str, RunStyle]:
|
|
1822
|
+
if self._char_property_cache is None:
|
|
1823
|
+
mapping: dict[str, RunStyle] = {}
|
|
1824
|
+
for header in self._headers:
|
|
1825
|
+
mapping.update(_char_properties_from_header(header.element))
|
|
1826
|
+
self._char_property_cache = mapping
|
|
1827
|
+
return self._char_property_cache
|
|
1828
|
+
|
|
1829
|
+
def invalidate_char_property_cache(self) -> None:
|
|
1830
|
+
self._char_property_cache = None
|
|
1831
|
+
|
|
1832
|
+
@property
|
|
1833
|
+
def char_properties(self) -> dict[str, RunStyle]:
|
|
1834
|
+
return dict(self._ensure_char_property_cache())
|
|
1835
|
+
|
|
1836
|
+
def char_property(self, char_pr_id_ref: int | str | None) -> RunStyle | None:
|
|
1837
|
+
if char_pr_id_ref is None:
|
|
1838
|
+
return None
|
|
1839
|
+
key = str(char_pr_id_ref).strip()
|
|
1840
|
+
if not key:
|
|
1841
|
+
return None
|
|
1842
|
+
cache = self._ensure_char_property_cache()
|
|
1843
|
+
style = cache.get(key)
|
|
1844
|
+
if style is not None:
|
|
1845
|
+
return style
|
|
1846
|
+
try:
|
|
1847
|
+
normalized = str(int(key))
|
|
1848
|
+
except (TypeError, ValueError):
|
|
1849
|
+
return None
|
|
1850
|
+
return cache.get(normalized)
|
|
1851
|
+
|
|
1852
|
+
@property
|
|
1853
|
+
def memo_shapes(self) -> dict[str, MemoShape]:
|
|
1854
|
+
shapes: dict[str, MemoShape] = {}
|
|
1855
|
+
for header in self._headers:
|
|
1856
|
+
shapes.update(header.memo_shapes)
|
|
1857
|
+
return shapes
|
|
1858
|
+
|
|
1859
|
+
def memo_shape(self, memo_shape_id_ref: int | str | None) -> MemoShape | None:
|
|
1860
|
+
if memo_shape_id_ref is None:
|
|
1861
|
+
return None
|
|
1862
|
+
key = str(memo_shape_id_ref).strip()
|
|
1863
|
+
if not key:
|
|
1864
|
+
return None
|
|
1865
|
+
shapes = self.memo_shapes
|
|
1866
|
+
shape = shapes.get(key)
|
|
1867
|
+
if shape is not None:
|
|
1868
|
+
return shape
|
|
1869
|
+
try:
|
|
1870
|
+
normalized = str(int(key))
|
|
1871
|
+
except (TypeError, ValueError):
|
|
1872
|
+
return None
|
|
1873
|
+
return shapes.get(normalized)
|
|
1874
|
+
|
|
1875
|
+
@property
|
|
1876
|
+
def paragraphs(self) -> List[HwpxOxmlParagraph]:
|
|
1877
|
+
paragraphs: List[HwpxOxmlParagraph] = []
|
|
1878
|
+
for section in self._sections:
|
|
1879
|
+
paragraphs.extend(section.paragraphs)
|
|
1880
|
+
return paragraphs
|
|
1881
|
+
|
|
1882
|
+
def add_paragraph(
|
|
1883
|
+
self,
|
|
1884
|
+
text: str = "",
|
|
1885
|
+
*,
|
|
1886
|
+
section: HwpxOxmlSection | None = None,
|
|
1887
|
+
section_index: int | None = None,
|
|
1888
|
+
para_pr_id_ref: str | int | None = None,
|
|
1889
|
+
style_id_ref: str | int | None = None,
|
|
1890
|
+
char_pr_id_ref: str | int | None = None,
|
|
1891
|
+
run_attributes: dict[str, str] | None = None,
|
|
1892
|
+
include_run: bool = True,
|
|
1893
|
+
**extra_attrs: str,
|
|
1894
|
+
) -> HwpxOxmlParagraph:
|
|
1895
|
+
"""Append a new paragraph to the requested section."""
|
|
1896
|
+
if section is None and section_index is not None:
|
|
1897
|
+
section = self._sections[section_index]
|
|
1898
|
+
if section is None:
|
|
1899
|
+
if not self._sections:
|
|
1900
|
+
raise ValueError("document does not contain any sections")
|
|
1901
|
+
section = self._sections[-1]
|
|
1902
|
+
return section.add_paragraph(
|
|
1903
|
+
text,
|
|
1904
|
+
para_pr_id_ref=para_pr_id_ref,
|
|
1905
|
+
style_id_ref=style_id_ref,
|
|
1906
|
+
char_pr_id_ref=char_pr_id_ref,
|
|
1907
|
+
run_attributes=run_attributes,
|
|
1908
|
+
include_run=include_run,
|
|
1909
|
+
**extra_attrs,
|
|
1910
|
+
)
|
|
1911
|
+
|
|
1912
|
+
def serialize(self) -> dict[str, bytes]:
|
|
1913
|
+
"""Return a mapping of part names to updated XML payloads."""
|
|
1914
|
+
updates: dict[str, bytes] = {}
|
|
1915
|
+
for section in self._sections:
|
|
1916
|
+
if section.dirty:
|
|
1917
|
+
updates[section.part_name] = section.to_bytes()
|
|
1918
|
+
headers_dirty = False
|
|
1919
|
+
for header in self._headers:
|
|
1920
|
+
if header.dirty:
|
|
1921
|
+
updates[header.part_name] = header.to_bytes()
|
|
1922
|
+
headers_dirty = True
|
|
1923
|
+
if headers_dirty:
|
|
1924
|
+
self.invalidate_char_property_cache()
|
|
1925
|
+
return updates
|
|
1926
|
+
|
|
1927
|
+
def reset_dirty(self) -> None:
|
|
1928
|
+
"""Mark all parts as clean after a successful save."""
|
|
1929
|
+
for section in self._sections:
|
|
1930
|
+
section.reset_dirty()
|
|
1931
|
+
for header in self._headers:
|
|
1932
|
+
header.reset_dirty()
|