python-hwpx 2.8.3__py3-none-any.whl → 2.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hwpx/__init__.py +6 -1
- hwpx/data/Skeleton.hwpx +0 -0
- hwpx/document.py +33 -1
- hwpx/opc/package.py +1 -0
- hwpx/opc/relationships.py +1 -0
- hwpx/opc/xml_utils.py +1 -0
- hwpx/oxml/__init__.py +1 -0
- hwpx/oxml/body.py +23 -1
- hwpx/oxml/common.py +1 -0
- hwpx/oxml/document.py +150 -29
- hwpx/oxml/header.py +1 -0
- hwpx/oxml/header_part.py +1 -0
- hwpx/oxml/memo.py +1 -0
- hwpx/oxml/namespaces.py +1 -0
- hwpx/oxml/paragraph.py +1 -0
- hwpx/oxml/parser.py +1 -0
- hwpx/oxml/schema.py +1 -0
- hwpx/oxml/section.py +1 -0
- hwpx/oxml/table.py +1 -0
- hwpx/oxml/utils.py +1 -0
- hwpx/package.py +1 -0
- hwpx/templates.py +1 -0
- hwpx/tools/__init__.py +25 -0
- hwpx/tools/archive_cli.py +1 -0
- hwpx/tools/exporter.py +43 -146
- hwpx/tools/object_finder.py +1 -0
- hwpx/tools/package_validator.py +1 -0
- hwpx/tools/page_guard.py +1 -0
- hwpx/tools/table_navigation.py +458 -0
- hwpx/tools/template_analyzer.py +1 -0
- hwpx/tools/text_extract_cli.py +1 -0
- hwpx/tools/text_extractor.py +5 -1
- hwpx/tools/validator.py +1 -0
- {python_hwpx-2.8.3.dist-info → python_hwpx-2.9.1.dist-info}/METADATA +138 -80
- python_hwpx-2.9.1.dist-info/RECORD +43 -0
- python_hwpx-2.9.1.dist-info/licenses/LICENSE +178 -0
- python_hwpx-2.9.1.dist-info/licenses/NOTICE +14 -0
- python_hwpx-2.8.3.dist-info/RECORD +0 -41
- python_hwpx-2.8.3.dist-info/licenses/LICENSE +0 -32
- {python_hwpx-2.8.3.dist-info → python_hwpx-2.9.1.dist-info}/WHEEL +0 -0
- {python_hwpx-2.8.3.dist-info → python_hwpx-2.9.1.dist-info}/entry_points.txt +0 -0
- {python_hwpx-2.8.3.dist-info → python_hwpx-2.9.1.dist-info}/top_level.txt +0 -0
hwpx/__init__.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
1
2
|
"""High-level utilities for working with HWPX documents."""
|
|
2
3
|
|
|
3
4
|
from importlib.metadata import PackageNotFoundError, version as _metadata_version
|
|
@@ -10,8 +11,12 @@ def _resolve_version() -> str:
|
|
|
10
11
|
except PackageNotFoundError:
|
|
11
12
|
return "0+unknown"
|
|
12
13
|
|
|
14
|
+
def __getattr__(name: str) -> object:
|
|
15
|
+
"""Resolve dynamic module attributes."""
|
|
13
16
|
|
|
14
|
-
|
|
17
|
+
if name == "__version__":
|
|
18
|
+
return _resolve_version()
|
|
19
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
15
20
|
|
|
16
21
|
from .tools.text_extractor import (
|
|
17
22
|
DEFAULT_NAMESPACES,
|
hwpx/data/Skeleton.hwpx
CHANGED
|
Binary file
|
hwpx/document.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
1
2
|
"""High-level representation of an HWPX document."""
|
|
2
3
|
|
|
3
4
|
from __future__ import annotations
|
|
@@ -10,7 +11,7 @@ import logging
|
|
|
10
11
|
import uuid
|
|
11
12
|
|
|
12
13
|
from os import PathLike
|
|
13
|
-
from typing import Any, BinaryIO, Iterator, Sequence, overload
|
|
14
|
+
from typing import TYPE_CHECKING, Any, BinaryIO, Iterator, Mapping, Sequence, overload
|
|
14
15
|
|
|
15
16
|
from lxml import etree
|
|
16
17
|
|
|
@@ -53,6 +54,9 @@ _HH = f"{{{_HH_NS}}}"
|
|
|
53
54
|
|
|
54
55
|
logger = logging.getLogger(__name__)
|
|
55
56
|
|
|
57
|
+
if TYPE_CHECKING:
|
|
58
|
+
from .tools.table_navigation import TableFillResult, TableLabelSearchResult, TableMapResult
|
|
59
|
+
|
|
56
60
|
|
|
57
61
|
def _append_element(
|
|
58
62
|
parent: Any,
|
|
@@ -741,6 +745,34 @@ class HwpxDocument:
|
|
|
741
745
|
char_pr_id_ref=char_pr_id_ref,
|
|
742
746
|
)
|
|
743
747
|
|
|
748
|
+
def get_table_map(self) -> TableMapResult:
|
|
749
|
+
"""Return compact metadata for every table in document order."""
|
|
750
|
+
|
|
751
|
+
from .tools.table_navigation import get_table_map
|
|
752
|
+
|
|
753
|
+
return get_table_map(self)
|
|
754
|
+
|
|
755
|
+
def find_cell_by_label(
|
|
756
|
+
self,
|
|
757
|
+
label_text: str,
|
|
758
|
+
direction: str = "right",
|
|
759
|
+
) -> TableLabelSearchResult:
|
|
760
|
+
"""Return every label/target cell pair that matches *label_text*."""
|
|
761
|
+
|
|
762
|
+
from .tools.table_navigation import find_cell_by_label
|
|
763
|
+
|
|
764
|
+
return find_cell_by_label(self, label_text, direction=direction)
|
|
765
|
+
|
|
766
|
+
def fill_by_path(
|
|
767
|
+
self,
|
|
768
|
+
mappings: Mapping[str, str],
|
|
769
|
+
) -> TableFillResult:
|
|
770
|
+
"""Fill table cells using ``label > direction > ...`` navigation paths."""
|
|
771
|
+
|
|
772
|
+
from .tools.table_navigation import fill_by_path
|
|
773
|
+
|
|
774
|
+
return fill_by_path(self, mappings)
|
|
775
|
+
|
|
744
776
|
def add_shape(
|
|
745
777
|
self,
|
|
746
778
|
shape_type: str,
|
hwpx/opc/package.py
CHANGED
hwpx/opc/relationships.py
CHANGED
hwpx/opc/xml_utils.py
CHANGED
hwpx/oxml/__init__.py
CHANGED
hwpx/oxml/body.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
1
2
|
from __future__ import annotations
|
|
2
3
|
|
|
3
4
|
import logging
|
|
@@ -43,7 +44,7 @@ _TRACK_CHANGE_MARK_NAMES = {
|
|
|
43
44
|
}
|
|
44
45
|
|
|
45
46
|
InlineMark = Union[GenericElement, "TrackChangeMark"]
|
|
46
|
-
RunChild = Union[GenericElement, "Control", "Table", "InlineObject", "TextSpan"]
|
|
47
|
+
RunChild = Union[GenericElement, "Control", "Table", "InlineObject", "TextSpan", "Tab"]
|
|
47
48
|
ParagraphChild = Union["Run", GenericElement]
|
|
48
49
|
|
|
49
50
|
|
|
@@ -105,6 +106,12 @@ class InlineObject:
|
|
|
105
106
|
children: List[GenericElement] = field(default_factory=list)
|
|
106
107
|
|
|
107
108
|
|
|
109
|
+
@dataclass(slots=True)
|
|
110
|
+
class Tab:
|
|
111
|
+
tag: str
|
|
112
|
+
attributes: Dict[str, str] = field(default_factory=dict)
|
|
113
|
+
|
|
114
|
+
|
|
108
115
|
@dataclass(slots=True)
|
|
109
116
|
class Table:
|
|
110
117
|
tag: str
|
|
@@ -120,6 +127,7 @@ class Run:
|
|
|
120
127
|
controls: List[Control] = field(default_factory=list)
|
|
121
128
|
tables: List[Table] = field(default_factory=list)
|
|
122
129
|
inline_objects: List[InlineObject] = field(default_factory=list)
|
|
130
|
+
tabs: List[Tab] = field(default_factory=list)
|
|
123
131
|
text_spans: List[TextSpan] = field(default_factory=list)
|
|
124
132
|
other_children: List[GenericElement] = field(default_factory=list)
|
|
125
133
|
attributes: Dict[str, str] = field(default_factory=dict)
|
|
@@ -227,6 +235,10 @@ def parse_table_element(node: etree._Element) -> Table:
|
|
|
227
235
|
)
|
|
228
236
|
|
|
229
237
|
|
|
238
|
+
def parse_tab_element(node: etree._Element) -> Tab:
|
|
239
|
+
return Tab(tag=node.tag, attributes={key: value for key, value in node.attrib.items()})
|
|
240
|
+
|
|
241
|
+
|
|
230
242
|
def parse_run_element(node: etree._Element) -> Run:
|
|
231
243
|
attributes = {key: value for key, value in node.attrib.items()}
|
|
232
244
|
char_pr_id_ref = parse_int(attributes.pop("charPrIDRef", None))
|
|
@@ -247,6 +259,10 @@ def parse_run_element(node: etree._Element) -> Run:
|
|
|
247
259
|
span = parse_text_span(child)
|
|
248
260
|
run.text_spans.append(span)
|
|
249
261
|
run.content.append(span)
|
|
262
|
+
elif name == "tab":
|
|
263
|
+
tab = parse_tab_element(child)
|
|
264
|
+
run.tabs.append(tab)
|
|
265
|
+
run.content.append(tab)
|
|
250
266
|
elif name == "tbl":
|
|
251
267
|
table = parse_table_element(child)
|
|
252
268
|
run.tables.append(table)
|
|
@@ -342,6 +358,10 @@ def _text_span_to_xml(span: TextSpan) -> etree._Element:
|
|
|
342
358
|
return node
|
|
343
359
|
|
|
344
360
|
|
|
361
|
+
def _tab_to_xml(tab: Tab) -> etree._Element:
|
|
362
|
+
return etree.Element(_qualified_tag(tab.tag, "tab"), dict(tab.attributes))
|
|
363
|
+
|
|
364
|
+
|
|
345
365
|
def _control_to_xml(control: Control) -> etree._Element:
|
|
346
366
|
attrs = dict(control.attributes)
|
|
347
367
|
if control.control_type is not None:
|
|
@@ -376,6 +396,8 @@ def serialize_run(run: Run) -> etree._Element:
|
|
|
376
396
|
node.append(_text_span_to_xml(child))
|
|
377
397
|
elif isinstance(child, Control):
|
|
378
398
|
node.append(_control_to_xml(child))
|
|
399
|
+
elif isinstance(child, Tab):
|
|
400
|
+
node.append(_tab_to_xml(child))
|
|
379
401
|
elif isinstance(child, Table):
|
|
380
402
|
node.append(_table_to_xml(child))
|
|
381
403
|
elif isinstance(child, InlineObject):
|
hwpx/oxml/common.py
CHANGED
hwpx/oxml/document.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
1
2
|
"""Object model mapping for the XML parts of an HWPX document."""
|
|
2
3
|
|
|
3
4
|
from __future__ import annotations
|
|
@@ -108,17 +109,56 @@ def _serialize_xml(element: ET.Element) -> bytes:
|
|
|
108
109
|
|
|
109
110
|
def _paragraph_id() -> str:
|
|
110
111
|
"""Generate an identifier for a new paragraph element."""
|
|
111
|
-
return str(uuid4().int &
|
|
112
|
+
return str(uuid4().int & 0x7FFFFFFF)
|
|
112
113
|
|
|
113
114
|
|
|
114
115
|
def _object_id() -> str:
|
|
115
116
|
"""Generate an identifier suitable for table and shape objects."""
|
|
116
|
-
return str(uuid4().int &
|
|
117
|
+
return str(uuid4().int & 0x7FFFFFFF)
|
|
117
118
|
|
|
118
119
|
|
|
119
120
|
def _memo_id() -> str:
|
|
120
121
|
"""Generate a lightweight identifier for memo elements."""
|
|
121
|
-
return str(uuid4().int &
|
|
122
|
+
return str(uuid4().int & 0x7FFFFFFF)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _refresh_copied_paragraph_subtree_ids(paragraph: ET.Element) -> None:
|
|
126
|
+
"""Assign fresh local identifiers inside a copied paragraph subtree.
|
|
127
|
+
|
|
128
|
+
This is intentionally narrow: it refreshes paragraph ids for the copied
|
|
129
|
+
paragraph and any nested paragraphs (for example inside table cells), plus
|
|
130
|
+
common object identifiers used by tables/shapes/notes. Reference-style
|
|
131
|
+
attributes such as ``borderFillIDRef`` are left untouched.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
for node in paragraph.iter():
|
|
135
|
+
if node.tag == f"{_HP}p":
|
|
136
|
+
node.set("id", _paragraph_id())
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
if "id" in node.attrib and node.tag in {
|
|
140
|
+
f"{_HP}tbl",
|
|
141
|
+
f"{_HP}pic",
|
|
142
|
+
f"{_HP}container",
|
|
143
|
+
f"{_HP}ole",
|
|
144
|
+
f"{_HP}equation",
|
|
145
|
+
f"{_HP}textart",
|
|
146
|
+
f"{_HP}video",
|
|
147
|
+
f"{_HP}header",
|
|
148
|
+
f"{_HP}footer",
|
|
149
|
+
}:
|
|
150
|
+
node.set("id", _object_id())
|
|
151
|
+
|
|
152
|
+
if "instId" in node.attrib:
|
|
153
|
+
node.set("instId", _object_id())
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _clone_paragraph_element(paragraph: ET.Element) -> ET.Element:
|
|
157
|
+
"""Return a deep-copied paragraph element with refreshed local ids."""
|
|
158
|
+
|
|
159
|
+
cloned = deepcopy(paragraph)
|
|
160
|
+
_refresh_copied_paragraph_subtree_ids(cloned)
|
|
161
|
+
return cloned
|
|
122
162
|
|
|
123
163
|
|
|
124
164
|
def _create_paragraph_element(
|
|
@@ -154,9 +194,7 @@ def _create_paragraph_element(
|
|
|
154
194
|
|
|
155
195
|
run = paragraph.makeelement(f"{_HP}run", run_attrs)
|
|
156
196
|
paragraph.append(run)
|
|
157
|
-
|
|
158
|
-
run.append(text_element)
|
|
159
|
-
text_element.text = text
|
|
197
|
+
_append_text_with_tabs(run, text)
|
|
160
198
|
return paragraph
|
|
161
199
|
|
|
162
200
|
|
|
@@ -192,6 +230,20 @@ def _append_child(
|
|
|
192
230
|
return child
|
|
193
231
|
|
|
194
232
|
|
|
233
|
+
def _is_tab_control_element(node: ET.Element) -> bool:
|
|
234
|
+
return node.tag == f"{_HP}ctrl" and (node.get("id") or "").lower() == "tab"
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _append_text_with_tabs(run: ET.Element, value: str) -> None:
|
|
238
|
+
segments = value.split("\t")
|
|
239
|
+
for index, segment in enumerate(segments):
|
|
240
|
+
text_element = run.makeelement(f"{_HP}t", {})
|
|
241
|
+
text_element.text = _sanitize_text(segment)
|
|
242
|
+
run.append(text_element)
|
|
243
|
+
if index < len(segments) - 1:
|
|
244
|
+
run.append(run.makeelement(f"{_HP}tab", {}))
|
|
245
|
+
|
|
246
|
+
|
|
195
247
|
def _normalize_length(value: str | None) -> str:
|
|
196
248
|
if value is None:
|
|
197
249
|
return ""
|
|
@@ -1979,17 +2031,21 @@ class HwpxOxmlTableCell:
|
|
|
1979
2031
|
def _ensure_text_element(self) -> ET.Element:
|
|
1980
2032
|
sublist = self.element.find(f"{_HP}subList")
|
|
1981
2033
|
if sublist is None:
|
|
1982
|
-
sublist =
|
|
2034
|
+
sublist = _append_child(
|
|
2035
|
+
self.element, f"{_HP}subList", _default_sublist_attributes()
|
|
2036
|
+
)
|
|
1983
2037
|
paragraph = sublist.find(f"{_HP}p")
|
|
1984
2038
|
if paragraph is None:
|
|
1985
|
-
paragraph =
|
|
2039
|
+
paragraph = _append_child(
|
|
2040
|
+
sublist, f"{_HP}p", _default_cell_paragraph_attributes()
|
|
2041
|
+
)
|
|
1986
2042
|
_clear_paragraph_layout_cache(paragraph)
|
|
1987
2043
|
run = paragraph.find(f"{_HP}run")
|
|
1988
2044
|
if run is None:
|
|
1989
|
-
run =
|
|
2045
|
+
run = _append_child(paragraph, f"{_HP}run", {"charPrIDRef": "0"})
|
|
1990
2046
|
text = run.find(f"{_HP}t")
|
|
1991
2047
|
if text is None:
|
|
1992
|
-
text =
|
|
2048
|
+
text = _append_child(run, f"{_HP}t")
|
|
1993
2049
|
return text
|
|
1994
2050
|
|
|
1995
2051
|
@property
|
|
@@ -2097,9 +2153,7 @@ class HwpxOxmlTableCell:
|
|
|
2097
2153
|
run_attrs["charPrIDRef"] = "0"
|
|
2098
2154
|
|
|
2099
2155
|
run = _append_child(paragraph, f"{_HP}run", run_attrs)
|
|
2100
|
-
|
|
2101
|
-
t.text = _sanitize_text(text)
|
|
2102
|
-
run.append(t)
|
|
2156
|
+
_append_text_with_tabs(run, text)
|
|
2103
2157
|
|
|
2104
2158
|
self.table.mark_dirty()
|
|
2105
2159
|
section = self.table.paragraph.section
|
|
@@ -2762,9 +2816,13 @@ class HwpxOxmlParagraph:
|
|
|
2762
2816
|
def text(self) -> str:
|
|
2763
2817
|
"""Return the concatenated textual content of this paragraph."""
|
|
2764
2818
|
texts: list[str] = []
|
|
2765
|
-
for
|
|
2766
|
-
|
|
2767
|
-
|
|
2819
|
+
for run in self._run_elements():
|
|
2820
|
+
for child in run:
|
|
2821
|
+
if child.tag == f"{_HP}t":
|
|
2822
|
+
if child.text:
|
|
2823
|
+
texts.append(child.text)
|
|
2824
|
+
elif child.tag == f"{_HP}tab" or _is_tab_control_element(child):
|
|
2825
|
+
texts.append("\t")
|
|
2768
2826
|
return "".join(texts)
|
|
2769
2827
|
|
|
2770
2828
|
@text.setter
|
|
@@ -2780,10 +2838,10 @@ class HwpxOxmlParagraph:
|
|
|
2780
2838
|
# Identify first run — its charPrIDRef will be kept.
|
|
2781
2839
|
first_run = self._ensure_run()
|
|
2782
2840
|
|
|
2783
|
-
# Remove
|
|
2841
|
+
# Remove existing text/tab nodes from all runs.
|
|
2784
2842
|
for run in runs:
|
|
2785
2843
|
for child in list(run):
|
|
2786
|
-
if child.tag == f"{_HP}t":
|
|
2844
|
+
if child.tag == f"{_HP}t" or child.tag == f"{_HP}tab" or _is_tab_control_element(child):
|
|
2787
2845
|
run.remove(child)
|
|
2788
2846
|
|
|
2789
2847
|
# Remove non-first runs that are now empty (only had text).
|
|
@@ -2794,10 +2852,8 @@ class HwpxOxmlParagraph:
|
|
|
2794
2852
|
if len(list(run)) == 0:
|
|
2795
2853
|
self.element.remove(run)
|
|
2796
2854
|
|
|
2797
|
-
# Write the new text into the first run
|
|
2798
|
-
|
|
2799
|
-
text_element.text = _sanitize_text(value)
|
|
2800
|
-
first_run.append(text_element)
|
|
2855
|
+
# Write the new text into the first run, preserving tabs as <hp:tab/>.
|
|
2856
|
+
_append_text_with_tabs(first_run, value)
|
|
2801
2857
|
_clear_paragraph_layout_cache(self.element)
|
|
2802
2858
|
self.section.mark_dirty()
|
|
2803
2859
|
|
|
@@ -3668,14 +3724,43 @@ class HwpxOxmlSection:
|
|
|
3668
3724
|
|
|
3669
3725
|
run = paragraph.makeelement(f"{_HP}run", run_attrs)
|
|
3670
3726
|
paragraph.append(run)
|
|
3671
|
-
|
|
3672
|
-
text_element.text = text
|
|
3673
|
-
run.append(text_element)
|
|
3727
|
+
_append_text_with_tabs(run, text)
|
|
3674
3728
|
|
|
3675
3729
|
self._element.append(paragraph)
|
|
3676
3730
|
self._dirty = True
|
|
3677
3731
|
return HwpxOxmlParagraph(paragraph, self)
|
|
3678
3732
|
|
|
3733
|
+
def insert_paragraphs(
|
|
3734
|
+
self,
|
|
3735
|
+
index: int,
|
|
3736
|
+
paragraphs: Sequence[HwpxOxmlParagraph | ET.Element],
|
|
3737
|
+
) -> list[HwpxOxmlParagraph]:
|
|
3738
|
+
"""Insert paragraph copies at *index* and return wrappers for them."""
|
|
3739
|
+
|
|
3740
|
+
existing = self.paragraphs
|
|
3741
|
+
if index < 0 or index > len(existing):
|
|
3742
|
+
raise IndexError(f"단락 인덱스 {index}이(가) 범위를 벗어났습니다 (총 {len(existing)}개)")
|
|
3743
|
+
|
|
3744
|
+
inserted: list[HwpxOxmlParagraph] = []
|
|
3745
|
+
for offset, paragraph in enumerate(paragraphs):
|
|
3746
|
+
source_element = paragraph.element if isinstance(paragraph, HwpxOxmlParagraph) else paragraph
|
|
3747
|
+
cloned = _clone_paragraph_element(source_element)
|
|
3748
|
+
self._element.insert(index + offset, cloned)
|
|
3749
|
+
inserted.append(HwpxOxmlParagraph(cloned, self))
|
|
3750
|
+
|
|
3751
|
+
if inserted:
|
|
3752
|
+
self._dirty = True
|
|
3753
|
+
return inserted
|
|
3754
|
+
|
|
3755
|
+
def copy_paragraph_range(self, start: int, end: int) -> list[ET.Element]:
|
|
3756
|
+
"""Return deep-copied paragraph elements for the inclusive range."""
|
|
3757
|
+
|
|
3758
|
+
paragraphs = self.paragraphs
|
|
3759
|
+
total = len(paragraphs)
|
|
3760
|
+
if start < 0 or end < 0 or start >= total or end >= total or start > end:
|
|
3761
|
+
raise IndexError(f"문단 범위 {start}..{end}이(가) 유효하지 않습니다 (총 {total}개)")
|
|
3762
|
+
return [_clone_paragraph_element(paragraphs[index].element) for index in range(start, end + 1)]
|
|
3763
|
+
|
|
3679
3764
|
def mark_dirty(self) -> None:
|
|
3680
3765
|
self._dirty = True
|
|
3681
3766
|
|
|
@@ -4455,9 +4540,9 @@ class HwpxOxmlDocument:
|
|
|
4455
4540
|
element.remove(child)
|
|
4456
4541
|
|
|
4457
4542
|
if target[0]:
|
|
4458
|
-
|
|
4543
|
+
_append_child(element, f"{_HH}bold")
|
|
4459
4544
|
if target[1]:
|
|
4460
|
-
|
|
4545
|
+
_append_child(element, f"{_HH}italic")
|
|
4461
4546
|
|
|
4462
4547
|
underline_attrs = dict(base_underline_attrs)
|
|
4463
4548
|
if target[2]:
|
|
@@ -4469,14 +4554,14 @@ class HwpxOxmlDocument:
|
|
|
4469
4554
|
underline_attrs["color"] = base_underline_attrs["color"]
|
|
4470
4555
|
if "color" not in underline_attrs:
|
|
4471
4556
|
underline_attrs["color"] = "#000000"
|
|
4472
|
-
|
|
4557
|
+
_append_child(element, f"{_HH}underline", underline_attrs)
|
|
4473
4558
|
else:
|
|
4474
4559
|
attrs = dict(base_underline_attrs)
|
|
4475
4560
|
attrs["type"] = "NONE"
|
|
4476
4561
|
attrs.setdefault("shape", base_underline_attrs.get("shape", "SOLID"))
|
|
4477
4562
|
if "color" in base_underline_attrs:
|
|
4478
4563
|
attrs["color"] = base_underline_attrs["color"]
|
|
4479
|
-
|
|
4564
|
+
_append_child(element, f"{_HH}underline", attrs)
|
|
4480
4565
|
|
|
4481
4566
|
element = header.ensure_char_property(
|
|
4482
4567
|
predicate=predicate,
|
|
@@ -4649,6 +4734,42 @@ class HwpxOxmlDocument:
|
|
|
4649
4734
|
else:
|
|
4650
4735
|
paragraph.remove()
|
|
4651
4736
|
|
|
4737
|
+
def copy_paragraph_range(
|
|
4738
|
+
self,
|
|
4739
|
+
start: int,
|
|
4740
|
+
end: int,
|
|
4741
|
+
*,
|
|
4742
|
+
section: HwpxOxmlSection | None = None,
|
|
4743
|
+
section_index: int | None = None,
|
|
4744
|
+
) -> list[ET.Element]:
|
|
4745
|
+
"""Return deep-copied paragraph elements for an inclusive range."""
|
|
4746
|
+
|
|
4747
|
+
if section is None and section_index is not None:
|
|
4748
|
+
section = self._sections[section_index]
|
|
4749
|
+
if section is None:
|
|
4750
|
+
if not self._sections:
|
|
4751
|
+
raise ValueError("document does not contain any sections")
|
|
4752
|
+
section = self._sections[-1]
|
|
4753
|
+
return section.copy_paragraph_range(start, end)
|
|
4754
|
+
|
|
4755
|
+
def insert_paragraphs(
|
|
4756
|
+
self,
|
|
4757
|
+
index: int,
|
|
4758
|
+
paragraphs: Sequence[HwpxOxmlParagraph | ET.Element],
|
|
4759
|
+
*,
|
|
4760
|
+
section: HwpxOxmlSection | None = None,
|
|
4761
|
+
section_index: int | None = None,
|
|
4762
|
+
) -> list[HwpxOxmlParagraph]:
|
|
4763
|
+
"""Insert copied paragraphs into the requested section."""
|
|
4764
|
+
|
|
4765
|
+
if section is None and section_index is not None:
|
|
4766
|
+
section = self._sections[section_index]
|
|
4767
|
+
if section is None:
|
|
4768
|
+
if not self._sections:
|
|
4769
|
+
raise ValueError("document does not contain any sections")
|
|
4770
|
+
section = self._sections[-1]
|
|
4771
|
+
return section.insert_paragraphs(index, paragraphs)
|
|
4772
|
+
|
|
4652
4773
|
# ------------------------------------------------------------------
|
|
4653
4774
|
# Section management
|
|
4654
4775
|
# ------------------------------------------------------------------
|
hwpx/oxml/header.py
CHANGED
hwpx/oxml/header_part.py
CHANGED
hwpx/oxml/memo.py
CHANGED
hwpx/oxml/namespaces.py
CHANGED
hwpx/oxml/paragraph.py
CHANGED
hwpx/oxml/parser.py
CHANGED
hwpx/oxml/schema.py
CHANGED
hwpx/oxml/section.py
CHANGED
hwpx/oxml/table.py
CHANGED
hwpx/oxml/utils.py
CHANGED
hwpx/package.py
CHANGED
hwpx/templates.py
CHANGED
hwpx/tools/__init__.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
1
2
|
"""Tooling helpers for inspecting HWPX archives."""
|
|
2
3
|
|
|
3
4
|
from .exporter import (
|
|
@@ -25,6 +26,19 @@ from .text_extractor import (
|
|
|
25
26
|
describe_element_path,
|
|
26
27
|
strip_namespace,
|
|
27
28
|
)
|
|
29
|
+
from .table_navigation import (
|
|
30
|
+
TableCellReference,
|
|
31
|
+
TableFillApplied,
|
|
32
|
+
TableFillFailed,
|
|
33
|
+
TableFillResult,
|
|
34
|
+
TableLabelMatch,
|
|
35
|
+
TableLabelSearchResult,
|
|
36
|
+
TableMapEntry,
|
|
37
|
+
TableMapResult,
|
|
38
|
+
fill_by_path,
|
|
39
|
+
find_cell_by_label,
|
|
40
|
+
get_table_map,
|
|
41
|
+
)
|
|
28
42
|
from .validator import (
|
|
29
43
|
DocumentSchemas,
|
|
30
44
|
ValidationIssue,
|
|
@@ -41,6 +55,17 @@ __all__ = [
|
|
|
41
55
|
"build_parent_map",
|
|
42
56
|
"describe_element_path",
|
|
43
57
|
"strip_namespace",
|
|
58
|
+
"TableCellReference",
|
|
59
|
+
"TableFillApplied",
|
|
60
|
+
"TableFillFailed",
|
|
61
|
+
"TableFillResult",
|
|
62
|
+
"TableLabelMatch",
|
|
63
|
+
"TableLabelSearchResult",
|
|
64
|
+
"TableMapEntry",
|
|
65
|
+
"TableMapResult",
|
|
66
|
+
"fill_by_path",
|
|
67
|
+
"find_cell_by_label",
|
|
68
|
+
"get_table_map",
|
|
44
69
|
"FoundElement",
|
|
45
70
|
"ObjectFinder",
|
|
46
71
|
"PackageValidationIssue",
|
hwpx/tools/archive_cli.py
CHANGED