python-hwpx 2.8.2__py3-none-any.whl → 2.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hwpx/__init__.py +5 -1
- hwpx/document.py +32 -1
- hwpx/tools/__init__.py +24 -0
- hwpx/tools/table_navigation.py +457 -0
- {python_hwpx-2.8.2.dist-info → python_hwpx-2.9.0.dist-info}/METADATA +21 -39
- {python_hwpx-2.8.2.dist-info → python_hwpx-2.9.0.dist-info}/RECORD +10 -9
- {python_hwpx-2.8.2.dist-info → python_hwpx-2.9.0.dist-info}/WHEEL +1 -1
- {python_hwpx-2.8.2.dist-info → python_hwpx-2.9.0.dist-info}/entry_points.txt +0 -0
- {python_hwpx-2.8.2.dist-info → python_hwpx-2.9.0.dist-info}/licenses/LICENSE +0 -0
- {python_hwpx-2.8.2.dist-info → python_hwpx-2.9.0.dist-info}/top_level.txt +0 -0
hwpx/__init__.py
CHANGED
|
@@ -10,8 +10,12 @@ def _resolve_version() -> str:
|
|
|
10
10
|
except PackageNotFoundError:
|
|
11
11
|
return "0+unknown"
|
|
12
12
|
|
|
13
|
+
def __getattr__(name: str) -> object:
|
|
14
|
+
"""Resolve dynamic module attributes."""
|
|
13
15
|
|
|
14
|
-
|
|
16
|
+
if name == "__version__":
|
|
17
|
+
return _resolve_version()
|
|
18
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
15
19
|
|
|
16
20
|
from .tools.text_extractor import (
|
|
17
21
|
DEFAULT_NAMESPACES,
|
hwpx/document.py
CHANGED
|
@@ -10,7 +10,7 @@ import logging
|
|
|
10
10
|
import uuid
|
|
11
11
|
|
|
12
12
|
from os import PathLike
|
|
13
|
-
from typing import Any, BinaryIO, Iterator, Sequence, overload
|
|
13
|
+
from typing import TYPE_CHECKING, Any, BinaryIO, Iterator, Mapping, Sequence, overload
|
|
14
14
|
|
|
15
15
|
from lxml import etree
|
|
16
16
|
|
|
@@ -53,6 +53,9 @@ _HH = f"{{{_HH_NS}}}"
|
|
|
53
53
|
|
|
54
54
|
logger = logging.getLogger(__name__)
|
|
55
55
|
|
|
56
|
+
if TYPE_CHECKING:
|
|
57
|
+
from .tools.table_navigation import TableFillResult, TableLabelSearchResult, TableMapResult
|
|
58
|
+
|
|
56
59
|
|
|
57
60
|
def _append_element(
|
|
58
61
|
parent: Any,
|
|
@@ -741,6 +744,34 @@ class HwpxDocument:
|
|
|
741
744
|
char_pr_id_ref=char_pr_id_ref,
|
|
742
745
|
)
|
|
743
746
|
|
|
747
|
+
def get_table_map(self) -> TableMapResult:
|
|
748
|
+
"""Return compact metadata for every table in document order."""
|
|
749
|
+
|
|
750
|
+
from .tools.table_navigation import get_table_map
|
|
751
|
+
|
|
752
|
+
return get_table_map(self)
|
|
753
|
+
|
|
754
|
+
def find_cell_by_label(
|
|
755
|
+
self,
|
|
756
|
+
label_text: str,
|
|
757
|
+
direction: str = "right",
|
|
758
|
+
) -> TableLabelSearchResult:
|
|
759
|
+
"""Return every label/target cell pair that matches *label_text*."""
|
|
760
|
+
|
|
761
|
+
from .tools.table_navigation import find_cell_by_label
|
|
762
|
+
|
|
763
|
+
return find_cell_by_label(self, label_text, direction=direction)
|
|
764
|
+
|
|
765
|
+
def fill_by_path(
|
|
766
|
+
self,
|
|
767
|
+
mappings: Mapping[str, str],
|
|
768
|
+
) -> TableFillResult:
|
|
769
|
+
"""Fill table cells using ``label > direction > ...`` navigation paths."""
|
|
770
|
+
|
|
771
|
+
from .tools.table_navigation import fill_by_path
|
|
772
|
+
|
|
773
|
+
return fill_by_path(self, mappings)
|
|
774
|
+
|
|
744
775
|
def add_shape(
|
|
745
776
|
self,
|
|
746
777
|
shape_type: str,
|
hwpx/tools/__init__.py
CHANGED
|
@@ -25,6 +25,19 @@ from .text_extractor import (
|
|
|
25
25
|
describe_element_path,
|
|
26
26
|
strip_namespace,
|
|
27
27
|
)
|
|
28
|
+
from .table_navigation import (
|
|
29
|
+
TableCellReference,
|
|
30
|
+
TableFillApplied,
|
|
31
|
+
TableFillFailed,
|
|
32
|
+
TableFillResult,
|
|
33
|
+
TableLabelMatch,
|
|
34
|
+
TableLabelSearchResult,
|
|
35
|
+
TableMapEntry,
|
|
36
|
+
TableMapResult,
|
|
37
|
+
fill_by_path,
|
|
38
|
+
find_cell_by_label,
|
|
39
|
+
get_table_map,
|
|
40
|
+
)
|
|
28
41
|
from .validator import (
|
|
29
42
|
DocumentSchemas,
|
|
30
43
|
ValidationIssue,
|
|
@@ -41,6 +54,17 @@ __all__ = [
|
|
|
41
54
|
"build_parent_map",
|
|
42
55
|
"describe_element_path",
|
|
43
56
|
"strip_namespace",
|
|
57
|
+
"TableCellReference",
|
|
58
|
+
"TableFillApplied",
|
|
59
|
+
"TableFillFailed",
|
|
60
|
+
"TableFillResult",
|
|
61
|
+
"TableLabelMatch",
|
|
62
|
+
"TableLabelSearchResult",
|
|
63
|
+
"TableMapEntry",
|
|
64
|
+
"TableMapResult",
|
|
65
|
+
"fill_by_path",
|
|
66
|
+
"find_cell_by_label",
|
|
67
|
+
"get_table_map",
|
|
44
68
|
"FoundElement",
|
|
45
69
|
"ObjectFinder",
|
|
46
70
|
"PackageValidationIssue",
|
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
"""Reusable helpers for HWPX table discovery and form-like navigation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
import re
|
|
7
|
+
from typing import TYPE_CHECKING, Literal, Mapping, TypedDict
|
|
8
|
+
|
|
9
|
+
from ..oxml import HwpxOxmlParagraph, HwpxOxmlTable
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from ..document import HwpxDocument
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"SearchDirection",
|
|
16
|
+
"PathDirection",
|
|
17
|
+
"TableCellReference",
|
|
18
|
+
"TableFillApplied",
|
|
19
|
+
"TableFillFailed",
|
|
20
|
+
"TableFillResult",
|
|
21
|
+
"TableLabelMatch",
|
|
22
|
+
"TableLabelSearchResult",
|
|
23
|
+
"TableMapEntry",
|
|
24
|
+
"TableMapResult",
|
|
25
|
+
"fill_by_path",
|
|
26
|
+
"find_cell_by_label",
|
|
27
|
+
"get_table_map",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
_HP_NS = "http://www.hancom.co.kr/hwpml/2011/paragraph"
|
|
31
|
+
_HP = f"{{{_HP_NS}}}"
|
|
32
|
+
_WHITESPACE_RE = re.compile(r"\s+")
|
|
33
|
+
|
|
34
|
+
SearchDirection = Literal["right", "down"]
|
|
35
|
+
PathDirection = Literal["left", "right", "up", "down"]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class TableMapEntry(TypedDict):
|
|
39
|
+
"""Compact metadata describing a table in document order."""
|
|
40
|
+
|
|
41
|
+
table_index: int
|
|
42
|
+
paragraph_index: int
|
|
43
|
+
rows: int
|
|
44
|
+
cols: int
|
|
45
|
+
header_text: str
|
|
46
|
+
first_row_preview: list[str]
|
|
47
|
+
is_empty: bool
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class TableMapResult(TypedDict):
|
|
51
|
+
"""Collection of table metadata entries."""
|
|
52
|
+
|
|
53
|
+
tables: list[TableMapEntry]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class TableCellReference(TypedDict):
|
|
57
|
+
"""A logical table cell position and its current text."""
|
|
58
|
+
|
|
59
|
+
row: int
|
|
60
|
+
col: int
|
|
61
|
+
text: str
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class TableLabelMatch(TypedDict):
|
|
65
|
+
"""A label cell and the cell reached from it."""
|
|
66
|
+
|
|
67
|
+
table_index: int
|
|
68
|
+
label_cell: TableCellReference
|
|
69
|
+
target_cell: TableCellReference
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class TableLabelSearchResult(TypedDict):
|
|
73
|
+
"""Result payload returned by :func:`find_cell_by_label`."""
|
|
74
|
+
|
|
75
|
+
matches: list[TableLabelMatch]
|
|
76
|
+
count: int
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class TableFillApplied(TypedDict):
|
|
80
|
+
"""A successfully applied path-based fill operation."""
|
|
81
|
+
|
|
82
|
+
path: str
|
|
83
|
+
table_index: int
|
|
84
|
+
row: int
|
|
85
|
+
col: int
|
|
86
|
+
value: str
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class TableFillFailed(TypedDict):
|
|
90
|
+
"""A failed path-based fill operation and its reason."""
|
|
91
|
+
|
|
92
|
+
path: str
|
|
93
|
+
reason: str
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class TableFillResult(TypedDict):
|
|
97
|
+
"""Batch fill summary for :func:`fill_by_path`."""
|
|
98
|
+
|
|
99
|
+
applied: list[TableFillApplied]
|
|
100
|
+
failed: list[TableFillFailed]
|
|
101
|
+
applied_count: int
|
|
102
|
+
failed_count: int
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@dataclass(frozen=True, slots=True)
|
|
106
|
+
class _AnchoredTable:
|
|
107
|
+
table: HwpxOxmlTable
|
|
108
|
+
paragraph_index: int
|
|
109
|
+
header_text: str
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@dataclass(frozen=True, slots=True)
|
|
113
|
+
class _IndexedTable:
|
|
114
|
+
table_index: int
|
|
115
|
+
table: HwpxOxmlTable
|
|
116
|
+
paragraph_index: int
|
|
117
|
+
header_text: str
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@dataclass(frozen=True, slots=True)
|
|
121
|
+
class _LabelCandidate:
|
|
122
|
+
table_index: int
|
|
123
|
+
table: HwpxOxmlTable
|
|
124
|
+
row: int
|
|
125
|
+
col: int
|
|
126
|
+
text: str
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _collapse_whitespace(value: str) -> str:
|
|
130
|
+
return _WHITESPACE_RE.sub(" ", value).strip()
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _normalize_label_text(value: str) -> str:
|
|
134
|
+
normalized = _collapse_whitespace(value).casefold()
|
|
135
|
+
while normalized.endswith((":", ":")):
|
|
136
|
+
normalized = normalized[:-1].rstrip()
|
|
137
|
+
return normalized
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _direct_paragraph_text(paragraph: HwpxOxmlParagraph) -> str:
|
|
141
|
+
parts: list[str] = []
|
|
142
|
+
for run in paragraph.element.findall(f"{_HP}run"):
|
|
143
|
+
for child in run:
|
|
144
|
+
if child.tag == f"{_HP}t" and child.text:
|
|
145
|
+
parts.append(child.text)
|
|
146
|
+
return _collapse_whitespace("".join(parts))
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _collect_tables_from_table(
|
|
150
|
+
table: HwpxOxmlTable,
|
|
151
|
+
*,
|
|
152
|
+
anchor_paragraph_index: int,
|
|
153
|
+
inherited_header_text: str,
|
|
154
|
+
sink: list[_AnchoredTable],
|
|
155
|
+
) -> str:
|
|
156
|
+
last_header_text = inherited_header_text
|
|
157
|
+
for row in table.rows:
|
|
158
|
+
for cell in row.cells:
|
|
159
|
+
for paragraph in cell.paragraphs:
|
|
160
|
+
last_header_text = _collect_tables_from_paragraph(
|
|
161
|
+
paragraph,
|
|
162
|
+
anchor_paragraph_index=anchor_paragraph_index,
|
|
163
|
+
inherited_header_text=last_header_text,
|
|
164
|
+
sink=sink,
|
|
165
|
+
)
|
|
166
|
+
return last_header_text
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _collect_tables_from_paragraph(
|
|
170
|
+
paragraph: HwpxOxmlParagraph,
|
|
171
|
+
*,
|
|
172
|
+
anchor_paragraph_index: int,
|
|
173
|
+
inherited_header_text: str,
|
|
174
|
+
sink: list[_AnchoredTable],
|
|
175
|
+
) -> str:
|
|
176
|
+
paragraph_text_parts: list[str] = []
|
|
177
|
+
last_header_text = inherited_header_text
|
|
178
|
+
|
|
179
|
+
for run in paragraph.element.findall(f"{_HP}run"):
|
|
180
|
+
for child in run:
|
|
181
|
+
if child.tag == f"{_HP}t":
|
|
182
|
+
if child.text:
|
|
183
|
+
paragraph_text_parts.append(child.text)
|
|
184
|
+
continue
|
|
185
|
+
if child.tag != f"{_HP}tbl":
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
paragraph_prefix_text = _collapse_whitespace("".join(paragraph_text_parts))
|
|
189
|
+
header_text = paragraph_prefix_text or last_header_text
|
|
190
|
+
table = HwpxOxmlTable(child, paragraph)
|
|
191
|
+
sink.append(
|
|
192
|
+
_AnchoredTable(
|
|
193
|
+
table=table,
|
|
194
|
+
paragraph_index=anchor_paragraph_index,
|
|
195
|
+
header_text=header_text,
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
last_header_text = _collect_tables_from_table(
|
|
199
|
+
table,
|
|
200
|
+
anchor_paragraph_index=anchor_paragraph_index,
|
|
201
|
+
inherited_header_text=header_text,
|
|
202
|
+
sink=sink,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
paragraph_text = _collapse_whitespace("".join(paragraph_text_parts))
|
|
206
|
+
return paragraph_text or last_header_text
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _collect_document_tables(document: HwpxDocument) -> list[_IndexedTable]:
|
|
210
|
+
anchored_tables: list[_AnchoredTable] = []
|
|
211
|
+
last_top_level_text = ""
|
|
212
|
+
|
|
213
|
+
for paragraph_index, paragraph in enumerate(document.paragraphs):
|
|
214
|
+
_collect_tables_from_paragraph(
|
|
215
|
+
paragraph,
|
|
216
|
+
anchor_paragraph_index=paragraph_index,
|
|
217
|
+
inherited_header_text=last_top_level_text,
|
|
218
|
+
sink=anchored_tables,
|
|
219
|
+
)
|
|
220
|
+
paragraph_text = _direct_paragraph_text(paragraph)
|
|
221
|
+
if paragraph_text:
|
|
222
|
+
last_top_level_text = paragraph_text
|
|
223
|
+
|
|
224
|
+
return [
|
|
225
|
+
_IndexedTable(
|
|
226
|
+
table_index=table_index,
|
|
227
|
+
table=item.table,
|
|
228
|
+
paragraph_index=item.paragraph_index,
|
|
229
|
+
header_text=item.header_text,
|
|
230
|
+
)
|
|
231
|
+
for table_index, item in enumerate(anchored_tables)
|
|
232
|
+
]
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _cell_text(table: HwpxOxmlTable, row_index: int, col_index: int) -> str:
|
|
236
|
+
return table.cell(row_index, col_index).text
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _table_is_empty(table: HwpxOxmlTable) -> bool:
|
|
240
|
+
for row_index in range(table.row_count):
|
|
241
|
+
for col_index in range(table.column_count):
|
|
242
|
+
if _cell_text(table, row_index, col_index).strip():
|
|
243
|
+
return False
|
|
244
|
+
return True
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _first_row_preview(table: HwpxOxmlTable) -> list[str]:
|
|
248
|
+
if table.row_count == 0:
|
|
249
|
+
return []
|
|
250
|
+
return [_cell_text(table, 0, col_index) for col_index in range(table.column_count)]
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _direction_delta(direction: PathDirection) -> tuple[int, int]:
|
|
254
|
+
if direction == "right":
|
|
255
|
+
return (0, 1)
|
|
256
|
+
if direction == "left":
|
|
257
|
+
return (0, -1)
|
|
258
|
+
if direction == "down":
|
|
259
|
+
return (1, 0)
|
|
260
|
+
return (-1, 0)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _move(
|
|
264
|
+
table: HwpxOxmlTable,
|
|
265
|
+
row_index: int,
|
|
266
|
+
col_index: int,
|
|
267
|
+
direction: PathDirection,
|
|
268
|
+
) -> tuple[int, int] | None:
|
|
269
|
+
row_delta, col_delta = _direction_delta(direction)
|
|
270
|
+
target_row = row_index + row_delta
|
|
271
|
+
target_col = col_index + col_delta
|
|
272
|
+
if target_row < 0 or target_col < 0:
|
|
273
|
+
return None
|
|
274
|
+
if target_row >= table.row_count or target_col >= table.column_count:
|
|
275
|
+
return None
|
|
276
|
+
return (target_row, target_col)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _find_label_candidates(
|
|
280
|
+
tables: list[_IndexedTable],
|
|
281
|
+
label_text: str,
|
|
282
|
+
) -> list[_LabelCandidate]:
|
|
283
|
+
normalized_label = _normalize_label_text(label_text)
|
|
284
|
+
if not normalized_label:
|
|
285
|
+
raise ValueError("label_text must contain at least one non-whitespace character")
|
|
286
|
+
|
|
287
|
+
candidates: list[_LabelCandidate] = []
|
|
288
|
+
for table_ref in tables:
|
|
289
|
+
for row_index in range(table_ref.table.row_count):
|
|
290
|
+
for col_index in range(table_ref.table.column_count):
|
|
291
|
+
cell_text = _cell_text(table_ref.table, row_index, col_index)
|
|
292
|
+
if _normalize_label_text(cell_text) != normalized_label:
|
|
293
|
+
continue
|
|
294
|
+
candidates.append(
|
|
295
|
+
_LabelCandidate(
|
|
296
|
+
table_index=table_ref.table_index,
|
|
297
|
+
table=table_ref.table,
|
|
298
|
+
row=row_index,
|
|
299
|
+
col=col_index,
|
|
300
|
+
text=cell_text,
|
|
301
|
+
)
|
|
302
|
+
)
|
|
303
|
+
return candidates
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def _cell_reference(
|
|
307
|
+
table: HwpxOxmlTable,
|
|
308
|
+
row_index: int,
|
|
309
|
+
col_index: int,
|
|
310
|
+
) -> TableCellReference:
|
|
311
|
+
return {
|
|
312
|
+
"row": row_index,
|
|
313
|
+
"col": col_index,
|
|
314
|
+
"text": _cell_text(table, row_index, col_index),
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def _parse_path(path: str) -> tuple[str | None, list[str], str | None]:
|
|
319
|
+
tokens = [token.strip() for token in path.split(">")]
|
|
320
|
+
if not tokens or not tokens[0]:
|
|
321
|
+
return (None, [], "path must start with a label")
|
|
322
|
+
|
|
323
|
+
label_text = tokens[0]
|
|
324
|
+
raw_directions = [token for token in tokens[1:] if token]
|
|
325
|
+
if not raw_directions:
|
|
326
|
+
return (label_text, [], "path must include at least one direction")
|
|
327
|
+
return (label_text, raw_directions, None)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def get_table_map(document: HwpxDocument) -> TableMapResult:
|
|
331
|
+
"""Return compact metadata for every table in document order."""
|
|
332
|
+
|
|
333
|
+
tables: list[TableMapEntry] = []
|
|
334
|
+
for table_ref in _collect_document_tables(document):
|
|
335
|
+
tables.append(
|
|
336
|
+
{
|
|
337
|
+
"table_index": table_ref.table_index,
|
|
338
|
+
"paragraph_index": table_ref.paragraph_index,
|
|
339
|
+
"rows": table_ref.table.row_count,
|
|
340
|
+
"cols": table_ref.table.column_count,
|
|
341
|
+
"header_text": table_ref.header_text,
|
|
342
|
+
"first_row_preview": _first_row_preview(table_ref.table),
|
|
343
|
+
"is_empty": _table_is_empty(table_ref.table),
|
|
344
|
+
}
|
|
345
|
+
)
|
|
346
|
+
return {"tables": tables}
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def find_cell_by_label(
|
|
350
|
+
document: HwpxDocument,
|
|
351
|
+
label_text: str,
|
|
352
|
+
direction: SearchDirection = "right",
|
|
353
|
+
) -> TableLabelSearchResult:
|
|
354
|
+
"""Find label cells and return the adjacent target cells that remain in bounds."""
|
|
355
|
+
|
|
356
|
+
if direction not in {"right", "down"}:
|
|
357
|
+
raise ValueError("direction must be one of: right, down")
|
|
358
|
+
|
|
359
|
+
matches: list[TableLabelMatch] = []
|
|
360
|
+
for candidate in _find_label_candidates(_collect_document_tables(document), label_text):
|
|
361
|
+
target = _move(candidate.table, candidate.row, candidate.col, direction)
|
|
362
|
+
if target is None:
|
|
363
|
+
continue
|
|
364
|
+
target_row, target_col = target
|
|
365
|
+
matches.append(
|
|
366
|
+
{
|
|
367
|
+
"table_index": candidate.table_index,
|
|
368
|
+
"label_cell": {
|
|
369
|
+
"row": candidate.row,
|
|
370
|
+
"col": candidate.col,
|
|
371
|
+
"text": candidate.text,
|
|
372
|
+
},
|
|
373
|
+
"target_cell": _cell_reference(candidate.table, target_row, target_col),
|
|
374
|
+
}
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
return {
|
|
378
|
+
"matches": matches,
|
|
379
|
+
"count": len(matches),
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def fill_by_path(
|
|
384
|
+
document: HwpxDocument,
|
|
385
|
+
mappings: Mapping[str, str],
|
|
386
|
+
) -> TableFillResult:
|
|
387
|
+
"""Fill multiple table cells using label-based navigation paths."""
|
|
388
|
+
|
|
389
|
+
indexed_tables = _collect_document_tables(document)
|
|
390
|
+
applied: list[TableFillApplied] = []
|
|
391
|
+
failed: list[TableFillFailed] = []
|
|
392
|
+
|
|
393
|
+
for path, value in mappings.items():
|
|
394
|
+
label_text, raw_directions, path_error = _parse_path(path)
|
|
395
|
+
if path_error is not None or label_text is None:
|
|
396
|
+
failed.append({"path": path, "reason": path_error or "invalid path"})
|
|
397
|
+
continue
|
|
398
|
+
|
|
399
|
+
try:
|
|
400
|
+
candidates = _find_label_candidates(indexed_tables, label_text)
|
|
401
|
+
except ValueError as exc:
|
|
402
|
+
failed.append({"path": path, "reason": str(exc)})
|
|
403
|
+
continue
|
|
404
|
+
|
|
405
|
+
if not candidates:
|
|
406
|
+
failed.append({"path": path, "reason": "label not found"})
|
|
407
|
+
continue
|
|
408
|
+
if len(candidates) > 1:
|
|
409
|
+
failed.append({"path": path, "reason": "ambiguous label"})
|
|
410
|
+
continue
|
|
411
|
+
|
|
412
|
+
candidate = candidates[0]
|
|
413
|
+
current_row = candidate.row
|
|
414
|
+
current_col = candidate.col
|
|
415
|
+
navigation_failed = False
|
|
416
|
+
|
|
417
|
+
for raw_direction in raw_directions:
|
|
418
|
+
direction = raw_direction.casefold()
|
|
419
|
+
if direction not in {"left", "right", "up", "down"}:
|
|
420
|
+
failed.append(
|
|
421
|
+
{
|
|
422
|
+
"path": path,
|
|
423
|
+
"reason": f"unsupported direction: {raw_direction}",
|
|
424
|
+
}
|
|
425
|
+
)
|
|
426
|
+
navigation_failed = True
|
|
427
|
+
break
|
|
428
|
+
|
|
429
|
+
next_position = _move(candidate.table, current_row, current_col, direction)
|
|
430
|
+
if next_position is None:
|
|
431
|
+
failed.append({"path": path, "reason": "navigation out of bounds"})
|
|
432
|
+
navigation_failed = True
|
|
433
|
+
break
|
|
434
|
+
|
|
435
|
+
current_row, current_col = next_position
|
|
436
|
+
|
|
437
|
+
if navigation_failed:
|
|
438
|
+
continue
|
|
439
|
+
|
|
440
|
+
text_value = str(value)
|
|
441
|
+
candidate.table.set_cell_text(current_row, current_col, text_value, logical=True)
|
|
442
|
+
applied.append(
|
|
443
|
+
{
|
|
444
|
+
"path": path,
|
|
445
|
+
"table_index": candidate.table_index,
|
|
446
|
+
"row": current_row,
|
|
447
|
+
"col": current_col,
|
|
448
|
+
"value": text_value,
|
|
449
|
+
}
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
return {
|
|
453
|
+
"applied": applied,
|
|
454
|
+
"failed": failed,
|
|
455
|
+
"applied_count": len(applied),
|
|
456
|
+
"failed_count": len(failed),
|
|
457
|
+
}
|
|
@@ -1,48 +1,15 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-hwpx
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.9.0
|
|
4
4
|
Summary: Hancom HWPX 패키지를 로드하고 편집하기 위한 Python 유틸리티 모음
|
|
5
5
|
Author: python-hwpx Maintainers
|
|
6
|
-
License:
|
|
7
|
-
|
|
8
|
-
Copyright (c) 2024 python-hwpx Maintainers
|
|
9
|
-
|
|
10
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
-
of this software and associated documentation files (the "Software"), to use,
|
|
12
|
-
copy, modify, merge, publish, distribute, and sublicense the Software only for
|
|
13
|
-
non-commercial purposes, subject to the following conditions:
|
|
14
|
-
|
|
15
|
-
1. Non-Commercial Use Only. The Software may be used, copied, modified,
|
|
16
|
-
merged, published, distributed, and sublicensed only for non-commercial
|
|
17
|
-
purposes. "Non-Commercial" means use that is not primarily intended for or
|
|
18
|
-
directed toward commercial advantage, monetary compensation, or any form of
|
|
19
|
-
direct or indirect commercial exploitation.
|
|
20
|
-
|
|
21
|
-
2. Attribution. The above copyright notice and this permission notice shall be
|
|
22
|
-
included in all copies or substantial portions of the Software.
|
|
23
|
-
|
|
24
|
-
3. No Warranty of Commercial Support. The maintainers are not obligated to
|
|
25
|
-
provide commercial support, maintenance, or updates.
|
|
26
|
-
|
|
27
|
-
THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
28
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
29
|
-
FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. IN NO EVENT SHALL THE
|
|
30
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
31
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
32
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
33
|
-
SOFTWARE.
|
|
34
|
-
|
|
35
|
-
If you require permissions to use this Software for commercial purposes,
|
|
36
|
-
please contact the copyright holders to negotiate an alternative licensing
|
|
37
|
-
arrangement.
|
|
38
|
-
|
|
6
|
+
License-Expression: LicenseRef-python-hwpx-NonCommercial
|
|
39
7
|
Project-URL: Homepage, https://github.com/airmang/python-hwpx
|
|
40
8
|
Project-URL: Documentation, https://github.com/airmang/python-hwpx/tree/main/docs
|
|
41
9
|
Project-URL: Issues, https://github.com/airmang/python-hwpx/issues
|
|
42
10
|
Keywords: hwp,hwpx,hancom,opc,xml
|
|
43
11
|
Classifier: Development Status :: 3 - Alpha
|
|
44
12
|
Classifier: Intended Audience :: Developers
|
|
45
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
46
13
|
Classifier: Programming Language :: Python :: 3
|
|
47
14
|
Classifier: Programming Language :: Python :: 3.10
|
|
48
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
@@ -58,6 +25,7 @@ Requires-Dist: build>=1.0; extra == "dev"
|
|
|
58
25
|
Requires-Dist: twine>=4.0; extra == "dev"
|
|
59
26
|
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
60
27
|
Provides-Extra: test
|
|
28
|
+
Requires-Dist: build>=1.0; extra == "test"
|
|
61
29
|
Requires-Dist: pytest>=7.4; extra == "test"
|
|
62
30
|
Requires-Dist: pytest-cov>=5.0; extra == "test"
|
|
63
31
|
Provides-Extra: typecheck
|
|
@@ -73,7 +41,7 @@ Dynamic: license-file
|
|
|
73
41
|
<p align="center">
|
|
74
42
|
<a href="https://pypi.org/project/python-hwpx/"><img src="https://img.shields.io/pypi/v/python-hwpx?color=blue&label=PyPI" alt="PyPI"></a>
|
|
75
43
|
<a href="https://pypi.org/project/python-hwpx/"><img src="https://img.shields.io/pypi/pyversions/python-hwpx" alt="Python"></a>
|
|
76
|
-
<a href="https://github.com/airmang/python-hwpx/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-
|
|
44
|
+
<a href="https://github.com/airmang/python-hwpx/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-Custom%20Noncommercial-orange" alt="License: Custom Non-Commercial"></a>
|
|
77
45
|
<a href="https://airmang.github.io/python-hwpx/"><img src="https://img.shields.io/badge/docs-Sphinx-8CA1AF" alt="Docs"></a>
|
|
78
46
|
</p>
|
|
79
47
|
</p>
|
|
@@ -153,6 +121,7 @@ doc.save_to_path("결과물.hwpx")
|
|
|
153
121
|
| 📝 **단락** | 추가/삭제/편집/서식 | 텍스트 설정, 단락 삭제(`remove_paragraph`), 스타일 참조 |
|
|
154
122
|
| ✏️ **Run** | 텍스트 조각 | 추가, 교체, 볼드/이탤릭/밑줄/색상 서식 |
|
|
155
123
|
| 📊 **표(Table)** | 생성/편집/병합 | N×M 표 생성, 셀 텍스트, 셀 병합/분할, 중첩 테이블 |
|
|
124
|
+
| 🧭 **표 자동화** | 탐색/채우기 | 테이블 맵, 라벨 기반 셀 탐색, 경로 기반 배치 채우기 |
|
|
156
125
|
| 📑 **섹션** | 추가/삭제 | `add_section(after=)`, `remove_section()`, manifest 자동 관리 |
|
|
157
126
|
| 🖼️ **이미지** | 임베드/삭제 | 바이너리 데이터 관리, manifest 자동 등록 |
|
|
158
127
|
| ✏️ **도형** | 선/사각형/타원 | OWPML 명세 준수 도형 삽입 |
|
|
@@ -193,6 +162,17 @@ doc.set_footer_text("1 / 10", page_type="BOTH")
|
|
|
193
162
|
# 표 셀 병합·분할
|
|
194
163
|
table.merge_cells(0, 0, 1, 1) # (0,0)~(1,1) 병합
|
|
195
164
|
table.set_cell_text(0, 0, "병합된 셀", logical=True, split_merged=True)
|
|
165
|
+
|
|
166
|
+
# 양식형 표 자동 채우기
|
|
167
|
+
form = doc.add_table(2, 2)
|
|
168
|
+
form.cell(0, 0).text = "성명:"
|
|
169
|
+
form.cell(1, 0).text = "소속"
|
|
170
|
+
|
|
171
|
+
doc.find_cell_by_label("성명") # {"matches": [...], "count": 1}
|
|
172
|
+
doc.fill_by_path({
|
|
173
|
+
"성명 > right": "홍길동",
|
|
174
|
+
"소속 > right": "플랫폼팀",
|
|
175
|
+
})
|
|
196
176
|
```
|
|
197
177
|
|
|
198
178
|
### 🔍 텍스트 추출 & 검색
|
|
@@ -353,13 +333,15 @@ pytest
|
|
|
353
333
|
|
|
354
334
|
## License
|
|
355
335
|
|
|
356
|
-
[
|
|
336
|
+
[Custom Non-Commercial License](LICENSE) © python-hwpx Maintainers
|
|
337
|
+
|
|
338
|
+
Commercial use requires separate permission from the copyright holders.
|
|
357
339
|
|
|
358
340
|
<br>
|
|
359
341
|
|
|
360
|
-
##
|
|
342
|
+
## Maintainer
|
|
361
343
|
|
|
362
|
-
**고규현** — 광교고등학교 정보·컴퓨터 교사
|
|
344
|
+
Primary maintainer/contact: **고규현** — 광교고등학교 정보·컴퓨터 교사
|
|
363
345
|
|
|
364
346
|
- ✉️ [kokyuhyun@hotmail.com](mailto:kokyuhyun@hotmail.com)
|
|
365
347
|
- 🐙 [@airmang](https://github.com/airmang)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
hwpx/__init__.py,sha256=
|
|
2
|
-
hwpx/document.py,sha256=
|
|
1
|
+
hwpx/__init__.py,sha256=pReUJ2NBbrnHKZfAipQIxge5hCmgdUM1j7m-xxQyIkA,1046
|
|
2
|
+
hwpx/document.py,sha256=B9qHnM6ttQPAz8-f47p0WdxUdz2QYeMLIt9qpAqBsvY,49124
|
|
3
3
|
hwpx/package.py,sha256=YK4oYEPk7la2BZKZepoVHzrjGIPMDnDdPa02Hh-RTBw,1103
|
|
4
4
|
hwpx/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
hwpx/templates.py,sha256=kZ_gV0bP-DIvr5CJuzs-uGnt8XVncJCI3cGFq083uTg,1149
|
|
@@ -21,21 +21,22 @@ hwpx/oxml/schema.py,sha256=THswXdMNpAiSoLxpvUGbdbI66hW-SKuUqSw4vdkIYmA,1246
|
|
|
21
21
|
hwpx/oxml/section.py,sha256=WwxZ6PWPeMrj2L9mz4JlqFGXwd7E7qAuSBuM5dgRjZk,199
|
|
22
22
|
hwpx/oxml/table.py,sha256=pdO2TTAcbEC6Z4cnaOnB-bcmuZ1KVado7J3RiY_zOfE,193
|
|
23
23
|
hwpx/oxml/utils.py,sha256=to0yytS7vtLSvWl-dQyegT6MWClMK55b1Sp1uagEkI4,2591
|
|
24
|
-
hwpx/tools/__init__.py,sha256=
|
|
24
|
+
hwpx/tools/__init__.py,sha256=kKcFSwpWFHRmXLL6AzLeS39-hWE6aBXPkp5yTbh9QuM,1800
|
|
25
25
|
hwpx/tools/archive_cli.py,sha256=FauetaiLsU9dNtm7dswGrF9Q4f_UEse4_vwDk9qTyPE,11233
|
|
26
26
|
hwpx/tools/exporter.py,sha256=GcbNtV4rIWOJv5nBcgdX0yfkXQa-xQhfrCzXWgaNbTE,8862
|
|
27
27
|
hwpx/tools/object_finder.py,sha256=vbZ8FuIpGF-2vpbWDeZWi4UgZ2-3PK_ddQCs0oq1dRw,13440
|
|
28
28
|
hwpx/tools/package_validator.py,sha256=jUwAV1L2SzDpNGLoZZJU7-YaA70Ob4WgQGsy7x9MXV0,12560
|
|
29
29
|
hwpx/tools/page_guard.py,sha256=BFBIR2sB7-GjT1d0gWsGDD5PdB3xTvfUUvbRpmRgV1w,9663
|
|
30
|
+
hwpx/tools/table_navigation.py,sha256=lIduw2JgEsHHvrrUtMinRLN7-dP9JXR3gNuFXj28mjs,13482
|
|
30
31
|
hwpx/tools/template_analyzer.py,sha256=nYo3kvqXkkO4-NRizKWdyJcXfxN24-KMQPOZWbpwp5M,8459
|
|
31
32
|
hwpx/tools/text_extract_cli.py,sha256=pIBMIFuFX10IEegw7fQ3gtUbQyjNgbAUYkQWh2S3aQs,2150
|
|
32
33
|
hwpx/tools/text_extractor.py,sha256=LQOll7EZBP_QhRjiGofJAoMdZg7SUYVzeEr4JhMKYOg,25142
|
|
33
34
|
hwpx/tools/validator.py,sha256=KThqBQKKQfZkuLMGtzONbPkzy877-2FgT22FHPmt_gI,5979
|
|
34
35
|
hwpx/tools/_schemas/header.xsd,sha256=mJXuFMuHGT1JnFFaluUpYUglwjMCNlfbFCRVM26eHXE,664
|
|
35
36
|
hwpx/tools/_schemas/section.xsd,sha256=MgvavVHG05RDfUnVPxVU10H4FQOja5ON04_m9Uk_m7E,522
|
|
36
|
-
python_hwpx-2.
|
|
37
|
-
python_hwpx-2.
|
|
38
|
-
python_hwpx-2.
|
|
39
|
-
python_hwpx-2.
|
|
40
|
-
python_hwpx-2.
|
|
41
|
-
python_hwpx-2.
|
|
37
|
+
python_hwpx-2.9.0.dist-info/licenses/LICENSE,sha256=3F1-JUTcmjmxMpHGeB77ZzaSdhms3h8p1DBBa3lvV08,1609
|
|
38
|
+
python_hwpx-2.9.0.dist-info/METADATA,sha256=WB7KBgMf6DzWclgB9UnOKbOlOzxwphhHHCUDnlg60Mk,14084
|
|
39
|
+
python_hwpx-2.9.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
40
|
+
python_hwpx-2.9.0.dist-info/entry_points.txt,sha256=zKneV9VceQKwbJUo-mUUbwRmQjNyNSzrv44XuMhsaUU,368
|
|
41
|
+
python_hwpx-2.9.0.dist-info/top_level.txt,sha256=R1iToqDh80Nf2oQhRjTN0rbN2X6kyDUizIocZjkhuxc,5
|
|
42
|
+
python_hwpx-2.9.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|