python-hwpx 2.8.3__tar.gz → 2.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_hwpx-2.8.3/src/python_hwpx.egg-info → python_hwpx-2.9.0}/PKG-INFO +13 -1
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/README.md +12 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/pyproject.toml +1 -1
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/__init__.py +5 -1
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/document.py +32 -1
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/tools/__init__.py +24 -0
- python_hwpx-2.9.0/src/hwpx/tools/table_navigation.py +457 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0/src/python_hwpx.egg-info}/PKG-INFO +13 -1
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/python_hwpx.egg-info/SOURCES.txt +2 -0
- python_hwpx-2.9.0/tests/test_table_navigation.py +183 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/LICENSE +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/setup.cfg +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/data/Skeleton.hwpx +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/opc/package.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/opc/relationships.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/opc/xml_utils.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/oxml/__init__.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/oxml/body.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/oxml/common.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/oxml/document.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/oxml/header.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/oxml/header_part.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/oxml/memo.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/oxml/namespaces.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/oxml/paragraph.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/oxml/parser.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/oxml/schema.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/oxml/section.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/oxml/table.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/oxml/utils.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/package.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/py.typed +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/templates.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/tools/_schemas/header.xsd +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/tools/_schemas/section.xsd +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/tools/archive_cli.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/tools/exporter.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/tools/object_finder.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/tools/package_validator.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/tools/page_guard.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/tools/template_analyzer.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/tools/text_extract_cli.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/tools/text_extractor.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/hwpx/tools/validator.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/python_hwpx.egg-info/dependency_links.txt +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/python_hwpx.egg-info/entry_points.txt +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/python_hwpx.egg-info/requires.txt +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/src/python_hwpx.egg-info/top_level.txt +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_coverage_targets.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_document_context_manager.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_document_formatting.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_document_save_api.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_gap_closure_tools.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_inline_models.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_integration_hwpx_compatibility.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_integration_roundtrip.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_memo_and_style_editing.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_new_features.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_opc_package.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_oxml_parsing.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_packaging_license_metadata.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_packaging_py_typed.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_paragraph_section_management.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_repr_snapshots.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_section_headers.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_split_merged_cell.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_tables_default_border.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_text_extractor_annotations.py +0 -0
- {python_hwpx-2.8.3 → python_hwpx-2.9.0}/tests/test_version_metadata.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-hwpx
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.9.0
|
|
4
4
|
Summary: Hancom HWPX 패키지를 로드하고 편집하기 위한 Python 유틸리티 모음
|
|
5
5
|
Author: python-hwpx Maintainers
|
|
6
6
|
License-Expression: LicenseRef-python-hwpx-NonCommercial
|
|
@@ -121,6 +121,7 @@ doc.save_to_path("결과물.hwpx")
|
|
|
121
121
|
| 📝 **단락** | 추가/삭제/편집/서식 | 텍스트 설정, 단락 삭제(`remove_paragraph`), 스타일 참조 |
|
|
122
122
|
| ✏️ **Run** | 텍스트 조각 | 추가, 교체, 볼드/이탤릭/밑줄/색상 서식 |
|
|
123
123
|
| 📊 **표(Table)** | 생성/편집/병합 | N×M 표 생성, 셀 텍스트, 셀 병합/분할, 중첩 테이블 |
|
|
124
|
+
| 🧭 **표 자동화** | 탐색/채우기 | 테이블 맵, 라벨 기반 셀 탐색, 경로 기반 배치 채우기 |
|
|
124
125
|
| 📑 **섹션** | 추가/삭제 | `add_section(after=)`, `remove_section()`, manifest 자동 관리 |
|
|
125
126
|
| 🖼️ **이미지** | 임베드/삭제 | 바이너리 데이터 관리, manifest 자동 등록 |
|
|
126
127
|
| ✏️ **도형** | 선/사각형/타원 | OWPML 명세 준수 도형 삽입 |
|
|
@@ -161,6 +162,17 @@ doc.set_footer_text("1 / 10", page_type="BOTH")
|
|
|
161
162
|
# 표 셀 병합·분할
|
|
162
163
|
table.merge_cells(0, 0, 1, 1) # (0,0)~(1,1) 병합
|
|
163
164
|
table.set_cell_text(0, 0, "병합된 셀", logical=True, split_merged=True)
|
|
165
|
+
|
|
166
|
+
# 양식형 표 자동 채우기
|
|
167
|
+
form = doc.add_table(2, 2)
|
|
168
|
+
form.cell(0, 0).text = "성명:"
|
|
169
|
+
form.cell(1, 0).text = "소속"
|
|
170
|
+
|
|
171
|
+
doc.find_cell_by_label("성명") # {"matches": [...], "count": 1}
|
|
172
|
+
doc.fill_by_path({
|
|
173
|
+
"성명 > right": "홍길동",
|
|
174
|
+
"소속 > right": "플랫폼팀",
|
|
175
|
+
})
|
|
164
176
|
```
|
|
165
177
|
|
|
166
178
|
### 🔍 텍스트 추출 & 검색
|
|
@@ -86,6 +86,7 @@ doc.save_to_path("결과물.hwpx")
|
|
|
86
86
|
| 📝 **단락** | 추가/삭제/편집/서식 | 텍스트 설정, 단락 삭제(`remove_paragraph`), 스타일 참조 |
|
|
87
87
|
| ✏️ **Run** | 텍스트 조각 | 추가, 교체, 볼드/이탤릭/밑줄/색상 서식 |
|
|
88
88
|
| 📊 **표(Table)** | 생성/편집/병합 | N×M 표 생성, 셀 텍스트, 셀 병합/분할, 중첩 테이블 |
|
|
89
|
+
| 🧭 **표 자동화** | 탐색/채우기 | 테이블 맵, 라벨 기반 셀 탐색, 경로 기반 배치 채우기 |
|
|
89
90
|
| 📑 **섹션** | 추가/삭제 | `add_section(after=)`, `remove_section()`, manifest 자동 관리 |
|
|
90
91
|
| 🖼️ **이미지** | 임베드/삭제 | 바이너리 데이터 관리, manifest 자동 등록 |
|
|
91
92
|
| ✏️ **도형** | 선/사각형/타원 | OWPML 명세 준수 도형 삽입 |
|
|
@@ -126,6 +127,17 @@ doc.set_footer_text("1 / 10", page_type="BOTH")
|
|
|
126
127
|
# 표 셀 병합·분할
|
|
127
128
|
table.merge_cells(0, 0, 1, 1) # (0,0)~(1,1) 병합
|
|
128
129
|
table.set_cell_text(0, 0, "병합된 셀", logical=True, split_merged=True)
|
|
130
|
+
|
|
131
|
+
# 양식형 표 자동 채우기
|
|
132
|
+
form = doc.add_table(2, 2)
|
|
133
|
+
form.cell(0, 0).text = "성명:"
|
|
134
|
+
form.cell(1, 0).text = "소속"
|
|
135
|
+
|
|
136
|
+
doc.find_cell_by_label("성명") # {"matches": [...], "count": 1}
|
|
137
|
+
doc.fill_by_path({
|
|
138
|
+
"성명 > right": "홍길동",
|
|
139
|
+
"소속 > right": "플랫폼팀",
|
|
140
|
+
})
|
|
129
141
|
```
|
|
130
142
|
|
|
131
143
|
### 🔍 텍스트 추출 & 검색
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "python-hwpx"
|
|
7
|
-
version = "2.
|
|
7
|
+
version = "2.9.0"
|
|
8
8
|
description = "Hancom HWPX 패키지를 로드하고 편집하기 위한 Python 유틸리티 모음"
|
|
9
9
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
10
|
license = "LicenseRef-python-hwpx-NonCommercial"
|
|
@@ -10,8 +10,12 @@ def _resolve_version() -> str:
|
|
|
10
10
|
except PackageNotFoundError:
|
|
11
11
|
return "0+unknown"
|
|
12
12
|
|
|
13
|
+
def __getattr__(name: str) -> object:
|
|
14
|
+
"""Resolve dynamic module attributes."""
|
|
13
15
|
|
|
14
|
-
|
|
16
|
+
if name == "__version__":
|
|
17
|
+
return _resolve_version()
|
|
18
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
15
19
|
|
|
16
20
|
from .tools.text_extractor import (
|
|
17
21
|
DEFAULT_NAMESPACES,
|
|
@@ -10,7 +10,7 @@ import logging
|
|
|
10
10
|
import uuid
|
|
11
11
|
|
|
12
12
|
from os import PathLike
|
|
13
|
-
from typing import Any, BinaryIO, Iterator, Sequence, overload
|
|
13
|
+
from typing import TYPE_CHECKING, Any, BinaryIO, Iterator, Mapping, Sequence, overload
|
|
14
14
|
|
|
15
15
|
from lxml import etree
|
|
16
16
|
|
|
@@ -53,6 +53,9 @@ _HH = f"{{{_HH_NS}}}"
|
|
|
53
53
|
|
|
54
54
|
logger = logging.getLogger(__name__)
|
|
55
55
|
|
|
56
|
+
if TYPE_CHECKING:
|
|
57
|
+
from .tools.table_navigation import TableFillResult, TableLabelSearchResult, TableMapResult
|
|
58
|
+
|
|
56
59
|
|
|
57
60
|
def _append_element(
|
|
58
61
|
parent: Any,
|
|
@@ -741,6 +744,34 @@ class HwpxDocument:
|
|
|
741
744
|
char_pr_id_ref=char_pr_id_ref,
|
|
742
745
|
)
|
|
743
746
|
|
|
747
|
+
def get_table_map(self) -> TableMapResult:
|
|
748
|
+
"""Return compact metadata for every table in document order."""
|
|
749
|
+
|
|
750
|
+
from .tools.table_navigation import get_table_map
|
|
751
|
+
|
|
752
|
+
return get_table_map(self)
|
|
753
|
+
|
|
754
|
+
def find_cell_by_label(
|
|
755
|
+
self,
|
|
756
|
+
label_text: str,
|
|
757
|
+
direction: str = "right",
|
|
758
|
+
) -> TableLabelSearchResult:
|
|
759
|
+
"""Return every label/target cell pair that matches *label_text*."""
|
|
760
|
+
|
|
761
|
+
from .tools.table_navigation import find_cell_by_label
|
|
762
|
+
|
|
763
|
+
return find_cell_by_label(self, label_text, direction=direction)
|
|
764
|
+
|
|
765
|
+
def fill_by_path(
|
|
766
|
+
self,
|
|
767
|
+
mappings: Mapping[str, str],
|
|
768
|
+
) -> TableFillResult:
|
|
769
|
+
"""Fill table cells using ``label > direction > ...`` navigation paths."""
|
|
770
|
+
|
|
771
|
+
from .tools.table_navigation import fill_by_path
|
|
772
|
+
|
|
773
|
+
return fill_by_path(self, mappings)
|
|
774
|
+
|
|
744
775
|
def add_shape(
|
|
745
776
|
self,
|
|
746
777
|
shape_type: str,
|
|
@@ -25,6 +25,19 @@ from .text_extractor import (
|
|
|
25
25
|
describe_element_path,
|
|
26
26
|
strip_namespace,
|
|
27
27
|
)
|
|
28
|
+
from .table_navigation import (
|
|
29
|
+
TableCellReference,
|
|
30
|
+
TableFillApplied,
|
|
31
|
+
TableFillFailed,
|
|
32
|
+
TableFillResult,
|
|
33
|
+
TableLabelMatch,
|
|
34
|
+
TableLabelSearchResult,
|
|
35
|
+
TableMapEntry,
|
|
36
|
+
TableMapResult,
|
|
37
|
+
fill_by_path,
|
|
38
|
+
find_cell_by_label,
|
|
39
|
+
get_table_map,
|
|
40
|
+
)
|
|
28
41
|
from .validator import (
|
|
29
42
|
DocumentSchemas,
|
|
30
43
|
ValidationIssue,
|
|
@@ -41,6 +54,17 @@ __all__ = [
|
|
|
41
54
|
"build_parent_map",
|
|
42
55
|
"describe_element_path",
|
|
43
56
|
"strip_namespace",
|
|
57
|
+
"TableCellReference",
|
|
58
|
+
"TableFillApplied",
|
|
59
|
+
"TableFillFailed",
|
|
60
|
+
"TableFillResult",
|
|
61
|
+
"TableLabelMatch",
|
|
62
|
+
"TableLabelSearchResult",
|
|
63
|
+
"TableMapEntry",
|
|
64
|
+
"TableMapResult",
|
|
65
|
+
"fill_by_path",
|
|
66
|
+
"find_cell_by_label",
|
|
67
|
+
"get_table_map",
|
|
44
68
|
"FoundElement",
|
|
45
69
|
"ObjectFinder",
|
|
46
70
|
"PackageValidationIssue",
|
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
"""Reusable helpers for HWPX table discovery and form-like navigation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
import re
|
|
7
|
+
from typing import TYPE_CHECKING, Literal, Mapping, TypedDict
|
|
8
|
+
|
|
9
|
+
from ..oxml import HwpxOxmlParagraph, HwpxOxmlTable
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from ..document import HwpxDocument
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"SearchDirection",
|
|
16
|
+
"PathDirection",
|
|
17
|
+
"TableCellReference",
|
|
18
|
+
"TableFillApplied",
|
|
19
|
+
"TableFillFailed",
|
|
20
|
+
"TableFillResult",
|
|
21
|
+
"TableLabelMatch",
|
|
22
|
+
"TableLabelSearchResult",
|
|
23
|
+
"TableMapEntry",
|
|
24
|
+
"TableMapResult",
|
|
25
|
+
"fill_by_path",
|
|
26
|
+
"find_cell_by_label",
|
|
27
|
+
"get_table_map",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
_HP_NS = "http://www.hancom.co.kr/hwpml/2011/paragraph"
|
|
31
|
+
_HP = f"{{{_HP_NS}}}"
|
|
32
|
+
_WHITESPACE_RE = re.compile(r"\s+")
|
|
33
|
+
|
|
34
|
+
SearchDirection = Literal["right", "down"]
|
|
35
|
+
PathDirection = Literal["left", "right", "up", "down"]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class TableMapEntry(TypedDict):
|
|
39
|
+
"""Compact metadata describing a table in document order."""
|
|
40
|
+
|
|
41
|
+
table_index: int
|
|
42
|
+
paragraph_index: int
|
|
43
|
+
rows: int
|
|
44
|
+
cols: int
|
|
45
|
+
header_text: str
|
|
46
|
+
first_row_preview: list[str]
|
|
47
|
+
is_empty: bool
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class TableMapResult(TypedDict):
|
|
51
|
+
"""Collection of table metadata entries."""
|
|
52
|
+
|
|
53
|
+
tables: list[TableMapEntry]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class TableCellReference(TypedDict):
|
|
57
|
+
"""A logical table cell position and its current text."""
|
|
58
|
+
|
|
59
|
+
row: int
|
|
60
|
+
col: int
|
|
61
|
+
text: str
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class TableLabelMatch(TypedDict):
|
|
65
|
+
"""A label cell and the cell reached from it."""
|
|
66
|
+
|
|
67
|
+
table_index: int
|
|
68
|
+
label_cell: TableCellReference
|
|
69
|
+
target_cell: TableCellReference
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class TableLabelSearchResult(TypedDict):
|
|
73
|
+
"""Result payload returned by :func:`find_cell_by_label`."""
|
|
74
|
+
|
|
75
|
+
matches: list[TableLabelMatch]
|
|
76
|
+
count: int
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class TableFillApplied(TypedDict):
|
|
80
|
+
"""A successfully applied path-based fill operation."""
|
|
81
|
+
|
|
82
|
+
path: str
|
|
83
|
+
table_index: int
|
|
84
|
+
row: int
|
|
85
|
+
col: int
|
|
86
|
+
value: str
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class TableFillFailed(TypedDict):
|
|
90
|
+
"""A failed path-based fill operation and its reason."""
|
|
91
|
+
|
|
92
|
+
path: str
|
|
93
|
+
reason: str
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class TableFillResult(TypedDict):
|
|
97
|
+
"""Batch fill summary for :func:`fill_by_path`."""
|
|
98
|
+
|
|
99
|
+
applied: list[TableFillApplied]
|
|
100
|
+
failed: list[TableFillFailed]
|
|
101
|
+
applied_count: int
|
|
102
|
+
failed_count: int
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@dataclass(frozen=True, slots=True)
|
|
106
|
+
class _AnchoredTable:
|
|
107
|
+
table: HwpxOxmlTable
|
|
108
|
+
paragraph_index: int
|
|
109
|
+
header_text: str
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@dataclass(frozen=True, slots=True)
|
|
113
|
+
class _IndexedTable:
|
|
114
|
+
table_index: int
|
|
115
|
+
table: HwpxOxmlTable
|
|
116
|
+
paragraph_index: int
|
|
117
|
+
header_text: str
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@dataclass(frozen=True, slots=True)
|
|
121
|
+
class _LabelCandidate:
|
|
122
|
+
table_index: int
|
|
123
|
+
table: HwpxOxmlTable
|
|
124
|
+
row: int
|
|
125
|
+
col: int
|
|
126
|
+
text: str
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _collapse_whitespace(value: str) -> str:
|
|
130
|
+
return _WHITESPACE_RE.sub(" ", value).strip()
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _normalize_label_text(value: str) -> str:
|
|
134
|
+
normalized = _collapse_whitespace(value).casefold()
|
|
135
|
+
while normalized.endswith((":", ":")):
|
|
136
|
+
normalized = normalized[:-1].rstrip()
|
|
137
|
+
return normalized
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _direct_paragraph_text(paragraph: HwpxOxmlParagraph) -> str:
|
|
141
|
+
parts: list[str] = []
|
|
142
|
+
for run in paragraph.element.findall(f"{_HP}run"):
|
|
143
|
+
for child in run:
|
|
144
|
+
if child.tag == f"{_HP}t" and child.text:
|
|
145
|
+
parts.append(child.text)
|
|
146
|
+
return _collapse_whitespace("".join(parts))
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _collect_tables_from_table(
|
|
150
|
+
table: HwpxOxmlTable,
|
|
151
|
+
*,
|
|
152
|
+
anchor_paragraph_index: int,
|
|
153
|
+
inherited_header_text: str,
|
|
154
|
+
sink: list[_AnchoredTable],
|
|
155
|
+
) -> str:
|
|
156
|
+
last_header_text = inherited_header_text
|
|
157
|
+
for row in table.rows:
|
|
158
|
+
for cell in row.cells:
|
|
159
|
+
for paragraph in cell.paragraphs:
|
|
160
|
+
last_header_text = _collect_tables_from_paragraph(
|
|
161
|
+
paragraph,
|
|
162
|
+
anchor_paragraph_index=anchor_paragraph_index,
|
|
163
|
+
inherited_header_text=last_header_text,
|
|
164
|
+
sink=sink,
|
|
165
|
+
)
|
|
166
|
+
return last_header_text
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _collect_tables_from_paragraph(
|
|
170
|
+
paragraph: HwpxOxmlParagraph,
|
|
171
|
+
*,
|
|
172
|
+
anchor_paragraph_index: int,
|
|
173
|
+
inherited_header_text: str,
|
|
174
|
+
sink: list[_AnchoredTable],
|
|
175
|
+
) -> str:
|
|
176
|
+
paragraph_text_parts: list[str] = []
|
|
177
|
+
last_header_text = inherited_header_text
|
|
178
|
+
|
|
179
|
+
for run in paragraph.element.findall(f"{_HP}run"):
|
|
180
|
+
for child in run:
|
|
181
|
+
if child.tag == f"{_HP}t":
|
|
182
|
+
if child.text:
|
|
183
|
+
paragraph_text_parts.append(child.text)
|
|
184
|
+
continue
|
|
185
|
+
if child.tag != f"{_HP}tbl":
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
paragraph_prefix_text = _collapse_whitespace("".join(paragraph_text_parts))
|
|
189
|
+
header_text = paragraph_prefix_text or last_header_text
|
|
190
|
+
table = HwpxOxmlTable(child, paragraph)
|
|
191
|
+
sink.append(
|
|
192
|
+
_AnchoredTable(
|
|
193
|
+
table=table,
|
|
194
|
+
paragraph_index=anchor_paragraph_index,
|
|
195
|
+
header_text=header_text,
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
last_header_text = _collect_tables_from_table(
|
|
199
|
+
table,
|
|
200
|
+
anchor_paragraph_index=anchor_paragraph_index,
|
|
201
|
+
inherited_header_text=header_text,
|
|
202
|
+
sink=sink,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
paragraph_text = _collapse_whitespace("".join(paragraph_text_parts))
|
|
206
|
+
return paragraph_text or last_header_text
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _collect_document_tables(document: HwpxDocument) -> list[_IndexedTable]:
|
|
210
|
+
anchored_tables: list[_AnchoredTable] = []
|
|
211
|
+
last_top_level_text = ""
|
|
212
|
+
|
|
213
|
+
for paragraph_index, paragraph in enumerate(document.paragraphs):
|
|
214
|
+
_collect_tables_from_paragraph(
|
|
215
|
+
paragraph,
|
|
216
|
+
anchor_paragraph_index=paragraph_index,
|
|
217
|
+
inherited_header_text=last_top_level_text,
|
|
218
|
+
sink=anchored_tables,
|
|
219
|
+
)
|
|
220
|
+
paragraph_text = _direct_paragraph_text(paragraph)
|
|
221
|
+
if paragraph_text:
|
|
222
|
+
last_top_level_text = paragraph_text
|
|
223
|
+
|
|
224
|
+
return [
|
|
225
|
+
_IndexedTable(
|
|
226
|
+
table_index=table_index,
|
|
227
|
+
table=item.table,
|
|
228
|
+
paragraph_index=item.paragraph_index,
|
|
229
|
+
header_text=item.header_text,
|
|
230
|
+
)
|
|
231
|
+
for table_index, item in enumerate(anchored_tables)
|
|
232
|
+
]
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _cell_text(table: HwpxOxmlTable, row_index: int, col_index: int) -> str:
|
|
236
|
+
return table.cell(row_index, col_index).text
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _table_is_empty(table: HwpxOxmlTable) -> bool:
|
|
240
|
+
for row_index in range(table.row_count):
|
|
241
|
+
for col_index in range(table.column_count):
|
|
242
|
+
if _cell_text(table, row_index, col_index).strip():
|
|
243
|
+
return False
|
|
244
|
+
return True
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _first_row_preview(table: HwpxOxmlTable) -> list[str]:
|
|
248
|
+
if table.row_count == 0:
|
|
249
|
+
return []
|
|
250
|
+
return [_cell_text(table, 0, col_index) for col_index in range(table.column_count)]
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _direction_delta(direction: PathDirection) -> tuple[int, int]:
|
|
254
|
+
if direction == "right":
|
|
255
|
+
return (0, 1)
|
|
256
|
+
if direction == "left":
|
|
257
|
+
return (0, -1)
|
|
258
|
+
if direction == "down":
|
|
259
|
+
return (1, 0)
|
|
260
|
+
return (-1, 0)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _move(
|
|
264
|
+
table: HwpxOxmlTable,
|
|
265
|
+
row_index: int,
|
|
266
|
+
col_index: int,
|
|
267
|
+
direction: PathDirection,
|
|
268
|
+
) -> tuple[int, int] | None:
|
|
269
|
+
row_delta, col_delta = _direction_delta(direction)
|
|
270
|
+
target_row = row_index + row_delta
|
|
271
|
+
target_col = col_index + col_delta
|
|
272
|
+
if target_row < 0 or target_col < 0:
|
|
273
|
+
return None
|
|
274
|
+
if target_row >= table.row_count or target_col >= table.column_count:
|
|
275
|
+
return None
|
|
276
|
+
return (target_row, target_col)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _find_label_candidates(
|
|
280
|
+
tables: list[_IndexedTable],
|
|
281
|
+
label_text: str,
|
|
282
|
+
) -> list[_LabelCandidate]:
|
|
283
|
+
normalized_label = _normalize_label_text(label_text)
|
|
284
|
+
if not normalized_label:
|
|
285
|
+
raise ValueError("label_text must contain at least one non-whitespace character")
|
|
286
|
+
|
|
287
|
+
candidates: list[_LabelCandidate] = []
|
|
288
|
+
for table_ref in tables:
|
|
289
|
+
for row_index in range(table_ref.table.row_count):
|
|
290
|
+
for col_index in range(table_ref.table.column_count):
|
|
291
|
+
cell_text = _cell_text(table_ref.table, row_index, col_index)
|
|
292
|
+
if _normalize_label_text(cell_text) != normalized_label:
|
|
293
|
+
continue
|
|
294
|
+
candidates.append(
|
|
295
|
+
_LabelCandidate(
|
|
296
|
+
table_index=table_ref.table_index,
|
|
297
|
+
table=table_ref.table,
|
|
298
|
+
row=row_index,
|
|
299
|
+
col=col_index,
|
|
300
|
+
text=cell_text,
|
|
301
|
+
)
|
|
302
|
+
)
|
|
303
|
+
return candidates
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def _cell_reference(
|
|
307
|
+
table: HwpxOxmlTable,
|
|
308
|
+
row_index: int,
|
|
309
|
+
col_index: int,
|
|
310
|
+
) -> TableCellReference:
|
|
311
|
+
return {
|
|
312
|
+
"row": row_index,
|
|
313
|
+
"col": col_index,
|
|
314
|
+
"text": _cell_text(table, row_index, col_index),
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def _parse_path(path: str) -> tuple[str | None, list[str], str | None]:
|
|
319
|
+
tokens = [token.strip() for token in path.split(">")]
|
|
320
|
+
if not tokens or not tokens[0]:
|
|
321
|
+
return (None, [], "path must start with a label")
|
|
322
|
+
|
|
323
|
+
label_text = tokens[0]
|
|
324
|
+
raw_directions = [token for token in tokens[1:] if token]
|
|
325
|
+
if not raw_directions:
|
|
326
|
+
return (label_text, [], "path must include at least one direction")
|
|
327
|
+
return (label_text, raw_directions, None)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def get_table_map(document: HwpxDocument) -> TableMapResult:
|
|
331
|
+
"""Return compact metadata for every table in document order."""
|
|
332
|
+
|
|
333
|
+
tables: list[TableMapEntry] = []
|
|
334
|
+
for table_ref in _collect_document_tables(document):
|
|
335
|
+
tables.append(
|
|
336
|
+
{
|
|
337
|
+
"table_index": table_ref.table_index,
|
|
338
|
+
"paragraph_index": table_ref.paragraph_index,
|
|
339
|
+
"rows": table_ref.table.row_count,
|
|
340
|
+
"cols": table_ref.table.column_count,
|
|
341
|
+
"header_text": table_ref.header_text,
|
|
342
|
+
"first_row_preview": _first_row_preview(table_ref.table),
|
|
343
|
+
"is_empty": _table_is_empty(table_ref.table),
|
|
344
|
+
}
|
|
345
|
+
)
|
|
346
|
+
return {"tables": tables}
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def find_cell_by_label(
|
|
350
|
+
document: HwpxDocument,
|
|
351
|
+
label_text: str,
|
|
352
|
+
direction: SearchDirection = "right",
|
|
353
|
+
) -> TableLabelSearchResult:
|
|
354
|
+
"""Find label cells and return the adjacent target cells that remain in bounds."""
|
|
355
|
+
|
|
356
|
+
if direction not in {"right", "down"}:
|
|
357
|
+
raise ValueError("direction must be one of: right, down")
|
|
358
|
+
|
|
359
|
+
matches: list[TableLabelMatch] = []
|
|
360
|
+
for candidate in _find_label_candidates(_collect_document_tables(document), label_text):
|
|
361
|
+
target = _move(candidate.table, candidate.row, candidate.col, direction)
|
|
362
|
+
if target is None:
|
|
363
|
+
continue
|
|
364
|
+
target_row, target_col = target
|
|
365
|
+
matches.append(
|
|
366
|
+
{
|
|
367
|
+
"table_index": candidate.table_index,
|
|
368
|
+
"label_cell": {
|
|
369
|
+
"row": candidate.row,
|
|
370
|
+
"col": candidate.col,
|
|
371
|
+
"text": candidate.text,
|
|
372
|
+
},
|
|
373
|
+
"target_cell": _cell_reference(candidate.table, target_row, target_col),
|
|
374
|
+
}
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
return {
|
|
378
|
+
"matches": matches,
|
|
379
|
+
"count": len(matches),
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def fill_by_path(
|
|
384
|
+
document: HwpxDocument,
|
|
385
|
+
mappings: Mapping[str, str],
|
|
386
|
+
) -> TableFillResult:
|
|
387
|
+
"""Fill multiple table cells using label-based navigation paths."""
|
|
388
|
+
|
|
389
|
+
indexed_tables = _collect_document_tables(document)
|
|
390
|
+
applied: list[TableFillApplied] = []
|
|
391
|
+
failed: list[TableFillFailed] = []
|
|
392
|
+
|
|
393
|
+
for path, value in mappings.items():
|
|
394
|
+
label_text, raw_directions, path_error = _parse_path(path)
|
|
395
|
+
if path_error is not None or label_text is None:
|
|
396
|
+
failed.append({"path": path, "reason": path_error or "invalid path"})
|
|
397
|
+
continue
|
|
398
|
+
|
|
399
|
+
try:
|
|
400
|
+
candidates = _find_label_candidates(indexed_tables, label_text)
|
|
401
|
+
except ValueError as exc:
|
|
402
|
+
failed.append({"path": path, "reason": str(exc)})
|
|
403
|
+
continue
|
|
404
|
+
|
|
405
|
+
if not candidates:
|
|
406
|
+
failed.append({"path": path, "reason": "label not found"})
|
|
407
|
+
continue
|
|
408
|
+
if len(candidates) > 1:
|
|
409
|
+
failed.append({"path": path, "reason": "ambiguous label"})
|
|
410
|
+
continue
|
|
411
|
+
|
|
412
|
+
candidate = candidates[0]
|
|
413
|
+
current_row = candidate.row
|
|
414
|
+
current_col = candidate.col
|
|
415
|
+
navigation_failed = False
|
|
416
|
+
|
|
417
|
+
for raw_direction in raw_directions:
|
|
418
|
+
direction = raw_direction.casefold()
|
|
419
|
+
if direction not in {"left", "right", "up", "down"}:
|
|
420
|
+
failed.append(
|
|
421
|
+
{
|
|
422
|
+
"path": path,
|
|
423
|
+
"reason": f"unsupported direction: {raw_direction}",
|
|
424
|
+
}
|
|
425
|
+
)
|
|
426
|
+
navigation_failed = True
|
|
427
|
+
break
|
|
428
|
+
|
|
429
|
+
next_position = _move(candidate.table, current_row, current_col, direction)
|
|
430
|
+
if next_position is None:
|
|
431
|
+
failed.append({"path": path, "reason": "navigation out of bounds"})
|
|
432
|
+
navigation_failed = True
|
|
433
|
+
break
|
|
434
|
+
|
|
435
|
+
current_row, current_col = next_position
|
|
436
|
+
|
|
437
|
+
if navigation_failed:
|
|
438
|
+
continue
|
|
439
|
+
|
|
440
|
+
text_value = str(value)
|
|
441
|
+
candidate.table.set_cell_text(current_row, current_col, text_value, logical=True)
|
|
442
|
+
applied.append(
|
|
443
|
+
{
|
|
444
|
+
"path": path,
|
|
445
|
+
"table_index": candidate.table_index,
|
|
446
|
+
"row": current_row,
|
|
447
|
+
"col": current_col,
|
|
448
|
+
"value": text_value,
|
|
449
|
+
}
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
return {
|
|
453
|
+
"applied": applied,
|
|
454
|
+
"failed": failed,
|
|
455
|
+
"applied_count": len(applied),
|
|
456
|
+
"failed_count": len(failed),
|
|
457
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-hwpx
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.9.0
|
|
4
4
|
Summary: Hancom HWPX 패키지를 로드하고 편집하기 위한 Python 유틸리티 모음
|
|
5
5
|
Author: python-hwpx Maintainers
|
|
6
6
|
License-Expression: LicenseRef-python-hwpx-NonCommercial
|
|
@@ -121,6 +121,7 @@ doc.save_to_path("결과물.hwpx")
|
|
|
121
121
|
| 📝 **단락** | 추가/삭제/편집/서식 | 텍스트 설정, 단락 삭제(`remove_paragraph`), 스타일 참조 |
|
|
122
122
|
| ✏️ **Run** | 텍스트 조각 | 추가, 교체, 볼드/이탤릭/밑줄/색상 서식 |
|
|
123
123
|
| 📊 **표(Table)** | 생성/편집/병합 | N×M 표 생성, 셀 텍스트, 셀 병합/분할, 중첩 테이블 |
|
|
124
|
+
| 🧭 **표 자동화** | 탐색/채우기 | 테이블 맵, 라벨 기반 셀 탐색, 경로 기반 배치 채우기 |
|
|
124
125
|
| 📑 **섹션** | 추가/삭제 | `add_section(after=)`, `remove_section()`, manifest 자동 관리 |
|
|
125
126
|
| 🖼️ **이미지** | 임베드/삭제 | 바이너리 데이터 관리, manifest 자동 등록 |
|
|
126
127
|
| ✏️ **도형** | 선/사각형/타원 | OWPML 명세 준수 도형 삽입 |
|
|
@@ -161,6 +162,17 @@ doc.set_footer_text("1 / 10", page_type="BOTH")
|
|
|
161
162
|
# 표 셀 병합·분할
|
|
162
163
|
table.merge_cells(0, 0, 1, 1) # (0,0)~(1,1) 병합
|
|
163
164
|
table.set_cell_text(0, 0, "병합된 셀", logical=True, split_merged=True)
|
|
165
|
+
|
|
166
|
+
# 양식형 표 자동 채우기
|
|
167
|
+
form = doc.add_table(2, 2)
|
|
168
|
+
form.cell(0, 0).text = "성명:"
|
|
169
|
+
form.cell(1, 0).text = "소속"
|
|
170
|
+
|
|
171
|
+
doc.find_cell_by_label("성명") # {"matches": [...], "count": 1}
|
|
172
|
+
doc.fill_by_path({
|
|
173
|
+
"성명 > right": "홍길동",
|
|
174
|
+
"소속 > right": "플랫폼팀",
|
|
175
|
+
})
|
|
164
176
|
```
|
|
165
177
|
|
|
166
178
|
### 🔍 텍스트 추출 & 검색
|
|
@@ -30,6 +30,7 @@ src/hwpx/tools/exporter.py
|
|
|
30
30
|
src/hwpx/tools/object_finder.py
|
|
31
31
|
src/hwpx/tools/package_validator.py
|
|
32
32
|
src/hwpx/tools/page_guard.py
|
|
33
|
+
src/hwpx/tools/table_navigation.py
|
|
33
34
|
src/hwpx/tools/template_analyzer.py
|
|
34
35
|
src/hwpx/tools/text_extract_cli.py
|
|
35
36
|
src/hwpx/tools/text_extractor.py
|
|
@@ -60,6 +61,7 @@ tests/test_paragraph_section_management.py
|
|
|
60
61
|
tests/test_repr_snapshots.py
|
|
61
62
|
tests/test_section_headers.py
|
|
62
63
|
tests/test_split_merged_cell.py
|
|
64
|
+
tests/test_table_navigation.py
|
|
63
65
|
tests/test_tables_default_border.py
|
|
64
66
|
tests/test_text_extractor_annotations.py
|
|
65
67
|
tests/test_version_metadata.py
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from hwpx import HwpxDocument
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _paragraph_index(document: HwpxDocument, target) -> int:
|
|
7
|
+
for index, paragraph in enumerate(document.paragraphs):
|
|
8
|
+
if paragraph.element is target.element:
|
|
9
|
+
return index
|
|
10
|
+
raise AssertionError("target paragraph was not found in document order")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_fill_by_path_handles_unique_labels_in_a_single_table() -> None:
|
|
14
|
+
document = HwpxDocument.new()
|
|
15
|
+
document.add_paragraph("1. 기본 현황")
|
|
16
|
+
table = document.add_table(2, 2)
|
|
17
|
+
table.cell(0, 0).text = "성명:"
|
|
18
|
+
table.cell(1, 0).text = "소속"
|
|
19
|
+
|
|
20
|
+
result = document.fill_by_path(
|
|
21
|
+
{
|
|
22
|
+
"성명 > right": "홍길동",
|
|
23
|
+
"소속 > right": "플랫폼팀",
|
|
24
|
+
}
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
assert result["applied_count"] == 2
|
|
28
|
+
assert result["failed_count"] == 0
|
|
29
|
+
assert table.cell(0, 1).text == "홍길동"
|
|
30
|
+
assert table.cell(1, 1).text == "플랫폼팀"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_find_cell_by_label_normalizes_trailing_colons() -> None:
|
|
34
|
+
document = HwpxDocument.new()
|
|
35
|
+
document.add_paragraph("기본 정보")
|
|
36
|
+
table = document.add_table(1, 2)
|
|
37
|
+
table.cell(0, 0).text = "성명:"
|
|
38
|
+
|
|
39
|
+
result = document.find_cell_by_label("성명")
|
|
40
|
+
|
|
41
|
+
assert result["count"] == 1
|
|
42
|
+
assert result["matches"][0]["table_index"] == 0
|
|
43
|
+
assert result["matches"][0]["label_cell"] == {
|
|
44
|
+
"row": 0,
|
|
45
|
+
"col": 0,
|
|
46
|
+
"text": "성명:",
|
|
47
|
+
}
|
|
48
|
+
assert result["matches"][0]["target_cell"] == {
|
|
49
|
+
"row": 0,
|
|
50
|
+
"col": 1,
|
|
51
|
+
"text": "",
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_multiple_tables_with_the_same_label_return_all_matches_and_make_fill_ambiguous() -> None:
|
|
56
|
+
document = HwpxDocument.new()
|
|
57
|
+
document.add_paragraph("1. 신청인")
|
|
58
|
+
first = document.add_table(1, 2)
|
|
59
|
+
first.cell(0, 0).text = "성명"
|
|
60
|
+
|
|
61
|
+
document.add_paragraph("2. 보호자")
|
|
62
|
+
second = document.add_table(1, 2)
|
|
63
|
+
second.cell(0, 0).text = "성명"
|
|
64
|
+
|
|
65
|
+
matches = document.find_cell_by_label("성명")
|
|
66
|
+
fill_result = document.fill_by_path({"성명 > right": "홍길동"})
|
|
67
|
+
|
|
68
|
+
assert matches["count"] == 2
|
|
69
|
+
assert [match["table_index"] for match in matches["matches"]] == [0, 1]
|
|
70
|
+
assert fill_result["applied_count"] == 0
|
|
71
|
+
assert fill_result["failed_count"] == 1
|
|
72
|
+
assert fill_result["failed"][0] == {
|
|
73
|
+
"path": "성명 > right",
|
|
74
|
+
"reason": "ambiguous label",
|
|
75
|
+
}
|
|
76
|
+
assert first.cell(0, 1).text == ""
|
|
77
|
+
assert second.cell(0, 1).text == ""
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def test_out_of_bounds_candidates_are_skipped_and_reported_for_batch_fill() -> None:
|
|
81
|
+
document = HwpxDocument.new()
|
|
82
|
+
document.add_paragraph("경계 값")
|
|
83
|
+
table = document.add_table(2, 2)
|
|
84
|
+
table.cell(0, 1).text = "마지막열"
|
|
85
|
+
table.cell(1, 0).text = "마지막행"
|
|
86
|
+
|
|
87
|
+
right_matches = document.find_cell_by_label("마지막열", direction="right")
|
|
88
|
+
down_matches = document.find_cell_by_label("마지막행", direction="down")
|
|
89
|
+
fill_result = document.fill_by_path(
|
|
90
|
+
{
|
|
91
|
+
"마지막열 > right": "실패",
|
|
92
|
+
"마지막행 > down": "실패",
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
assert right_matches["count"] == 0
|
|
97
|
+
assert down_matches["count"] == 0
|
|
98
|
+
assert fill_result["applied_count"] == 0
|
|
99
|
+
assert fill_result["failed_count"] == 2
|
|
100
|
+
assert fill_result["failed"] == [
|
|
101
|
+
{"path": "마지막열 > right", "reason": "navigation out of bounds"},
|
|
102
|
+
{"path": "마지막행 > down", "reason": "navigation out of bounds"},
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def test_fill_by_path_supports_multi_step_navigation() -> None:
|
|
107
|
+
document = HwpxDocument.new()
|
|
108
|
+
document.add_paragraph("정산표")
|
|
109
|
+
table = document.add_table(3, 2)
|
|
110
|
+
table.cell(0, 0).text = "합계"
|
|
111
|
+
|
|
112
|
+
result = document.fill_by_path({"합계 > down > right": "100"})
|
|
113
|
+
|
|
114
|
+
assert result["applied"] == [
|
|
115
|
+
{
|
|
116
|
+
"path": "합계 > down > right",
|
|
117
|
+
"table_index": 0,
|
|
118
|
+
"row": 1,
|
|
119
|
+
"col": 1,
|
|
120
|
+
"value": "100",
|
|
121
|
+
}
|
|
122
|
+
]
|
|
123
|
+
assert result["failed"] == []
|
|
124
|
+
assert table.cell(1, 1).text == "100"
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def test_get_table_map_reports_stable_order_shape_and_header_text() -> None:
|
|
128
|
+
document = HwpxDocument.new()
|
|
129
|
+
document.add_paragraph("1. 기본 현황")
|
|
130
|
+
first = document.add_table(2, 4)
|
|
131
|
+
first.cell(0, 0).text = "성명"
|
|
132
|
+
first.cell(0, 1).text = "소속"
|
|
133
|
+
first.cell(0, 2).text = "직위"
|
|
134
|
+
first.cell(0, 3).text = "연락처"
|
|
135
|
+
first.cell(1, 0).text = "홍길동"
|
|
136
|
+
|
|
137
|
+
document.add_paragraph("2. 비고")
|
|
138
|
+
second = document.add_table(1, 2)
|
|
139
|
+
second.cell(0, 0).text = "항목"
|
|
140
|
+
second.cell(0, 1).text = "값"
|
|
141
|
+
|
|
142
|
+
result = document.get_table_map()
|
|
143
|
+
|
|
144
|
+
assert result["tables"] == [
|
|
145
|
+
{
|
|
146
|
+
"table_index": 0,
|
|
147
|
+
"paragraph_index": _paragraph_index(document, first.paragraph),
|
|
148
|
+
"rows": 2,
|
|
149
|
+
"cols": 4,
|
|
150
|
+
"header_text": "1. 기본 현황",
|
|
151
|
+
"first_row_preview": ["성명", "소속", "직위", "연락처"],
|
|
152
|
+
"is_empty": False,
|
|
153
|
+
},
|
|
154
|
+
{
|
|
155
|
+
"table_index": 1,
|
|
156
|
+
"paragraph_index": _paragraph_index(document, second.paragraph),
|
|
157
|
+
"rows": 1,
|
|
158
|
+
"cols": 2,
|
|
159
|
+
"header_text": "2. 비고",
|
|
160
|
+
"first_row_preview": ["항목", "값"],
|
|
161
|
+
"is_empty": False,
|
|
162
|
+
},
|
|
163
|
+
]
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def test_get_table_map_marks_tables_with_only_empty_strings_as_empty() -> None:
|
|
167
|
+
document = HwpxDocument.new()
|
|
168
|
+
document.add_paragraph("빈 표")
|
|
169
|
+
table = document.add_table(2, 2)
|
|
170
|
+
|
|
171
|
+
result = document.get_table_map()
|
|
172
|
+
|
|
173
|
+
assert result["tables"] == [
|
|
174
|
+
{
|
|
175
|
+
"table_index": 0,
|
|
176
|
+
"paragraph_index": _paragraph_index(document, table.paragraph),
|
|
177
|
+
"rows": 2,
|
|
178
|
+
"cols": 2,
|
|
179
|
+
"header_text": "빈 표",
|
|
180
|
+
"first_row_preview": ["", ""],
|
|
181
|
+
"is_empty": True,
|
|
182
|
+
}
|
|
183
|
+
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|