PyPI - python-hwpx - Versions diffs - 2.8.3__py3-none-any.whl → 2.9.1__py3-none-any.whl - Mend

python-hwpx 2.8.3py3-none-any.whl → 2.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

hwpx/__init__.py +6 -1
hwpx/data/Skeleton.hwpx +0 -0
hwpx/document.py +33 -1
hwpx/opc/package.py +1 -0
hwpx/opc/relationships.py +1 -0
hwpx/opc/xml_utils.py +1 -0
hwpx/oxml/__init__.py +1 -0
hwpx/oxml/body.py +23 -1
hwpx/oxml/common.py +1 -0
hwpx/oxml/document.py +150 -29
hwpx/oxml/header.py +1 -0
hwpx/oxml/header_part.py +1 -0
hwpx/oxml/memo.py +1 -0
hwpx/oxml/namespaces.py +1 -0
hwpx/oxml/paragraph.py +1 -0
hwpx/oxml/parser.py +1 -0
hwpx/oxml/schema.py +1 -0
hwpx/oxml/section.py +1 -0
hwpx/oxml/table.py +1 -0
hwpx/oxml/utils.py +1 -0
hwpx/package.py +1 -0
hwpx/templates.py +1 -0
hwpx/tools/__init__.py +25 -0
hwpx/tools/archive_cli.py +1 -0
hwpx/tools/exporter.py +43 -146
hwpx/tools/object_finder.py +1 -0
hwpx/tools/package_validator.py +1 -0
hwpx/tools/page_guard.py +1 -0
hwpx/tools/table_navigation.py +458 -0
hwpx/tools/template_analyzer.py +1 -0
hwpx/tools/text_extract_cli.py +1 -0
hwpx/tools/text_extractor.py +5 -1
hwpx/tools/validator.py +1 -0
{python_hwpx-2.8.3.dist-info → python_hwpx-2.9.1.dist-info}/METADATA +138 -80
python_hwpx-2.9.1.dist-info/RECORD +43 -0
python_hwpx-2.9.1.dist-info/licenses/LICENSE +178 -0
python_hwpx-2.9.1.dist-info/licenses/NOTICE +14 -0
python_hwpx-2.8.3.dist-info/RECORD +0 -41
python_hwpx-2.8.3.dist-info/licenses/LICENSE +0 -32
{python_hwpx-2.8.3.dist-info → python_hwpx-2.9.1.dist-info}/WHEEL +0 -0
{python_hwpx-2.8.3.dist-info → python_hwpx-2.9.1.dist-info}/entry_points.txt +0 -0
{python_hwpx-2.8.3.dist-info → python_hwpx-2.9.1.dist-info}/top_level.txt +0 -0

hwpx/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """High-level utilities for working with HWPX documents."""
 from importlib.metadata import PackageNotFoundError, version as _metadata_version
@@ -10,8 +11,12 @@ def _resolve_version() -> str:
     except PackageNotFoundError:
         return "0+unknown"
+def __getattr__(name: str) -> object:
+    """Resolve dynamic module attributes."""
-__version__ = _resolve_version()
+    if name == "__version__":
+        return _resolve_version()
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
 from .tools.text_extractor import (
     DEFAULT_NAMESPACES,

hwpx/data/Skeleton.hwpx CHANGED Viewed

Binary file

hwpx/document.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """High-level representation of an HWPX document."""
 from __future__ import annotations
@@ -10,7 +11,7 @@ import logging
 import uuid
 from os import PathLike
-from typing import Any, BinaryIO, Iterator, Sequence, overload
+from typing import TYPE_CHECKING, Any, BinaryIO, Iterator, Mapping, Sequence, overload
 from lxml import etree
@@ -53,6 +54,9 @@ _HH = f"{{{_HH_NS}}}"
 logger = logging.getLogger(__name__)
+if TYPE_CHECKING:
+    from .tools.table_navigation import TableFillResult, TableLabelSearchResult, TableMapResult
 def _append_element(
     parent: Any,
@@ -741,6 +745,34 @@ class HwpxDocument:
             char_pr_id_ref=char_pr_id_ref,
         )
+    def get_table_map(self) -> TableMapResult:
+        """Return compact metadata for every table in document order."""
+        from .tools.table_navigation import get_table_map
+        return get_table_map(self)
+    def find_cell_by_label(
+        self,
+        label_text: str,
+        direction: str = "right",
+    ) -> TableLabelSearchResult:
+        """Return every label/target cell pair that matches *label_text*."""
+        from .tools.table_navigation import find_cell_by_label
+        return find_cell_by_label(self, label_text, direction=direction)
+    def fill_by_path(
+        self,
+        mappings: Mapping[str, str],
+    ) -> TableFillResult:
+        """Fill table cells using ``label > direction > ...`` navigation paths."""
+        from .tools.table_navigation import fill_by_path
+        return fill_by_path(self, mappings)
     def add_shape(
         self,
         shape_type: str,

hwpx/opc/package.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """Utilities for reading and writing HWPX OPC packages."""
 from __future__ import annotations

hwpx/opc/relationships.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """Helpers for resolving HWPX container and manifest relationships."""
 from __future__ import annotations

hwpx/opc/xml_utils.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """XML 파싱/직렬화를 위한 OPC 공통 유틸리티."""
 from __future__ import annotations

hwpx/oxml/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """Open XML helpers for the HWPX document format."""

hwpx/oxml/body.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 import logging
@@ -43,7 +44,7 @@ _TRACK_CHANGE_MARK_NAMES = {
 }
 InlineMark = Union[GenericElement, "TrackChangeMark"]
-RunChild = Union[GenericElement, "Control", "Table", "InlineObject", "TextSpan"]
+RunChild = Union[GenericElement, "Control", "Table", "InlineObject", "TextSpan", "Tab"]
 ParagraphChild = Union["Run", GenericElement]
@@ -105,6 +106,12 @@ class InlineObject:
     children: List[GenericElement] = field(default_factory=list)
+@dataclass(slots=True)
+class Tab:
+    tag: str
+    attributes: Dict[str, str] = field(default_factory=dict)
 @dataclass(slots=True)
 class Table:
     tag: str
@@ -120,6 +127,7 @@ class Run:
     controls: List[Control] = field(default_factory=list)
     tables: List[Table] = field(default_factory=list)
     inline_objects: List[InlineObject] = field(default_factory=list)
+    tabs: List[Tab] = field(default_factory=list)
     text_spans: List[TextSpan] = field(default_factory=list)
     other_children: List[GenericElement] = field(default_factory=list)
     attributes: Dict[str, str] = field(default_factory=dict)
@@ -227,6 +235,10 @@ def parse_table_element(node: etree._Element) -> Table:
     )
+def parse_tab_element(node: etree._Element) -> Tab:
+    return Tab(tag=node.tag, attributes={key: value for key, value in node.attrib.items()})
 def parse_run_element(node: etree._Element) -> Run:
     attributes = {key: value for key, value in node.attrib.items()}
     char_pr_id_ref = parse_int(attributes.pop("charPrIDRef", None))
@@ -247,6 +259,10 @@ def parse_run_element(node: etree._Element) -> Run:
             span = parse_text_span(child)
             run.text_spans.append(span)
             run.content.append(span)
+        elif name == "tab":
+            tab = parse_tab_element(child)
+            run.tabs.append(tab)
+            run.content.append(tab)
         elif name == "tbl":
             table = parse_table_element(child)
             run.tables.append(table)
@@ -342,6 +358,10 @@ def _text_span_to_xml(span: TextSpan) -> etree._Element:
     return node
+def _tab_to_xml(tab: Tab) -> etree._Element:
+    return etree.Element(_qualified_tag(tab.tag, "tab"), dict(tab.attributes))
 def _control_to_xml(control: Control) -> etree._Element:
     attrs = dict(control.attributes)
     if control.control_type is not None:
@@ -376,6 +396,8 @@ def serialize_run(run: Run) -> etree._Element:
             node.append(_text_span_to_xml(child))
         elif isinstance(child, Control):
             node.append(_control_to_xml(child))
+        elif isinstance(child, Tab):
+            node.append(_tab_to_xml(child))
         elif isinstance(child, Table):
             node.append(_table_to_xml(child))
         elif isinstance(child, InlineObject):

hwpx/oxml/common.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 import logging

hwpx/oxml/document.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """Object model mapping for the XML parts of an HWPX document."""
 from __future__ import annotations
@@ -108,17 +109,56 @@ def _serialize_xml(element: ET.Element) -> bytes:
 def _paragraph_id() -> str:
     """Generate an identifier for a new paragraph element."""
-    return str(uuid4().int & 0xFFFFFFFF)
+    return str(uuid4().int & 0x7FFFFFFF)
 def _object_id() -> str:
     """Generate an identifier suitable for table and shape objects."""
-    return str(uuid4().int & 0xFFFFFFFF)
+    return str(uuid4().int & 0x7FFFFFFF)
 def _memo_id() -> str:
     """Generate a lightweight identifier for memo elements."""
-    return str(uuid4().int & 0xFFFFFFFF)
+    return str(uuid4().int & 0x7FFFFFFF)
+def _refresh_copied_paragraph_subtree_ids(paragraph: ET.Element) -> None:
+    """Assign fresh local identifiers inside a copied paragraph subtree.
+    This is intentionally narrow: it refreshes paragraph ids for the copied
+    paragraph and any nested paragraphs (for example inside table cells), plus
+    common object identifiers used by tables/shapes/notes. Reference-style
+    attributes such as ``borderFillIDRef`` are left untouched.
+    """
+    for node in paragraph.iter():
+        if node.tag == f"{_HP}p":
+            node.set("id", _paragraph_id())
+            continue
+        if "id" in node.attrib and node.tag in {
+            f"{_HP}tbl",
+            f"{_HP}pic",
+            f"{_HP}container",
+            f"{_HP}ole",
+            f"{_HP}equation",
+            f"{_HP}textart",
+            f"{_HP}video",
+            f"{_HP}header",
+            f"{_HP}footer",
+        }:
+            node.set("id", _object_id())
+        if "instId" in node.attrib:
+            node.set("instId", _object_id())
+def _clone_paragraph_element(paragraph: ET.Element) -> ET.Element:
+    """Return a deep-copied paragraph element with refreshed local ids."""
+    cloned = deepcopy(paragraph)
+    _refresh_copied_paragraph_subtree_ids(cloned)
+    return cloned
 def _create_paragraph_element(
@@ -154,9 +194,7 @@ def _create_paragraph_element(
     run = paragraph.makeelement(f"{_HP}run", run_attrs)
     paragraph.append(run)
-    text_element = run.makeelement(f"{_HP}t", {})
-    run.append(text_element)
-    text_element.text = text
+    _append_text_with_tabs(run, text)
     return paragraph
@@ -192,6 +230,20 @@ def _append_child(
     return child
+def _is_tab_control_element(node: ET.Element) -> bool:
+    return node.tag == f"{_HP}ctrl" and (node.get("id") or "").lower() == "tab"
+def _append_text_with_tabs(run: ET.Element, value: str) -> None:
+    segments = value.split("\t")
+    for index, segment in enumerate(segments):
+        text_element = run.makeelement(f"{_HP}t", {})
+        text_element.text = _sanitize_text(segment)
+        run.append(text_element)
+        if index < len(segments) - 1:
+            run.append(run.makeelement(f"{_HP}tab", {}))
 def _normalize_length(value: str | None) -> str:
     if value is None:
         return ""
@@ -1979,17 +2031,21 @@ class HwpxOxmlTableCell:
     def _ensure_text_element(self) -> ET.Element:
         sublist = self.element.find(f"{_HP}subList")
         if sublist is None:
-            sublist = ET.SubElement(self.element, f"{_HP}subList", _default_sublist_attributes())
+            sublist = _append_child(
+                self.element, f"{_HP}subList", _default_sublist_attributes()
+            )
         paragraph = sublist.find(f"{_HP}p")
         if paragraph is None:
-            paragraph = ET.SubElement(sublist, f"{_HP}p", _default_cell_paragraph_attributes())
+            paragraph = _append_child(
+                sublist, f"{_HP}p", _default_cell_paragraph_attributes()
+            )
         _clear_paragraph_layout_cache(paragraph)
         run = paragraph.find(f"{_HP}run")
         if run is None:
-            run = ET.SubElement(paragraph, f"{_HP}run", {"charPrIDRef": "0"})
+            run = _append_child(paragraph, f"{_HP}run", {"charPrIDRef": "0"})
         text = run.find(f"{_HP}t")
         if text is None:
-            text = ET.SubElement(run, f"{_HP}t")
+            text = _append_child(run, f"{_HP}t")
         return text
     @property
@@ -2097,9 +2153,7 @@ class HwpxOxmlTableCell:
             run_attrs["charPrIDRef"] = "0"
         run = _append_child(paragraph, f"{_HP}run", run_attrs)
-        t = run.makeelement(f"{_HP}t", {})
-        t.text = _sanitize_text(text)
-        run.append(t)
+        _append_text_with_tabs(run, text)
         self.table.mark_dirty()
         section = self.table.paragraph.section
@@ -2762,9 +2816,13 @@ class HwpxOxmlParagraph:
     def text(self) -> str:
         """Return the concatenated textual content of this paragraph."""
         texts: list[str] = []
-        for text_element in self.element.findall(f".//{_HP}t"):
-            if text_element.text:
-                texts.append(text_element.text)
+        for run in self._run_elements():
+            for child in run:
+                if child.tag == f"{_HP}t":
+                    if child.text:
+                        texts.append(child.text)
+                elif child.tag == f"{_HP}tab" or _is_tab_control_element(child):
+                    texts.append("\t")
         return "".join(texts)
     @text.setter
@@ -2780,10 +2838,10 @@ class HwpxOxmlParagraph:
         # Identify first run — its charPrIDRef will be kept.
         first_run = self._ensure_run()
-        # Remove <hp:t> from ALL runs.
+        # Remove existing text/tab nodes from all runs.
         for run in runs:
             for child in list(run):
-                if child.tag == f"{_HP}t":
+                if child.tag == f"{_HP}t" or child.tag == f"{_HP}tab" or _is_tab_control_element(child):
                     run.remove(child)
         # Remove non-first runs that are now empty (only had text).
@@ -2794,10 +2852,8 @@ class HwpxOxmlParagraph:
             if len(list(run)) == 0:
                 self.element.remove(run)
-        # Write the new text into the first run.
-        text_element = first_run.makeelement(f"{_HP}t", {})
-        text_element.text = _sanitize_text(value)
-        first_run.append(text_element)
+        # Write the new text into the first run, preserving tabs as <hp:tab/>.
+        _append_text_with_tabs(first_run, value)
         _clear_paragraph_layout_cache(self.element)
         self.section.mark_dirty()
@@ -3668,14 +3724,43 @@ class HwpxOxmlSection:
             run = paragraph.makeelement(f"{_HP}run", run_attrs)
             paragraph.append(run)
-            text_element = run.makeelement(f"{_HP}t", {})
-            text_element.text = text
-            run.append(text_element)
+            _append_text_with_tabs(run, text)
         self._element.append(paragraph)
         self._dirty = True
         return HwpxOxmlParagraph(paragraph, self)
+    def insert_paragraphs(
+        self,
+        index: int,
+        paragraphs: Sequence[HwpxOxmlParagraph | ET.Element],
+    ) -> list[HwpxOxmlParagraph]:
+        """Insert paragraph copies at *index* and return wrappers for them."""
+        existing = self.paragraphs
+        if index < 0 or index > len(existing):
+            raise IndexError(f"단락 인덱스 {index}이(가) 범위를 벗어났습니다 (총 {len(existing)}개)")
+        inserted: list[HwpxOxmlParagraph] = []
+        for offset, paragraph in enumerate(paragraphs):
+            source_element = paragraph.element if isinstance(paragraph, HwpxOxmlParagraph) else paragraph
+            cloned = _clone_paragraph_element(source_element)
+            self._element.insert(index + offset, cloned)
+            inserted.append(HwpxOxmlParagraph(cloned, self))
+        if inserted:
+            self._dirty = True
+        return inserted
+    def copy_paragraph_range(self, start: int, end: int) -> list[ET.Element]:
+        """Return deep-copied paragraph elements for the inclusive range."""
+        paragraphs = self.paragraphs
+        total = len(paragraphs)
+        if start < 0 or end < 0 or start >= total or end >= total or start > end:
+            raise IndexError(f"문단 범위 {start}..{end}이(가) 유효하지 않습니다 (총 {total}개)")
+        return [_clone_paragraph_element(paragraphs[index].element) for index in range(start, end + 1)]
     def mark_dirty(self) -> None:
         self._dirty = True
@@ -4455,9 +4540,9 @@ class HwpxOxmlDocument:
                 element.remove(child)
             if target[0]:
-                ET.SubElement(element, f"{_HH}bold")
+                _append_child(element, f"{_HH}bold")
             if target[1]:
-                ET.SubElement(element, f"{_HH}italic")
+                _append_child(element, f"{_HH}italic")
             underline_attrs = dict(base_underline_attrs)
             if target[2]:
@@ -4469,14 +4554,14 @@ class HwpxOxmlDocument:
                     underline_attrs["color"] = base_underline_attrs["color"]
                 if "color" not in underline_attrs:
                     underline_attrs["color"] = "#000000"
-                ET.SubElement(element, f"{_HH}underline", underline_attrs)
+                _append_child(element, f"{_HH}underline", underline_attrs)
             else:
                 attrs = dict(base_underline_attrs)
                 attrs["type"] = "NONE"
                 attrs.setdefault("shape", base_underline_attrs.get("shape", "SOLID"))
                 if "color" in base_underline_attrs:
                     attrs["color"] = base_underline_attrs["color"]
-                ET.SubElement(element, f"{_HH}underline", attrs)
+                _append_child(element, f"{_HH}underline", attrs)
         element = header.ensure_char_property(
             predicate=predicate,
@@ -4649,6 +4734,42 @@ class HwpxOxmlDocument:
         else:
             paragraph.remove()
+    def copy_paragraph_range(
+        self,
+        start: int,
+        end: int,
+        *,
+        section: HwpxOxmlSection | None = None,
+        section_index: int | None = None,
+    ) -> list[ET.Element]:
+        """Return deep-copied paragraph elements for an inclusive range."""
+        if section is None and section_index is not None:
+            section = self._sections[section_index]
+        if section is None:
+            if not self._sections:
+                raise ValueError("document does not contain any sections")
+            section = self._sections[-1]
+        return section.copy_paragraph_range(start, end)
+    def insert_paragraphs(
+        self,
+        index: int,
+        paragraphs: Sequence[HwpxOxmlParagraph | ET.Element],
+        *,
+        section: HwpxOxmlSection | None = None,
+        section_index: int | None = None,
+    ) -> list[HwpxOxmlParagraph]:
+        """Insert copied paragraphs into the requested section."""
+        if section is None and section_index is not None:
+            section = self._sections[section_index]
+        if section is None:
+            if not self._sections:
+                raise ValueError("document does not contain any sections")
+            section = self._sections[-1]
+        return section.insert_paragraphs(index, paragraphs)
     # ------------------------------------------------------------------
     # Section management
     # ------------------------------------------------------------------

hwpx/oxml/header.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 import logging

hwpx/oxml/header_part.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """Header-part OpenXML wrappers."""
 from __future__ import annotations

hwpx/oxml/memo.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """Memo-related OpenXML wrappers."""
 from __future__ import annotations

hwpx/oxml/namespaces.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """Shared namespace constants for the HWPML/OWPML XML schemas.
 All modules that need HWPML namespace URIs should import from here

hwpx/oxml/paragraph.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """Paragraph-related OpenXML wrappers."""
 from __future__ import annotations

hwpx/oxml/parser.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 import logging

hwpx/oxml/schema.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 import logging

hwpx/oxml/section.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """Section-related OpenXML wrappers."""
 from __future__ import annotations

hwpx/oxml/table.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """Table-related OpenXML wrappers."""
 from __future__ import annotations

hwpx/oxml/utils.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 import logging

hwpx/package.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """하위 호환을 위한 패키지 모듈.
 신규 코드는 :mod:`hwpx.opc.package` 를 직접 사용하세요.

hwpx/templates.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """Embedded templates and sample payloads for HWPX documents."""
 from __future__ import annotations

hwpx/tools/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 """Tooling helpers for inspecting HWPX archives."""
 from .exporter import (
@@ -25,6 +26,19 @@ from .text_extractor import (
     describe_element_path,
     strip_namespace,
 )
+from .table_navigation import (
+    TableCellReference,
+    TableFillApplied,
+    TableFillFailed,
+    TableFillResult,
+    TableLabelMatch,
+    TableLabelSearchResult,
+    TableMapEntry,
+    TableMapResult,
+    fill_by_path,
+    find_cell_by_label,
+    get_table_map,
+)
 from .validator import (
     DocumentSchemas,
     ValidationIssue,
@@ -41,6 +55,17 @@ __all__ = [
     "build_parent_map",
     "describe_element_path",
     "strip_namespace",
+    "TableCellReference",
+    "TableFillApplied",
+    "TableFillFailed",
+    "TableFillResult",
+    "TableLabelMatch",
+    "TableLabelSearchResult",
+    "TableMapEntry",
+    "TableMapResult",
+    "fill_by_path",
+    "find_cell_by_label",
+    "get_table_map",
     "FoundElement",
     "ObjectFinder",
     "PackageValidationIssue",

hwpx/tools/archive_cli.py CHANGED Viewed

@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 import argparse

python-hwpx 2.8.3__py3-none-any.whl → 2.9.1__py3-none-any.whl

python-hwpx 2.8.3py3-none-any.whl → 2.9.1py3-none-any.whl