PyPI - python-hwpx - Versions diffs - 2.10.0__py3-none-any.whl → 2.10.2__py3-none-any.whl - Mend

python-hwpx 2.10.0py3-none-any.whl → 2.10.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

hwpx/authoring.py +785 -27
hwpx/builder/core.py +267 -59
hwpx/builder/report.py +32 -0
hwpx/document.py +14 -8
hwpx/opc/package.py +5 -1
hwpx/oxml/body.py +336 -27
hwpx/oxml/document.py +212 -39
hwpx/oxml/namespaces.py +142 -11
hwpx/oxml/parser.py +7 -0
hwpx/tools/generic_inventory.py +156 -0
hwpx/tools/id_integrity.py +275 -0
hwpx/tools/markdown_export.py +488 -0
hwpx/tools/report_parser.py +135 -0
hwpx/tools/report_utils.py +132 -0
hwpx/tools/roundtrip_diff.py +50 -0
hwpx/tools/table_cleanup.py +61 -0
hwpx/tools/table_navigation.py +77 -1
hwpx/tools/text_extractor.py +33 -25
{python_hwpx-2.10.0.dist-info → python_hwpx-2.10.2.dist-info}/METADATA +49 -1
{python_hwpx-2.10.0.dist-info → python_hwpx-2.10.2.dist-info}/RECORD +25 -18
{python_hwpx-2.10.0.dist-info → python_hwpx-2.10.2.dist-info}/licenses/NOTICE +8 -0
{python_hwpx-2.10.0.dist-info → python_hwpx-2.10.2.dist-info}/WHEEL +0 -0
{python_hwpx-2.10.0.dist-info → python_hwpx-2.10.2.dist-info}/entry_points.txt +0 -0
{python_hwpx-2.10.0.dist-info → python_hwpx-2.10.2.dist-info}/licenses/LICENSE +0 -0
{python_hwpx-2.10.0.dist-info → python_hwpx-2.10.2.dist-info}/top_level.txt +0 -0

hwpx/authoring.py CHANGED Viewed

@@ -4,14 +4,45 @@
 from __future__ import annotations
 import re
+from ast import literal_eval as _literal
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Mapping
+from .builder import (
+    Bullet as BuilderBullet,
+    Document as BuilderDocument,
+    Footer as BuilderFooter,
+    Header as BuilderHeader,
+    Heading as BuilderHeading,
+    Image as BuilderImage,
+    Margins as BuilderMargins,
+    Metadata as BuilderMetadata,
+    NumberedList as BuilderNumberedList,
+    PageBreak as BuilderPageBreak,
+    PageNumber as BuilderPageNumber,
+    PageSize as BuilderPageSize,
+    Paragraph as BuilderParagraph,
+    Run as BuilderRun,
+    Section as BuilderSection,
+    Table as BuilderTable,
+)
+from .builder.core import Toc as BuilderToc
 from .document import HwpxDocument
 from .tools.package_validator import validate_package
+from .tools.table_cleanup import normalize_cell_text
+from .tools.report_utils import (
+    calculate_age,
+    calculate_ratios,
+    format_delta,
+    format_delta_percent,
+    format_krw_hangul,
+    format_number_commas,
+    normalize_korean_date,
+)
 DOCUMENT_PLAN_SCHEMA_VERSION = "hwpx.document_plan.v1"
+DOCUMENT_PLAN_V2_SCHEMA_VERSION = "hwpx.document_plan.v2"
 AUTHORING_REPORT_VERSION = "hwpx-authoring-quality-v1"
 OPERATING_PLAN_QUALITY_VERSION = "operating-plan-quality-v1"
 DEFAULT_STYLE_PRESET = "standard_korean_business"
@@ -28,11 +59,14 @@ _SUPPORTED_BLOCK_TYPES = frozenset(
 _SUPPORTED_STYLE_TOKENS = frozenset(
     {"body", "title", "subtitle", "heading", "bullet", "table_header", "table_cell"}
 )
+_SUPPORTED_TABLE_PROFILES = frozenset({"government"})
 _BOOLEAN_QUALITY_GATES = frozenset(
     {"validatePackage", "validateDocument", "reopen", "visualReviewRequired"}
 )
 _INTEGER_QUALITY_GATES = frozenset({"minNonEmptyParagraphs", "minTableCount"})
 _LIST_QUALITY_GATES = frozenset({"requiredText"})
+_COMPUTED_FIELD_RE = re.compile(r"\{\{\s*(.*?)\s*\}\}")
+_COMPUTED_CALL_RE = re.compile(r"^([A-Za-z_][A-Za-z0-9_]*)\s*\((.*)\)$", re.DOTALL)
 @dataclass(slots=True)
@@ -54,6 +88,7 @@ class DocumentPlan:
     style_preset: str = DEFAULT_STYLE_PRESET
     quality_gates: dict[str, Any] = field(default_factory=dict)
     schema_version: str = DOCUMENT_PLAN_SCHEMA_VERSION
+    builder_document: BuilderDocument | None = None
     def to_dict(self) -> dict[str, Any]:
         """Return a JSON-serializable representation of this plan."""
@@ -182,6 +217,143 @@ def _plan_validation_report(
     )
+_COMPUTED_FUNCTIONS = {
+    "krw_hangul": format_krw_hangul,
+    "commas": format_number_commas,
+    "age": calculate_age,
+    "delta": format_delta,
+    "delta_percent": format_delta_percent,
+    "ratio": calculate_ratios,
+    "date": normalize_korean_date,
+}
+class _ComputedFieldError(ValueError):
+    def __init__(self, code: str, message: str) -> None:
+        super().__init__(message)
+        self.code = code
+def replace_computed_fields(text: str) -> str:
+    """Replace safe ``{{ function(args) }}`` report utility placeholders."""
+    def replacement(match: re.Match[str]) -> str:
+        return _evaluate_computed_field(match.group(1))
+    result = _COMPUTED_FIELD_RE.sub(replacement, text)
+    if "{{" in result or "}}" in result:
+        raise _ComputedFieldError(
+            "invalid_computed_field",
+            "computed field marker is malformed or unresolved",
+        )
+    return result
+def _evaluate_computed_field(expression: str) -> str:
+    match = _COMPUTED_CALL_RE.match(expression.strip())
+    if not match:
+        raise _ComputedFieldError(
+            "invalid_computed_field",
+            f"computed field must be a function call: {expression!r}",
+        )
+    function_name, raw_args = match.groups()
+    function = _COMPUTED_FUNCTIONS.get(function_name)
+    if function is None:
+        raise _ComputedFieldError(
+            "unknown_computed_field",
+            f"unknown computed field function: {function_name}",
+        )
+    args = [_parse_computed_arg(arg) for arg in _split_computed_args(raw_args)]
+    try:
+        return str(function(*args))
+    except Exception as exc:
+        raise _ComputedFieldError(
+            "invalid_computed_field",
+            f"computed field failed: {expression!r}",
+        ) from exc
+def _split_computed_args(raw_args: str) -> list[str]:
+    if not raw_args.strip():
+        return []
+    args: list[str] = []
+    start = 0
+    quote: str | None = None
+    escaped = False
+    for index, char in enumerate(raw_args):
+        if escaped:
+            escaped = False
+            continue
+        if char == "\\" and quote:
+            escaped = True
+            continue
+        if quote:
+            if char == quote:
+                quote = None
+            continue
+        if char in {"'", '"'}:
+            quote = char
+            continue
+        if char == ",":
+            args.append(raw_args[start:index].strip())
+            start = index + 1
+    if quote:
+        raise _ComputedFieldError("invalid_computed_field", "unterminated string argument")
+    args.append(raw_args[start:].strip())
+    return args
+def _parse_computed_arg(token: str) -> object:
+    if not token:
+        raise _ComputedFieldError("invalid_computed_field", "empty computed field argument")
+    if token[0] in {"'", '"'}:
+        try:
+            value = _literal(token)
+        except (SyntaxError, ValueError) as exc:
+            raise _ComputedFieldError("invalid_computed_field", "invalid string argument") from exc
+        if not isinstance(value, str):
+            raise _ComputedFieldError("invalid_computed_field", "only string literals are supported")
+        return value
+    if re.fullmatch(r"[+-]?\d+", token):
+        return int(token)
+    if re.fullmatch(r"[+-]?\d+\.\d+", token):
+        return float(token)
+    raise _ComputedFieldError(
+        "invalid_computed_field",
+        f"unsupported computed field argument: {token!r}",
+    )
+def _computed_field_issues(text: Any, *, path: str) -> list[PlanValidationIssue]:
+    value = str(text or "")
+    if "{{" not in value and "}}" not in value:
+        return []
+    issues: list[PlanValidationIssue] = []
+    for match in _COMPUTED_FIELD_RE.finditer(value):
+        try:
+            _evaluate_computed_field(match.group(1))
+        except _ComputedFieldError as exc:
+            issues.append(
+                _plan_issue(
+                    exc.code,
+                    path,
+                    str(exc),
+                    suggestion="Use a supported computed function such as krw_hangul, commas, delta, ratio, or date.",
+                )
+            )
+    residue = _COMPUTED_FIELD_RE.sub("", value)
+    if "{{" in residue or "}}" in residue:
+        issues.append(
+            _plan_issue(
+                "invalid_computed_field",
+                path,
+                "computed field marker is malformed or unresolved",
+                suggestion="Use balanced computed field delimiters such as {{ commas(1234) }}.",
+            )
+        )
+    return issues
 def _report_plan_issues(report: PlanValidationReport) -> tuple[PlanValidationIssue, ...]:
     if report.issues:
         return report.issues
@@ -235,15 +407,20 @@ def validate_document_plan(plan: Mapping[str, Any]) -> PlanValidationReport:
         )
     schema_version = str(plan.get("schemaVersion") or "").strip()
-    if schema_version != DOCUMENT_PLAN_SCHEMA_VERSION:
+    if schema_version not in {DOCUMENT_PLAN_SCHEMA_VERSION, DOCUMENT_PLAN_V2_SCHEMA_VERSION}:
         issues.append(
             _plan_issue(
                 "invalid_schema_version",
                 "schemaVersion",
-                f"schemaVersion must be {DOCUMENT_PLAN_SCHEMA_VERSION!r}",
+                (
+                    f"schemaVersion must be {DOCUMENT_PLAN_SCHEMA_VERSION!r} "
+                    f"or {DOCUMENT_PLAN_V2_SCHEMA_VERSION!r}"
+                ),
                 suggestion=f"Set schemaVersion to {DOCUMENT_PLAN_SCHEMA_VERSION!r}.",
             )
         )
+    elif schema_version == DOCUMENT_PLAN_V2_SCHEMA_VERSION:
+        return _validate_document_plan_v2(plan, schema_version=schema_version)
     title = str(plan.get("title") or "").strip()
     if not title:
@@ -314,6 +491,20 @@ def normalize_document_plan(plan: Mapping[str, Any] | DocumentPlan) -> DocumentP
     if not report.ok:
         raise ValueError("; ".join(report.errors))
+    schema_version = str(plan.get("schemaVersion") or "").strip()
+    if schema_version == DOCUMENT_PLAN_V2_SCHEMA_VERSION:
+        return DocumentPlan(
+            schema_version=DOCUMENT_PLAN_V2_SCHEMA_VERSION,
+            title="",
+            subtitle="",
+            metadata={},
+            blocks=[],
+            style_preset=str(plan.get("stylePreset") or DEFAULT_STYLE_PRESET).strip()
+            or DEFAULT_STYLE_PRESET,
+            quality_gates=dict(_default_quality_gates() | dict(plan.get("qualityGates") or {})),
+            builder_document=_normalize_v2_builder_document(plan),
+        )
     blocks = [
         _normalize_block(raw_block, index=index)
         for index, raw_block in enumerate(plan.get("blocks") or [])
@@ -330,6 +521,165 @@ def normalize_document_plan(plan: Mapping[str, Any] | DocumentPlan) -> DocumentP
     )
+def _validate_document_plan_v2(
+    plan: Mapping[str, Any],
+    *,
+    schema_version: str,
+) -> PlanValidationReport:
+    issues: list[PlanValidationIssue] = []
+    sections = plan.get("sections")
+    if not isinstance(sections, list) or not sections:
+        issues.append(
+            _plan_issue(
+                "missing_sections",
+                "sections",
+                "sections must be a non-empty list",
+                suggestion="Add at least one section with a blocks array.",
+            )
+        )
+        return _plan_validation_report(issues, schema_version=schema_version)
+    for section_index, raw_section in enumerate(sections):
+        section_path = f"sections[{section_index}]"
+        if not isinstance(raw_section, Mapping):
+            issues.append(
+                _plan_issue(
+                    "section_not_object",
+                    section_path,
+                    f"{section_path} must be a mapping",
+                    suggestion="Use an object with optional header/footer and a blocks array.",
+                )
+            )
+            continue
+        blocks = raw_section.get("blocks", raw_section.get("children"))
+        if not isinstance(blocks, list) or not blocks:
+            issues.append(
+                _plan_issue(
+                    "missing_section_blocks",
+                    f"{section_path}.blocks",
+                    f"{section_path}.blocks must be a non-empty list",
+                    suggestion="Add builder blocks such as heading, paragraph, table, image, or page_break.",
+                )
+            )
+            continue
+        for block_index, raw_block in enumerate(blocks):
+            issues.extend(
+                _validate_v2_block(
+                    raw_block,
+                    path=f"{section_path}.blocks[{block_index}]",
+                )
+            )
+    metadata = plan.get("metadata", {})
+    if metadata is not None and not isinstance(metadata, Mapping):
+        issues.append(
+            _plan_issue(
+                "invalid_metadata",
+                "metadata",
+                "metadata must be a mapping when provided",
+                suggestion="Use an object with title, author, and organization fields or omit metadata.",
+            )
+        )
+    return _plan_validation_report(issues, schema_version=schema_version)
+def _validate_v2_block(raw_block: Any, *, path: str) -> list[PlanValidationIssue]:
+    if not isinstance(raw_block, Mapping):
+        return [
+            _plan_issue(
+                "block_not_object",
+                path,
+                f"{path} must be a mapping",
+                suggestion="Replace this block with a JSON object containing a supported builder type.",
+            )
+        ]
+    block_type = str(raw_block.get("type") or "").strip()
+    supported = {
+        "heading",
+        "paragraph",
+        "bullets",
+        "bullet",
+        "numbered_list",
+        "numberedList",
+        "table",
+        "image",
+        "toc",
+        "page_break",
+        "pageBreak",
+    }
+    if block_type not in supported:
+        return [
+            _plan_issue(
+                "unsupported_block_type",
+                f"{path}.type",
+                f"{path}.type is unsupported: {block_type!r}",
+                suggestion="Use a public builder block type.",
+            )
+        ]
+    if block_type in {"heading", "image"}:
+        text_key = "text" if block_type == "heading" else "path"
+        if not str(raw_block.get(text_key) or "").strip():
+            return [
+                _plan_issue(
+                    "missing_text",
+                    f"{path}.{text_key}",
+                    f"{path}.{text_key} is required",
+                    suggestion=f"Add non-empty {text_key}.",
+                )
+            ]
+    if block_type in {"bullets", "bullet", "numbered_list", "numberedList"}:
+        if not _string_list(raw_block.get("items")):
+            return [
+                _plan_issue(
+                    "missing_list_items",
+                    f"{path}.items",
+                    f"{path}.items must be a non-empty list",
+                    suggestion="Add one or more list items.",
+                )
+            ]
+    if block_type == "table":
+        header = raw_block.get("header")
+        rows = raw_block.get("rows")
+        if not isinstance(header, list) and not isinstance(rows, list):
+            return [
+                _plan_issue(
+                    "missing_table_content",
+                    path,
+                    f"{path} must define header or rows",
+                    suggestion="Add a header array or rows array.",
+                )
+            ]
+    issues: list[PlanValidationIssue] = []
+    if block_type == "heading":
+        issues.extend(_computed_field_issues(raw_block.get("text"), path=f"{path}.text"))
+    elif block_type == "paragraph":
+        issues.extend(_computed_field_issues(raw_block.get("text"), path=f"{path}.text"))
+        for child_index, child in enumerate(raw_block.get("children") or []):
+            if isinstance(child, Mapping):
+                issues.extend(
+                    _computed_field_issues(
+                        child.get("text"),
+                        path=f"{path}.children[{child_index}].text",
+                    )
+                )
+    elif block_type in {"bullets", "bullet", "numbered_list", "numberedList"}:
+        for item_index, item in enumerate(_string_list(raw_block.get("items"))):
+            issues.extend(_computed_field_issues(item, path=f"{path}.items[{item_index}]"))
+    elif block_type == "table":
+        for header_index, header_value in enumerate(raw_block.get("header") or []):
+            issues.extend(_computed_field_issues(header_value, path=f"{path}.header[{header_index}]"))
+        for row_index, row in enumerate(raw_block.get("rows") or []):
+            if isinstance(row, (list, tuple)):
+                for col_index, value in enumerate(row):
+                    issues.extend(_computed_field_issues(value, path=f"{path}.rows[{row_index}][{col_index}]"))
+    elif block_type == "toc":
+        issues.extend(_computed_field_issues(raw_block.get("title"), path=f"{path}.title"))
+        for entry_index, entry in enumerate(raw_block.get("entries") or []):
+            if isinstance(entry, Mapping):
+                issues.extend(_computed_field_issues(entry.get("text"), path=f"{path}.entries[{entry_index}].text"))
+    return issues
 def create_document_from_plan(
     plan: Mapping[str, Any] | DocumentPlan,
     *,
@@ -338,6 +688,8 @@ def create_document_from_plan(
     """Create a formatted HWPX document from a declarative document plan."""
     normalized = normalize_document_plan(plan)
+    if normalized.builder_document is not None:
+        return normalized.builder_document.lower()
     style_preset = (
         preset
         if isinstance(preset, DocumentStylePreset)
@@ -345,6 +697,7 @@ def create_document_from_plan(
     )
     document = HwpxDocument.new()
     tokens = style_preset.ensure_tokens(document)
+    builder_document = _lower_plan_to_builder_document(normalized)
     if normalized.title:
         document.add_paragraph(
@@ -367,7 +720,7 @@ def create_document_from_plan(
         )
         _add_key_value_table(document, normalized.metadata, tokens)
-    for block in normalized.blocks:
+    for block in builder_document.sections[0].children:
         _render_block(document, block, tokens)
     return document
@@ -564,8 +917,10 @@ def _validate_block(raw_block: Any, *, index: int) -> list[PlanValidationIssue]:
     if block_type == "heading":
         issues.extend(_validate_heading_block(raw_block, path=path))
+        issues.extend(_computed_field_issues(raw_block.get("text"), path=f"{path}.text"))
     elif block_type == "paragraph":
         issues.extend(_validate_paragraph_block(raw_block, path=path))
+        issues.extend(_computed_field_issues(raw_block.get("text"), path=f"{path}.text"))
     elif block_type == "bullets":
         items = _string_list(raw_block.get("items") or raw_block.get("bullets"))
         if not items:
@@ -577,12 +932,48 @@ def _validate_block(raw_block: Any, *, index: int) -> list[PlanValidationIssue]:
                     suggestion="Add a non-empty items array, or use a paragraph block instead.",
                 )
             )
+        for item_index, item in enumerate(items):
+            issues.extend(_computed_field_issues(item, path=f"{path}.items[{item_index}]"))
     elif block_type == "table":
         column_keys, column_issues = _validate_table_columns(raw_block.get("columns"), path=path)
         issues.extend(column_issues)
         issues.extend(_validate_table_rows(raw_block.get("rows"), column_keys, path=path))
+        issues.extend(_computed_field_issues(raw_block.get("caption"), path=f"{path}.caption"))
+        issues.extend(_computed_field_issues(raw_block.get("unit"), path=f"{path}.unit"))
+        table_profile = str(raw_block.get("tableProfile") or "").strip()
+        if table_profile and table_profile not in _SUPPORTED_TABLE_PROFILES:
+            issues.append(
+                _plan_issue(
+                    "unknown_table_profile",
+                    f"{path}.tableProfile",
+                    f"{path}.tableProfile is unknown: {table_profile!r}",
+                    severity="warning",
+                    suggestion="Use tableProfile='government' or omit tableProfile.",
+                )
+            )
+        for column_index, column in enumerate(raw_block.get("columns") or []):
+            if isinstance(column, Mapping):
+                issues.extend(
+                    _computed_field_issues(
+                        column.get("label"),
+                        path=f"{path}.columns[{column_index}].label",
+                    )
+                )
+        for row_index, row in enumerate(raw_block.get("rows") or []):
+            if isinstance(row, Mapping):
+                for key, value in row.items():
+                    if isinstance(value, Mapping):
+                        value = value.get("text", value.get("value"))
+                    issues.extend(
+                        _computed_field_issues(
+                            value,
+                            path=f"{path}.rows[{row_index}].{key}",
+                        )
+                    )
     elif block_type == "memo":
         issues.extend(_validate_required_text_fields(raw_block, path=path, fields=("text", "memo")))
+        issues.extend(_computed_field_issues(raw_block.get("text"), path=f"{path}.text"))
+        issues.extend(_computed_field_issues(raw_block.get("memo"), path=f"{path}.memo"))
     return issues
@@ -852,13 +1243,13 @@ def _normalize_block(raw_block: Any, *, index: int) -> DocumentBlock:
         if level < 1 or level > 3:
             raise ValueError(f"blocks[{index}].level must be between 1 and 3")
         text = _required_text(raw_block, "text", index)
-        return DocumentBlock("heading", {"level": level, "text": text})
+        return DocumentBlock("heading", {"level": level, "text": replace_computed_fields(text)})
     if block_type == "paragraph":
         return DocumentBlock(
             "paragraph",
             {
-                "text": _required_text(raw_block, "text", index),
+                "text": replace_computed_fields(_required_text(raw_block, "text", index)),
                 "style": str(raw_block.get("style") or "body").strip() or "body",
             },
         )
@@ -867,29 +1258,304 @@ def _normalize_block(raw_block: Any, *, index: int) -> DocumentBlock:
         items = _string_list(raw_block.get("items") or raw_block.get("bullets"))
         if not items:
             raise ValueError(f"blocks[{index}].items must be a non-empty list")
-        return DocumentBlock("bullets", {"items": items})
+        return DocumentBlock(
+            "bullets",
+            {"items": [replace_computed_fields(item) for item in items]},
+        )
     if block_type == "table":
         columns = _normalize_columns(raw_block.get("columns"), index=index)
         rows = _normalize_rows(raw_block.get("rows"), columns, index=index)
-        caption = str(raw_block.get("caption") or "").strip()
-        return DocumentBlock(
-            "table",
-            {"caption": caption, "columns": columns, "rows": rows},
-        )
+        caption = replace_computed_fields(normalize_cell_text(raw_block.get("caption")))
+        unit = replace_computed_fields(normalize_cell_text(raw_block.get("unit")))
+        table_profile = str(raw_block.get("tableProfile") or "").strip()
+        columns = [
+            {**column, "label": replace_computed_fields(normalize_cell_text(column["label"]))}
+            for column in columns
+        ]
+        rows = [
+            {key: replace_computed_fields(value) for key, value in row.items()}
+            for row in rows
+        ]
+        data: dict[str, Any] = {"caption": caption, "columns": columns, "rows": rows}
+        if unit:
+            data["unit"] = unit
+        if table_profile:
+            data["tableProfile"] = table_profile
+        return DocumentBlock("table", data)
     if block_type == "memo":
         return DocumentBlock(
             "memo",
             {
-                "text": _required_text(raw_block, "text", index),
-                "memo": _required_text(raw_block, "memo", index),
+                "text": replace_computed_fields(_required_text(raw_block, "text", index)),
+                "memo": replace_computed_fields(_required_text(raw_block, "memo", index)),
             },
         )
     return DocumentBlock("page_break", {})
+def _normalize_v2_builder_document(plan: Mapping[str, Any]) -> BuilderDocument:
+    metadata = plan.get("metadata") or {}
+    builder_metadata = None
+    if isinstance(metadata, Mapping):
+        title = str(metadata.get("title") or plan.get("title") or "").strip()
+        author = str(metadata.get("author") or "").strip()
+        organization = str(metadata.get("organization") or "").strip()
+        if title or author or organization:
+            builder_metadata = BuilderMetadata(
+                title=title,
+                author=author,
+                organization=organization,
+            )
+    return BuilderDocument(
+        sections=tuple(
+            _normalize_v2_section(raw_section, index=index)
+            for index, raw_section in enumerate(plan.get("sections") or [])
+        ),
+        metadata=builder_metadata,
+        visual_review_required=_optional_bool(plan.get("visualReviewRequired")),
+        preset=str(plan.get("preset") or plan.get("stylePreset") or DEFAULT_STYLE_PRESET).strip()
+        or DEFAULT_STYLE_PRESET,
+    )
+def _normalize_v2_section(raw_section: Any, *, index: int) -> BuilderSection:
+    if not isinstance(raw_section, Mapping):
+        raise TypeError(f"sections[{index}] must be a mapping")
+    raw_blocks = raw_section.get("blocks", raw_section.get("children"))
+    return BuilderSection(
+        children=tuple(
+            _normalize_v2_block(raw_block, path=f"sections[{index}].blocks[{block_index}]")
+            for block_index, raw_block in enumerate(raw_blocks or [])
+        ),
+        page=_normalize_v2_page(raw_section.get("page")),
+        margins=_normalize_v2_margins(raw_section.get("margins")),
+        header=_normalize_v2_header_footer(raw_section.get("header"), kind="header"),
+        footer=_normalize_v2_header_footer(raw_section.get("footer"), kind="footer"),
+    )
+def _normalize_v2_page(value: Any) -> BuilderPageSize | None:
+    if not isinstance(value, Mapping):
+        return None
+    preset = str(value.get("preset") or "").strip().upper()
+    if preset == "A4":
+        return BuilderPageSize.A4
+    width = _float_value(value.get("widthMm", value.get("width_mm")), default=210)
+    height = _float_value(value.get("heightMm", value.get("height_mm")), default=297)
+    orientation = str(value.get("orientation") or "PORTRAIT").strip() or "PORTRAIT"
+    return BuilderPageSize(width_mm=width, height_mm=height, orientation=orientation)
+def _normalize_v2_margins(value: Any) -> BuilderMargins | None:
+    if not isinstance(value, Mapping):
+        return None
+    return BuilderMargins(
+        top_mm=_float_value(value.get("topMm", value.get("top_mm")), default=20),
+        right_mm=_float_value(value.get("rightMm", value.get("right_mm")), default=20),
+        bottom_mm=_float_value(value.get("bottomMm", value.get("bottom_mm")), default=20),
+        left_mm=_float_value(value.get("leftMm", value.get("left_mm")), default=20),
+        header_mm=_float_value(value.get("headerMm", value.get("header_mm")), default=10),
+        footer_mm=_float_value(value.get("footerMm", value.get("footer_mm")), default=10),
+        gutter_mm=_float_value(value.get("gutterMm", value.get("gutter_mm")), default=0),
+    )
+def _normalize_v2_header_footer(value: Any, *, kind: str) -> BuilderHeader | BuilderFooter | None:
+    if not isinstance(value, Mapping):
+        return None
+    children = tuple(_normalize_v2_header_footer_child(child) for child in value.get("children") or [])
+    if kind == "header":
+        return BuilderHeader(children=children)
+    return BuilderFooter(children=children)
+def _normalize_v2_header_footer_child(value: Any) -> BuilderParagraph | BuilderPageNumber:
+    if not isinstance(value, Mapping):
+        raise TypeError("header/footer children must be mappings")
+    child_type = str(value.get("type") or "paragraph").strip()
+    if child_type == "page_number":
+        return BuilderPageNumber(format=str(value.get("format") or "page"))
+    if child_type != "paragraph":
+        raise ValueError(f"unsupported header/footer child type: {child_type!r}")
+    children = tuple(_normalize_v2_paragraph_child(child) for child in value.get("children") or [])
+    return BuilderParagraph(
+        text=replace_computed_fields(str(value.get("text") or "")),
+        children=children,
+        align=_optional_str(value.get("align")),
+    )
+def _normalize_v2_paragraph_child(value: Any) -> BuilderRun | BuilderPageNumber:
+    if not isinstance(value, Mapping):
+        raise TypeError("paragraph children must be mappings")
+    child_type = str(value.get("type") or "run").strip()
+    if child_type == "page_number":
+        return BuilderPageNumber(format=str(value.get("format") or "page"))
+    if child_type != "run":
+        raise ValueError(f"unsupported paragraph child type: {child_type!r}")
+    return BuilderRun(
+        text=replace_computed_fields(str(value.get("text") or "")),
+        bold=bool(value.get("bold", False)),
+        italic=bool(value.get("italic", False)),
+        underline=bool(value.get("underline", False)),
+        color=_optional_str(value.get("color")),
+        font=_optional_str(value.get("font")),
+        size=_optional_number(value.get("size")),
+        highlight=_optional_str(value.get("highlight")),
+        strike=bool(value.get("strike", False)),
+    )
+def _normalize_v2_block(raw_block: Any, *, path: str) -> Any:
+    if not isinstance(raw_block, Mapping):
+        raise TypeError(f"{path} must be a mapping")
+    block_type = str(raw_block.get("type") or "").strip()
+    if block_type == "heading":
+        return BuilderHeading(
+            level=_int_value(raw_block.get("level", 1), default=1),
+            text=replace_computed_fields(str(raw_block.get("text") or "")),
+        )
+    if block_type == "paragraph":
+        children = tuple(
+            child
+            for child in (_normalize_v2_paragraph_child(child) for child in raw_block.get("children") or [])
+            if isinstance(child, BuilderRun)
+        )
+        return BuilderParagraph(
+            text=replace_computed_fields(str(raw_block.get("text") or "")),
+            children=children,
+            align=_optional_str(raw_block.get("align")),
+            style=_optional_str(raw_block.get("style")),
+        )
+    if block_type in {"bullets", "bullet"}:
+        return BuilderBullet(
+            items=tuple(replace_computed_fields(item) for item in _string_list(raw_block.get("items"))),
+            level=_int_value(raw_block.get("level", 0), default=0),
+            style=_optional_str(raw_block.get("style")),
+        )
+    if block_type in {"numbered_list", "numberedList"}:
+        return BuilderNumberedList(
+            items=tuple(replace_computed_fields(item) for item in _string_list(raw_block.get("items"))),
+            level=_int_value(raw_block.get("level", 0), default=0),
+        )
+    if block_type == "table":
+        return BuilderTable(
+            header=tuple(replace_computed_fields(str(item)) for item in raw_block.get("header") or ()),
+            rows=tuple(
+                tuple(replace_computed_fields(str(cell)) for cell in row)
+                for row in raw_block.get("rows") or ()
+            ),
+            merges=tuple(str(item) for item in raw_block.get("merges") or ()),
+            header_shading=_optional_str(raw_block.get("headerShading", raw_block.get("header_shading"))),
+            column_widths=tuple(
+                _optional_number(item) or 0
+                for item in raw_block.get("columnWidths", raw_block.get("column_widths")) or ()
+            ),
+        )
+    if block_type == "image":
+        return BuilderImage(
+            path=str(raw_block.get("path") or ""),
+            width_mm=_optional_number(raw_block.get("widthMm", raw_block.get("width_mm"))),
+            align=_optional_str(raw_block.get("align")),
+            caption=(
+                replace_computed_fields(str(raw_block.get("caption")))
+                if raw_block.get("caption") is not None
+                else None
+            ),
+            image_format=_optional_str(raw_block.get("imageFormat", raw_block.get("image_format"))),
+        )
+    if block_type == "toc":
+        return BuilderToc(
+            title=replace_computed_fields(str(raw_block.get("title") or "목차")),
+            entries=tuple(
+                {**entry, "text": replace_computed_fields(str(entry.get("text") or ""))}
+                for entry in raw_block.get("entries") or ()
+                if isinstance(entry, Mapping)
+            ),
+        )
+    if block_type in {"page_break", "pageBreak"}:
+        return BuilderPageBreak()
+    raise ValueError(f"{path}.type is unsupported: {block_type!r}")
+def _lower_plan_to_builder_document(plan: DocumentPlan) -> BuilderDocument:
+    """Lower a normalized document plan to builder nodes.
+    v1 authoring keeps its historical title, metadata, style-token, and memo
+    rendering contracts, so this helper lowers the body blocks into public
+    builder nodes while ``create_document_from_plan`` supplies the existing
+    document-level framing.
+    """
+    if plan.builder_document is not None:
+        return plan.builder_document
+    children: list[Any] = []
+    for block in plan.blocks:
+        children.extend(_block_to_builder_nodes(block))
+    return BuilderDocument(sections=(BuilderSection(children=tuple(children)),))
+def _block_to_builder_nodes(block: DocumentBlock) -> tuple[Any, ...]:
+    if block.type == "heading":
+        return (
+            BuilderHeading(
+                level=int(block.data["level"]),
+                text=str(block.data["text"]),
+            ),
+        )
+    if block.type == "paragraph":
+        return (
+            BuilderParagraph(
+                text=str(block.data["text"]),
+                style=str(block.data.get("style") or "body"),
+            ),
+        )
+    if block.type == "bullets":
+        return (BuilderBullet(items=tuple(str(item) for item in block.data["items"])),)
+    if block.type == "table":
+        columns = list(block.data["columns"])
+        rows = list(block.data["rows"])
+        nodes: list[Any] = []
+        caption = str(block.data.get("caption") or "").strip()
+        if caption:
+            nodes.append(BuilderParagraph(text=caption, style="heading"))
+        nodes.append(
+            BuilderTable(
+                header=tuple(str(column["label"]) for column in columns),
+                rows=tuple(
+                    tuple(str(row.get(column["key"], "")) for column in columns)
+                    for row in rows
+                ),
+                column_widths=tuple(_plan_table_column_widths(columns)),
+            ),
+        )
+        unit = str(block.data.get("unit") or "").strip()
+        if unit:
+            nodes.append(BuilderParagraph(text=unit, style="body"))
+        return tuple(nodes)
+    if block.type == "memo":
+        return (block,)
+    if block.type == "page_break":
+        return (BuilderPageBreak(),)
+    raise ValueError(f"unsupported block type: {block.type!r}")
+def _plan_table_column_widths(columns: list[dict[str, Any]]) -> list[int]:
+    total = sum(max(int(column.get("widthWeight", 1)), 1) for column in columns)
+    if total <= 0:
+        return []
+    widths = [
+        round(_DEFAULT_TABLE_WIDTH * max(int(column.get("widthWeight", 1)), 1) / total)
+        for column in columns
+    ]
+    if widths:
+        widths[-1] += _DEFAULT_TABLE_WIDTH - sum(widths)
+    return widths
 def _normalize_columns(value: Any, *, index: int) -> list[dict[str, Any]]:
     if not isinstance(value, list) or not value:
         raise ValueError(f"blocks[{index}].columns must be a non-empty list")
@@ -904,7 +1570,7 @@ def _normalize_columns(value: Any, *, index: int) -> list[dict[str, Any]]:
         if key in seen:
             raise ValueError(f"blocks[{index}].columns contains duplicate key: {key!r}")
         seen.add(key)
-        label = str(raw_column.get("label") or key).strip()
+        label = normalize_cell_text(raw_column.get("label") or key)
         width_weight = _int_value(raw_column.get("widthWeight", 1), default=1)
         columns.append(
             {
@@ -929,42 +1595,51 @@ def _normalize_rows(
     for row_index, raw_row in enumerate(value):
         if not isinstance(raw_row, Mapping):
             raise ValueError(f"blocks[{index}].rows[{row_index}] must be a mapping")
-        rows.append({key: str(raw_row.get(key, "")) for key in column_keys})
+        rows.append({key: _normalize_table_cell_value(raw_row.get(key, "")) for key in column_keys})
     return rows
+def _normalize_table_cell_value(value: Any) -> str:
+    if isinstance(value, Mapping):
+        text = value.get("text", value.get("value", ""))
+        if bool(value.get("preserveWhitespace", False)):
+            return str(text or "")
+        return normalize_cell_text(text)
+    return normalize_cell_text(value)
 def _render_block(
     document: HwpxDocument,
-    block: DocumentBlock,
+    block: Any,
     tokens: Mapping[str, str],
 ) -> None:
-    if block.type == "heading":
+    if isinstance(block, BuilderHeading):
         document.add_paragraph(
-            str(block.data["text"]),
+            block.text,
             char_pr_id_ref=tokens["heading"],
             inherit_style=False,
         )
         return
-    if block.type == "paragraph":
-        style = str(block.data.get("style") or "body")
+    if isinstance(block, BuilderParagraph):
+        style = str(block.style or "body")
         document.add_paragraph(
-            str(block.data["text"]),
+            block.text,
             char_pr_id_ref=tokens.get(style, tokens["body"]),
             inherit_style=False,
         )
         return
-    if block.type == "bullets":
-        for item in block.data["items"]:
+    if isinstance(block, BuilderBullet):
+        for item in block.items:
             document.add_paragraph(
                 f"• {item}",
                 char_pr_id_ref=tokens["bullet"],
                 inherit_style=False,
             )
         return
-    if block.type == "table":
-        _add_plan_table(document, block.data, tokens)
+    if isinstance(block, BuilderTable):
+        _add_builder_table(document, block, tokens)
         return
-    if block.type == "memo":
+    if isinstance(block, DocumentBlock) and block.type == "memo":
         paragraph = document.add_paragraph(
             str(block.data["text"]),
             char_pr_id_ref=tokens["body"],
@@ -972,8 +1647,10 @@ def _render_block(
         )
         document.add_memo_with_anchor(str(block.data["memo"]), paragraph=paragraph)
         return
-    if block.type == "page_break":
+    if isinstance(block, BuilderPageBreak):
         document.add_paragraph("", pageBreak="1", inherit_style=False)
+        return
+    raise ValueError(f"unsupported builder block: {type(block).__name__}")
 def _add_key_value_table(
@@ -1040,6 +1717,46 @@ def _add_plan_table(
             )
+def _add_builder_table(
+    document: HwpxDocument,
+    table_node: BuilderTable,
+    tokens: Mapping[str, str],
+) -> None:
+    rows = [list(table_node.header), *(list(row) for row in table_node.rows)]
+    if not rows:
+        raise ValueError("table must contain a header or at least one row")
+    column_count = max(len(row) for row in rows)
+    table = document.add_table(
+        len(rows),
+        column_count,
+        width=_DEFAULT_TABLE_WIDTH,
+        char_pr_id_ref=tokens["table_cell"],
+    )
+    if table_node.column_widths:
+        for row in table.rows:
+            for col_index, cell in enumerate(row.cells):
+                if col_index < len(table_node.column_widths):
+                    cell.set_size(width=int(table_node.column_widths[col_index]))
+    for col_index, label in enumerate(table_node.header):
+        _set_table_cell_text(
+            table,
+            0,
+            col_index,
+            str(label),
+            char_pr_id_ref=tokens["table_header"],
+        )
+    row_offset = 1 if table_node.header else 0
+    for row_index, row in enumerate(table_node.rows, start=row_offset):
+        for col_index, value in enumerate(row):
+            _set_table_cell_text(
+                table,
+                row_index,
+                col_index,
+                str(value),
+                char_pr_id_ref=tokens["table_cell"],
+            )
 def _set_table_cell_text(
     table: Any,
     row_index: int,
@@ -1545,6 +2262,7 @@ def _table_block_text(block: Mapping[str, Any]) -> str:
     for row in block.get("rows", []):
         if isinstance(row, Mapping):
             parts.extend(str(value) for value in row.values())
+    parts.append(str(block.get("unit") or ""))
     return "\n".join(parts)
@@ -1608,10 +2326,17 @@ def _document_table_blocks(document: HwpxDocument) -> list[Mapping[str, Any]]:
         text = str(getattr(paragraph, "text", "") or "").strip()
         if text:
+            if _looks_like_unit_text(text):
+                previous_text = ""
+                continue
             previous_text = text
     return blocks
+def _looks_like_unit_text(text: str) -> bool:
+    return text.startswith(("단위:", "단위："))
 def _looks_like_table_header_row(text_rows: list[list[str]]) -> bool:
     if not text_rows:
         return False
@@ -1864,10 +2589,43 @@ def _int_value(value: Any, *, default: int) -> int:
         return default
+def _float_value(value: Any, *, default: float) -> float:
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return default
+def _optional_number(value: Any) -> int | float | None:
+    if value in (None, ""):
+        return None
+    try:
+        number = float(value)
+    except (TypeError, ValueError):
+        return None
+    if number.is_integer():
+        return int(number)
+    return number
+def _optional_bool(value: Any) -> bool | None:
+    if value is None:
+        return None
+    return bool(value)
+def _optional_str(value: Any) -> str | None:
+    if value is None:
+        return None
+    text = str(value)
+    return text if text else None
 __all__ = [
     "AUTHORING_REPORT_VERSION",
     "DEFAULT_STYLE_PRESET",
     "DOCUMENT_PLAN_SCHEMA_VERSION",
+    "DOCUMENT_PLAN_V2_SCHEMA_VERSION",
     "DocumentBlock",
     "DocumentPlan",
     "DocumentStylePreset",

python-hwpx 2.10.0__py3-none-any.whl → 2.10.2__py3-none-any.whl

python-hwpx 2.10.0py3-none-any.whl → 2.10.2py3-none-any.whl