python-hwpx 2.10.0__py3-none-any.whl → 2.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hwpx/authoring.py CHANGED
@@ -4,14 +4,45 @@
4
4
  from __future__ import annotations
5
5
 
6
6
  import re
7
+ from ast import literal_eval as _literal
7
8
  from dataclasses import dataclass, field
8
9
  from pathlib import Path
9
10
  from typing import Any, Mapping
10
11
 
12
+ from .builder import (
13
+ Bullet as BuilderBullet,
14
+ Document as BuilderDocument,
15
+ Footer as BuilderFooter,
16
+ Header as BuilderHeader,
17
+ Heading as BuilderHeading,
18
+ Image as BuilderImage,
19
+ Margins as BuilderMargins,
20
+ Metadata as BuilderMetadata,
21
+ NumberedList as BuilderNumberedList,
22
+ PageBreak as BuilderPageBreak,
23
+ PageNumber as BuilderPageNumber,
24
+ PageSize as BuilderPageSize,
25
+ Paragraph as BuilderParagraph,
26
+ Run as BuilderRun,
27
+ Section as BuilderSection,
28
+ Table as BuilderTable,
29
+ )
30
+ from .builder.core import Toc as BuilderToc
11
31
  from .document import HwpxDocument
12
32
  from .tools.package_validator import validate_package
33
+ from .tools.table_cleanup import normalize_cell_text
34
+ from .tools.report_utils import (
35
+ calculate_age,
36
+ calculate_ratios,
37
+ format_delta,
38
+ format_delta_percent,
39
+ format_krw_hangul,
40
+ format_number_commas,
41
+ normalize_korean_date,
42
+ )
13
43
 
14
44
  DOCUMENT_PLAN_SCHEMA_VERSION = "hwpx.document_plan.v1"
45
+ DOCUMENT_PLAN_V2_SCHEMA_VERSION = "hwpx.document_plan.v2"
15
46
  AUTHORING_REPORT_VERSION = "hwpx-authoring-quality-v1"
16
47
  OPERATING_PLAN_QUALITY_VERSION = "operating-plan-quality-v1"
17
48
  DEFAULT_STYLE_PRESET = "standard_korean_business"
@@ -28,11 +59,14 @@ _SUPPORTED_BLOCK_TYPES = frozenset(
28
59
  _SUPPORTED_STYLE_TOKENS = frozenset(
29
60
  {"body", "title", "subtitle", "heading", "bullet", "table_header", "table_cell"}
30
61
  )
62
+ _SUPPORTED_TABLE_PROFILES = frozenset({"government"})
31
63
  _BOOLEAN_QUALITY_GATES = frozenset(
32
64
  {"validatePackage", "validateDocument", "reopen", "visualReviewRequired"}
33
65
  )
34
66
  _INTEGER_QUALITY_GATES = frozenset({"minNonEmptyParagraphs", "minTableCount"})
35
67
  _LIST_QUALITY_GATES = frozenset({"requiredText"})
68
+ _COMPUTED_FIELD_RE = re.compile(r"\{\{\s*(.*?)\s*\}\}")
69
+ _COMPUTED_CALL_RE = re.compile(r"^([A-Za-z_][A-Za-z0-9_]*)\s*\((.*)\)$", re.DOTALL)
36
70
 
37
71
 
38
72
  @dataclass(slots=True)
@@ -54,6 +88,7 @@ class DocumentPlan:
54
88
  style_preset: str = DEFAULT_STYLE_PRESET
55
89
  quality_gates: dict[str, Any] = field(default_factory=dict)
56
90
  schema_version: str = DOCUMENT_PLAN_SCHEMA_VERSION
91
+ builder_document: BuilderDocument | None = None
57
92
 
58
93
  def to_dict(self) -> dict[str, Any]:
59
94
  """Return a JSON-serializable representation of this plan."""
@@ -182,6 +217,143 @@ def _plan_validation_report(
182
217
  )
183
218
 
184
219
 
220
+ _COMPUTED_FUNCTIONS = {
221
+ "krw_hangul": format_krw_hangul,
222
+ "commas": format_number_commas,
223
+ "age": calculate_age,
224
+ "delta": format_delta,
225
+ "delta_percent": format_delta_percent,
226
+ "ratio": calculate_ratios,
227
+ "date": normalize_korean_date,
228
+ }
229
+
230
+
231
+ class _ComputedFieldError(ValueError):
232
+ def __init__(self, code: str, message: str) -> None:
233
+ super().__init__(message)
234
+ self.code = code
235
+
236
+
237
+ def replace_computed_fields(text: str) -> str:
238
+ """Replace safe ``{{ function(args) }}`` report utility placeholders."""
239
+
240
+ def replacement(match: re.Match[str]) -> str:
241
+ return _evaluate_computed_field(match.group(1))
242
+
243
+ result = _COMPUTED_FIELD_RE.sub(replacement, text)
244
+ if "{{" in result or "}}" in result:
245
+ raise _ComputedFieldError(
246
+ "invalid_computed_field",
247
+ "computed field marker is malformed or unresolved",
248
+ )
249
+ return result
250
+
251
+
252
+ def _evaluate_computed_field(expression: str) -> str:
253
+ match = _COMPUTED_CALL_RE.match(expression.strip())
254
+ if not match:
255
+ raise _ComputedFieldError(
256
+ "invalid_computed_field",
257
+ f"computed field must be a function call: {expression!r}",
258
+ )
259
+ function_name, raw_args = match.groups()
260
+ function = _COMPUTED_FUNCTIONS.get(function_name)
261
+ if function is None:
262
+ raise _ComputedFieldError(
263
+ "unknown_computed_field",
264
+ f"unknown computed field function: {function_name}",
265
+ )
266
+ args = [_parse_computed_arg(arg) for arg in _split_computed_args(raw_args)]
267
+ try:
268
+ return str(function(*args))
269
+ except Exception as exc:
270
+ raise _ComputedFieldError(
271
+ "invalid_computed_field",
272
+ f"computed field failed: {expression!r}",
273
+ ) from exc
274
+
275
+
276
+ def _split_computed_args(raw_args: str) -> list[str]:
277
+ if not raw_args.strip():
278
+ return []
279
+ args: list[str] = []
280
+ start = 0
281
+ quote: str | None = None
282
+ escaped = False
283
+ for index, char in enumerate(raw_args):
284
+ if escaped:
285
+ escaped = False
286
+ continue
287
+ if char == "\\" and quote:
288
+ escaped = True
289
+ continue
290
+ if quote:
291
+ if char == quote:
292
+ quote = None
293
+ continue
294
+ if char in {"'", '"'}:
295
+ quote = char
296
+ continue
297
+ if char == ",":
298
+ args.append(raw_args[start:index].strip())
299
+ start = index + 1
300
+ if quote:
301
+ raise _ComputedFieldError("invalid_computed_field", "unterminated string argument")
302
+ args.append(raw_args[start:].strip())
303
+ return args
304
+
305
+
306
+ def _parse_computed_arg(token: str) -> object:
307
+ if not token:
308
+ raise _ComputedFieldError("invalid_computed_field", "empty computed field argument")
309
+ if token[0] in {"'", '"'}:
310
+ try:
311
+ value = _literal(token)
312
+ except (SyntaxError, ValueError) as exc:
313
+ raise _ComputedFieldError("invalid_computed_field", "invalid string argument") from exc
314
+ if not isinstance(value, str):
315
+ raise _ComputedFieldError("invalid_computed_field", "only string literals are supported")
316
+ return value
317
+ if re.fullmatch(r"[+-]?\d+", token):
318
+ return int(token)
319
+ if re.fullmatch(r"[+-]?\d+\.\d+", token):
320
+ return float(token)
321
+ raise _ComputedFieldError(
322
+ "invalid_computed_field",
323
+ f"unsupported computed field argument: {token!r}",
324
+ )
325
+
326
+
327
+ def _computed_field_issues(text: Any, *, path: str) -> list[PlanValidationIssue]:
328
+ value = str(text or "")
329
+ if "{{" not in value and "}}" not in value:
330
+ return []
331
+ issues: list[PlanValidationIssue] = []
332
+ for match in _COMPUTED_FIELD_RE.finditer(value):
333
+ try:
334
+ _evaluate_computed_field(match.group(1))
335
+ except _ComputedFieldError as exc:
336
+ issues.append(
337
+ _plan_issue(
338
+ exc.code,
339
+ path,
340
+ str(exc),
341
+ suggestion="Use a supported computed function such as krw_hangul, commas, delta, ratio, or date.",
342
+ )
343
+ )
344
+ residue = _COMPUTED_FIELD_RE.sub("", value)
345
+ if "{{" in residue or "}}" in residue:
346
+ issues.append(
347
+ _plan_issue(
348
+ "invalid_computed_field",
349
+ path,
350
+ "computed field marker is malformed or unresolved",
351
+ suggestion="Use balanced computed field delimiters such as {{ commas(1234) }}.",
352
+ )
353
+ )
354
+ return issues
355
+
356
+
185
357
  def _report_plan_issues(report: PlanValidationReport) -> tuple[PlanValidationIssue, ...]:
186
358
  if report.issues:
187
359
  return report.issues
@@ -235,15 +407,20 @@ def validate_document_plan(plan: Mapping[str, Any]) -> PlanValidationReport:
235
407
  )
236
408
 
237
409
  schema_version = str(plan.get("schemaVersion") or "").strip()
238
- if schema_version != DOCUMENT_PLAN_SCHEMA_VERSION:
410
+ if schema_version not in {DOCUMENT_PLAN_SCHEMA_VERSION, DOCUMENT_PLAN_V2_SCHEMA_VERSION}:
239
411
  issues.append(
240
412
  _plan_issue(
241
413
  "invalid_schema_version",
242
414
  "schemaVersion",
243
- f"schemaVersion must be {DOCUMENT_PLAN_SCHEMA_VERSION!r}",
415
+ (
416
+ f"schemaVersion must be {DOCUMENT_PLAN_SCHEMA_VERSION!r} "
417
+ f"or {DOCUMENT_PLAN_V2_SCHEMA_VERSION!r}"
418
+ ),
244
419
  suggestion=f"Set schemaVersion to {DOCUMENT_PLAN_SCHEMA_VERSION!r}.",
245
420
  )
246
421
  )
422
+ elif schema_version == DOCUMENT_PLAN_V2_SCHEMA_VERSION:
423
+ return _validate_document_plan_v2(plan, schema_version=schema_version)
247
424
 
248
425
  title = str(plan.get("title") or "").strip()
249
426
  if not title:
@@ -314,6 +491,20 @@ def normalize_document_plan(plan: Mapping[str, Any] | DocumentPlan) -> DocumentP
314
491
  if not report.ok:
315
492
  raise ValueError("; ".join(report.errors))
316
493
 
494
+ schema_version = str(plan.get("schemaVersion") or "").strip()
495
+ if schema_version == DOCUMENT_PLAN_V2_SCHEMA_VERSION:
496
+ return DocumentPlan(
497
+ schema_version=DOCUMENT_PLAN_V2_SCHEMA_VERSION,
498
+ title="",
499
+ subtitle="",
500
+ metadata={},
501
+ blocks=[],
502
+ style_preset=str(plan.get("stylePreset") or DEFAULT_STYLE_PRESET).strip()
503
+ or DEFAULT_STYLE_PRESET,
504
+ quality_gates=dict(_default_quality_gates() | dict(plan.get("qualityGates") or {})),
505
+ builder_document=_normalize_v2_builder_document(plan),
506
+ )
507
+
317
508
  blocks = [
318
509
  _normalize_block(raw_block, index=index)
319
510
  for index, raw_block in enumerate(plan.get("blocks") or [])
@@ -330,6 +521,165 @@ def normalize_document_plan(plan: Mapping[str, Any] | DocumentPlan) -> DocumentP
330
521
  )
331
522
 
332
523
 
524
+ def _validate_document_plan_v2(
525
+ plan: Mapping[str, Any],
526
+ *,
527
+ schema_version: str,
528
+ ) -> PlanValidationReport:
529
+ issues: list[PlanValidationIssue] = []
530
+ sections = plan.get("sections")
531
+ if not isinstance(sections, list) or not sections:
532
+ issues.append(
533
+ _plan_issue(
534
+ "missing_sections",
535
+ "sections",
536
+ "sections must be a non-empty list",
537
+ suggestion="Add at least one section with a blocks array.",
538
+ )
539
+ )
540
+ return _plan_validation_report(issues, schema_version=schema_version)
541
+
542
+ for section_index, raw_section in enumerate(sections):
543
+ section_path = f"sections[{section_index}]"
544
+ if not isinstance(raw_section, Mapping):
545
+ issues.append(
546
+ _plan_issue(
547
+ "section_not_object",
548
+ section_path,
549
+ f"{section_path} must be a mapping",
550
+ suggestion="Use an object with optional header/footer and a blocks array.",
551
+ )
552
+ )
553
+ continue
554
+ blocks = raw_section.get("blocks", raw_section.get("children"))
555
+ if not isinstance(blocks, list) or not blocks:
556
+ issues.append(
557
+ _plan_issue(
558
+ "missing_section_blocks",
559
+ f"{section_path}.blocks",
560
+ f"{section_path}.blocks must be a non-empty list",
561
+ suggestion="Add builder blocks such as heading, paragraph, table, image, or page_break.",
562
+ )
563
+ )
564
+ continue
565
+ for block_index, raw_block in enumerate(blocks):
566
+ issues.extend(
567
+ _validate_v2_block(
568
+ raw_block,
569
+ path=f"{section_path}.blocks[{block_index}]",
570
+ )
571
+ )
572
+
573
+ metadata = plan.get("metadata", {})
574
+ if metadata is not None and not isinstance(metadata, Mapping):
575
+ issues.append(
576
+ _plan_issue(
577
+ "invalid_metadata",
578
+ "metadata",
579
+ "metadata must be a mapping when provided",
580
+ suggestion="Use an object with title, author, and organization fields or omit metadata.",
581
+ )
582
+ )
583
+ return _plan_validation_report(issues, schema_version=schema_version)
584
+
585
+
586
+ def _validate_v2_block(raw_block: Any, *, path: str) -> list[PlanValidationIssue]:
587
+ if not isinstance(raw_block, Mapping):
588
+ return [
589
+ _plan_issue(
590
+ "block_not_object",
591
+ path,
592
+ f"{path} must be a mapping",
593
+ suggestion="Replace this block with a JSON object containing a supported builder type.",
594
+ )
595
+ ]
596
+ block_type = str(raw_block.get("type") or "").strip()
597
+ supported = {
598
+ "heading",
599
+ "paragraph",
600
+ "bullets",
601
+ "bullet",
602
+ "numbered_list",
603
+ "numberedList",
604
+ "table",
605
+ "image",
606
+ "toc",
607
+ "page_break",
608
+ "pageBreak",
609
+ }
610
+ if block_type not in supported:
611
+ return [
612
+ _plan_issue(
613
+ "unsupported_block_type",
614
+ f"{path}.type",
615
+ f"{path}.type is unsupported: {block_type!r}",
616
+ suggestion="Use a public builder block type.",
617
+ )
618
+ ]
619
+ if block_type in {"heading", "image"}:
620
+ text_key = "text" if block_type == "heading" else "path"
621
+ if not str(raw_block.get(text_key) or "").strip():
622
+ return [
623
+ _plan_issue(
624
+ "missing_text",
625
+ f"{path}.{text_key}",
626
+ f"{path}.{text_key} is required",
627
+ suggestion=f"Add non-empty {text_key}.",
628
+ )
629
+ ]
630
+ if block_type in {"bullets", "bullet", "numbered_list", "numberedList"}:
631
+ if not _string_list(raw_block.get("items")):
632
+ return [
633
+ _plan_issue(
634
+ "missing_list_items",
635
+ f"{path}.items",
636
+ f"{path}.items must be a non-empty list",
637
+ suggestion="Add one or more list items.",
638
+ )
639
+ ]
640
+ if block_type == "table":
641
+ header = raw_block.get("header")
642
+ rows = raw_block.get("rows")
643
+ if not isinstance(header, list) and not isinstance(rows, list):
644
+ return [
645
+ _plan_issue(
646
+ "missing_table_content",
647
+ path,
648
+ f"{path} must define header or rows",
649
+ suggestion="Add a header array or rows array.",
650
+ )
651
+ ]
652
+ issues: list[PlanValidationIssue] = []
653
+ if block_type == "heading":
654
+ issues.extend(_computed_field_issues(raw_block.get("text"), path=f"{path}.text"))
655
+ elif block_type == "paragraph":
656
+ issues.extend(_computed_field_issues(raw_block.get("text"), path=f"{path}.text"))
657
+ for child_index, child in enumerate(raw_block.get("children") or []):
658
+ if isinstance(child, Mapping):
659
+ issues.extend(
660
+ _computed_field_issues(
661
+ child.get("text"),
662
+ path=f"{path}.children[{child_index}].text",
663
+ )
664
+ )
665
+ elif block_type in {"bullets", "bullet", "numbered_list", "numberedList"}:
666
+ for item_index, item in enumerate(_string_list(raw_block.get("items"))):
667
+ issues.extend(_computed_field_issues(item, path=f"{path}.items[{item_index}]"))
668
+ elif block_type == "table":
669
+ for header_index, header_value in enumerate(raw_block.get("header") or []):
670
+ issues.extend(_computed_field_issues(header_value, path=f"{path}.header[{header_index}]"))
671
+ for row_index, row in enumerate(raw_block.get("rows") or []):
672
+ if isinstance(row, (list, tuple)):
673
+ for col_index, value in enumerate(row):
674
+ issues.extend(_computed_field_issues(value, path=f"{path}.rows[{row_index}][{col_index}]"))
675
+ elif block_type == "toc":
676
+ issues.extend(_computed_field_issues(raw_block.get("title"), path=f"{path}.title"))
677
+ for entry_index, entry in enumerate(raw_block.get("entries") or []):
678
+ if isinstance(entry, Mapping):
679
+ issues.extend(_computed_field_issues(entry.get("text"), path=f"{path}.entries[{entry_index}].text"))
680
+ return issues
681
+
682
+
333
683
  def create_document_from_plan(
334
684
  plan: Mapping[str, Any] | DocumentPlan,
335
685
  *,
@@ -338,6 +688,8 @@ def create_document_from_plan(
338
688
  """Create a formatted HWPX document from a declarative document plan."""
339
689
 
340
690
  normalized = normalize_document_plan(plan)
691
+ if normalized.builder_document is not None:
692
+ return normalized.builder_document.lower()
341
693
  style_preset = (
342
694
  preset
343
695
  if isinstance(preset, DocumentStylePreset)
@@ -345,6 +697,7 @@ def create_document_from_plan(
345
697
  )
346
698
  document = HwpxDocument.new()
347
699
  tokens = style_preset.ensure_tokens(document)
700
+ builder_document = _lower_plan_to_builder_document(normalized)
348
701
 
349
702
  if normalized.title:
350
703
  document.add_paragraph(
@@ -367,7 +720,7 @@ def create_document_from_plan(
367
720
  )
368
721
  _add_key_value_table(document, normalized.metadata, tokens)
369
722
 
370
- for block in normalized.blocks:
723
+ for block in builder_document.sections[0].children:
371
724
  _render_block(document, block, tokens)
372
725
 
373
726
  return document
@@ -564,8 +917,10 @@ def _validate_block(raw_block: Any, *, index: int) -> list[PlanValidationIssue]:
564
917
 
565
918
  if block_type == "heading":
566
919
  issues.extend(_validate_heading_block(raw_block, path=path))
920
+ issues.extend(_computed_field_issues(raw_block.get("text"), path=f"{path}.text"))
567
921
  elif block_type == "paragraph":
568
922
  issues.extend(_validate_paragraph_block(raw_block, path=path))
923
+ issues.extend(_computed_field_issues(raw_block.get("text"), path=f"{path}.text"))
569
924
  elif block_type == "bullets":
570
925
  items = _string_list(raw_block.get("items") or raw_block.get("bullets"))
571
926
  if not items:
@@ -577,12 +932,48 @@ def _validate_block(raw_block: Any, *, index: int) -> list[PlanValidationIssue]:
577
932
  suggestion="Add a non-empty items array, or use a paragraph block instead.",
578
933
  )
579
934
  )
935
+ for item_index, item in enumerate(items):
936
+ issues.extend(_computed_field_issues(item, path=f"{path}.items[{item_index}]"))
580
937
  elif block_type == "table":
581
938
  column_keys, column_issues = _validate_table_columns(raw_block.get("columns"), path=path)
582
939
  issues.extend(column_issues)
583
940
  issues.extend(_validate_table_rows(raw_block.get("rows"), column_keys, path=path))
941
+ issues.extend(_computed_field_issues(raw_block.get("caption"), path=f"{path}.caption"))
942
+ issues.extend(_computed_field_issues(raw_block.get("unit"), path=f"{path}.unit"))
943
+ table_profile = str(raw_block.get("tableProfile") or "").strip()
944
+ if table_profile and table_profile not in _SUPPORTED_TABLE_PROFILES:
945
+ issues.append(
946
+ _plan_issue(
947
+ "unknown_table_profile",
948
+ f"{path}.tableProfile",
949
+ f"{path}.tableProfile is unknown: {table_profile!r}",
950
+ severity="warning",
951
+ suggestion="Use tableProfile='government' or omit tableProfile.",
952
+ )
953
+ )
954
+ for column_index, column in enumerate(raw_block.get("columns") or []):
955
+ if isinstance(column, Mapping):
956
+ issues.extend(
957
+ _computed_field_issues(
958
+ column.get("label"),
959
+ path=f"{path}.columns[{column_index}].label",
960
+ )
961
+ )
962
+ for row_index, row in enumerate(raw_block.get("rows") or []):
963
+ if isinstance(row, Mapping):
964
+ for key, value in row.items():
965
+ if isinstance(value, Mapping):
966
+ value = value.get("text", value.get("value"))
967
+ issues.extend(
968
+ _computed_field_issues(
969
+ value,
970
+ path=f"{path}.rows[{row_index}].{key}",
971
+ )
972
+ )
584
973
  elif block_type == "memo":
585
974
  issues.extend(_validate_required_text_fields(raw_block, path=path, fields=("text", "memo")))
975
+ issues.extend(_computed_field_issues(raw_block.get("text"), path=f"{path}.text"))
976
+ issues.extend(_computed_field_issues(raw_block.get("memo"), path=f"{path}.memo"))
586
977
 
587
978
  return issues
588
979
 
@@ -852,13 +1243,13 @@ def _normalize_block(raw_block: Any, *, index: int) -> DocumentBlock:
852
1243
  if level < 1 or level > 3:
853
1244
  raise ValueError(f"blocks[{index}].level must be between 1 and 3")
854
1245
  text = _required_text(raw_block, "text", index)
855
- return DocumentBlock("heading", {"level": level, "text": text})
1246
+ return DocumentBlock("heading", {"level": level, "text": replace_computed_fields(text)})
856
1247
 
857
1248
  if block_type == "paragraph":
858
1249
  return DocumentBlock(
859
1250
  "paragraph",
860
1251
  {
861
- "text": _required_text(raw_block, "text", index),
1252
+ "text": replace_computed_fields(_required_text(raw_block, "text", index)),
862
1253
  "style": str(raw_block.get("style") or "body").strip() or "body",
863
1254
  },
864
1255
  )
@@ -867,29 +1258,304 @@ def _normalize_block(raw_block: Any, *, index: int) -> DocumentBlock:
867
1258
  items = _string_list(raw_block.get("items") or raw_block.get("bullets"))
868
1259
  if not items:
869
1260
  raise ValueError(f"blocks[{index}].items must be a non-empty list")
870
- return DocumentBlock("bullets", {"items": items})
1261
+ return DocumentBlock(
1262
+ "bullets",
1263
+ {"items": [replace_computed_fields(item) for item in items]},
1264
+ )
871
1265
 
872
1266
  if block_type == "table":
873
1267
  columns = _normalize_columns(raw_block.get("columns"), index=index)
874
1268
  rows = _normalize_rows(raw_block.get("rows"), columns, index=index)
875
- caption = str(raw_block.get("caption") or "").strip()
876
- return DocumentBlock(
877
- "table",
878
- {"caption": caption, "columns": columns, "rows": rows},
879
- )
1269
+ caption = replace_computed_fields(normalize_cell_text(raw_block.get("caption")))
1270
+ unit = replace_computed_fields(normalize_cell_text(raw_block.get("unit")))
1271
+ table_profile = str(raw_block.get("tableProfile") or "").strip()
1272
+ columns = [
1273
+ {**column, "label": replace_computed_fields(normalize_cell_text(column["label"]))}
1274
+ for column in columns
1275
+ ]
1276
+ rows = [
1277
+ {key: replace_computed_fields(value) for key, value in row.items()}
1278
+ for row in rows
1279
+ ]
1280
+ data: dict[str, Any] = {"caption": caption, "columns": columns, "rows": rows}
1281
+ if unit:
1282
+ data["unit"] = unit
1283
+ if table_profile:
1284
+ data["tableProfile"] = table_profile
1285
+ return DocumentBlock("table", data)
880
1286
 
881
1287
  if block_type == "memo":
882
1288
  return DocumentBlock(
883
1289
  "memo",
884
1290
  {
885
- "text": _required_text(raw_block, "text", index),
886
- "memo": _required_text(raw_block, "memo", index),
1291
+ "text": replace_computed_fields(_required_text(raw_block, "text", index)),
1292
+ "memo": replace_computed_fields(_required_text(raw_block, "memo", index)),
887
1293
  },
888
1294
  )
889
1295
 
890
1296
  return DocumentBlock("page_break", {})
891
1297
 
892
1298
 
1299
+ def _normalize_v2_builder_document(plan: Mapping[str, Any]) -> BuilderDocument:
1300
+ metadata = plan.get("metadata") or {}
1301
+ builder_metadata = None
1302
+ if isinstance(metadata, Mapping):
1303
+ title = str(metadata.get("title") or plan.get("title") or "").strip()
1304
+ author = str(metadata.get("author") or "").strip()
1305
+ organization = str(metadata.get("organization") or "").strip()
1306
+ if title or author or organization:
1307
+ builder_metadata = BuilderMetadata(
1308
+ title=title,
1309
+ author=author,
1310
+ organization=organization,
1311
+ )
1312
+ return BuilderDocument(
1313
+ sections=tuple(
1314
+ _normalize_v2_section(raw_section, index=index)
1315
+ for index, raw_section in enumerate(plan.get("sections") or [])
1316
+ ),
1317
+ metadata=builder_metadata,
1318
+ visual_review_required=_optional_bool(plan.get("visualReviewRequired")),
1319
+ preset=str(plan.get("preset") or plan.get("stylePreset") or DEFAULT_STYLE_PRESET).strip()
1320
+ or DEFAULT_STYLE_PRESET,
1321
+ )
1322
+
1323
+
1324
+ def _normalize_v2_section(raw_section: Any, *, index: int) -> BuilderSection:
1325
+ if not isinstance(raw_section, Mapping):
1326
+ raise TypeError(f"sections[{index}] must be a mapping")
1327
+ raw_blocks = raw_section.get("blocks", raw_section.get("children"))
1328
+ return BuilderSection(
1329
+ children=tuple(
1330
+ _normalize_v2_block(raw_block, path=f"sections[{index}].blocks[{block_index}]")
1331
+ for block_index, raw_block in enumerate(raw_blocks or [])
1332
+ ),
1333
+ page=_normalize_v2_page(raw_section.get("page")),
1334
+ margins=_normalize_v2_margins(raw_section.get("margins")),
1335
+ header=_normalize_v2_header_footer(raw_section.get("header"), kind="header"),
1336
+ footer=_normalize_v2_header_footer(raw_section.get("footer"), kind="footer"),
1337
+ )
1338
+
1339
+
1340
+ def _normalize_v2_page(value: Any) -> BuilderPageSize | None:
1341
+ if not isinstance(value, Mapping):
1342
+ return None
1343
+ preset = str(value.get("preset") or "").strip().upper()
1344
+ if preset == "A4":
1345
+ return BuilderPageSize.A4
1346
+ width = _float_value(value.get("widthMm", value.get("width_mm")), default=210)
1347
+ height = _float_value(value.get("heightMm", value.get("height_mm")), default=297)
1348
+ orientation = str(value.get("orientation") or "PORTRAIT").strip() or "PORTRAIT"
1349
+ return BuilderPageSize(width_mm=width, height_mm=height, orientation=orientation)
1350
+
1351
+
1352
+ def _normalize_v2_margins(value: Any) -> BuilderMargins | None:
1353
+ if not isinstance(value, Mapping):
1354
+ return None
1355
+ return BuilderMargins(
1356
+ top_mm=_float_value(value.get("topMm", value.get("top_mm")), default=20),
1357
+ right_mm=_float_value(value.get("rightMm", value.get("right_mm")), default=20),
1358
+ bottom_mm=_float_value(value.get("bottomMm", value.get("bottom_mm")), default=20),
1359
+ left_mm=_float_value(value.get("leftMm", value.get("left_mm")), default=20),
1360
+ header_mm=_float_value(value.get("headerMm", value.get("header_mm")), default=10),
1361
+ footer_mm=_float_value(value.get("footerMm", value.get("footer_mm")), default=10),
1362
+ gutter_mm=_float_value(value.get("gutterMm", value.get("gutter_mm")), default=0),
1363
+ )
1364
+
1365
+
1366
+ def _normalize_v2_header_footer(value: Any, *, kind: str) -> BuilderHeader | BuilderFooter | None:
1367
+ if not isinstance(value, Mapping):
1368
+ return None
1369
+ children = tuple(_normalize_v2_header_footer_child(child) for child in value.get("children") or [])
1370
+ if kind == "header":
1371
+ return BuilderHeader(children=children)
1372
+ return BuilderFooter(children=children)
1373
+
1374
+
1375
+ def _normalize_v2_header_footer_child(value: Any) -> BuilderParagraph | BuilderPageNumber:
1376
+ if not isinstance(value, Mapping):
1377
+ raise TypeError("header/footer children must be mappings")
1378
+ child_type = str(value.get("type") or "paragraph").strip()
1379
+ if child_type == "page_number":
1380
+ return BuilderPageNumber(format=str(value.get("format") or "page"))
1381
+ if child_type != "paragraph":
1382
+ raise ValueError(f"unsupported header/footer child type: {child_type!r}")
1383
+ children = tuple(_normalize_v2_paragraph_child(child) for child in value.get("children") or [])
1384
+ return BuilderParagraph(
1385
+ text=replace_computed_fields(str(value.get("text") or "")),
1386
+ children=children,
1387
+ align=_optional_str(value.get("align")),
1388
+ )
1389
+
1390
+
1391
+ def _normalize_v2_paragraph_child(value: Any) -> BuilderRun | BuilderPageNumber:
1392
+ if not isinstance(value, Mapping):
1393
+ raise TypeError("paragraph children must be mappings")
1394
+ child_type = str(value.get("type") or "run").strip()
1395
+ if child_type == "page_number":
1396
+ return BuilderPageNumber(format=str(value.get("format") or "page"))
1397
+ if child_type != "run":
1398
+ raise ValueError(f"unsupported paragraph child type: {child_type!r}")
1399
+ return BuilderRun(
1400
+ text=replace_computed_fields(str(value.get("text") or "")),
1401
+ bold=bool(value.get("bold", False)),
1402
+ italic=bool(value.get("italic", False)),
1403
+ underline=bool(value.get("underline", False)),
1404
+ color=_optional_str(value.get("color")),
1405
+ font=_optional_str(value.get("font")),
1406
+ size=_optional_number(value.get("size")),
1407
+ highlight=_optional_str(value.get("highlight")),
1408
+ strike=bool(value.get("strike", False)),
1409
+ )
1410
+
1411
+
1412
+ def _normalize_v2_block(raw_block: Any, *, path: str) -> Any:
1413
+ if not isinstance(raw_block, Mapping):
1414
+ raise TypeError(f"{path} must be a mapping")
1415
+ block_type = str(raw_block.get("type") or "").strip()
1416
+ if block_type == "heading":
1417
+ return BuilderHeading(
1418
+ level=_int_value(raw_block.get("level", 1), default=1),
1419
+ text=replace_computed_fields(str(raw_block.get("text") or "")),
1420
+ )
1421
+ if block_type == "paragraph":
1422
+ children = tuple(
1423
+ child
1424
+ for child in (_normalize_v2_paragraph_child(child) for child in raw_block.get("children") or [])
1425
+ if isinstance(child, BuilderRun)
1426
+ )
1427
+ return BuilderParagraph(
1428
+ text=replace_computed_fields(str(raw_block.get("text") or "")),
1429
+ children=children,
1430
+ align=_optional_str(raw_block.get("align")),
1431
+ style=_optional_str(raw_block.get("style")),
1432
+ )
1433
+ if block_type in {"bullets", "bullet"}:
1434
+ return BuilderBullet(
1435
+ items=tuple(replace_computed_fields(item) for item in _string_list(raw_block.get("items"))),
1436
+ level=_int_value(raw_block.get("level", 0), default=0),
1437
+ style=_optional_str(raw_block.get("style")),
1438
+ )
1439
+ if block_type in {"numbered_list", "numberedList"}:
1440
+ return BuilderNumberedList(
1441
+ items=tuple(replace_computed_fields(item) for item in _string_list(raw_block.get("items"))),
1442
+ level=_int_value(raw_block.get("level", 0), default=0),
1443
+ )
1444
+ if block_type == "table":
1445
+ return BuilderTable(
1446
+ header=tuple(replace_computed_fields(str(item)) for item in raw_block.get("header") or ()),
1447
+ rows=tuple(
1448
+ tuple(replace_computed_fields(str(cell)) for cell in row)
1449
+ for row in raw_block.get("rows") or ()
1450
+ ),
1451
+ merges=tuple(str(item) for item in raw_block.get("merges") or ()),
1452
+ header_shading=_optional_str(raw_block.get("headerShading", raw_block.get("header_shading"))),
1453
+ column_widths=tuple(
1454
+ _optional_number(item) or 0
1455
+ for item in raw_block.get("columnWidths", raw_block.get("column_widths")) or ()
1456
+ ),
1457
+ )
1458
+ if block_type == "image":
1459
+ return BuilderImage(
1460
+ path=str(raw_block.get("path") or ""),
1461
+ width_mm=_optional_number(raw_block.get("widthMm", raw_block.get("width_mm"))),
1462
+ align=_optional_str(raw_block.get("align")),
1463
+ caption=(
1464
+ replace_computed_fields(str(raw_block.get("caption")))
1465
+ if raw_block.get("caption") is not None
1466
+ else None
1467
+ ),
1468
+ image_format=_optional_str(raw_block.get("imageFormat", raw_block.get("image_format"))),
1469
+ )
1470
+ if block_type == "toc":
1471
+ return BuilderToc(
1472
+ title=replace_computed_fields(str(raw_block.get("title") or "목차")),
1473
+ entries=tuple(
1474
+ {**entry, "text": replace_computed_fields(str(entry.get("text") or ""))}
1475
+ for entry in raw_block.get("entries") or ()
1476
+ if isinstance(entry, Mapping)
1477
+ ),
1478
+ )
1479
+ if block_type in {"page_break", "pageBreak"}:
1480
+ return BuilderPageBreak()
1481
+ raise ValueError(f"{path}.type is unsupported: {block_type!r}")
1482
+
1483
+
1484
+ def _lower_plan_to_builder_document(plan: DocumentPlan) -> BuilderDocument:
1485
+ """Lower a normalized document plan to builder nodes.
1486
+
1487
+ v1 authoring keeps its historical title, metadata, style-token, and memo
1488
+ rendering contracts, so this helper lowers the body blocks into public
1489
+ builder nodes while ``create_document_from_plan`` supplies the existing
1490
+ document-level framing.
1491
+ """
1492
+
1493
+ if plan.builder_document is not None:
1494
+ return plan.builder_document
1495
+ children: list[Any] = []
1496
+ for block in plan.blocks:
1497
+ children.extend(_block_to_builder_nodes(block))
1498
+ return BuilderDocument(sections=(BuilderSection(children=tuple(children)),))
1499
+
1500
+
1501
+ def _block_to_builder_nodes(block: DocumentBlock) -> tuple[Any, ...]:
1502
+ if block.type == "heading":
1503
+ return (
1504
+ BuilderHeading(
1505
+ level=int(block.data["level"]),
1506
+ text=str(block.data["text"]),
1507
+ ),
1508
+ )
1509
+ if block.type == "paragraph":
1510
+ return (
1511
+ BuilderParagraph(
1512
+ text=str(block.data["text"]),
1513
+ style=str(block.data.get("style") or "body"),
1514
+ ),
1515
+ )
1516
+ if block.type == "bullets":
1517
+ return (BuilderBullet(items=tuple(str(item) for item in block.data["items"])),)
1518
+ if block.type == "table":
1519
+ columns = list(block.data["columns"])
1520
+ rows = list(block.data["rows"])
1521
+ nodes: list[Any] = []
1522
+ caption = str(block.data.get("caption") or "").strip()
1523
+ if caption:
1524
+ nodes.append(BuilderParagraph(text=caption, style="heading"))
1525
+ nodes.append(
1526
+ BuilderTable(
1527
+ header=tuple(str(column["label"]) for column in columns),
1528
+ rows=tuple(
1529
+ tuple(str(row.get(column["key"], "")) for column in columns)
1530
+ for row in rows
1531
+ ),
1532
+ column_widths=tuple(_plan_table_column_widths(columns)),
1533
+ ),
1534
+ )
1535
+ unit = str(block.data.get("unit") or "").strip()
1536
+ if unit:
1537
+ nodes.append(BuilderParagraph(text=unit, style="body"))
1538
+ return tuple(nodes)
1539
+ if block.type == "memo":
1540
+ return (block,)
1541
+ if block.type == "page_break":
1542
+ return (BuilderPageBreak(),)
1543
+ raise ValueError(f"unsupported block type: {block.type!r}")
1544
+
1545
+
1546
+ def _plan_table_column_widths(columns: list[dict[str, Any]]) -> list[int]:
1547
+ total = sum(max(int(column.get("widthWeight", 1)), 1) for column in columns)
1548
+ if total <= 0:
1549
+ return []
1550
+ widths = [
1551
+ round(_DEFAULT_TABLE_WIDTH * max(int(column.get("widthWeight", 1)), 1) / total)
1552
+ for column in columns
1553
+ ]
1554
+ if widths:
1555
+ widths[-1] += _DEFAULT_TABLE_WIDTH - sum(widths)
1556
+ return widths
1557
+
1558
+
893
1559
  def _normalize_columns(value: Any, *, index: int) -> list[dict[str, Any]]:
894
1560
  if not isinstance(value, list) or not value:
895
1561
  raise ValueError(f"blocks[{index}].columns must be a non-empty list")
@@ -904,7 +1570,7 @@ def _normalize_columns(value: Any, *, index: int) -> list[dict[str, Any]]:
904
1570
  if key in seen:
905
1571
  raise ValueError(f"blocks[{index}].columns contains duplicate key: {key!r}")
906
1572
  seen.add(key)
907
- label = str(raw_column.get("label") or key).strip()
1573
+ label = normalize_cell_text(raw_column.get("label") or key)
908
1574
  width_weight = _int_value(raw_column.get("widthWeight", 1), default=1)
909
1575
  columns.append(
910
1576
  {
@@ -929,42 +1595,51 @@ def _normalize_rows(
929
1595
  for row_index, raw_row in enumerate(value):
930
1596
  if not isinstance(raw_row, Mapping):
931
1597
  raise ValueError(f"blocks[{index}].rows[{row_index}] must be a mapping")
932
- rows.append({key: str(raw_row.get(key, "")) for key in column_keys})
1598
+ rows.append({key: _normalize_table_cell_value(raw_row.get(key, "")) for key in column_keys})
933
1599
  return rows
934
1600
 
935
1601
 
1602
+ def _normalize_table_cell_value(value: Any) -> str:
1603
+ if isinstance(value, Mapping):
1604
+ text = value.get("text", value.get("value", ""))
1605
+ if bool(value.get("preserveWhitespace", False)):
1606
+ return str(text or "")
1607
+ return normalize_cell_text(text)
1608
+ return normalize_cell_text(value)
1609
+
1610
+
936
1611
  def _render_block(
937
1612
  document: HwpxDocument,
938
- block: DocumentBlock,
1613
+ block: Any,
939
1614
  tokens: Mapping[str, str],
940
1615
  ) -> None:
941
- if block.type == "heading":
1616
+ if isinstance(block, BuilderHeading):
942
1617
  document.add_paragraph(
943
- str(block.data["text"]),
1618
+ block.text,
944
1619
  char_pr_id_ref=tokens["heading"],
945
1620
  inherit_style=False,
946
1621
  )
947
1622
  return
948
- if block.type == "paragraph":
949
- style = str(block.data.get("style") or "body")
1623
+ if isinstance(block, BuilderParagraph):
1624
+ style = str(block.style or "body")
950
1625
  document.add_paragraph(
951
- str(block.data["text"]),
1626
+ block.text,
952
1627
  char_pr_id_ref=tokens.get(style, tokens["body"]),
953
1628
  inherit_style=False,
954
1629
  )
955
1630
  return
956
- if block.type == "bullets":
957
- for item in block.data["items"]:
1631
+ if isinstance(block, BuilderBullet):
1632
+ for item in block.items:
958
1633
  document.add_paragraph(
959
1634
  f"• {item}",
960
1635
  char_pr_id_ref=tokens["bullet"],
961
1636
  inherit_style=False,
962
1637
  )
963
1638
  return
964
- if block.type == "table":
965
- _add_plan_table(document, block.data, tokens)
1639
+ if isinstance(block, BuilderTable):
1640
+ _add_builder_table(document, block, tokens)
966
1641
  return
967
- if block.type == "memo":
1642
+ if isinstance(block, DocumentBlock) and block.type == "memo":
968
1643
  paragraph = document.add_paragraph(
969
1644
  str(block.data["text"]),
970
1645
  char_pr_id_ref=tokens["body"],
@@ -972,8 +1647,10 @@ def _render_block(
972
1647
  )
973
1648
  document.add_memo_with_anchor(str(block.data["memo"]), paragraph=paragraph)
974
1649
  return
975
- if block.type == "page_break":
1650
+ if isinstance(block, BuilderPageBreak):
976
1651
  document.add_paragraph("", pageBreak="1", inherit_style=False)
1652
+ return
1653
+ raise ValueError(f"unsupported builder block: {type(block).__name__}")
977
1654
 
978
1655
 
979
1656
  def _add_key_value_table(
@@ -1040,6 +1717,46 @@ def _add_plan_table(
1040
1717
  )
1041
1718
 
1042
1719
 
1720
+ def _add_builder_table(
1721
+ document: HwpxDocument,
1722
+ table_node: BuilderTable,
1723
+ tokens: Mapping[str, str],
1724
+ ) -> None:
1725
+ rows = [list(table_node.header), *(list(row) for row in table_node.rows)]
1726
+ if not rows:
1727
+ raise ValueError("table must contain a header or at least one row")
1728
+ column_count = max(len(row) for row in rows)
1729
+ table = document.add_table(
1730
+ len(rows),
1731
+ column_count,
1732
+ width=_DEFAULT_TABLE_WIDTH,
1733
+ char_pr_id_ref=tokens["table_cell"],
1734
+ )
1735
+ if table_node.column_widths:
1736
+ for row in table.rows:
1737
+ for col_index, cell in enumerate(row.cells):
1738
+ if col_index < len(table_node.column_widths):
1739
+ cell.set_size(width=int(table_node.column_widths[col_index]))
1740
+ for col_index, label in enumerate(table_node.header):
1741
+ _set_table_cell_text(
1742
+ table,
1743
+ 0,
1744
+ col_index,
1745
+ str(label),
1746
+ char_pr_id_ref=tokens["table_header"],
1747
+ )
1748
+ row_offset = 1 if table_node.header else 0
1749
+ for row_index, row in enumerate(table_node.rows, start=row_offset):
1750
+ for col_index, value in enumerate(row):
1751
+ _set_table_cell_text(
1752
+ table,
1753
+ row_index,
1754
+ col_index,
1755
+ str(value),
1756
+ char_pr_id_ref=tokens["table_cell"],
1757
+ )
1758
+
1759
+
1043
1760
  def _set_table_cell_text(
1044
1761
  table: Any,
1045
1762
  row_index: int,
@@ -1545,6 +2262,7 @@ def _table_block_text(block: Mapping[str, Any]) -> str:
1545
2262
  for row in block.get("rows", []):
1546
2263
  if isinstance(row, Mapping):
1547
2264
  parts.extend(str(value) for value in row.values())
2265
+ parts.append(str(block.get("unit") or ""))
1548
2266
  return "\n".join(parts)
1549
2267
 
1550
2268
 
@@ -1608,10 +2326,17 @@ def _document_table_blocks(document: HwpxDocument) -> list[Mapping[str, Any]]:
1608
2326
 
1609
2327
  text = str(getattr(paragraph, "text", "") or "").strip()
1610
2328
  if text:
2329
+ if _looks_like_unit_text(text):
2330
+ previous_text = ""
2331
+ continue
1611
2332
  previous_text = text
1612
2333
  return blocks
1613
2334
 
1614
2335
 
2336
+ def _looks_like_unit_text(text: str) -> bool:
2337
+ return text.startswith(("단위:", "단위:"))
2338
+
2339
+
1615
2340
  def _looks_like_table_header_row(text_rows: list[list[str]]) -> bool:
1616
2341
  if not text_rows:
1617
2342
  return False
@@ -1864,10 +2589,43 @@ def _int_value(value: Any, *, default: int) -> int:
1864
2589
  return default
1865
2590
 
1866
2591
 
2592
+ def _float_value(value: Any, *, default: float) -> float:
2593
+ try:
2594
+ return float(value)
2595
+ except (TypeError, ValueError):
2596
+ return default
2597
+
2598
+
2599
+ def _optional_number(value: Any) -> int | float | None:
2600
+ if value in (None, ""):
2601
+ return None
2602
+ try:
2603
+ number = float(value)
2604
+ except (TypeError, ValueError):
2605
+ return None
2606
+ if number.is_integer():
2607
+ return int(number)
2608
+ return number
2609
+
2610
+
2611
+ def _optional_bool(value: Any) -> bool | None:
2612
+ if value is None:
2613
+ return None
2614
+ return bool(value)
2615
+
2616
+
2617
+ def _optional_str(value: Any) -> str | None:
2618
+ if value is None:
2619
+ return None
2620
+ text = str(value)
2621
+ return text if text else None
2622
+
2623
+
1867
2624
  __all__ = [
1868
2625
  "AUTHORING_REPORT_VERSION",
1869
2626
  "DEFAULT_STYLE_PRESET",
1870
2627
  "DOCUMENT_PLAN_SCHEMA_VERSION",
2628
+ "DOCUMENT_PLAN_V2_SCHEMA_VERSION",
1871
2629
  "DocumentBlock",
1872
2630
  "DocumentPlan",
1873
2631
  "DocumentStylePreset",