python-hwpx 2.10.0__py3-none-any.whl → 2.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hwpx/authoring.py +785 -27
- hwpx/builder/core.py +267 -59
- hwpx/builder/report.py +32 -0
- hwpx/document.py +6 -8
- hwpx/opc/package.py +5 -1
- hwpx/oxml/body.py +336 -27
- hwpx/oxml/document.py +65 -37
- hwpx/oxml/namespaces.py +142 -11
- hwpx/oxml/parser.py +7 -0
- hwpx/tools/generic_inventory.py +156 -0
- hwpx/tools/id_integrity.py +275 -0
- hwpx/tools/report_parser.py +135 -0
- hwpx/tools/report_utils.py +132 -0
- hwpx/tools/roundtrip_diff.py +50 -0
- hwpx/tools/table_cleanup.py +61 -0
- hwpx/tools/text_extractor.py +33 -25
- {python_hwpx-2.10.0.dist-info → python_hwpx-2.10.1.dist-info}/METADATA +1 -1
- {python_hwpx-2.10.0.dist-info → python_hwpx-2.10.1.dist-info}/RECORD +23 -17
- {python_hwpx-2.10.0.dist-info → python_hwpx-2.10.1.dist-info}/licenses/NOTICE +8 -0
- {python_hwpx-2.10.0.dist-info → python_hwpx-2.10.1.dist-info}/WHEEL +0 -0
- {python_hwpx-2.10.0.dist-info → python_hwpx-2.10.1.dist-info}/entry_points.txt +0 -0
- {python_hwpx-2.10.0.dist-info → python_hwpx-2.10.1.dist-info}/licenses/LICENSE +0 -0
- {python_hwpx-2.10.0.dist-info → python_hwpx-2.10.1.dist-info}/top_level.txt +0 -0
hwpx/authoring.py
CHANGED
|
@@ -4,14 +4,45 @@
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
6
|
import re
|
|
7
|
+
from ast import literal_eval as _literal
|
|
7
8
|
from dataclasses import dataclass, field
|
|
8
9
|
from pathlib import Path
|
|
9
10
|
from typing import Any, Mapping
|
|
10
11
|
|
|
12
|
+
from .builder import (
|
|
13
|
+
Bullet as BuilderBullet,
|
|
14
|
+
Document as BuilderDocument,
|
|
15
|
+
Footer as BuilderFooter,
|
|
16
|
+
Header as BuilderHeader,
|
|
17
|
+
Heading as BuilderHeading,
|
|
18
|
+
Image as BuilderImage,
|
|
19
|
+
Margins as BuilderMargins,
|
|
20
|
+
Metadata as BuilderMetadata,
|
|
21
|
+
NumberedList as BuilderNumberedList,
|
|
22
|
+
PageBreak as BuilderPageBreak,
|
|
23
|
+
PageNumber as BuilderPageNumber,
|
|
24
|
+
PageSize as BuilderPageSize,
|
|
25
|
+
Paragraph as BuilderParagraph,
|
|
26
|
+
Run as BuilderRun,
|
|
27
|
+
Section as BuilderSection,
|
|
28
|
+
Table as BuilderTable,
|
|
29
|
+
)
|
|
30
|
+
from .builder.core import Toc as BuilderToc
|
|
11
31
|
from .document import HwpxDocument
|
|
12
32
|
from .tools.package_validator import validate_package
|
|
33
|
+
from .tools.table_cleanup import normalize_cell_text
|
|
34
|
+
from .tools.report_utils import (
|
|
35
|
+
calculate_age,
|
|
36
|
+
calculate_ratios,
|
|
37
|
+
format_delta,
|
|
38
|
+
format_delta_percent,
|
|
39
|
+
format_krw_hangul,
|
|
40
|
+
format_number_commas,
|
|
41
|
+
normalize_korean_date,
|
|
42
|
+
)
|
|
13
43
|
|
|
14
44
|
DOCUMENT_PLAN_SCHEMA_VERSION = "hwpx.document_plan.v1"
|
|
45
|
+
DOCUMENT_PLAN_V2_SCHEMA_VERSION = "hwpx.document_plan.v2"
|
|
15
46
|
AUTHORING_REPORT_VERSION = "hwpx-authoring-quality-v1"
|
|
16
47
|
OPERATING_PLAN_QUALITY_VERSION = "operating-plan-quality-v1"
|
|
17
48
|
DEFAULT_STYLE_PRESET = "standard_korean_business"
|
|
@@ -28,11 +59,14 @@ _SUPPORTED_BLOCK_TYPES = frozenset(
|
|
|
28
59
|
_SUPPORTED_STYLE_TOKENS = frozenset(
|
|
29
60
|
{"body", "title", "subtitle", "heading", "bullet", "table_header", "table_cell"}
|
|
30
61
|
)
|
|
62
|
+
_SUPPORTED_TABLE_PROFILES = frozenset({"government"})
|
|
31
63
|
_BOOLEAN_QUALITY_GATES = frozenset(
|
|
32
64
|
{"validatePackage", "validateDocument", "reopen", "visualReviewRequired"}
|
|
33
65
|
)
|
|
34
66
|
_INTEGER_QUALITY_GATES = frozenset({"minNonEmptyParagraphs", "minTableCount"})
|
|
35
67
|
_LIST_QUALITY_GATES = frozenset({"requiredText"})
|
|
68
|
+
_COMPUTED_FIELD_RE = re.compile(r"\{\{\s*(.*?)\s*\}\}")
|
|
69
|
+
_COMPUTED_CALL_RE = re.compile(r"^([A-Za-z_][A-Za-z0-9_]*)\s*\((.*)\)$", re.DOTALL)
|
|
36
70
|
|
|
37
71
|
|
|
38
72
|
@dataclass(slots=True)
|
|
@@ -54,6 +88,7 @@ class DocumentPlan:
|
|
|
54
88
|
style_preset: str = DEFAULT_STYLE_PRESET
|
|
55
89
|
quality_gates: dict[str, Any] = field(default_factory=dict)
|
|
56
90
|
schema_version: str = DOCUMENT_PLAN_SCHEMA_VERSION
|
|
91
|
+
builder_document: BuilderDocument | None = None
|
|
57
92
|
|
|
58
93
|
def to_dict(self) -> dict[str, Any]:
|
|
59
94
|
"""Return a JSON-serializable representation of this plan."""
|
|
@@ -182,6 +217,143 @@ def _plan_validation_report(
|
|
|
182
217
|
)
|
|
183
218
|
|
|
184
219
|
|
|
220
|
+
_COMPUTED_FUNCTIONS = {
|
|
221
|
+
"krw_hangul": format_krw_hangul,
|
|
222
|
+
"commas": format_number_commas,
|
|
223
|
+
"age": calculate_age,
|
|
224
|
+
"delta": format_delta,
|
|
225
|
+
"delta_percent": format_delta_percent,
|
|
226
|
+
"ratio": calculate_ratios,
|
|
227
|
+
"date": normalize_korean_date,
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
class _ComputedFieldError(ValueError):
|
|
232
|
+
def __init__(self, code: str, message: str) -> None:
|
|
233
|
+
super().__init__(message)
|
|
234
|
+
self.code = code
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def replace_computed_fields(text: str) -> str:
|
|
238
|
+
"""Replace safe ``{{ function(args) }}`` report utility placeholders."""
|
|
239
|
+
|
|
240
|
+
def replacement(match: re.Match[str]) -> str:
|
|
241
|
+
return _evaluate_computed_field(match.group(1))
|
|
242
|
+
|
|
243
|
+
result = _COMPUTED_FIELD_RE.sub(replacement, text)
|
|
244
|
+
if "{{" in result or "}}" in result:
|
|
245
|
+
raise _ComputedFieldError(
|
|
246
|
+
"invalid_computed_field",
|
|
247
|
+
"computed field marker is malformed or unresolved",
|
|
248
|
+
)
|
|
249
|
+
return result
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _evaluate_computed_field(expression: str) -> str:
|
|
253
|
+
match = _COMPUTED_CALL_RE.match(expression.strip())
|
|
254
|
+
if not match:
|
|
255
|
+
raise _ComputedFieldError(
|
|
256
|
+
"invalid_computed_field",
|
|
257
|
+
f"computed field must be a function call: {expression!r}",
|
|
258
|
+
)
|
|
259
|
+
function_name, raw_args = match.groups()
|
|
260
|
+
function = _COMPUTED_FUNCTIONS.get(function_name)
|
|
261
|
+
if function is None:
|
|
262
|
+
raise _ComputedFieldError(
|
|
263
|
+
"unknown_computed_field",
|
|
264
|
+
f"unknown computed field function: {function_name}",
|
|
265
|
+
)
|
|
266
|
+
args = [_parse_computed_arg(arg) for arg in _split_computed_args(raw_args)]
|
|
267
|
+
try:
|
|
268
|
+
return str(function(*args))
|
|
269
|
+
except Exception as exc:
|
|
270
|
+
raise _ComputedFieldError(
|
|
271
|
+
"invalid_computed_field",
|
|
272
|
+
f"computed field failed: {expression!r}",
|
|
273
|
+
) from exc
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _split_computed_args(raw_args: str) -> list[str]:
|
|
277
|
+
if not raw_args.strip():
|
|
278
|
+
return []
|
|
279
|
+
args: list[str] = []
|
|
280
|
+
start = 0
|
|
281
|
+
quote: str | None = None
|
|
282
|
+
escaped = False
|
|
283
|
+
for index, char in enumerate(raw_args):
|
|
284
|
+
if escaped:
|
|
285
|
+
escaped = False
|
|
286
|
+
continue
|
|
287
|
+
if char == "\\" and quote:
|
|
288
|
+
escaped = True
|
|
289
|
+
continue
|
|
290
|
+
if quote:
|
|
291
|
+
if char == quote:
|
|
292
|
+
quote = None
|
|
293
|
+
continue
|
|
294
|
+
if char in {"'", '"'}:
|
|
295
|
+
quote = char
|
|
296
|
+
continue
|
|
297
|
+
if char == ",":
|
|
298
|
+
args.append(raw_args[start:index].strip())
|
|
299
|
+
start = index + 1
|
|
300
|
+
if quote:
|
|
301
|
+
raise _ComputedFieldError("invalid_computed_field", "unterminated string argument")
|
|
302
|
+
args.append(raw_args[start:].strip())
|
|
303
|
+
return args
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def _parse_computed_arg(token: str) -> object:
|
|
307
|
+
if not token:
|
|
308
|
+
raise _ComputedFieldError("invalid_computed_field", "empty computed field argument")
|
|
309
|
+
if token[0] in {"'", '"'}:
|
|
310
|
+
try:
|
|
311
|
+
value = _literal(token)
|
|
312
|
+
except (SyntaxError, ValueError) as exc:
|
|
313
|
+
raise _ComputedFieldError("invalid_computed_field", "invalid string argument") from exc
|
|
314
|
+
if not isinstance(value, str):
|
|
315
|
+
raise _ComputedFieldError("invalid_computed_field", "only string literals are supported")
|
|
316
|
+
return value
|
|
317
|
+
if re.fullmatch(r"[+-]?\d+", token):
|
|
318
|
+
return int(token)
|
|
319
|
+
if re.fullmatch(r"[+-]?\d+\.\d+", token):
|
|
320
|
+
return float(token)
|
|
321
|
+
raise _ComputedFieldError(
|
|
322
|
+
"invalid_computed_field",
|
|
323
|
+
f"unsupported computed field argument: {token!r}",
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _computed_field_issues(text: Any, *, path: str) -> list[PlanValidationIssue]:
|
|
328
|
+
value = str(text or "")
|
|
329
|
+
if "{{" not in value and "}}" not in value:
|
|
330
|
+
return []
|
|
331
|
+
issues: list[PlanValidationIssue] = []
|
|
332
|
+
for match in _COMPUTED_FIELD_RE.finditer(value):
|
|
333
|
+
try:
|
|
334
|
+
_evaluate_computed_field(match.group(1))
|
|
335
|
+
except _ComputedFieldError as exc:
|
|
336
|
+
issues.append(
|
|
337
|
+
_plan_issue(
|
|
338
|
+
exc.code,
|
|
339
|
+
path,
|
|
340
|
+
str(exc),
|
|
341
|
+
suggestion="Use a supported computed function such as krw_hangul, commas, delta, ratio, or date.",
|
|
342
|
+
)
|
|
343
|
+
)
|
|
344
|
+
residue = _COMPUTED_FIELD_RE.sub("", value)
|
|
345
|
+
if "{{" in residue or "}}" in residue:
|
|
346
|
+
issues.append(
|
|
347
|
+
_plan_issue(
|
|
348
|
+
"invalid_computed_field",
|
|
349
|
+
path,
|
|
350
|
+
"computed field marker is malformed or unresolved",
|
|
351
|
+
suggestion="Use balanced computed field delimiters such as {{ commas(1234) }}.",
|
|
352
|
+
)
|
|
353
|
+
)
|
|
354
|
+
return issues
|
|
355
|
+
|
|
356
|
+
|
|
185
357
|
def _report_plan_issues(report: PlanValidationReport) -> tuple[PlanValidationIssue, ...]:
|
|
186
358
|
if report.issues:
|
|
187
359
|
return report.issues
|
|
@@ -235,15 +407,20 @@ def validate_document_plan(plan: Mapping[str, Any]) -> PlanValidationReport:
|
|
|
235
407
|
)
|
|
236
408
|
|
|
237
409
|
schema_version = str(plan.get("schemaVersion") or "").strip()
|
|
238
|
-
if schema_version
|
|
410
|
+
if schema_version not in {DOCUMENT_PLAN_SCHEMA_VERSION, DOCUMENT_PLAN_V2_SCHEMA_VERSION}:
|
|
239
411
|
issues.append(
|
|
240
412
|
_plan_issue(
|
|
241
413
|
"invalid_schema_version",
|
|
242
414
|
"schemaVersion",
|
|
243
|
-
|
|
415
|
+
(
|
|
416
|
+
f"schemaVersion must be {DOCUMENT_PLAN_SCHEMA_VERSION!r} "
|
|
417
|
+
f"or {DOCUMENT_PLAN_V2_SCHEMA_VERSION!r}"
|
|
418
|
+
),
|
|
244
419
|
suggestion=f"Set schemaVersion to {DOCUMENT_PLAN_SCHEMA_VERSION!r}.",
|
|
245
420
|
)
|
|
246
421
|
)
|
|
422
|
+
elif schema_version == DOCUMENT_PLAN_V2_SCHEMA_VERSION:
|
|
423
|
+
return _validate_document_plan_v2(plan, schema_version=schema_version)
|
|
247
424
|
|
|
248
425
|
title = str(plan.get("title") or "").strip()
|
|
249
426
|
if not title:
|
|
@@ -314,6 +491,20 @@ def normalize_document_plan(plan: Mapping[str, Any] | DocumentPlan) -> DocumentP
|
|
|
314
491
|
if not report.ok:
|
|
315
492
|
raise ValueError("; ".join(report.errors))
|
|
316
493
|
|
|
494
|
+
schema_version = str(plan.get("schemaVersion") or "").strip()
|
|
495
|
+
if schema_version == DOCUMENT_PLAN_V2_SCHEMA_VERSION:
|
|
496
|
+
return DocumentPlan(
|
|
497
|
+
schema_version=DOCUMENT_PLAN_V2_SCHEMA_VERSION,
|
|
498
|
+
title="",
|
|
499
|
+
subtitle="",
|
|
500
|
+
metadata={},
|
|
501
|
+
blocks=[],
|
|
502
|
+
style_preset=str(plan.get("stylePreset") or DEFAULT_STYLE_PRESET).strip()
|
|
503
|
+
or DEFAULT_STYLE_PRESET,
|
|
504
|
+
quality_gates=dict(_default_quality_gates() | dict(plan.get("qualityGates") or {})),
|
|
505
|
+
builder_document=_normalize_v2_builder_document(plan),
|
|
506
|
+
)
|
|
507
|
+
|
|
317
508
|
blocks = [
|
|
318
509
|
_normalize_block(raw_block, index=index)
|
|
319
510
|
for index, raw_block in enumerate(plan.get("blocks") or [])
|
|
@@ -330,6 +521,165 @@ def normalize_document_plan(plan: Mapping[str, Any] | DocumentPlan) -> DocumentP
|
|
|
330
521
|
)
|
|
331
522
|
|
|
332
523
|
|
|
524
|
+
def _validate_document_plan_v2(
|
|
525
|
+
plan: Mapping[str, Any],
|
|
526
|
+
*,
|
|
527
|
+
schema_version: str,
|
|
528
|
+
) -> PlanValidationReport:
|
|
529
|
+
issues: list[PlanValidationIssue] = []
|
|
530
|
+
sections = plan.get("sections")
|
|
531
|
+
if not isinstance(sections, list) or not sections:
|
|
532
|
+
issues.append(
|
|
533
|
+
_plan_issue(
|
|
534
|
+
"missing_sections",
|
|
535
|
+
"sections",
|
|
536
|
+
"sections must be a non-empty list",
|
|
537
|
+
suggestion="Add at least one section with a blocks array.",
|
|
538
|
+
)
|
|
539
|
+
)
|
|
540
|
+
return _plan_validation_report(issues, schema_version=schema_version)
|
|
541
|
+
|
|
542
|
+
for section_index, raw_section in enumerate(sections):
|
|
543
|
+
section_path = f"sections[{section_index}]"
|
|
544
|
+
if not isinstance(raw_section, Mapping):
|
|
545
|
+
issues.append(
|
|
546
|
+
_plan_issue(
|
|
547
|
+
"section_not_object",
|
|
548
|
+
section_path,
|
|
549
|
+
f"{section_path} must be a mapping",
|
|
550
|
+
suggestion="Use an object with optional header/footer and a blocks array.",
|
|
551
|
+
)
|
|
552
|
+
)
|
|
553
|
+
continue
|
|
554
|
+
blocks = raw_section.get("blocks", raw_section.get("children"))
|
|
555
|
+
if not isinstance(blocks, list) or not blocks:
|
|
556
|
+
issues.append(
|
|
557
|
+
_plan_issue(
|
|
558
|
+
"missing_section_blocks",
|
|
559
|
+
f"{section_path}.blocks",
|
|
560
|
+
f"{section_path}.blocks must be a non-empty list",
|
|
561
|
+
suggestion="Add builder blocks such as heading, paragraph, table, image, or page_break.",
|
|
562
|
+
)
|
|
563
|
+
)
|
|
564
|
+
continue
|
|
565
|
+
for block_index, raw_block in enumerate(blocks):
|
|
566
|
+
issues.extend(
|
|
567
|
+
_validate_v2_block(
|
|
568
|
+
raw_block,
|
|
569
|
+
path=f"{section_path}.blocks[{block_index}]",
|
|
570
|
+
)
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
metadata = plan.get("metadata", {})
|
|
574
|
+
if metadata is not None and not isinstance(metadata, Mapping):
|
|
575
|
+
issues.append(
|
|
576
|
+
_plan_issue(
|
|
577
|
+
"invalid_metadata",
|
|
578
|
+
"metadata",
|
|
579
|
+
"metadata must be a mapping when provided",
|
|
580
|
+
suggestion="Use an object with title, author, and organization fields or omit metadata.",
|
|
581
|
+
)
|
|
582
|
+
)
|
|
583
|
+
return _plan_validation_report(issues, schema_version=schema_version)
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
def _validate_v2_block(raw_block: Any, *, path: str) -> list[PlanValidationIssue]:
|
|
587
|
+
if not isinstance(raw_block, Mapping):
|
|
588
|
+
return [
|
|
589
|
+
_plan_issue(
|
|
590
|
+
"block_not_object",
|
|
591
|
+
path,
|
|
592
|
+
f"{path} must be a mapping",
|
|
593
|
+
suggestion="Replace this block with a JSON object containing a supported builder type.",
|
|
594
|
+
)
|
|
595
|
+
]
|
|
596
|
+
block_type = str(raw_block.get("type") or "").strip()
|
|
597
|
+
supported = {
|
|
598
|
+
"heading",
|
|
599
|
+
"paragraph",
|
|
600
|
+
"bullets",
|
|
601
|
+
"bullet",
|
|
602
|
+
"numbered_list",
|
|
603
|
+
"numberedList",
|
|
604
|
+
"table",
|
|
605
|
+
"image",
|
|
606
|
+
"toc",
|
|
607
|
+
"page_break",
|
|
608
|
+
"pageBreak",
|
|
609
|
+
}
|
|
610
|
+
if block_type not in supported:
|
|
611
|
+
return [
|
|
612
|
+
_plan_issue(
|
|
613
|
+
"unsupported_block_type",
|
|
614
|
+
f"{path}.type",
|
|
615
|
+
f"{path}.type is unsupported: {block_type!r}",
|
|
616
|
+
suggestion="Use a public builder block type.",
|
|
617
|
+
)
|
|
618
|
+
]
|
|
619
|
+
if block_type in {"heading", "image"}:
|
|
620
|
+
text_key = "text" if block_type == "heading" else "path"
|
|
621
|
+
if not str(raw_block.get(text_key) or "").strip():
|
|
622
|
+
return [
|
|
623
|
+
_plan_issue(
|
|
624
|
+
"missing_text",
|
|
625
|
+
f"{path}.{text_key}",
|
|
626
|
+
f"{path}.{text_key} is required",
|
|
627
|
+
suggestion=f"Add non-empty {text_key}.",
|
|
628
|
+
)
|
|
629
|
+
]
|
|
630
|
+
if block_type in {"bullets", "bullet", "numbered_list", "numberedList"}:
|
|
631
|
+
if not _string_list(raw_block.get("items")):
|
|
632
|
+
return [
|
|
633
|
+
_plan_issue(
|
|
634
|
+
"missing_list_items",
|
|
635
|
+
f"{path}.items",
|
|
636
|
+
f"{path}.items must be a non-empty list",
|
|
637
|
+
suggestion="Add one or more list items.",
|
|
638
|
+
)
|
|
639
|
+
]
|
|
640
|
+
if block_type == "table":
|
|
641
|
+
header = raw_block.get("header")
|
|
642
|
+
rows = raw_block.get("rows")
|
|
643
|
+
if not isinstance(header, list) and not isinstance(rows, list):
|
|
644
|
+
return [
|
|
645
|
+
_plan_issue(
|
|
646
|
+
"missing_table_content",
|
|
647
|
+
path,
|
|
648
|
+
f"{path} must define header or rows",
|
|
649
|
+
suggestion="Add a header array or rows array.",
|
|
650
|
+
)
|
|
651
|
+
]
|
|
652
|
+
issues: list[PlanValidationIssue] = []
|
|
653
|
+
if block_type == "heading":
|
|
654
|
+
issues.extend(_computed_field_issues(raw_block.get("text"), path=f"{path}.text"))
|
|
655
|
+
elif block_type == "paragraph":
|
|
656
|
+
issues.extend(_computed_field_issues(raw_block.get("text"), path=f"{path}.text"))
|
|
657
|
+
for child_index, child in enumerate(raw_block.get("children") or []):
|
|
658
|
+
if isinstance(child, Mapping):
|
|
659
|
+
issues.extend(
|
|
660
|
+
_computed_field_issues(
|
|
661
|
+
child.get("text"),
|
|
662
|
+
path=f"{path}.children[{child_index}].text",
|
|
663
|
+
)
|
|
664
|
+
)
|
|
665
|
+
elif block_type in {"bullets", "bullet", "numbered_list", "numberedList"}:
|
|
666
|
+
for item_index, item in enumerate(_string_list(raw_block.get("items"))):
|
|
667
|
+
issues.extend(_computed_field_issues(item, path=f"{path}.items[{item_index}]"))
|
|
668
|
+
elif block_type == "table":
|
|
669
|
+
for header_index, header_value in enumerate(raw_block.get("header") or []):
|
|
670
|
+
issues.extend(_computed_field_issues(header_value, path=f"{path}.header[{header_index}]"))
|
|
671
|
+
for row_index, row in enumerate(raw_block.get("rows") or []):
|
|
672
|
+
if isinstance(row, (list, tuple)):
|
|
673
|
+
for col_index, value in enumerate(row):
|
|
674
|
+
issues.extend(_computed_field_issues(value, path=f"{path}.rows[{row_index}][{col_index}]"))
|
|
675
|
+
elif block_type == "toc":
|
|
676
|
+
issues.extend(_computed_field_issues(raw_block.get("title"), path=f"{path}.title"))
|
|
677
|
+
for entry_index, entry in enumerate(raw_block.get("entries") or []):
|
|
678
|
+
if isinstance(entry, Mapping):
|
|
679
|
+
issues.extend(_computed_field_issues(entry.get("text"), path=f"{path}.entries[{entry_index}].text"))
|
|
680
|
+
return issues
|
|
681
|
+
|
|
682
|
+
|
|
333
683
|
def create_document_from_plan(
|
|
334
684
|
plan: Mapping[str, Any] | DocumentPlan,
|
|
335
685
|
*,
|
|
@@ -338,6 +688,8 @@ def create_document_from_plan(
|
|
|
338
688
|
"""Create a formatted HWPX document from a declarative document plan."""
|
|
339
689
|
|
|
340
690
|
normalized = normalize_document_plan(plan)
|
|
691
|
+
if normalized.builder_document is not None:
|
|
692
|
+
return normalized.builder_document.lower()
|
|
341
693
|
style_preset = (
|
|
342
694
|
preset
|
|
343
695
|
if isinstance(preset, DocumentStylePreset)
|
|
@@ -345,6 +697,7 @@ def create_document_from_plan(
|
|
|
345
697
|
)
|
|
346
698
|
document = HwpxDocument.new()
|
|
347
699
|
tokens = style_preset.ensure_tokens(document)
|
|
700
|
+
builder_document = _lower_plan_to_builder_document(normalized)
|
|
348
701
|
|
|
349
702
|
if normalized.title:
|
|
350
703
|
document.add_paragraph(
|
|
@@ -367,7 +720,7 @@ def create_document_from_plan(
|
|
|
367
720
|
)
|
|
368
721
|
_add_key_value_table(document, normalized.metadata, tokens)
|
|
369
722
|
|
|
370
|
-
for block in
|
|
723
|
+
for block in builder_document.sections[0].children:
|
|
371
724
|
_render_block(document, block, tokens)
|
|
372
725
|
|
|
373
726
|
return document
|
|
@@ -564,8 +917,10 @@ def _validate_block(raw_block: Any, *, index: int) -> list[PlanValidationIssue]:
|
|
|
564
917
|
|
|
565
918
|
if block_type == "heading":
|
|
566
919
|
issues.extend(_validate_heading_block(raw_block, path=path))
|
|
920
|
+
issues.extend(_computed_field_issues(raw_block.get("text"), path=f"{path}.text"))
|
|
567
921
|
elif block_type == "paragraph":
|
|
568
922
|
issues.extend(_validate_paragraph_block(raw_block, path=path))
|
|
923
|
+
issues.extend(_computed_field_issues(raw_block.get("text"), path=f"{path}.text"))
|
|
569
924
|
elif block_type == "bullets":
|
|
570
925
|
items = _string_list(raw_block.get("items") or raw_block.get("bullets"))
|
|
571
926
|
if not items:
|
|
@@ -577,12 +932,48 @@ def _validate_block(raw_block: Any, *, index: int) -> list[PlanValidationIssue]:
|
|
|
577
932
|
suggestion="Add a non-empty items array, or use a paragraph block instead.",
|
|
578
933
|
)
|
|
579
934
|
)
|
|
935
|
+
for item_index, item in enumerate(items):
|
|
936
|
+
issues.extend(_computed_field_issues(item, path=f"{path}.items[{item_index}]"))
|
|
580
937
|
elif block_type == "table":
|
|
581
938
|
column_keys, column_issues = _validate_table_columns(raw_block.get("columns"), path=path)
|
|
582
939
|
issues.extend(column_issues)
|
|
583
940
|
issues.extend(_validate_table_rows(raw_block.get("rows"), column_keys, path=path))
|
|
941
|
+
issues.extend(_computed_field_issues(raw_block.get("caption"), path=f"{path}.caption"))
|
|
942
|
+
issues.extend(_computed_field_issues(raw_block.get("unit"), path=f"{path}.unit"))
|
|
943
|
+
table_profile = str(raw_block.get("tableProfile") or "").strip()
|
|
944
|
+
if table_profile and table_profile not in _SUPPORTED_TABLE_PROFILES:
|
|
945
|
+
issues.append(
|
|
946
|
+
_plan_issue(
|
|
947
|
+
"unknown_table_profile",
|
|
948
|
+
f"{path}.tableProfile",
|
|
949
|
+
f"{path}.tableProfile is unknown: {table_profile!r}",
|
|
950
|
+
severity="warning",
|
|
951
|
+
suggestion="Use tableProfile='government' or omit tableProfile.",
|
|
952
|
+
)
|
|
953
|
+
)
|
|
954
|
+
for column_index, column in enumerate(raw_block.get("columns") or []):
|
|
955
|
+
if isinstance(column, Mapping):
|
|
956
|
+
issues.extend(
|
|
957
|
+
_computed_field_issues(
|
|
958
|
+
column.get("label"),
|
|
959
|
+
path=f"{path}.columns[{column_index}].label",
|
|
960
|
+
)
|
|
961
|
+
)
|
|
962
|
+
for row_index, row in enumerate(raw_block.get("rows") or []):
|
|
963
|
+
if isinstance(row, Mapping):
|
|
964
|
+
for key, value in row.items():
|
|
965
|
+
if isinstance(value, Mapping):
|
|
966
|
+
value = value.get("text", value.get("value"))
|
|
967
|
+
issues.extend(
|
|
968
|
+
_computed_field_issues(
|
|
969
|
+
value,
|
|
970
|
+
path=f"{path}.rows[{row_index}].{key}",
|
|
971
|
+
)
|
|
972
|
+
)
|
|
584
973
|
elif block_type == "memo":
|
|
585
974
|
issues.extend(_validate_required_text_fields(raw_block, path=path, fields=("text", "memo")))
|
|
975
|
+
issues.extend(_computed_field_issues(raw_block.get("text"), path=f"{path}.text"))
|
|
976
|
+
issues.extend(_computed_field_issues(raw_block.get("memo"), path=f"{path}.memo"))
|
|
586
977
|
|
|
587
978
|
return issues
|
|
588
979
|
|
|
@@ -852,13 +1243,13 @@ def _normalize_block(raw_block: Any, *, index: int) -> DocumentBlock:
|
|
|
852
1243
|
if level < 1 or level > 3:
|
|
853
1244
|
raise ValueError(f"blocks[{index}].level must be between 1 and 3")
|
|
854
1245
|
text = _required_text(raw_block, "text", index)
|
|
855
|
-
return DocumentBlock("heading", {"level": level, "text": text})
|
|
1246
|
+
return DocumentBlock("heading", {"level": level, "text": replace_computed_fields(text)})
|
|
856
1247
|
|
|
857
1248
|
if block_type == "paragraph":
|
|
858
1249
|
return DocumentBlock(
|
|
859
1250
|
"paragraph",
|
|
860
1251
|
{
|
|
861
|
-
"text": _required_text(raw_block, "text", index),
|
|
1252
|
+
"text": replace_computed_fields(_required_text(raw_block, "text", index)),
|
|
862
1253
|
"style": str(raw_block.get("style") or "body").strip() or "body",
|
|
863
1254
|
},
|
|
864
1255
|
)
|
|
@@ -867,29 +1258,304 @@ def _normalize_block(raw_block: Any, *, index: int) -> DocumentBlock:
|
|
|
867
1258
|
items = _string_list(raw_block.get("items") or raw_block.get("bullets"))
|
|
868
1259
|
if not items:
|
|
869
1260
|
raise ValueError(f"blocks[{index}].items must be a non-empty list")
|
|
870
|
-
return DocumentBlock(
|
|
1261
|
+
return DocumentBlock(
|
|
1262
|
+
"bullets",
|
|
1263
|
+
{"items": [replace_computed_fields(item) for item in items]},
|
|
1264
|
+
)
|
|
871
1265
|
|
|
872
1266
|
if block_type == "table":
|
|
873
1267
|
columns = _normalize_columns(raw_block.get("columns"), index=index)
|
|
874
1268
|
rows = _normalize_rows(raw_block.get("rows"), columns, index=index)
|
|
875
|
-
caption =
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
1269
|
+
caption = replace_computed_fields(normalize_cell_text(raw_block.get("caption")))
|
|
1270
|
+
unit = replace_computed_fields(normalize_cell_text(raw_block.get("unit")))
|
|
1271
|
+
table_profile = str(raw_block.get("tableProfile") or "").strip()
|
|
1272
|
+
columns = [
|
|
1273
|
+
{**column, "label": replace_computed_fields(normalize_cell_text(column["label"]))}
|
|
1274
|
+
for column in columns
|
|
1275
|
+
]
|
|
1276
|
+
rows = [
|
|
1277
|
+
{key: replace_computed_fields(value) for key, value in row.items()}
|
|
1278
|
+
for row in rows
|
|
1279
|
+
]
|
|
1280
|
+
data: dict[str, Any] = {"caption": caption, "columns": columns, "rows": rows}
|
|
1281
|
+
if unit:
|
|
1282
|
+
data["unit"] = unit
|
|
1283
|
+
if table_profile:
|
|
1284
|
+
data["tableProfile"] = table_profile
|
|
1285
|
+
return DocumentBlock("table", data)
|
|
880
1286
|
|
|
881
1287
|
if block_type == "memo":
|
|
882
1288
|
return DocumentBlock(
|
|
883
1289
|
"memo",
|
|
884
1290
|
{
|
|
885
|
-
"text": _required_text(raw_block, "text", index),
|
|
886
|
-
"memo": _required_text(raw_block, "memo", index),
|
|
1291
|
+
"text": replace_computed_fields(_required_text(raw_block, "text", index)),
|
|
1292
|
+
"memo": replace_computed_fields(_required_text(raw_block, "memo", index)),
|
|
887
1293
|
},
|
|
888
1294
|
)
|
|
889
1295
|
|
|
890
1296
|
return DocumentBlock("page_break", {})
|
|
891
1297
|
|
|
892
1298
|
|
|
1299
|
+
def _normalize_v2_builder_document(plan: Mapping[str, Any]) -> BuilderDocument:
|
|
1300
|
+
metadata = plan.get("metadata") or {}
|
|
1301
|
+
builder_metadata = None
|
|
1302
|
+
if isinstance(metadata, Mapping):
|
|
1303
|
+
title = str(metadata.get("title") or plan.get("title") or "").strip()
|
|
1304
|
+
author = str(metadata.get("author") or "").strip()
|
|
1305
|
+
organization = str(metadata.get("organization") or "").strip()
|
|
1306
|
+
if title or author or organization:
|
|
1307
|
+
builder_metadata = BuilderMetadata(
|
|
1308
|
+
title=title,
|
|
1309
|
+
author=author,
|
|
1310
|
+
organization=organization,
|
|
1311
|
+
)
|
|
1312
|
+
return BuilderDocument(
|
|
1313
|
+
sections=tuple(
|
|
1314
|
+
_normalize_v2_section(raw_section, index=index)
|
|
1315
|
+
for index, raw_section in enumerate(plan.get("sections") or [])
|
|
1316
|
+
),
|
|
1317
|
+
metadata=builder_metadata,
|
|
1318
|
+
visual_review_required=_optional_bool(plan.get("visualReviewRequired")),
|
|
1319
|
+
preset=str(plan.get("preset") or plan.get("stylePreset") or DEFAULT_STYLE_PRESET).strip()
|
|
1320
|
+
or DEFAULT_STYLE_PRESET,
|
|
1321
|
+
)
|
|
1322
|
+
|
|
1323
|
+
|
|
1324
|
+
def _normalize_v2_section(raw_section: Any, *, index: int) -> BuilderSection:
|
|
1325
|
+
if not isinstance(raw_section, Mapping):
|
|
1326
|
+
raise TypeError(f"sections[{index}] must be a mapping")
|
|
1327
|
+
raw_blocks = raw_section.get("blocks", raw_section.get("children"))
|
|
1328
|
+
return BuilderSection(
|
|
1329
|
+
children=tuple(
|
|
1330
|
+
_normalize_v2_block(raw_block, path=f"sections[{index}].blocks[{block_index}]")
|
|
1331
|
+
for block_index, raw_block in enumerate(raw_blocks or [])
|
|
1332
|
+
),
|
|
1333
|
+
page=_normalize_v2_page(raw_section.get("page")),
|
|
1334
|
+
margins=_normalize_v2_margins(raw_section.get("margins")),
|
|
1335
|
+
header=_normalize_v2_header_footer(raw_section.get("header"), kind="header"),
|
|
1336
|
+
footer=_normalize_v2_header_footer(raw_section.get("footer"), kind="footer"),
|
|
1337
|
+
)
|
|
1338
|
+
|
|
1339
|
+
|
|
1340
|
+
def _normalize_v2_page(value: Any) -> BuilderPageSize | None:
|
|
1341
|
+
if not isinstance(value, Mapping):
|
|
1342
|
+
return None
|
|
1343
|
+
preset = str(value.get("preset") or "").strip().upper()
|
|
1344
|
+
if preset == "A4":
|
|
1345
|
+
return BuilderPageSize.A4
|
|
1346
|
+
width = _float_value(value.get("widthMm", value.get("width_mm")), default=210)
|
|
1347
|
+
height = _float_value(value.get("heightMm", value.get("height_mm")), default=297)
|
|
1348
|
+
orientation = str(value.get("orientation") or "PORTRAIT").strip() or "PORTRAIT"
|
|
1349
|
+
return BuilderPageSize(width_mm=width, height_mm=height, orientation=orientation)
|
|
1350
|
+
|
|
1351
|
+
|
|
1352
|
+
def _normalize_v2_margins(value: Any) -> BuilderMargins | None:
|
|
1353
|
+
if not isinstance(value, Mapping):
|
|
1354
|
+
return None
|
|
1355
|
+
return BuilderMargins(
|
|
1356
|
+
top_mm=_float_value(value.get("topMm", value.get("top_mm")), default=20),
|
|
1357
|
+
right_mm=_float_value(value.get("rightMm", value.get("right_mm")), default=20),
|
|
1358
|
+
bottom_mm=_float_value(value.get("bottomMm", value.get("bottom_mm")), default=20),
|
|
1359
|
+
left_mm=_float_value(value.get("leftMm", value.get("left_mm")), default=20),
|
|
1360
|
+
header_mm=_float_value(value.get("headerMm", value.get("header_mm")), default=10),
|
|
1361
|
+
footer_mm=_float_value(value.get("footerMm", value.get("footer_mm")), default=10),
|
|
1362
|
+
gutter_mm=_float_value(value.get("gutterMm", value.get("gutter_mm")), default=0),
|
|
1363
|
+
)
|
|
1364
|
+
|
|
1365
|
+
|
|
1366
|
+
def _normalize_v2_header_footer(value: Any, *, kind: str) -> BuilderHeader | BuilderFooter | None:
|
|
1367
|
+
if not isinstance(value, Mapping):
|
|
1368
|
+
return None
|
|
1369
|
+
children = tuple(_normalize_v2_header_footer_child(child) for child in value.get("children") or [])
|
|
1370
|
+
if kind == "header":
|
|
1371
|
+
return BuilderHeader(children=children)
|
|
1372
|
+
return BuilderFooter(children=children)
|
|
1373
|
+
|
|
1374
|
+
|
|
1375
|
+
def _normalize_v2_header_footer_child(value: Any) -> BuilderParagraph | BuilderPageNumber:
|
|
1376
|
+
if not isinstance(value, Mapping):
|
|
1377
|
+
raise TypeError("header/footer children must be mappings")
|
|
1378
|
+
child_type = str(value.get("type") or "paragraph").strip()
|
|
1379
|
+
if child_type == "page_number":
|
|
1380
|
+
return BuilderPageNumber(format=str(value.get("format") or "page"))
|
|
1381
|
+
if child_type != "paragraph":
|
|
1382
|
+
raise ValueError(f"unsupported header/footer child type: {child_type!r}")
|
|
1383
|
+
children = tuple(_normalize_v2_paragraph_child(child) for child in value.get("children") or [])
|
|
1384
|
+
return BuilderParagraph(
|
|
1385
|
+
text=replace_computed_fields(str(value.get("text") or "")),
|
|
1386
|
+
children=children,
|
|
1387
|
+
align=_optional_str(value.get("align")),
|
|
1388
|
+
)
|
|
1389
|
+
|
|
1390
|
+
|
|
1391
|
+
def _normalize_v2_paragraph_child(value: Any) -> BuilderRun | BuilderPageNumber:
|
|
1392
|
+
if not isinstance(value, Mapping):
|
|
1393
|
+
raise TypeError("paragraph children must be mappings")
|
|
1394
|
+
child_type = str(value.get("type") or "run").strip()
|
|
1395
|
+
if child_type == "page_number":
|
|
1396
|
+
return BuilderPageNumber(format=str(value.get("format") or "page"))
|
|
1397
|
+
if child_type != "run":
|
|
1398
|
+
raise ValueError(f"unsupported paragraph child type: {child_type!r}")
|
|
1399
|
+
return BuilderRun(
|
|
1400
|
+
text=replace_computed_fields(str(value.get("text") or "")),
|
|
1401
|
+
bold=bool(value.get("bold", False)),
|
|
1402
|
+
italic=bool(value.get("italic", False)),
|
|
1403
|
+
underline=bool(value.get("underline", False)),
|
|
1404
|
+
color=_optional_str(value.get("color")),
|
|
1405
|
+
font=_optional_str(value.get("font")),
|
|
1406
|
+
size=_optional_number(value.get("size")),
|
|
1407
|
+
highlight=_optional_str(value.get("highlight")),
|
|
1408
|
+
strike=bool(value.get("strike", False)),
|
|
1409
|
+
)
|
|
1410
|
+
|
|
1411
|
+
|
|
1412
|
+
def _normalize_v2_block(raw_block: Any, *, path: str) -> Any:
|
|
1413
|
+
if not isinstance(raw_block, Mapping):
|
|
1414
|
+
raise TypeError(f"{path} must be a mapping")
|
|
1415
|
+
block_type = str(raw_block.get("type") or "").strip()
|
|
1416
|
+
if block_type == "heading":
|
|
1417
|
+
return BuilderHeading(
|
|
1418
|
+
level=_int_value(raw_block.get("level", 1), default=1),
|
|
1419
|
+
text=replace_computed_fields(str(raw_block.get("text") or "")),
|
|
1420
|
+
)
|
|
1421
|
+
if block_type == "paragraph":
|
|
1422
|
+
children = tuple(
|
|
1423
|
+
child
|
|
1424
|
+
for child in (_normalize_v2_paragraph_child(child) for child in raw_block.get("children") or [])
|
|
1425
|
+
if isinstance(child, BuilderRun)
|
|
1426
|
+
)
|
|
1427
|
+
return BuilderParagraph(
|
|
1428
|
+
text=replace_computed_fields(str(raw_block.get("text") or "")),
|
|
1429
|
+
children=children,
|
|
1430
|
+
align=_optional_str(raw_block.get("align")),
|
|
1431
|
+
style=_optional_str(raw_block.get("style")),
|
|
1432
|
+
)
|
|
1433
|
+
if block_type in {"bullets", "bullet"}:
|
|
1434
|
+
return BuilderBullet(
|
|
1435
|
+
items=tuple(replace_computed_fields(item) for item in _string_list(raw_block.get("items"))),
|
|
1436
|
+
level=_int_value(raw_block.get("level", 0), default=0),
|
|
1437
|
+
style=_optional_str(raw_block.get("style")),
|
|
1438
|
+
)
|
|
1439
|
+
if block_type in {"numbered_list", "numberedList"}:
|
|
1440
|
+
return BuilderNumberedList(
|
|
1441
|
+
items=tuple(replace_computed_fields(item) for item in _string_list(raw_block.get("items"))),
|
|
1442
|
+
level=_int_value(raw_block.get("level", 0), default=0),
|
|
1443
|
+
)
|
|
1444
|
+
if block_type == "table":
|
|
1445
|
+
return BuilderTable(
|
|
1446
|
+
header=tuple(replace_computed_fields(str(item)) for item in raw_block.get("header") or ()),
|
|
1447
|
+
rows=tuple(
|
|
1448
|
+
tuple(replace_computed_fields(str(cell)) for cell in row)
|
|
1449
|
+
for row in raw_block.get("rows") or ()
|
|
1450
|
+
),
|
|
1451
|
+
merges=tuple(str(item) for item in raw_block.get("merges") or ()),
|
|
1452
|
+
header_shading=_optional_str(raw_block.get("headerShading", raw_block.get("header_shading"))),
|
|
1453
|
+
column_widths=tuple(
|
|
1454
|
+
_optional_number(item) or 0
|
|
1455
|
+
for item in raw_block.get("columnWidths", raw_block.get("column_widths")) or ()
|
|
1456
|
+
),
|
|
1457
|
+
)
|
|
1458
|
+
if block_type == "image":
|
|
1459
|
+
return BuilderImage(
|
|
1460
|
+
path=str(raw_block.get("path") or ""),
|
|
1461
|
+
width_mm=_optional_number(raw_block.get("widthMm", raw_block.get("width_mm"))),
|
|
1462
|
+
align=_optional_str(raw_block.get("align")),
|
|
1463
|
+
caption=(
|
|
1464
|
+
replace_computed_fields(str(raw_block.get("caption")))
|
|
1465
|
+
if raw_block.get("caption") is not None
|
|
1466
|
+
else None
|
|
1467
|
+
),
|
|
1468
|
+
image_format=_optional_str(raw_block.get("imageFormat", raw_block.get("image_format"))),
|
|
1469
|
+
)
|
|
1470
|
+
if block_type == "toc":
|
|
1471
|
+
return BuilderToc(
|
|
1472
|
+
title=replace_computed_fields(str(raw_block.get("title") or "목차")),
|
|
1473
|
+
entries=tuple(
|
|
1474
|
+
{**entry, "text": replace_computed_fields(str(entry.get("text") or ""))}
|
|
1475
|
+
for entry in raw_block.get("entries") or ()
|
|
1476
|
+
if isinstance(entry, Mapping)
|
|
1477
|
+
),
|
|
1478
|
+
)
|
|
1479
|
+
if block_type in {"page_break", "pageBreak"}:
|
|
1480
|
+
return BuilderPageBreak()
|
|
1481
|
+
raise ValueError(f"{path}.type is unsupported: {block_type!r}")
|
|
1482
|
+
|
|
1483
|
+
|
|
1484
|
+
def _lower_plan_to_builder_document(plan: DocumentPlan) -> BuilderDocument:
|
|
1485
|
+
"""Lower a normalized document plan to builder nodes.
|
|
1486
|
+
|
|
1487
|
+
v1 authoring keeps its historical title, metadata, style-token, and memo
|
|
1488
|
+
rendering contracts, so this helper lowers the body blocks into public
|
|
1489
|
+
builder nodes while ``create_document_from_plan`` supplies the existing
|
|
1490
|
+
document-level framing.
|
|
1491
|
+
"""
|
|
1492
|
+
|
|
1493
|
+
if plan.builder_document is not None:
|
|
1494
|
+
return plan.builder_document
|
|
1495
|
+
children: list[Any] = []
|
|
1496
|
+
for block in plan.blocks:
|
|
1497
|
+
children.extend(_block_to_builder_nodes(block))
|
|
1498
|
+
return BuilderDocument(sections=(BuilderSection(children=tuple(children)),))
|
|
1499
|
+
|
|
1500
|
+
|
|
1501
|
+
def _block_to_builder_nodes(block: DocumentBlock) -> tuple[Any, ...]:
|
|
1502
|
+
if block.type == "heading":
|
|
1503
|
+
return (
|
|
1504
|
+
BuilderHeading(
|
|
1505
|
+
level=int(block.data["level"]),
|
|
1506
|
+
text=str(block.data["text"]),
|
|
1507
|
+
),
|
|
1508
|
+
)
|
|
1509
|
+
if block.type == "paragraph":
|
|
1510
|
+
return (
|
|
1511
|
+
BuilderParagraph(
|
|
1512
|
+
text=str(block.data["text"]),
|
|
1513
|
+
style=str(block.data.get("style") or "body"),
|
|
1514
|
+
),
|
|
1515
|
+
)
|
|
1516
|
+
if block.type == "bullets":
|
|
1517
|
+
return (BuilderBullet(items=tuple(str(item) for item in block.data["items"])),)
|
|
1518
|
+
if block.type == "table":
|
|
1519
|
+
columns = list(block.data["columns"])
|
|
1520
|
+
rows = list(block.data["rows"])
|
|
1521
|
+
nodes: list[Any] = []
|
|
1522
|
+
caption = str(block.data.get("caption") or "").strip()
|
|
1523
|
+
if caption:
|
|
1524
|
+
nodes.append(BuilderParagraph(text=caption, style="heading"))
|
|
1525
|
+
nodes.append(
|
|
1526
|
+
BuilderTable(
|
|
1527
|
+
header=tuple(str(column["label"]) for column in columns),
|
|
1528
|
+
rows=tuple(
|
|
1529
|
+
tuple(str(row.get(column["key"], "")) for column in columns)
|
|
1530
|
+
for row in rows
|
|
1531
|
+
),
|
|
1532
|
+
column_widths=tuple(_plan_table_column_widths(columns)),
|
|
1533
|
+
),
|
|
1534
|
+
)
|
|
1535
|
+
unit = str(block.data.get("unit") or "").strip()
|
|
1536
|
+
if unit:
|
|
1537
|
+
nodes.append(BuilderParagraph(text=unit, style="body"))
|
|
1538
|
+
return tuple(nodes)
|
|
1539
|
+
if block.type == "memo":
|
|
1540
|
+
return (block,)
|
|
1541
|
+
if block.type == "page_break":
|
|
1542
|
+
return (BuilderPageBreak(),)
|
|
1543
|
+
raise ValueError(f"unsupported block type: {block.type!r}")
|
|
1544
|
+
|
|
1545
|
+
|
|
1546
|
+
def _plan_table_column_widths(columns: list[dict[str, Any]]) -> list[int]:
|
|
1547
|
+
total = sum(max(int(column.get("widthWeight", 1)), 1) for column in columns)
|
|
1548
|
+
if total <= 0:
|
|
1549
|
+
return []
|
|
1550
|
+
widths = [
|
|
1551
|
+
round(_DEFAULT_TABLE_WIDTH * max(int(column.get("widthWeight", 1)), 1) / total)
|
|
1552
|
+
for column in columns
|
|
1553
|
+
]
|
|
1554
|
+
if widths:
|
|
1555
|
+
widths[-1] += _DEFAULT_TABLE_WIDTH - sum(widths)
|
|
1556
|
+
return widths
|
|
1557
|
+
|
|
1558
|
+
|
|
893
1559
|
def _normalize_columns(value: Any, *, index: int) -> list[dict[str, Any]]:
|
|
894
1560
|
if not isinstance(value, list) or not value:
|
|
895
1561
|
raise ValueError(f"blocks[{index}].columns must be a non-empty list")
|
|
@@ -904,7 +1570,7 @@ def _normalize_columns(value: Any, *, index: int) -> list[dict[str, Any]]:
|
|
|
904
1570
|
if key in seen:
|
|
905
1571
|
raise ValueError(f"blocks[{index}].columns contains duplicate key: {key!r}")
|
|
906
1572
|
seen.add(key)
|
|
907
|
-
label =
|
|
1573
|
+
label = normalize_cell_text(raw_column.get("label") or key)
|
|
908
1574
|
width_weight = _int_value(raw_column.get("widthWeight", 1), default=1)
|
|
909
1575
|
columns.append(
|
|
910
1576
|
{
|
|
@@ -929,42 +1595,51 @@ def _normalize_rows(
|
|
|
929
1595
|
for row_index, raw_row in enumerate(value):
|
|
930
1596
|
if not isinstance(raw_row, Mapping):
|
|
931
1597
|
raise ValueError(f"blocks[{index}].rows[{row_index}] must be a mapping")
|
|
932
|
-
rows.append({key:
|
|
1598
|
+
rows.append({key: _normalize_table_cell_value(raw_row.get(key, "")) for key in column_keys})
|
|
933
1599
|
return rows
|
|
934
1600
|
|
|
935
1601
|
|
|
1602
|
+
def _normalize_table_cell_value(value: Any) -> str:
|
|
1603
|
+
if isinstance(value, Mapping):
|
|
1604
|
+
text = value.get("text", value.get("value", ""))
|
|
1605
|
+
if bool(value.get("preserveWhitespace", False)):
|
|
1606
|
+
return str(text or "")
|
|
1607
|
+
return normalize_cell_text(text)
|
|
1608
|
+
return normalize_cell_text(value)
|
|
1609
|
+
|
|
1610
|
+
|
|
936
1611
|
def _render_block(
|
|
937
1612
|
document: HwpxDocument,
|
|
938
|
-
block:
|
|
1613
|
+
block: Any,
|
|
939
1614
|
tokens: Mapping[str, str],
|
|
940
1615
|
) -> None:
|
|
941
|
-
if block
|
|
1616
|
+
if isinstance(block, BuilderHeading):
|
|
942
1617
|
document.add_paragraph(
|
|
943
|
-
|
|
1618
|
+
block.text,
|
|
944
1619
|
char_pr_id_ref=tokens["heading"],
|
|
945
1620
|
inherit_style=False,
|
|
946
1621
|
)
|
|
947
1622
|
return
|
|
948
|
-
if block
|
|
949
|
-
style = str(block.
|
|
1623
|
+
if isinstance(block, BuilderParagraph):
|
|
1624
|
+
style = str(block.style or "body")
|
|
950
1625
|
document.add_paragraph(
|
|
951
|
-
|
|
1626
|
+
block.text,
|
|
952
1627
|
char_pr_id_ref=tokens.get(style, tokens["body"]),
|
|
953
1628
|
inherit_style=False,
|
|
954
1629
|
)
|
|
955
1630
|
return
|
|
956
|
-
if block
|
|
957
|
-
for item in block.
|
|
1631
|
+
if isinstance(block, BuilderBullet):
|
|
1632
|
+
for item in block.items:
|
|
958
1633
|
document.add_paragraph(
|
|
959
1634
|
f"• {item}",
|
|
960
1635
|
char_pr_id_ref=tokens["bullet"],
|
|
961
1636
|
inherit_style=False,
|
|
962
1637
|
)
|
|
963
1638
|
return
|
|
964
|
-
if block
|
|
965
|
-
|
|
1639
|
+
if isinstance(block, BuilderTable):
|
|
1640
|
+
_add_builder_table(document, block, tokens)
|
|
966
1641
|
return
|
|
967
|
-
if block.type == "memo":
|
|
1642
|
+
if isinstance(block, DocumentBlock) and block.type == "memo":
|
|
968
1643
|
paragraph = document.add_paragraph(
|
|
969
1644
|
str(block.data["text"]),
|
|
970
1645
|
char_pr_id_ref=tokens["body"],
|
|
@@ -972,8 +1647,10 @@ def _render_block(
|
|
|
972
1647
|
)
|
|
973
1648
|
document.add_memo_with_anchor(str(block.data["memo"]), paragraph=paragraph)
|
|
974
1649
|
return
|
|
975
|
-
if block
|
|
1650
|
+
if isinstance(block, BuilderPageBreak):
|
|
976
1651
|
document.add_paragraph("", pageBreak="1", inherit_style=False)
|
|
1652
|
+
return
|
|
1653
|
+
raise ValueError(f"unsupported builder block: {type(block).__name__}")
|
|
977
1654
|
|
|
978
1655
|
|
|
979
1656
|
def _add_key_value_table(
|
|
@@ -1040,6 +1717,46 @@ def _add_plan_table(
|
|
|
1040
1717
|
)
|
|
1041
1718
|
|
|
1042
1719
|
|
|
1720
|
+
def _add_builder_table(
|
|
1721
|
+
document: HwpxDocument,
|
|
1722
|
+
table_node: BuilderTable,
|
|
1723
|
+
tokens: Mapping[str, str],
|
|
1724
|
+
) -> None:
|
|
1725
|
+
rows = [list(table_node.header), *(list(row) for row in table_node.rows)]
|
|
1726
|
+
if not rows:
|
|
1727
|
+
raise ValueError("table must contain a header or at least one row")
|
|
1728
|
+
column_count = max(len(row) for row in rows)
|
|
1729
|
+
table = document.add_table(
|
|
1730
|
+
len(rows),
|
|
1731
|
+
column_count,
|
|
1732
|
+
width=_DEFAULT_TABLE_WIDTH,
|
|
1733
|
+
char_pr_id_ref=tokens["table_cell"],
|
|
1734
|
+
)
|
|
1735
|
+
if table_node.column_widths:
|
|
1736
|
+
for row in table.rows:
|
|
1737
|
+
for col_index, cell in enumerate(row.cells):
|
|
1738
|
+
if col_index < len(table_node.column_widths):
|
|
1739
|
+
cell.set_size(width=int(table_node.column_widths[col_index]))
|
|
1740
|
+
for col_index, label in enumerate(table_node.header):
|
|
1741
|
+
_set_table_cell_text(
|
|
1742
|
+
table,
|
|
1743
|
+
0,
|
|
1744
|
+
col_index,
|
|
1745
|
+
str(label),
|
|
1746
|
+
char_pr_id_ref=tokens["table_header"],
|
|
1747
|
+
)
|
|
1748
|
+
row_offset = 1 if table_node.header else 0
|
|
1749
|
+
for row_index, row in enumerate(table_node.rows, start=row_offset):
|
|
1750
|
+
for col_index, value in enumerate(row):
|
|
1751
|
+
_set_table_cell_text(
|
|
1752
|
+
table,
|
|
1753
|
+
row_index,
|
|
1754
|
+
col_index,
|
|
1755
|
+
str(value),
|
|
1756
|
+
char_pr_id_ref=tokens["table_cell"],
|
|
1757
|
+
)
|
|
1758
|
+
|
|
1759
|
+
|
|
1043
1760
|
def _set_table_cell_text(
|
|
1044
1761
|
table: Any,
|
|
1045
1762
|
row_index: int,
|
|
@@ -1545,6 +2262,7 @@ def _table_block_text(block: Mapping[str, Any]) -> str:
|
|
|
1545
2262
|
for row in block.get("rows", []):
|
|
1546
2263
|
if isinstance(row, Mapping):
|
|
1547
2264
|
parts.extend(str(value) for value in row.values())
|
|
2265
|
+
parts.append(str(block.get("unit") or ""))
|
|
1548
2266
|
return "\n".join(parts)
|
|
1549
2267
|
|
|
1550
2268
|
|
|
@@ -1608,10 +2326,17 @@ def _document_table_blocks(document: HwpxDocument) -> list[Mapping[str, Any]]:
|
|
|
1608
2326
|
|
|
1609
2327
|
text = str(getattr(paragraph, "text", "") or "").strip()
|
|
1610
2328
|
if text:
|
|
2329
|
+
if _looks_like_unit_text(text):
|
|
2330
|
+
previous_text = ""
|
|
2331
|
+
continue
|
|
1611
2332
|
previous_text = text
|
|
1612
2333
|
return blocks
|
|
1613
2334
|
|
|
1614
2335
|
|
|
2336
|
+
def _looks_like_unit_text(text: str) -> bool:
|
|
2337
|
+
return text.startswith(("단위:", "단위:"))
|
|
2338
|
+
|
|
2339
|
+
|
|
1615
2340
|
def _looks_like_table_header_row(text_rows: list[list[str]]) -> bool:
|
|
1616
2341
|
if not text_rows:
|
|
1617
2342
|
return False
|
|
@@ -1864,10 +2589,43 @@ def _int_value(value: Any, *, default: int) -> int:
|
|
|
1864
2589
|
return default
|
|
1865
2590
|
|
|
1866
2591
|
|
|
2592
|
+
def _float_value(value: Any, *, default: float) -> float:
|
|
2593
|
+
try:
|
|
2594
|
+
return float(value)
|
|
2595
|
+
except (TypeError, ValueError):
|
|
2596
|
+
return default
|
|
2597
|
+
|
|
2598
|
+
|
|
2599
|
+
def _optional_number(value: Any) -> int | float | None:
|
|
2600
|
+
if value in (None, ""):
|
|
2601
|
+
return None
|
|
2602
|
+
try:
|
|
2603
|
+
number = float(value)
|
|
2604
|
+
except (TypeError, ValueError):
|
|
2605
|
+
return None
|
|
2606
|
+
if number.is_integer():
|
|
2607
|
+
return int(number)
|
|
2608
|
+
return number
|
|
2609
|
+
|
|
2610
|
+
|
|
2611
|
+
def _optional_bool(value: Any) -> bool | None:
|
|
2612
|
+
if value is None:
|
|
2613
|
+
return None
|
|
2614
|
+
return bool(value)
|
|
2615
|
+
|
|
2616
|
+
|
|
2617
|
+
def _optional_str(value: Any) -> str | None:
|
|
2618
|
+
if value is None:
|
|
2619
|
+
return None
|
|
2620
|
+
text = str(value)
|
|
2621
|
+
return text if text else None
|
|
2622
|
+
|
|
2623
|
+
|
|
1867
2624
|
__all__ = [
|
|
1868
2625
|
"AUTHORING_REPORT_VERSION",
|
|
1869
2626
|
"DEFAULT_STYLE_PRESET",
|
|
1870
2627
|
"DOCUMENT_PLAN_SCHEMA_VERSION",
|
|
2628
|
+
"DOCUMENT_PLAN_V2_SCHEMA_VERSION",
|
|
1871
2629
|
"DocumentBlock",
|
|
1872
2630
|
"DocumentPlan",
|
|
1873
2631
|
"DocumentStylePreset",
|