python-hwpx 2.15.0__py3-none-any.whl → 2.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hwpx/authoring.py +200 -2
- hwpx/design/profiles/home_notice/fragments/body.xml +1 -0
- hwpx/design/profiles/home_notice/fragments/heading.xml +1 -0
- hwpx/design/profiles/home_notice/fragments/title.xml +1 -0
- hwpx/design/profiles/home_notice/profile.json +24 -0
- hwpx/design/profiles/home_notice/template.hwpx +0 -0
- hwpx/tools/official_lint.py +111 -6
- {python_hwpx-2.15.0.dist-info → python_hwpx-2.16.0.dist-info}/METADATA +1 -1
- {python_hwpx-2.15.0.dist-info → python_hwpx-2.16.0.dist-info}/RECORD +14 -9
- {python_hwpx-2.15.0.dist-info → python_hwpx-2.16.0.dist-info}/WHEEL +0 -0
- {python_hwpx-2.15.0.dist-info → python_hwpx-2.16.0.dist-info}/entry_points.txt +0 -0
- {python_hwpx-2.15.0.dist-info → python_hwpx-2.16.0.dist-info}/licenses/LICENSE +0 -0
- {python_hwpx-2.15.0.dist-info → python_hwpx-2.16.0.dist-info}/licenses/NOTICE +0 -0
- {python_hwpx-2.15.0.dist-info → python_hwpx-2.16.0.dist-info}/top_level.txt +0 -0
hwpx/authoring.py
CHANGED
|
@@ -874,6 +874,138 @@ def _validate_v2_block(raw_block: Any, *, path: str) -> list[PlanValidationIssue
|
|
|
874
874
|
return issues
|
|
875
875
|
|
|
876
876
|
|
|
877
|
+
# --- M3 (S-057) document-type -> design profile routing ------------------------
|
|
878
|
+
# Maps a plan's document_type (Korean label or profile id) to a committed
|
|
879
|
+
# hwpx.design profile. When it resolves, create_document_from_plan composes from
|
|
880
|
+
# the harvested, Hancom-opens-clean profile skeleton instead of the from-scratch
|
|
881
|
+
# builder. Unknown types keep the legacy from-scratch path (regression-safe).
|
|
882
|
+
_DOCTYPE_TO_PROFILE = {
|
|
883
|
+
"공문": "official_notice",
|
|
884
|
+
"공문서": "official_notice",
|
|
885
|
+
"official_notice": "official_notice",
|
|
886
|
+
"보고서": "report",
|
|
887
|
+
"report": "report",
|
|
888
|
+
"government_report": "report",
|
|
889
|
+
"가정통신문": "home_notice",
|
|
890
|
+
"home_notice": "home_notice",
|
|
891
|
+
}
|
|
892
|
+
_DOCTYPE_METADATA_KEYS = (
|
|
893
|
+
"document_type",
|
|
894
|
+
"문서 유형",
|
|
895
|
+
"문서유형",
|
|
896
|
+
"문서 종류",
|
|
897
|
+
"문서종류",
|
|
898
|
+
"documentType",
|
|
899
|
+
)
|
|
900
|
+
# 결문 (closing block) fields in their canonical render order.
|
|
901
|
+
_GYEOLMUN_FIELDS = (
|
|
902
|
+
("issuer", "발신명의"),
|
|
903
|
+
("productionNumber", "생산등록번호"),
|
|
904
|
+
("enforcementDate", "시행일"),
|
|
905
|
+
("disclosure", "공개구분"),
|
|
906
|
+
)
|
|
907
|
+
|
|
908
|
+
|
|
909
|
+
def _plan_document_type(plan: Any) -> str:
|
|
910
|
+
"""Read the plan's document type from metadata (preferred) or top level."""
|
|
911
|
+
|
|
912
|
+
if not isinstance(plan, Mapping):
|
|
913
|
+
return ""
|
|
914
|
+
metadata = plan.get("metadata")
|
|
915
|
+
metadata = metadata if isinstance(metadata, Mapping) else {}
|
|
916
|
+
for key in _DOCTYPE_METADATA_KEYS:
|
|
917
|
+
value = metadata.get(key) or plan.get(key)
|
|
918
|
+
if value:
|
|
919
|
+
return str(value).strip()
|
|
920
|
+
return ""
|
|
921
|
+
|
|
922
|
+
|
|
923
|
+
def _resolve_design_profile(plan: Any) -> str | None:
|
|
924
|
+
"""Return a committed design profile id for the plan's document_type, or None."""
|
|
925
|
+
|
|
926
|
+
raw = _plan_document_type(plan)
|
|
927
|
+
if not raw:
|
|
928
|
+
return None
|
|
929
|
+
from hwpx import design as _design
|
|
930
|
+
|
|
931
|
+
profile_id = _DOCTYPE_TO_PROFILE.get(raw)
|
|
932
|
+
if profile_id and profile_id in _design.available_profiles():
|
|
933
|
+
return profile_id
|
|
934
|
+
return None
|
|
935
|
+
|
|
936
|
+
|
|
937
|
+
def _bridge_to_design_plan(plan: Mapping[str, Any], profile_id: str):
|
|
938
|
+
"""Lower a document_plan mapping onto a :class:`hwpx.design.plan.DocumentPlan`.
|
|
939
|
+
|
|
940
|
+
Heading level 1 -> ``heading`` role, level >= 2 -> ``subheading``; paragraphs
|
|
941
|
+
and bullet items -> ``body``; tables -> an ``info`` table block. 결문 메타
|
|
942
|
+
fields are appended as trailing ``body`` blocks in canonical order (P0 proved
|
|
943
|
+
these survive a Hancom render).
|
|
944
|
+
"""
|
|
945
|
+
|
|
946
|
+
from hwpx.design.plan import Block as _Block, DocumentPlan as _DesignPlan
|
|
947
|
+
|
|
948
|
+
blocks: list = []
|
|
949
|
+
for raw in plan.get("blocks") or []:
|
|
950
|
+
if not isinstance(raw, Mapping):
|
|
951
|
+
continue
|
|
952
|
+
block_type = str(raw.get("type") or "paragraph")
|
|
953
|
+
if block_type == "heading":
|
|
954
|
+
level = int(raw.get("level") or 1)
|
|
955
|
+
role = "heading" if level <= 1 else "subheading"
|
|
956
|
+
blocks.append(_Block(type="paragraph", role=role, text=str(raw.get("text") or "")))
|
|
957
|
+
elif block_type == "paragraph":
|
|
958
|
+
blocks.append(_Block(type="paragraph", role="body", text=str(raw.get("text") or "")))
|
|
959
|
+
elif block_type == "bullets":
|
|
960
|
+
for item in raw.get("items") or []:
|
|
961
|
+
blocks.append(_Block(type="paragraph", role="body", text=str(item)))
|
|
962
|
+
elif block_type == "table":
|
|
963
|
+
raw_cols = list(raw.get("columns") or raw.get("header") or [])
|
|
964
|
+
if raw_cols and isinstance(raw_cols[0], Mapping):
|
|
965
|
+
# document_plan schema: columns=[{key,label}], rows=[{key: value}]
|
|
966
|
+
keys = [str(c.get("key") or c.get("label") or "") for c in raw_cols]
|
|
967
|
+
columns = [str(c.get("label") or c.get("key") or "") for c in raw_cols]
|
|
968
|
+
rows = []
|
|
969
|
+
for row in raw.get("rows") or []:
|
|
970
|
+
if isinstance(row, Mapping):
|
|
971
|
+
rows.append([str(row.get(k, "")) for k in keys])
|
|
972
|
+
elif isinstance(row, (list, tuple)):
|
|
973
|
+
rows.append([str(c) for c in row])
|
|
974
|
+
else:
|
|
975
|
+
columns = [str(c) for c in raw_cols]
|
|
976
|
+
rows = [[str(c) for c in row] for row in (raw.get("rows") or [])]
|
|
977
|
+
blocks.append(_Block(type="table", role="info", columns=columns, rows=rows))
|
|
978
|
+
# page_break / memo: no design role -> skipped
|
|
979
|
+
gyeolmun = plan.get("gyeolmun")
|
|
980
|
+
if isinstance(gyeolmun, Mapping):
|
|
981
|
+
for key, label in _GYEOLMUN_FIELDS:
|
|
982
|
+
value = gyeolmun.get(key)
|
|
983
|
+
if value:
|
|
984
|
+
blocks.append(_Block(type="paragraph", role="body", text=f"{label} {value}"))
|
|
985
|
+
return _DesignPlan(profile=profile_id, title=str(plan.get("title") or ""), blocks=blocks)
|
|
986
|
+
|
|
987
|
+
|
|
988
|
+
def _korean_proofing_status(plan: Any, normalized_plan: "DocumentPlan | None") -> str:
|
|
989
|
+
"""Honest 맞춤법/공공언어 status (Constitution V/IX) — never asserts 'passed'.
|
|
990
|
+
|
|
991
|
+
No free offline Korean spell/spacing oracle exists, so the default is
|
|
992
|
+
``unverified``. If the plan signals an LLM self-proof pass it is labelled
|
|
993
|
+
``llm_proofed_not_oracle_verified`` — proofed, but NOT oracle-verified.
|
|
994
|
+
"""
|
|
995
|
+
|
|
996
|
+
metadata: Mapping[str, Any] = {}
|
|
997
|
+
if isinstance(plan, Mapping) and isinstance(plan.get("metadata"), Mapping):
|
|
998
|
+
metadata = plan["metadata"]
|
|
999
|
+
elif normalized_plan is not None:
|
|
1000
|
+
metadata = normalized_plan.metadata
|
|
1001
|
+
signal = str(
|
|
1002
|
+
metadata.get("korean_proofing") or metadata.get("korean_proofing_status") or ""
|
|
1003
|
+
).strip().lower()
|
|
1004
|
+
if signal in {"llm", "llm_proofed", "llm-proofed", "llm_proofed_not_oracle_verified"}:
|
|
1005
|
+
return "llm_proofed_not_oracle_verified"
|
|
1006
|
+
return "unverified"
|
|
1007
|
+
|
|
1008
|
+
|
|
877
1009
|
def create_document_from_plan(
|
|
878
1010
|
plan: Mapping[str, Any] | DocumentPlan,
|
|
879
1011
|
*,
|
|
@@ -881,6 +1013,19 @@ def create_document_from_plan(
|
|
|
881
1013
|
) -> HwpxDocument:
|
|
882
1014
|
"""Create a formatted HWPX document from a declarative document plan."""
|
|
883
1015
|
|
|
1016
|
+
if isinstance(plan, Mapping):
|
|
1017
|
+
profile_id = _resolve_design_profile(plan)
|
|
1018
|
+
if profile_id is not None:
|
|
1019
|
+
from hwpx import design as _design
|
|
1020
|
+
|
|
1021
|
+
design_plan = _bridge_to_design_plan(plan, profile_id)
|
|
1022
|
+
data, result = _design.compose_bytes(design_plan, production=True)
|
|
1023
|
+
if not result.ok:
|
|
1024
|
+
raise ValueError(
|
|
1025
|
+
f"profile compose failed for {profile_id!r}: {result.errors}"
|
|
1026
|
+
)
|
|
1027
|
+
return HwpxDocument.open(data)
|
|
1028
|
+
|
|
884
1029
|
normalized = normalize_document_plan(plan)
|
|
885
1030
|
if normalized.builder_document is not None:
|
|
886
1031
|
return normalized.builder_document.lower()
|
|
@@ -952,8 +1097,15 @@ def inspect_document_authoring_quality(
|
|
|
952
1097
|
*,
|
|
953
1098
|
plan: Mapping[str, Any] | DocumentPlan | None = None,
|
|
954
1099
|
quality_profile: str | Mapping[str, Any] | None = None,
|
|
1100
|
+
verify_render: bool = False,
|
|
955
1101
|
) -> dict[str, Any]:
|
|
956
|
-
"""Return deterministic structural quality evidence for generated HWPX.
|
|
1102
|
+
"""Return deterministic structural quality evidence for generated HWPX.
|
|
1103
|
+
|
|
1104
|
+
When *verify_render* is true AND a Mac Hancom oracle is reachable, the
|
|
1105
|
+
document is rendered and ``render_checked``/``visual_complete`` become real
|
|
1106
|
+
receipts. Otherwise ``render_checked`` is ``False`` and ``visual_complete``
|
|
1107
|
+
is ``"unverified"`` — never a silent true (Constitution V).
|
|
1108
|
+
"""
|
|
957
1109
|
|
|
958
1110
|
normalized_plan: DocumentPlan | None = None
|
|
959
1111
|
plan_validation: dict[str, Any] | None = None
|
|
@@ -981,6 +1133,32 @@ def inspect_document_authoring_quality(
|
|
|
981
1133
|
package_report = validate_package(path if path is not None else package_payload)
|
|
982
1134
|
document_report = document.validate()
|
|
983
1135
|
reopened = _can_reopen(path, package_payload)
|
|
1136
|
+
render_checked = False
|
|
1137
|
+
visual_complete: Any = "unverified"
|
|
1138
|
+
if verify_render:
|
|
1139
|
+
from hwpx.visual import oracle as _oracle
|
|
1140
|
+
|
|
1141
|
+
_mac = _oracle.MacHancomOracle()
|
|
1142
|
+
if _mac.available():
|
|
1143
|
+
import tempfile as _tf
|
|
1144
|
+
|
|
1145
|
+
with _tf.TemporaryDirectory() as _tmp:
|
|
1146
|
+
_hwpx = Path(_tmp) / "render_check.hwpx"
|
|
1147
|
+
_hwpx.write_bytes(package_payload)
|
|
1148
|
+
_pdf = Path(_tmp) / "render_check.pdf"
|
|
1149
|
+
_rendered = _mac.render_pdf(str(_hwpx), str(_pdf))
|
|
1150
|
+
if _rendered and Path(_rendered).exists():
|
|
1151
|
+
try:
|
|
1152
|
+
import fitz as _fitz
|
|
1153
|
+
|
|
1154
|
+
_doc = _fitz.open(_rendered)
|
|
1155
|
+
_has_text = any(pg.get_text().strip() for pg in _doc)
|
|
1156
|
+
_doc.close()
|
|
1157
|
+
render_checked = bool(_has_text)
|
|
1158
|
+
visual_complete = render_checked
|
|
1159
|
+
except Exception:
|
|
1160
|
+
render_checked = False
|
|
1161
|
+
visual_complete = "unverified"
|
|
984
1162
|
non_empty_texts = [
|
|
985
1163
|
(paragraph.text or "").strip()
|
|
986
1164
|
for paragraph in document.paragraphs
|
|
@@ -1034,11 +1212,29 @@ def inspect_document_authoring_quality(
|
|
|
1034
1212
|
and not profiles["operating_plan"].get("pass", False)
|
|
1035
1213
|
):
|
|
1036
1214
|
gaps.append("operating plan quality failed")
|
|
1215
|
+
|
|
1216
|
+
document_type = ""
|
|
1217
|
+
if isinstance(plan, Mapping):
|
|
1218
|
+
document_type = _plan_document_type(plan)
|
|
1219
|
+
elif normalized_plan is not None:
|
|
1220
|
+
document_type = str(normalized_plan.metadata.get("document_type", "") or "")
|
|
1221
|
+
gongmun_structure: dict[str, Any] | None = None
|
|
1222
|
+
if _DOCTYPE_TO_PROFILE.get(document_type.strip()) == "official_notice":
|
|
1223
|
+
from hwpx.tools.official_lint import (
|
|
1224
|
+
inspect_official_document_style as _gongmun_lint,
|
|
1225
|
+
)
|
|
1226
|
+
|
|
1227
|
+
gongmun_structure = _gongmun_lint(document, document_type="공문")
|
|
1228
|
+
if not gongmun_structure.get("structure_pass", True):
|
|
1229
|
+
gaps.append("공문 structure gate failed")
|
|
1230
|
+
korean_proofing_status = _korean_proofing_status(plan, normalized_plan)
|
|
1037
1231
|
return {
|
|
1038
1232
|
"report_version": AUTHORING_REPORT_VERSION,
|
|
1039
1233
|
"schemaVersion": DOCUMENT_PLAN_SCHEMA_VERSION,
|
|
1040
1234
|
"plan_validation": plan_validation,
|
|
1041
1235
|
"pass": not gaps,
|
|
1236
|
+
"korean_proofing_status": korean_proofing_status,
|
|
1237
|
+
"gongmun_structure": gongmun_structure,
|
|
1042
1238
|
"block_counts": _block_counts(normalized_plan),
|
|
1043
1239
|
"document": {
|
|
1044
1240
|
"paragraph_count": len(document.paragraphs),
|
|
@@ -1046,6 +1242,8 @@ def inspect_document_authoring_quality(
|
|
|
1046
1242
|
"table_count": table_count,
|
|
1047
1243
|
"page_break_count": page_break_count,
|
|
1048
1244
|
},
|
|
1245
|
+
"render_checked": render_checked,
|
|
1246
|
+
"visual_complete": visual_complete,
|
|
1049
1247
|
"validation": {
|
|
1050
1248
|
"reopened": reopened,
|
|
1051
1249
|
"validate_package": {
|
|
@@ -1064,7 +1262,7 @@ def inspect_document_authoring_quality(
|
|
|
1064
1262
|
"style_token_usage": style_usage,
|
|
1065
1263
|
"recovery": recovery,
|
|
1066
1264
|
"profiles": profiles,
|
|
1067
|
-
"visual_review_required": bool(gates.get("visualReviewRequired", True)),
|
|
1265
|
+
"visual_review_required": bool(gates.get("visualReviewRequired", True)) and not render_checked,
|
|
1068
1266
|
"gaps": gaps,
|
|
1069
1267
|
}
|
|
1070
1268
|
finally:
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
<hp:p xmlns:hp="http://www.hancom.co.kr/hwpml/2011/paragraph" id="2147483648" paraPrIDRef="1" styleIDRef="0" pageBreak="0" columnBreak="0" merged="0"><hp:run charPrIDRef="26"><hp:t>{{body}}</hp:t></hp:run></hp:p>
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
<hp:p xmlns:hp="http://www.hancom.co.kr/hwpml/2011/paragraph" id="2147483648" paraPrIDRef="1" styleIDRef="0" pageBreak="0" columnBreak="0" merged="0"><hp:run charPrIDRef="25"><hp:t>{{heading}}</hp:t></hp:run></hp:p>
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
<hp:p xmlns:hp="http://www.hancom.co.kr/hwpml/2011/paragraph" id="2147483648" paraPrIDRef="25" styleIDRef="0" pageBreak="0" columnBreak="0" merged="0"><hp:run charPrIDRef="17"><hp:t>{{title}}</hp:t></hp:run></hp:p>
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schemaVersion": "hwpx.design.profile.v1",
|
|
3
|
+
"id": "home_notice",
|
|
4
|
+
"template": "template.hwpx",
|
|
5
|
+
"fragments": {
|
|
6
|
+
"title": "fragments/title.xml",
|
|
7
|
+
"heading": "fragments/heading.xml",
|
|
8
|
+
"body": "fragments/body.xml"
|
|
9
|
+
},
|
|
10
|
+
"page": {
|
|
11
|
+
"width": 59528,
|
|
12
|
+
"height": 84186,
|
|
13
|
+
"orientation": "WIDELY",
|
|
14
|
+
"margins": {
|
|
15
|
+
"left": 4251,
|
|
16
|
+
"right": 4251,
|
|
17
|
+
"top": 1417,
|
|
18
|
+
"bottom": 0
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
"char_pr_count": 54,
|
|
22
|
+
"style_coverage_threshold": 0.98,
|
|
23
|
+
"source_basename": "ganghwa_records.hwpx"
|
|
24
|
+
}
|
|
Binary file
|
hwpx/tools/official_lint.py
CHANGED
|
@@ -36,28 +36,48 @@ _ATTACHMENT_RE = re.compile(r"^\s*(?:붙임|첨부)\s+(?:\d+\.\s*)?.+\s+\d+\s*
|
|
|
36
36
|
_SPACE_BEFORE_PUNCTUATION_RE = re.compile(r"\s+[:??]")
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
def inspect_official_document_style(
|
|
40
|
-
|
|
39
|
+
def inspect_official_document_style(
|
|
40
|
+
source: Any, *, document_type: Any = None
|
|
41
|
+
) -> dict[str, Any]:
|
|
42
|
+
"""Inspect official-document conventions in text, plans, or HWPX files.
|
|
43
|
+
|
|
44
|
+
When *document_type* resolves to a 공문 (official outgoing document) the
|
|
45
|
+
structural spine — 두문(수신)·결문(발신명의·시행·공개구분)·끝. — is enforced at
|
|
46
|
+
ERROR severity (the hard-gate, ``structure_pass``). Without *document_type*
|
|
47
|
+
the behaviour is unchanged (backward compatible).
|
|
48
|
+
"""
|
|
41
49
|
|
|
42
50
|
paragraphs = _paragraphs_from_source(source)
|
|
51
|
+
is_gongmun = _is_gongmun(document_type)
|
|
43
52
|
violations: list[dict[str, Any]] = []
|
|
44
53
|
violations.extend(_inspect_marker_hierarchy(paragraphs))
|
|
45
|
-
|
|
54
|
+
if not is_gongmun:
|
|
55
|
+
# A 시행문 places its 결문(발신명의·시행) AFTER the 끝. marker, so the strict
|
|
56
|
+
# "끝. must be the final paragraph" rule does not apply to 공문; the
|
|
57
|
+
# structure gate enforces 끝. presence instead.
|
|
58
|
+
violations.extend(_inspect_end_marker(paragraphs))
|
|
46
59
|
violations.extend(_inspect_attachment_notation(paragraphs))
|
|
47
60
|
violations.extend(_inspect_dates(paragraphs))
|
|
48
61
|
violations.extend(_inspect_amounts(paragraphs))
|
|
49
62
|
violations.extend(_inspect_spacing(paragraphs))
|
|
63
|
+
if is_gongmun:
|
|
64
|
+
violations.extend(_inspect_gongmun_structure(paragraphs))
|
|
50
65
|
|
|
51
66
|
violation_count = len(violations)
|
|
67
|
+
error_count = sum(1 for v in violations if v.get("severity") == "error")
|
|
52
68
|
ok = violation_count == 0
|
|
69
|
+
rules = list(_RULES_CHECKED) + (list(_GONGMUN_STRUCTURE_RULES) if is_gongmun else [])
|
|
53
70
|
return {
|
|
54
71
|
"report_version": OFFICIAL_DOCUMENT_STYLE_REPORT_VERSION,
|
|
55
72
|
"pass": ok,
|
|
73
|
+
"structure_pass": error_count == 0,
|
|
74
|
+
"document_type": str(document_type) if document_type else None,
|
|
56
75
|
"score": max(0.0, round(1.0 - (violation_count / 10), 2)),
|
|
57
76
|
"summary": {
|
|
58
77
|
"paragraph_count": len(paragraphs),
|
|
59
78
|
"violation_count": violation_count,
|
|
60
|
-
"
|
|
79
|
+
"error_count": error_count,
|
|
80
|
+
"rules_checked": rules,
|
|
61
81
|
},
|
|
62
82
|
"violations": violations,
|
|
63
83
|
"repair_hints": [
|
|
@@ -71,9 +91,94 @@ def inspect_official_document_style(source: Any) -> dict[str, Any]:
|
|
|
71
91
|
}
|
|
72
92
|
|
|
73
93
|
|
|
94
|
+
_GONGMUN_DOCTYPES = {"공문", "공문서", "official_notice", "시행문"}
|
|
95
|
+
_GONGMUN_STRUCTURE_RULES = (
|
|
96
|
+
"missing-susin",
|
|
97
|
+
"missing-balsinmyeongui",
|
|
98
|
+
"missing-sihaeng",
|
|
99
|
+
"missing-disclosure",
|
|
100
|
+
"missing-end-marker",
|
|
101
|
+
)
|
|
102
|
+
_ISSUER_SUFFIX_RE = re.compile(r"(장|관|감)$")
|
|
103
|
+
_DISCLOSURE_RE = re.compile(r"(부분공개|비공개|공개)")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _is_gongmun(document_type: Any) -> bool:
|
|
107
|
+
return bool(document_type) and str(document_type).strip() in _GONGMUN_DOCTYPES
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _norm_spaces(text: str) -> str:
|
|
111
|
+
return re.sub(r"\s+", "", text)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _inspect_gongmun_structure(paragraphs: Sequence[str]) -> list[dict[str, Any]]:
|
|
115
|
+
"""ERROR-severity 공문 spine checks (the hard-gate), anchored by a real 시행문.
|
|
116
|
+
|
|
117
|
+
Reliably machine-checkable from real 시행문: 수신(두문), 시행/공개구분(결문),
|
|
118
|
+
끝.(본문 종결), and 발신명의 — detected via the literal label OR a 기관장 명의
|
|
119
|
+
line (space-normalised, ending 장/관/감, not the 수신 recipient line).
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
nonempty = [t.strip() for t in paragraphs if t.strip()]
|
|
123
|
+
norm = [_norm_spaces(t) for t in nonempty]
|
|
124
|
+
full_norm = "".join(norm)
|
|
125
|
+
violations: list[dict[str, Any]] = []
|
|
126
|
+
|
|
127
|
+
def err(rule: str, message: str, suggestion: str) -> None:
|
|
128
|
+
violations.append(
|
|
129
|
+
_violation(
|
|
130
|
+
rule=rule,
|
|
131
|
+
paragraph_index=0,
|
|
132
|
+
text="",
|
|
133
|
+
message=message,
|
|
134
|
+
suggestion=suggestion,
|
|
135
|
+
severity="error",
|
|
136
|
+
)
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
if "수신" not in full_norm:
|
|
140
|
+
err("missing-susin", "공문 두문에 수신(수신자)이 없습니다",
|
|
141
|
+
"두문에 '수신 <수신자>'를 추가하세요.")
|
|
142
|
+
if "시행" not in full_norm:
|
|
143
|
+
err("missing-sihaeng", "공문 결문에 시행 정보가 없습니다",
|
|
144
|
+
"결문에 '시행 <처리과-일련번호> (<시행일자>)'를 추가하세요.")
|
|
145
|
+
if not _DISCLOSURE_RE.search(full_norm):
|
|
146
|
+
err("missing-disclosure", "공문 결문에 공개구분이 없습니다",
|
|
147
|
+
"결문에 공개구분(공개/부분공개/비공개)을 추가하세요.")
|
|
148
|
+
if "끝." not in full_norm:
|
|
149
|
+
err("missing-end-marker", "공문 본문에 끝 표시(끝.)가 없습니다",
|
|
150
|
+
"본문/붙임 마지막에 '끝.'을 두세요.")
|
|
151
|
+
has_label = "발신명의" in full_norm
|
|
152
|
+
has_issuer = any(
|
|
153
|
+
_ISSUER_SUFFIX_RE.search(t) and len(t) >= 3 and "수신" not in t and not t.endswith(")")
|
|
154
|
+
for t in norm
|
|
155
|
+
)
|
|
156
|
+
if not (has_label or has_issuer):
|
|
157
|
+
err("missing-balsinmyeongui", "공문 결문에 발신명의(기관장 명의)가 없습니다",
|
|
158
|
+
"결문에 발신명의(예: ○○교육지원청교육장)를 추가하세요.")
|
|
159
|
+
return violations
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _document_paragraph_texts(paragraphs: Any) -> list[str]:
|
|
163
|
+
"""Flatten paragraph text including nested table-cell text.
|
|
164
|
+
|
|
165
|
+
Real 시행문 carry the 두문(수신·경유) and 결문(발신명의·시행·공개구분) inside
|
|
166
|
+
tables, which top-level ``document.paragraphs`` does not descend into.
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
texts: list[str] = []
|
|
170
|
+
for paragraph in paragraphs:
|
|
171
|
+
texts.append(paragraph.text)
|
|
172
|
+
for table in getattr(paragraph, "tables", ()):
|
|
173
|
+
for row in table.rows:
|
|
174
|
+
for cell in row.cells:
|
|
175
|
+
texts.extend(_document_paragraph_texts(cell.paragraphs))
|
|
176
|
+
return texts
|
|
177
|
+
|
|
178
|
+
|
|
74
179
|
def _paragraphs_from_source(source: Any) -> list[str]:
|
|
75
180
|
if isinstance(source, HwpxDocument):
|
|
76
|
-
return
|
|
181
|
+
return _document_paragraph_texts(source.paragraphs)
|
|
77
182
|
if isinstance(source, Path):
|
|
78
183
|
return _paragraphs_from_path(source)
|
|
79
184
|
if isinstance(source, str):
|
|
@@ -96,7 +201,7 @@ def _paragraphs_from_source(source: Any) -> list[str]:
|
|
|
96
201
|
def _paragraphs_from_path(path: Path) -> list[str]:
|
|
97
202
|
document = HwpxDocument.open(path)
|
|
98
203
|
try:
|
|
99
|
-
return
|
|
204
|
+
return _document_paragraph_texts(document.paragraphs)
|
|
100
205
|
finally:
|
|
101
206
|
document.close()
|
|
102
207
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
hwpx/__init__.py,sha256=ikzGacbkMN7PhNFVb5h3UhtpfSZ1Smr_cKu0OdSaOes,5052
|
|
2
|
-
hwpx/authoring.py,sha256=
|
|
2
|
+
hwpx/authoring.py,sha256=_AUGOzwmiHHuO08HZhGCEd6eCHCZV0yXeBaYKMsJJ34,125020
|
|
3
3
|
hwpx/document.py,sha256=zck0NXTj4k_RwBPi72iWgerG67v66TYLofLR8duNXXY,103182
|
|
4
4
|
hwpx/form_fill.py,sha256=VUIU53Qa9Ho2aP72biDvJwnDW7ngdAzu3PSd5A7d1JM,9908
|
|
5
5
|
hwpx/package.py,sha256=0rKjGCJbPQvrVBIy07Jpjsu3fI7HhbqFCGWTiTDsJpo,1141
|
|
@@ -34,6 +34,11 @@ hwpx/design/profiles/application_form/fragments/body.xml,sha256=a93VW8s0Eh2Pvcbf
|
|
|
34
34
|
hwpx/design/profiles/application_form/fragments/heading.xml,sha256=p9e0IX6L0SYlAhNAGC252pELqoU8Zv-MWE9jz9XLmt0,207
|
|
35
35
|
hwpx/design/profiles/application_form/fragments/info_table.xml,sha256=rLlCeWTEIAwK3v-5KU1CGR2dq4zdECG8HEugfG4ecd4,5727
|
|
36
36
|
hwpx/design/profiles/application_form/fragments/title.xml,sha256=wL2RusOksJ8Z0MBkbOfiQ5krmSkcvo1sMdM1pP8yasI,216
|
|
37
|
+
hwpx/design/profiles/home_notice/profile.json,sha256=u-Hz3AYZqqf313cv2qtHbo79AhoEb8KQgb2ROLzZOps,524
|
|
38
|
+
hwpx/design/profiles/home_notice/template.hwpx,sha256=TZeOV77NrhB9kVdUasXweKSSlQylqbI3zQTd-UDekfQ,9507
|
|
39
|
+
hwpx/design/profiles/home_notice/fragments/body.xml,sha256=A8M2S1fe7QIdyl5KK0tg6HjbksBD5hioOl8tOM25if0,212
|
|
40
|
+
hwpx/design/profiles/home_notice/fragments/heading.xml,sha256=FWPCjuAbsR7j6qYVd1-8c0sxyo6JDY5vkM-LV3ZWzEw,215
|
|
41
|
+
hwpx/design/profiles/home_notice/fragments/title.xml,sha256=6BybxZE8h4h370iB-ZJzM7fKqw1M_Y_J_TGuTzXEIRM,214
|
|
37
42
|
hwpx/design/profiles/official_notice/profile.json,sha256=Q9WQBnH9ukgPhMCyxluDbxPfqNmxyz_6tO9hADxyGnk,654
|
|
38
43
|
hwpx/design/profiles/official_notice/template.hwpx,sha256=y84XEouqC9SMVW7hMucsiYBp9zg2OdzlIgVy0i81Nr8,10343
|
|
39
44
|
hwpx/design/profiles/official_notice/fragments/body.xml,sha256=nEWh0GQfbtOKpu3NkjULZ72t7va7rNHxU4QG0LN6OLg,213
|
|
@@ -107,7 +112,7 @@ hwpx/tools/layout_preview.py,sha256=XdKxNL34zVBvFDQv5LFErz1ESuVhkW8jHdyQ1EGbSBU,
|
|
|
107
112
|
hwpx/tools/mail_merge.py,sha256=EXdx7gcG4YrrIb-yFDghWIHpaiFKlZ3h_s2XY7ZMRN4,17932
|
|
108
113
|
hwpx/tools/markdown_export.py,sha256=FejutCpQHbycO185uljcSwfZuwXMTbGEgXtf5e-a4_k,19139
|
|
109
114
|
hwpx/tools/object_finder.py,sha256=7i6XI1-r7-ar_IzSZQ82hfOcxVzJFK2XjMDB8oxcmMA,13478
|
|
110
|
-
hwpx/tools/official_lint.py,sha256=
|
|
115
|
+
hwpx/tools/official_lint.py,sha256=9iszI4CAAEMGdPCOhmQ6XonO1jc1XE2BJXMHgv9hIvY,18600
|
|
111
116
|
hwpx/tools/package_reconcile.py,sha256=y1Hl7hbPh4YaV59LTdDLzQwgn4g1qEnFmSjmajnrEbA,2416
|
|
112
117
|
hwpx/tools/package_validator.py,sha256=AA5wy6YgwlU6BTq1p2qCbCVCM8lmIBLhPANKCfaPb-s,29369
|
|
113
118
|
hwpx/tools/page_guard.py,sha256=nDAVPcvrnuyDxVTA_j22wiYD7CXAD6XlzsMzaz3h_q8,9701
|
|
@@ -140,10 +145,10 @@ hwpx/visual/diff.py,sha256=0X5T9IgwRZU3td-7vnPrlowovtGud7P_ymq0KVehlKk,5677
|
|
|
140
145
|
hwpx/visual/masks.py,sha256=oXhgynAb4uKjJtZ2BGHHdAjyvWGqSFlZFQ-iJxzHiuo,1832
|
|
141
146
|
hwpx/visual/oracle.py,sha256=QXAyc0xVIjLPWUHM2rBjsustQTmwfoW2Xkb9iX9ai2E,21786
|
|
142
147
|
hwpx/visual/report.py,sha256=2RhXN1KBYOZTim9FNpeUhaaDHR7oFxI6Z2DLUkDIiwE,1717
|
|
143
|
-
python_hwpx-2.
|
|
144
|
-
python_hwpx-2.
|
|
145
|
-
python_hwpx-2.
|
|
146
|
-
python_hwpx-2.
|
|
147
|
-
python_hwpx-2.
|
|
148
|
-
python_hwpx-2.
|
|
149
|
-
python_hwpx-2.
|
|
148
|
+
python_hwpx-2.16.0.dist-info/licenses/LICENSE,sha256=_ubz4wv-BkkT3l3gu-QuH7JGeVjuRYGZoZK95eNsCHU,9688
|
|
149
|
+
python_hwpx-2.16.0.dist-info/licenses/NOTICE,sha256=k48h6EaGQE8Y1c0dS9sIOOcz4YqkbcImWClF7pBOgsg,2473
|
|
150
|
+
python_hwpx-2.16.0.dist-info/METADATA,sha256=drWhBw2ZwdxyyKj2OkCcY_rUZWYNyFlBr-cIoC1vaNs,19982
|
|
151
|
+
python_hwpx-2.16.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
152
|
+
python_hwpx-2.16.0.dist-info/entry_points.txt,sha256=4U6WXYWHxEiWp2VRHo97fvOYNh7ebu6roonk7chxKcY,453
|
|
153
|
+
python_hwpx-2.16.0.dist-info/top_level.txt,sha256=R1iToqDh80Nf2oQhRjTN0rbN2X6kyDUizIocZjkhuxc,5
|
|
154
|
+
python_hwpx-2.16.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|