@panda-agent/panda-cli 0.1.28 → 0.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/panda-cli-ink.bundle.mjs +267 -258
- package/package.json +6 -4
- package/skills/.gitkeep +0 -0
- package/skills/README.md +13 -0
- package/skills/docx/.skill-metadata.yaml +173 -0
- package/skills/docx/LICENSE.txt +30 -0
- package/skills/docx/SKILL.md +589 -0
- package/skills/docx/scripts/__init__.py +1 -0
- package/skills/docx/scripts/accept_changes.py +206 -0
- package/skills/docx/scripts/comment.py +442 -0
- package/skills/docx/scripts/office/helpers/__init__.py +1 -0
- package/skills/docx/scripts/office/helpers/merge_runs.py +190 -0
- package/skills/docx/scripts/office/helpers/simplify_redlines.py +185 -0
- package/skills/docx/scripts/office/pack.py +167 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/docx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/docx/scripts/office/soffice.py +194 -0
- package/skills/docx/scripts/office/unpack.py +145 -0
- package/skills/docx/scripts/office/validate.py +114 -0
- package/skills/docx/scripts/office/validators/__init__.py +16 -0
- package/skills/docx/scripts/office/validators/base.py +733 -0
- package/skills/docx/scripts/office/validators/docx.py +354 -0
- package/skills/docx/scripts/office/validators/pptx.py +230 -0
- package/skills/docx/scripts/office/validators/redlining.py +212 -0
- package/skills/docx/scripts/templates/comments.xml +3 -0
- package/skills/docx/scripts/templates/commentsExtended.xml +3 -0
- package/skills/docx/scripts/templates/commentsExtensible.xml +3 -0
- package/skills/docx/scripts/templates/commentsIds.xml +3 -0
- package/skills/docx/scripts/templates/people.xml +3 -0
- package/skills/frontend-design/LICENSE.txt +177 -0
- package/skills/frontend-design/SKILL.md +42 -0
- package/skills/pdf/.skill-metadata.yaml +273 -0
- package/skills/pdf/LICENSE.txt +30 -0
- package/skills/pdf/SKILL.md +324 -0
- package/skills/pdf/advanced-reference.md +609 -0
- package/skills/pdf/form-filling-guide.md +318 -0
- package/skills/pdf/forms.md +294 -0
- package/skills/pdf/reference.md +612 -0
- package/skills/pdf/scripts/check_bounding_boxes.py +198 -0
- package/skills/pdf/scripts/check_fillable_fields.py +64 -0
- package/skills/pdf/scripts/convert_pdf_to_images.py +102 -0
- package/skills/pdf/scripts/create_validation_image.py +125 -0
- package/skills/pdf/scripts/extract_form_field_info.py +220 -0
- package/skills/pdf/scripts/extract_form_structure.py +202 -0
- package/skills/pdf/scripts/fill_fillable_fields.py +205 -0
- package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +193 -0
- package/skills/pptx-generator/SKILL.md +204 -0
- package/skills/pptx-generator/assets/styles/business.json +8 -0
- package/skills/pptx-generator/assets/styles/minimal.json +8 -0
- package/skills/pptx-generator/assets/styles/modern.json +8 -0
- package/skills/pptx-generator/assets/templates/ppt_data_template.json +40 -0
- package/skills/pptx-generator/references/collaboration_guide.md +381 -0
- package/skills/pptx-generator/references/json_format_spec.md +215 -0
- package/skills/pptx-generator/references/layout_guide.md +290 -0
- package/skills/pptx-generator/scripts/json_validator.py +194 -0
- package/skills/pptx-generator/scripts/pptx_builder.py +340 -0
- package/skills/pptx-generator/scripts/pptx_validator.py +162 -0
- package/skills/skill-creator/LICENSE.txt +202 -0
- package/skills/skill-creator/SKILL.md +479 -0
- package/skills/skill-creator/agents/analyzer.md +274 -0
- package/skills/skill-creator/agents/comparator.md +202 -0
- package/skills/skill-creator/agents/grader.md +223 -0
- package/skills/skill-creator/assets/eval_review.html +146 -0
- package/skills/skill-creator/eval-viewer/generate_review.py +471 -0
- package/skills/skill-creator/eval-viewer/viewer.html +1325 -0
- package/skills/skill-creator/references/schemas.md +430 -0
- package/skills/skill-creator/scripts/__init__.py +0 -0
- package/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/skills/skill-creator/scripts/generate_report.py +326 -0
- package/skills/skill-creator/scripts/improve_description.py +248 -0
- package/skills/skill-creator/scripts/package_skill.py +136 -0
- package/skills/skill-creator/scripts/quick_validate.py +103 -0
- package/skills/skill-creator/scripts/run_eval.py +310 -0
- package/skills/skill-creator/scripts/run_loop.py +332 -0
- package/skills/skill-creator/scripts/utils.py +47 -0
- package/skills/xlsx/.skill-metadata.yaml +185 -0
- package/skills/xlsx/LICENSE.txt +30 -0
- package/skills/xlsx/SKILL.md +233 -0
- package/skills/xlsx/scripts/office/helpers/__init__.py +1 -0
- package/skills/xlsx/scripts/office/helpers/merge_runs.py +226 -0
- package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +198 -0
- package/skills/xlsx/scripts/office/pack.py +162 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/xlsx/scripts/office/soffice.py +185 -0
- package/skills/xlsx/scripts/office/unpack.py +146 -0
- package/skills/xlsx/scripts/office/validate.py +108 -0
- package/skills/xlsx/scripts/office/validators/__init__.py +13 -0
- package/skills/xlsx/scripts/office/validators/base.py +800 -0
- package/skills/xlsx/scripts/office/validators/docx.py +383 -0
- package/skills/xlsx/scripts/office/validators/pptx.py +250 -0
- package/skills/xlsx/scripts/office/validators/redlining.py +229 -0
- package/skills/xlsx/scripts/recalc.py +296 -0
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""Validate geometric constraints of form field bounding regions.
|
|
2
|
+
|
|
3
|
+
Detects overlapping bounding boxes and entry regions too small for their
|
|
4
|
+
specified font size.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
python check_bounding_boxes.py <fields.json>
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import json
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Dict, List, TextIO, Tuple
|
|
15
|
+
|
|
16
|
+
# ---------------------------------------------------------------------------
|
|
17
|
+
# Constants
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
|
|
20
|
+
EXIT_SUCCESS: int = 0
|
|
21
|
+
EXIT_FAILURE: int = 1
|
|
22
|
+
|
|
23
|
+
MAX_REPORTED_ISSUES: int = 20
|
|
24
|
+
|
|
25
|
+
BoundingBox = Tuple[float, float, float, float]
|
|
26
|
+
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
# Data structures
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class FieldRegion:
|
|
33
|
+
"""Encapsulates a rectangle associated with a specific form field."""
|
|
34
|
+
|
|
35
|
+
__slots__ = ("bounds", "kind", "parent_field")
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
bounds: List[float],
|
|
40
|
+
kind: str,
|
|
41
|
+
parent_field: Dict[str, Any],
|
|
42
|
+
) -> None:
|
|
43
|
+
self.bounds = bounds
|
|
44
|
+
self.kind = kind
|
|
45
|
+
self.parent_field = parent_field
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
# Geometry helpers
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _do_overlap(box_a: List[float], box_b: List[float]) -> bool:
|
|
54
|
+
"""Determine whether two axis-aligned rectangles share any area."""
|
|
55
|
+
separated_x: bool = box_a[0] >= box_b[2] or box_b[0] >= box_a[2]
|
|
56
|
+
separated_y: bool = box_a[1] >= box_b[3] or box_b[1] >= box_a[3]
|
|
57
|
+
return not separated_x and not separated_y
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _height_too_small(region: FieldRegion) -> bool:
|
|
61
|
+
"""Check if an entry region is vertically insufficient for its text."""
|
|
62
|
+
if "entry_text" not in region.parent_field:
|
|
63
|
+
return False
|
|
64
|
+
font_size: float = region.parent_field["entry_text"].get("font_size", 14)
|
|
65
|
+
height: float = region.bounds[3] - region.bounds[1]
|
|
66
|
+
return height < font_size
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
# Core validation
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def validate_field_geometry(input_stream: TextIO) -> List[str]:
|
|
75
|
+
"""Analyze all field bounding boxes from the given JSON stream.
|
|
76
|
+
|
|
77
|
+
Returns a list of diagnostic strings.
|
|
78
|
+
"""
|
|
79
|
+
doc: Dict[str, Any] = json.load(input_stream)
|
|
80
|
+
form_entries: List[Dict[str, Any]] = doc["form_fields"]
|
|
81
|
+
diagnostics: List[str] = ["Read %d fields" % len(form_entries)]
|
|
82
|
+
|
|
83
|
+
# Build flat list of all regions
|
|
84
|
+
all_regions: List[FieldRegion] = []
|
|
85
|
+
for entry in form_entries:
|
|
86
|
+
all_regions.append(
|
|
87
|
+
FieldRegion(entry["label_bounding_box"], "label", entry)
|
|
88
|
+
)
|
|
89
|
+
all_regions.append(
|
|
90
|
+
FieldRegion(entry["entry_bounding_box"], "entry", entry)
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
found_problem: bool = False
|
|
94
|
+
idx: int = 0
|
|
95
|
+
|
|
96
|
+
while idx < len(all_regions):
|
|
97
|
+
ri = all_regions[idx]
|
|
98
|
+
|
|
99
|
+
# Check pairwise overlaps with subsequent regions
|
|
100
|
+
for jdx in range(idx + 1, len(all_regions)):
|
|
101
|
+
rj = all_regions[jdx]
|
|
102
|
+
if ri.parent_field["page_number"] != rj.parent_field["page_number"]:
|
|
103
|
+
continue
|
|
104
|
+
if not _do_overlap(ri.bounds, rj.bounds):
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
found_problem = True
|
|
108
|
+
if ri.parent_field is rj.parent_field:
|
|
109
|
+
msg = (
|
|
110
|
+
"FAILURE: intersection between label and entry "
|
|
111
|
+
"bounding boxes for `{}` ({}, {})".format(
|
|
112
|
+
ri.parent_field["description"], ri.bounds, rj.bounds
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
else:
|
|
116
|
+
msg = (
|
|
117
|
+
"FAILURE: intersection between {} bounding box for "
|
|
118
|
+
"`{}` ({}) and {} bounding box for `{}` ({})".format(
|
|
119
|
+
ri.kind,
|
|
120
|
+
ri.parent_field["description"],
|
|
121
|
+
ri.bounds,
|
|
122
|
+
rj.kind,
|
|
123
|
+
rj.parent_field["description"],
|
|
124
|
+
rj.bounds,
|
|
125
|
+
)
|
|
126
|
+
)
|
|
127
|
+
diagnostics.append(msg)
|
|
128
|
+
if len(diagnostics) >= MAX_REPORTED_ISSUES:
|
|
129
|
+
diagnostics.append(
|
|
130
|
+
"Aborting further checks; fix bounding boxes and try again"
|
|
131
|
+
)
|
|
132
|
+
return diagnostics
|
|
133
|
+
|
|
134
|
+
# Height validation for entry regions
|
|
135
|
+
if ri.kind == "entry" and _height_too_small(ri):
|
|
136
|
+
found_problem = True
|
|
137
|
+
height: float = ri.bounds[3] - ri.bounds[1]
|
|
138
|
+
font_size = ri.parent_field["entry_text"].get("font_size", 14)
|
|
139
|
+
diagnostics.append(
|
|
140
|
+
"FAILURE: entry bounding box height ({}) for `{}` is too short "
|
|
141
|
+
"for the text content (font size: {}). Increase the box height "
|
|
142
|
+
"or decrease the font size.".format(
|
|
143
|
+
height, ri.parent_field["description"], font_size
|
|
144
|
+
)
|
|
145
|
+
)
|
|
146
|
+
if len(diagnostics) >= MAX_REPORTED_ISSUES:
|
|
147
|
+
diagnostics.append(
|
|
148
|
+
"Aborting further checks; fix bounding boxes and try again"
|
|
149
|
+
)
|
|
150
|
+
return diagnostics
|
|
151
|
+
|
|
152
|
+
idx += 1
|
|
153
|
+
|
|
154
|
+
if not found_problem:
|
|
155
|
+
diagnostics.append("SUCCESS: All bounding boxes are valid")
|
|
156
|
+
return diagnostics
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# ---------------------------------------------------------------------------
|
|
160
|
+
# CLI
|
|
161
|
+
# ---------------------------------------------------------------------------
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
165
|
+
"""Construct the CLI argument parser."""
|
|
166
|
+
parser = argparse.ArgumentParser(
|
|
167
|
+
description=(
|
|
168
|
+
"Validate bounding box geometry in a fields.json specification. "
|
|
169
|
+
"Detects overlaps and insufficient entry heights."
|
|
170
|
+
)
|
|
171
|
+
)
|
|
172
|
+
parser.add_argument(
|
|
173
|
+
"fields_json",
|
|
174
|
+
type=Path,
|
|
175
|
+
help="Path to the fields.json file to validate.",
|
|
176
|
+
)
|
|
177
|
+
return parser
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def main() -> None:
|
|
181
|
+
"""Entry point: parse arguments and run geometry validation."""
|
|
182
|
+
parser = build_parser()
|
|
183
|
+
args = parser.parse_args()
|
|
184
|
+
|
|
185
|
+
fields_json: Path = args.fields_json
|
|
186
|
+
if not fields_json.exists():
|
|
187
|
+
print("ERROR: File not found: {}".format(fields_json), file=sys.stderr)
|
|
188
|
+
sys.exit(EXIT_FAILURE)
|
|
189
|
+
|
|
190
|
+
with open(fields_json, "r", encoding="utf-8") as handle:
|
|
191
|
+
results = validate_field_geometry(handle)
|
|
192
|
+
|
|
193
|
+
for line in results:
|
|
194
|
+
print(line)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
if __name__ == "__main__":
|
|
198
|
+
main()
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Detect whether a PDF document contains interactive form fields.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
python check_fillable_fields.py <pdf_path>
|
|
5
|
+
|
|
6
|
+
Outputs a human-readable message indicating whether the document
|
|
7
|
+
has native fillable form widgets.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
import pypdf
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
EXIT_SUCCESS: int = 0
|
|
18
|
+
EXIT_FAILURE: int = 1
|
|
19
|
+
|
|
20
|
+
MSG_HAS_FIELDS: str = "This PDF has fillable form fields"
|
|
21
|
+
MSG_NO_FIELDS: str = (
|
|
22
|
+
"This PDF does not have fillable form fields; "
|
|
23
|
+
"you will need to visually determine where to enter data"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def check_fillable(pdf_path: Path) -> bool:
|
|
28
|
+
"""Return True if the PDF at *pdf_path* contains interactive form fields."""
|
|
29
|
+
reader = pypdf.PdfReader(str(pdf_path))
|
|
30
|
+
return bool(reader.get_fields())
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
34
|
+
"""Construct the CLI argument parser."""
|
|
35
|
+
parser = argparse.ArgumentParser(
|
|
36
|
+
description="Detect whether a PDF contains interactive form fields."
|
|
37
|
+
)
|
|
38
|
+
parser.add_argument(
|
|
39
|
+
"pdf_path",
|
|
40
|
+
type=Path,
|
|
41
|
+
help="Path to the PDF document to inspect.",
|
|
42
|
+
)
|
|
43
|
+
return parser
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def main() -> None:
|
|
47
|
+
"""Entry point: parse arguments, inspect PDF, print result."""
|
|
48
|
+
parser = build_parser()
|
|
49
|
+
args = parser.parse_args()
|
|
50
|
+
|
|
51
|
+
pdf_path: Path = args.pdf_path
|
|
52
|
+
if not pdf_path.exists():
|
|
53
|
+
print("ERROR: File not found: {}".format(pdf_path), file=sys.stderr)
|
|
54
|
+
sys.exit(EXIT_FAILURE)
|
|
55
|
+
|
|
56
|
+
has_fields = check_fillable(pdf_path)
|
|
57
|
+
if has_fields:
|
|
58
|
+
print(MSG_HAS_FIELDS)
|
|
59
|
+
else:
|
|
60
|
+
print(MSG_NO_FIELDS)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
if __name__ == "__main__":
|
|
64
|
+
main()
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Render each page of a PDF document as a PNG image file.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
python convert_pdf_to_images.py <input.pdf> <output_directory>
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import argparse
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import List
|
|
11
|
+
|
|
12
|
+
import pdf2image
|
|
13
|
+
from PIL import Image
|
|
14
|
+
|
|
15
|
+
# ---------------------------------------------------------------------------
|
|
16
|
+
# Constants
|
|
17
|
+
# ---------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
EXIT_SUCCESS: int = 0
|
|
20
|
+
EXIT_FAILURE: int = 1
|
|
21
|
+
|
|
22
|
+
RENDER_DPI: int = 200
|
|
23
|
+
DEFAULT_MAX_DIMENSION: int = 1000
|
|
24
|
+
|
|
25
|
+
OUTPUT_FORMAT: str = "png"
|
|
26
|
+
FILENAME_TEMPLATE: str = "page_{}.png"
|
|
27
|
+
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
# Core logic
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def render_pages_to_png(
|
|
34
|
+
source_pdf: Path,
|
|
35
|
+
dest_folder: Path,
|
|
36
|
+
dimension_cap: int = DEFAULT_MAX_DIMENSION,
|
|
37
|
+
) -> None:
|
|
38
|
+
"""Convert all pages in *source_pdf* to PNG files within *dest_folder*."""
|
|
39
|
+
rendered: List[Image.Image] = pdf2image.convert_from_path(
|
|
40
|
+
str(source_pdf), dpi=RENDER_DPI
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
for page_idx, img in enumerate(rendered):
|
|
44
|
+
w, h = img.size
|
|
45
|
+
needs_resize: bool = w > dimension_cap or h > dimension_cap
|
|
46
|
+
if needs_resize:
|
|
47
|
+
ratio: float = min(dimension_cap / w, dimension_cap / h)
|
|
48
|
+
resized_w: int = int(w * ratio)
|
|
49
|
+
resized_h: int = int(h * ratio)
|
|
50
|
+
img = img.resize((resized_w, resized_h))
|
|
51
|
+
|
|
52
|
+
out_path: Path = dest_folder / FILENAME_TEMPLATE.format(page_idx + 1)
|
|
53
|
+
img.save(str(out_path))
|
|
54
|
+
print(
|
|
55
|
+
"Saved page %d as %s (size: %s)"
|
|
56
|
+
% (page_idx + 1, out_path, str(img.size))
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
print("Converted %d pages to PNG images" % len(rendered))
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# ---------------------------------------------------------------------------
|
|
63
|
+
# CLI
|
|
64
|
+
# ---------------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
68
|
+
"""Construct the CLI argument parser."""
|
|
69
|
+
parser = argparse.ArgumentParser(
|
|
70
|
+
description="Render PDF pages as PNG image files."
|
|
71
|
+
)
|
|
72
|
+
parser.add_argument(
|
|
73
|
+
"input_pdf",
|
|
74
|
+
type=Path,
|
|
75
|
+
help="Path to the source PDF document.",
|
|
76
|
+
)
|
|
77
|
+
parser.add_argument(
|
|
78
|
+
"output_directory",
|
|
79
|
+
type=Path,
|
|
80
|
+
help="Directory to write PNG page images into.",
|
|
81
|
+
)
|
|
82
|
+
return parser
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def main() -> None:
|
|
86
|
+
"""Entry point: parse arguments, create output dir, render pages."""
|
|
87
|
+
parser = build_parser()
|
|
88
|
+
args = parser.parse_args()
|
|
89
|
+
|
|
90
|
+
input_pdf: Path = args.input_pdf
|
|
91
|
+
output_dir: Path = args.output_directory
|
|
92
|
+
|
|
93
|
+
if not input_pdf.exists():
|
|
94
|
+
print("ERROR: File not found: {}".format(input_pdf), file=sys.stderr)
|
|
95
|
+
sys.exit(EXIT_FAILURE)
|
|
96
|
+
|
|
97
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
98
|
+
render_pages_to_png(input_pdf, output_dir)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
if __name__ == "__main__":
|
|
102
|
+
main()
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""Overlay colored rectangles on a page image to visualize field bounding boxes.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
python create_validation_image.py <page_number> <fields.json> <input_image> <output_image>
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import argparse
|
|
8
|
+
import json
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Dict, List
|
|
12
|
+
|
|
13
|
+
from PIL import Image as PILImage
|
|
14
|
+
from PIL import ImageDraw as PILDraw
|
|
15
|
+
|
|
16
|
+
# ---------------------------------------------------------------------------
|
|
17
|
+
# Constants
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
|
|
20
|
+
EXIT_SUCCESS: int = 0
|
|
21
|
+
EXIT_FAILURE: int = 1
|
|
22
|
+
|
|
23
|
+
ENTRY_OUTLINE_COLOR: str = "red"
|
|
24
|
+
LABEL_OUTLINE_COLOR: str = "blue"
|
|
25
|
+
OUTLINE_WIDTH: int = 2
|
|
26
|
+
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
# Core logic
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def generate_overlay(
|
|
33
|
+
target_page: int,
|
|
34
|
+
fields_path: Path,
|
|
35
|
+
src_image: Path,
|
|
36
|
+
dst_image: Path,
|
|
37
|
+
) -> None:
|
|
38
|
+
"""Draw entry (red) and label (blue) rectangles onto the source image
|
|
39
|
+
for all fields matching *target_page*, then save to *dst_image*.
|
|
40
|
+
"""
|
|
41
|
+
with open(fields_path, "r", encoding="utf-8") as fh:
|
|
42
|
+
spec: Dict[str, Any] = json.load(fh)
|
|
43
|
+
|
|
44
|
+
canvas = PILImage.open(str(src_image))
|
|
45
|
+
pen = PILDraw.Draw(canvas)
|
|
46
|
+
box_count: int = 0
|
|
47
|
+
|
|
48
|
+
matching: List[Dict[str, Any]] = [
|
|
49
|
+
f for f in spec["form_fields"] if f["page_number"] == target_page
|
|
50
|
+
]
|
|
51
|
+
for fld in matching:
|
|
52
|
+
pen.rectangle(
|
|
53
|
+
fld["entry_bounding_box"],
|
|
54
|
+
outline=ENTRY_OUTLINE_COLOR,
|
|
55
|
+
width=OUTLINE_WIDTH,
|
|
56
|
+
)
|
|
57
|
+
pen.rectangle(
|
|
58
|
+
fld["label_bounding_box"],
|
|
59
|
+
outline=LABEL_OUTLINE_COLOR,
|
|
60
|
+
width=OUTLINE_WIDTH,
|
|
61
|
+
)
|
|
62
|
+
box_count += 2
|
|
63
|
+
|
|
64
|
+
canvas.save(str(dst_image))
|
|
65
|
+
print(
|
|
66
|
+
"Created validation image at {} with {} bounding boxes".format(
|
|
67
|
+
dst_image, box_count
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ---------------------------------------------------------------------------
|
|
73
|
+
# CLI
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
78
|
+
"""Construct the CLI argument parser."""
|
|
79
|
+
parser = argparse.ArgumentParser(
|
|
80
|
+
description="Visualize field bounding boxes by overlaying colored rectangles on a page image."
|
|
81
|
+
)
|
|
82
|
+
parser.add_argument(
|
|
83
|
+
"page_number",
|
|
84
|
+
type=int,
|
|
85
|
+
help="1-indexed page number to visualize.",
|
|
86
|
+
)
|
|
87
|
+
parser.add_argument(
|
|
88
|
+
"fields_json",
|
|
89
|
+
type=Path,
|
|
90
|
+
help="Path to the fields.json specification file.",
|
|
91
|
+
)
|
|
92
|
+
parser.add_argument(
|
|
93
|
+
"input_image",
|
|
94
|
+
type=Path,
|
|
95
|
+
help="Path to the source page image.",
|
|
96
|
+
)
|
|
97
|
+
parser.add_argument(
|
|
98
|
+
"output_image",
|
|
99
|
+
type=Path,
|
|
100
|
+
help="Destination path for the annotated image.",
|
|
101
|
+
)
|
|
102
|
+
return parser
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def main() -> None:
|
|
106
|
+
"""Entry point: parse arguments and generate validation overlay."""
|
|
107
|
+
parser = build_parser()
|
|
108
|
+
args = parser.parse_args()
|
|
109
|
+
|
|
110
|
+
fields_json: Path = args.fields_json
|
|
111
|
+
input_image: Path = args.input_image
|
|
112
|
+
|
|
113
|
+
if not fields_json.exists():
|
|
114
|
+
print("ERROR: File not found: {}".format(fields_json), file=sys.stderr)
|
|
115
|
+
sys.exit(EXIT_FAILURE)
|
|
116
|
+
|
|
117
|
+
if not input_image.exists():
|
|
118
|
+
print("ERROR: File not found: {}".format(input_image), file=sys.stderr)
|
|
119
|
+
sys.exit(EXIT_FAILURE)
|
|
120
|
+
|
|
121
|
+
generate_overlay(args.page_number, fields_json, input_image, args.output_image)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
if __name__ == "__main__":
|
|
125
|
+
main()
|