@panda-agent/panda-cli 0.1.29 → 0.1.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/pandacli.mjs +6 -1
- package/bundled-preset-skills/.gitkeep +0 -0
- package/bundled-preset-skills/README.md +17 -0
- package/bundled-preset-skills/docx/.skill-metadata.yaml +173 -0
- package/bundled-preset-skills/docx/LICENSE.txt +30 -0
- package/bundled-preset-skills/docx/SKILL.md +589 -0
- package/bundled-preset-skills/docx/scripts/__init__.py +1 -0
- package/bundled-preset-skills/docx/scripts/accept_changes.py +206 -0
- package/bundled-preset-skills/docx/scripts/comment.py +442 -0
- package/bundled-preset-skills/docx/scripts/office/helpers/__init__.py +1 -0
- package/bundled-preset-skills/docx/scripts/office/helpers/merge_runs.py +190 -0
- package/bundled-preset-skills/docx/scripts/office/helpers/simplify_redlines.py +185 -0
- package/bundled-preset-skills/docx/scripts/office/pack.py +167 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/bundled-preset-skills/docx/scripts/office/soffice.py +194 -0
- package/bundled-preset-skills/docx/scripts/office/unpack.py +145 -0
- package/bundled-preset-skills/docx/scripts/office/validate.py +114 -0
- package/bundled-preset-skills/docx/scripts/office/validators/__init__.py +16 -0
- package/bundled-preset-skills/docx/scripts/office/validators/base.py +733 -0
- package/bundled-preset-skills/docx/scripts/office/validators/docx.py +354 -0
- package/bundled-preset-skills/docx/scripts/office/validators/pptx.py +230 -0
- package/bundled-preset-skills/docx/scripts/office/validators/redlining.py +212 -0
- package/bundled-preset-skills/docx/scripts/templates/comments.xml +3 -0
- package/bundled-preset-skills/docx/scripts/templates/commentsExtended.xml +3 -0
- package/bundled-preset-skills/docx/scripts/templates/commentsExtensible.xml +3 -0
- package/bundled-preset-skills/docx/scripts/templates/commentsIds.xml +3 -0
- package/bundled-preset-skills/docx/scripts/templates/people.xml +3 -0
- package/bundled-preset-skills/frontend-design/LICENSE.txt +177 -0
- package/bundled-preset-skills/frontend-design/SKILL.md +42 -0
- package/bundled-preset-skills/pdf/.skill-metadata.yaml +273 -0
- package/bundled-preset-skills/pdf/LICENSE.txt +30 -0
- package/bundled-preset-skills/pdf/SKILL.md +324 -0
- package/bundled-preset-skills/pdf/advanced-reference.md +609 -0
- package/bundled-preset-skills/pdf/form-filling-guide.md +318 -0
- package/bundled-preset-skills/pdf/forms.md +294 -0
- package/bundled-preset-skills/pdf/reference.md +612 -0
- package/bundled-preset-skills/pdf/scripts/check_bounding_boxes.py +198 -0
- package/bundled-preset-skills/pdf/scripts/check_fillable_fields.py +64 -0
- package/bundled-preset-skills/pdf/scripts/convert_pdf_to_images.py +102 -0
- package/bundled-preset-skills/pdf/scripts/create_validation_image.py +125 -0
- package/bundled-preset-skills/pdf/scripts/extract_form_field_info.py +220 -0
- package/bundled-preset-skills/pdf/scripts/extract_form_structure.py +202 -0
- package/bundled-preset-skills/pdf/scripts/fill_fillable_fields.py +205 -0
- package/bundled-preset-skills/pdf/scripts/fill_pdf_form_with_annotations.py +193 -0
- package/bundled-preset-skills/pptx-generator/SKILL.md +204 -0
- package/bundled-preset-skills/pptx-generator/assets/styles/business.json +8 -0
- package/bundled-preset-skills/pptx-generator/assets/styles/minimal.json +8 -0
- package/bundled-preset-skills/pptx-generator/assets/styles/modern.json +8 -0
- package/bundled-preset-skills/pptx-generator/assets/templates/ppt_data_template.json +40 -0
- package/bundled-preset-skills/pptx-generator/references/collaboration_guide.md +381 -0
- package/bundled-preset-skills/pptx-generator/references/json_format_spec.md +215 -0
- package/bundled-preset-skills/pptx-generator/references/layout_guide.md +290 -0
- package/bundled-preset-skills/pptx-generator/scripts/json_validator.py +194 -0
- package/bundled-preset-skills/pptx-generator/scripts/pptx_builder.py +340 -0
- package/bundled-preset-skills/pptx-generator/scripts/pptx_validator.py +162 -0
- package/bundled-preset-skills/skill-creator/LICENSE.txt +202 -0
- package/bundled-preset-skills/skill-creator/SKILL.md +479 -0
- package/bundled-preset-skills/skill-creator/agents/analyzer.md +274 -0
- package/bundled-preset-skills/skill-creator/agents/comparator.md +202 -0
- package/bundled-preset-skills/skill-creator/agents/grader.md +223 -0
- package/bundled-preset-skills/skill-creator/assets/eval_review.html +146 -0
- package/bundled-preset-skills/skill-creator/eval-viewer/generate_review.py +471 -0
- package/bundled-preset-skills/skill-creator/eval-viewer/viewer.html +1325 -0
- package/bundled-preset-skills/skill-creator/references/schemas.md +430 -0
- package/bundled-preset-skills/skill-creator/scripts/__init__.py +0 -0
- package/bundled-preset-skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/bundled-preset-skills/skill-creator/scripts/generate_report.py +326 -0
- package/bundled-preset-skills/skill-creator/scripts/improve_description.py +248 -0
- package/bundled-preset-skills/skill-creator/scripts/package_skill.py +136 -0
- package/bundled-preset-skills/skill-creator/scripts/quick_validate.py +103 -0
- package/bundled-preset-skills/skill-creator/scripts/run_eval.py +310 -0
- package/bundled-preset-skills/skill-creator/scripts/run_loop.py +332 -0
- package/bundled-preset-skills/skill-creator/scripts/utils.py +47 -0
- package/bundled-preset-skills/xlsx/.skill-metadata.yaml +185 -0
- package/bundled-preset-skills/xlsx/LICENSE.txt +30 -0
- package/bundled-preset-skills/xlsx/SKILL.md +233 -0
- package/bundled-preset-skills/xlsx/scripts/office/helpers/__init__.py +1 -0
- package/bundled-preset-skills/xlsx/scripts/office/helpers/merge_runs.py +226 -0
- package/bundled-preset-skills/xlsx/scripts/office/helpers/simplify_redlines.py +198 -0
- package/bundled-preset-skills/xlsx/scripts/office/pack.py +162 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/bundled-preset-skills/xlsx/scripts/office/soffice.py +185 -0
- package/bundled-preset-skills/xlsx/scripts/office/unpack.py +146 -0
- package/bundled-preset-skills/xlsx/scripts/office/validate.py +108 -0
- package/bundled-preset-skills/xlsx/scripts/office/validators/__init__.py +13 -0
- package/bundled-preset-skills/xlsx/scripts/office/validators/base.py +800 -0
- package/bundled-preset-skills/xlsx/scripts/office/validators/docx.py +383 -0
- package/bundled-preset-skills/xlsx/scripts/office/validators/pptx.py +250 -0
- package/bundled-preset-skills/xlsx/scripts/office/validators/redlining.py +229 -0
- package/bundled-preset-skills/xlsx/scripts/recalc.py +296 -0
- package/dist/panda-cli-ink.bundle.mjs +276 -342
- package/package.json +6 -4
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""Introspect fillable PDF form fields and serialize their metadata to JSON.
|
|
2
|
+
|
|
3
|
+
Supports text inputs, checkboxes, radio button groups, and dropdown choices.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
python extract_form_field_info.py <input.pdf> <output.json>
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import json
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Dict, List, Optional, Set
|
|
14
|
+
|
|
15
|
+
import pypdf
|
|
16
|
+
|
|
17
|
+
# ---------------------------------------------------------------------------
|
|
18
|
+
# Constants
|
|
19
|
+
# ---------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
EXIT_SUCCESS: int = 0
|
|
22
|
+
EXIT_FAILURE: int = 1
|
|
23
|
+
|
|
24
|
+
FIELD_TYPE_TEXT: str = "text"
|
|
25
|
+
FIELD_TYPE_CHECKBOX: str = "checkbox"
|
|
26
|
+
FIELD_TYPE_RADIO: str = "radio_group"
|
|
27
|
+
FIELD_TYPE_CHOICE: str = "choice"
|
|
28
|
+
|
|
29
|
+
PDF_FT_TEXT: str = "/Tx"
|
|
30
|
+
PDF_FT_BUTTON: str = "/Btn"
|
|
31
|
+
PDF_FT_CHOICE: str = "/Ch"
|
|
32
|
+
|
|
33
|
+
OFF_STATE: str = "/Off"
|
|
34
|
+
|
|
35
|
+
# ---------------------------------------------------------------------------
|
|
36
|
+
# Internal helpers
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _resolve_qualified_name(annot_obj: Any) -> Optional[str]:
|
|
41
|
+
"""Walk the /Parent chain to assemble a dot-separated field identifier."""
|
|
42
|
+
parts: List[str] = []
|
|
43
|
+
node = annot_obj
|
|
44
|
+
while node is not None:
|
|
45
|
+
name_component = node.get("/T")
|
|
46
|
+
if name_component:
|
|
47
|
+
parts.append(name_component)
|
|
48
|
+
node = node.get("/Parent")
|
|
49
|
+
return ".".join(reversed(parts)) if parts else None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _build_field_descriptor(raw_field: Any, identifier: str) -> Dict[str, Any]:
|
|
53
|
+
"""Construct a typed descriptor dict from a raw PDF field object."""
|
|
54
|
+
descriptor: Dict[str, Any] = {"field_id": identifier}
|
|
55
|
+
field_type_code = raw_field.get("/FT")
|
|
56
|
+
|
|
57
|
+
if field_type_code == PDF_FT_TEXT:
|
|
58
|
+
descriptor["type"] = FIELD_TYPE_TEXT
|
|
59
|
+
|
|
60
|
+
elif field_type_code == PDF_FT_BUTTON:
|
|
61
|
+
descriptor["type"] = FIELD_TYPE_CHECKBOX
|
|
62
|
+
available_states = raw_field.get("/_States_", [])
|
|
63
|
+
if len(available_states) == 2:
|
|
64
|
+
off_present = OFF_STATE in available_states
|
|
65
|
+
if off_present:
|
|
66
|
+
on_val = (
|
|
67
|
+
available_states[0]
|
|
68
|
+
if available_states[0] != OFF_STATE
|
|
69
|
+
else available_states[1]
|
|
70
|
+
)
|
|
71
|
+
descriptor["checked_value"] = on_val
|
|
72
|
+
descriptor["unchecked_value"] = OFF_STATE
|
|
73
|
+
else:
|
|
74
|
+
print(
|
|
75
|
+
"Unexpected state values for checkbox `$%s`. "
|
|
76
|
+
"Its checked and unchecked values may not be correct; "
|
|
77
|
+
"if you're trying to check it, visually verify the results."
|
|
78
|
+
% identifier
|
|
79
|
+
)
|
|
80
|
+
descriptor["checked_value"] = available_states[0]
|
|
81
|
+
descriptor["unchecked_value"] = available_states[1]
|
|
82
|
+
|
|
83
|
+
elif field_type_code == PDF_FT_CHOICE:
|
|
84
|
+
descriptor["type"] = FIELD_TYPE_CHOICE
|
|
85
|
+
available_states = raw_field.get("/_States_", [])
|
|
86
|
+
descriptor["choice_options"] = [
|
|
87
|
+
{"value": opt[0], "text": opt[1]} for opt in available_states
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
else:
|
|
91
|
+
descriptor["type"] = "unknown (%s)" % field_type_code
|
|
92
|
+
|
|
93
|
+
return descriptor
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _ordering_key(item: Dict[str, Any]) -> List[Any]:
|
|
97
|
+
"""Produce a sort key: page number, then top-to-bottom left-to-right."""
|
|
98
|
+
if "radio_options" in item:
|
|
99
|
+
rect = item["radio_options"][0]["rect"] or [0, 0, 0, 0]
|
|
100
|
+
else:
|
|
101
|
+
rect = item.get("rect") or [0, 0, 0, 0]
|
|
102
|
+
return [item.get("page"), [-rect[1], rect[0]]]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# ---------------------------------------------------------------------------
|
|
106
|
+
# Public API
|
|
107
|
+
# ---------------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def get_field_info(pdf_reader: pypdf.PdfReader) -> List[Dict[str, Any]]:
|
|
111
|
+
"""Extract structured field metadata from all form fields in the document.
|
|
112
|
+
|
|
113
|
+
Returns a sorted list of field descriptors with page/rect info.
|
|
114
|
+
"""
|
|
115
|
+
raw_fields = pdf_reader.get_fields()
|
|
116
|
+
if not raw_fields:
|
|
117
|
+
return []
|
|
118
|
+
|
|
119
|
+
descriptors_map: Dict[str, Dict[str, Any]] = {}
|
|
120
|
+
candidate_radio_ids: Set[str] = set()
|
|
121
|
+
|
|
122
|
+
for fid, fobj in raw_fields.items():
|
|
123
|
+
if fobj.get("/Kids"):
|
|
124
|
+
if fobj.get("/FT") == PDF_FT_BUTTON:
|
|
125
|
+
candidate_radio_ids.add(fid)
|
|
126
|
+
continue
|
|
127
|
+
descriptors_map[fid] = _build_field_descriptor(fobj, fid)
|
|
128
|
+
|
|
129
|
+
radio_groups: Dict[str, Dict[str, Any]] = {}
|
|
130
|
+
|
|
131
|
+
for pg_idx, pg in enumerate(pdf_reader.pages):
|
|
132
|
+
annot_list = pg.get("/Annots", [])
|
|
133
|
+
for annot in annot_list:
|
|
134
|
+
qualified = _resolve_qualified_name(annot)
|
|
135
|
+
if qualified in descriptors_map:
|
|
136
|
+
descriptors_map[qualified]["page"] = pg_idx + 1
|
|
137
|
+
descriptors_map[qualified]["rect"] = annot.get("/Rect")
|
|
138
|
+
elif qualified in candidate_radio_ids:
|
|
139
|
+
try:
|
|
140
|
+
active_vals = [
|
|
141
|
+
k for k in annot["/AP"]["/N"] if k != OFF_STATE
|
|
142
|
+
]
|
|
143
|
+
except KeyError:
|
|
144
|
+
continue
|
|
145
|
+
if len(active_vals) != 1:
|
|
146
|
+
continue
|
|
147
|
+
rect_val = annot.get("/Rect")
|
|
148
|
+
if qualified not in radio_groups:
|
|
149
|
+
radio_groups[qualified] = {
|
|
150
|
+
"field_id": qualified,
|
|
151
|
+
"type": FIELD_TYPE_RADIO,
|
|
152
|
+
"page": pg_idx + 1,
|
|
153
|
+
"radio_options": [],
|
|
154
|
+
}
|
|
155
|
+
radio_groups[qualified]["radio_options"].append({
|
|
156
|
+
"value": active_vals[0],
|
|
157
|
+
"rect": rect_val,
|
|
158
|
+
})
|
|
159
|
+
|
|
160
|
+
# Filter out fields without a determined page location
|
|
161
|
+
located = [d for d in descriptors_map.values() if "page" in d]
|
|
162
|
+
for orphan in descriptors_map.values():
|
|
163
|
+
if "page" not in orphan:
|
|
164
|
+
print(
|
|
165
|
+
"Unable to determine location for field id: %s, ignoring"
|
|
166
|
+
% orphan.get("field_id")
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
combined = located + list(radio_groups.values())
|
|
170
|
+
combined.sort(key=_ordering_key)
|
|
171
|
+
return combined
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def serialize_field_info(pdf_file: Path, output_json: Path) -> None:
|
|
175
|
+
"""Read the PDF and write field info as JSON."""
|
|
176
|
+
reader = pypdf.PdfReader(str(pdf_file))
|
|
177
|
+
info = get_field_info(reader)
|
|
178
|
+
with open(output_json, "w", encoding="utf-8") as fp:
|
|
179
|
+
json.dump(info, fp, indent=2)
|
|
180
|
+
print("Wrote %d fields to %s" % (len(info), output_json))
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
# ---------------------------------------------------------------------------
|
|
184
|
+
# CLI
|
|
185
|
+
# ---------------------------------------------------------------------------
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
189
|
+
"""Construct the CLI argument parser."""
|
|
190
|
+
parser = argparse.ArgumentParser(
|
|
191
|
+
description="Extract fillable form field metadata from a PDF to JSON."
|
|
192
|
+
)
|
|
193
|
+
parser.add_argument(
|
|
194
|
+
"input_pdf",
|
|
195
|
+
type=Path,
|
|
196
|
+
help="Path to the source PDF with form fields.",
|
|
197
|
+
)
|
|
198
|
+
parser.add_argument(
|
|
199
|
+
"output_json",
|
|
200
|
+
type=Path,
|
|
201
|
+
help="Destination path for the JSON output.",
|
|
202
|
+
)
|
|
203
|
+
return parser
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def main() -> None:
|
|
207
|
+
"""Entry point: parse arguments and run extraction."""
|
|
208
|
+
parser = build_parser()
|
|
209
|
+
args = parser.parse_args()
|
|
210
|
+
|
|
211
|
+
input_pdf: Path = args.input_pdf
|
|
212
|
+
if not input_pdf.exists():
|
|
213
|
+
print("ERROR: File not found: {}".format(input_pdf), file=sys.stderr)
|
|
214
|
+
sys.exit(EXIT_FAILURE)
|
|
215
|
+
|
|
216
|
+
serialize_field_info(input_pdf, args.output_json)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
if __name__ == "__main__":
|
|
220
|
+
main()
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""Analyze non-fillable PDF layout to discover text elements, ruling lines,
|
|
2
|
+
and checkbox-like rectangles.
|
|
3
|
+
|
|
4
|
+
Produces a JSON manifest for downstream coordinate-based form filling.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
python extract_form_structure.py <input.pdf> <output.json>
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import json
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Dict, List
|
|
15
|
+
|
|
16
|
+
import pdfplumber
|
|
17
|
+
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
# Constants
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
EXIT_SUCCESS: int = 0
|
|
23
|
+
EXIT_FAILURE: int = 1
|
|
24
|
+
|
|
25
|
+
# Constraints for identifying checkbox-shaped rectangles
|
|
26
|
+
CHECKBOX_MIN_SIZE: float = 5.0
|
|
27
|
+
CHECKBOX_MAX_SIZE: float = 15.0
|
|
28
|
+
CHECKBOX_ASPECT_TOLERANCE: float = 2.0
|
|
29
|
+
|
|
30
|
+
# Minimum fraction of page width for a line to be considered "spanning"
|
|
31
|
+
SPANNING_LINE_RATIO: float = 0.5
|
|
32
|
+
|
|
33
|
+
COORDINATE_PRECISION: int = 1
|
|
34
|
+
|
|
35
|
+
# ---------------------------------------------------------------------------
|
|
36
|
+
# Geometry helpers
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _is_checkbox_rect(rect_obj: Dict[str, Any]) -> bool:
|
|
41
|
+
"""Return True if the rectangle resembles a checkbox (small, roughly square)."""
|
|
42
|
+
w: float = float(rect_obj["x1"]) - float(rect_obj["x0"])
|
|
43
|
+
h: float = float(rect_obj["bottom"]) - float(rect_obj["top"])
|
|
44
|
+
size_ok = (
|
|
45
|
+
CHECKBOX_MIN_SIZE <= w <= CHECKBOX_MAX_SIZE
|
|
46
|
+
and CHECKBOX_MIN_SIZE <= h <= CHECKBOX_MAX_SIZE
|
|
47
|
+
)
|
|
48
|
+
square_ok = abs(w - h) < CHECKBOX_ASPECT_TOLERANCE
|
|
49
|
+
return size_ok and square_ok
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _is_spanning_line(line_obj: Dict[str, Any], page_width: float) -> bool:
|
|
53
|
+
"""Return True if the line covers more than half the page width."""
|
|
54
|
+
span: float = abs(float(line_obj["x1"]) - float(line_obj["x0"]))
|
|
55
|
+
return span > page_width * SPANNING_LINE_RATIO
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
# Core analysis
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def analyze_pdf_layout(pdf_path: Path) -> Dict[str, Any]:
|
|
64
|
+
"""Open a PDF and extract structural elements.
|
|
65
|
+
|
|
66
|
+
Extracts words, long horizontal lines, and small square rectangles
|
|
67
|
+
(checkboxes). Returns a dict of categorized data.
|
|
68
|
+
"""
|
|
69
|
+
result: Dict[str, Any] = {
|
|
70
|
+
"pages": [],
|
|
71
|
+
"labels": [],
|
|
72
|
+
"lines": [],
|
|
73
|
+
"checkboxes": [],
|
|
74
|
+
"row_boundaries": [],
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
with pdfplumber.open(str(pdf_path)) as doc:
|
|
78
|
+
for pg_num, pg in enumerate(doc.pages, start=1):
|
|
79
|
+
result["pages"].append({
|
|
80
|
+
"page_number": pg_num,
|
|
81
|
+
"width": float(pg.width),
|
|
82
|
+
"height": float(pg.height),
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
# Collect word-level text elements
|
|
86
|
+
for word in pg.extract_words():
|
|
87
|
+
result["labels"].append({
|
|
88
|
+
"page": pg_num,
|
|
89
|
+
"text": word["text"],
|
|
90
|
+
"x0": round(float(word["x0"]), COORDINATE_PRECISION),
|
|
91
|
+
"top": round(float(word["top"]), COORDINATE_PRECISION),
|
|
92
|
+
"x1": round(float(word["x1"]), COORDINATE_PRECISION),
|
|
93
|
+
"bottom": round(float(word["bottom"]), COORDINATE_PRECISION),
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
# Collect long horizontal rules
|
|
97
|
+
for ln in pg.lines:
|
|
98
|
+
if _is_spanning_line(ln, pg.width):
|
|
99
|
+
result["lines"].append({
|
|
100
|
+
"page": pg_num,
|
|
101
|
+
"y": round(float(ln["top"]), COORDINATE_PRECISION),
|
|
102
|
+
"x0": round(float(ln["x0"]), COORDINATE_PRECISION),
|
|
103
|
+
"x1": round(float(ln["x1"]), COORDINATE_PRECISION),
|
|
104
|
+
})
|
|
105
|
+
|
|
106
|
+
# Collect checkbox-like rectangles
|
|
107
|
+
for rect in pg.rects:
|
|
108
|
+
if _is_checkbox_rect(rect):
|
|
109
|
+
x0v: float = float(rect["x0"])
|
|
110
|
+
x1v: float = float(rect["x1"])
|
|
111
|
+
topv: float = float(rect["top"])
|
|
112
|
+
botv: float = float(rect["bottom"])
|
|
113
|
+
result["checkboxes"].append({
|
|
114
|
+
"page": pg_num,
|
|
115
|
+
"x0": round(x0v, COORDINATE_PRECISION),
|
|
116
|
+
"top": round(topv, COORDINATE_PRECISION),
|
|
117
|
+
"x1": round(x1v, COORDINATE_PRECISION),
|
|
118
|
+
"bottom": round(botv, COORDINATE_PRECISION),
|
|
119
|
+
"center_x": round((x0v + x1v) / 2, COORDINATE_PRECISION),
|
|
120
|
+
"center_y": round((topv + botv) / 2, COORDINATE_PRECISION),
|
|
121
|
+
})
|
|
122
|
+
|
|
123
|
+
# Derive row boundaries from horizontal lines
|
|
124
|
+
_compute_row_boundaries(result)
|
|
125
|
+
|
|
126
|
+
return result
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _compute_row_boundaries(result: Dict[str, Any]) -> None:
|
|
130
|
+
"""Derive row boundary intervals from collected horizontal lines."""
|
|
131
|
+
per_page_ys: Dict[int, List[float]] = {}
|
|
132
|
+
for ln in result["lines"]:
|
|
133
|
+
per_page_ys.setdefault(ln["page"], []).append(ln["y"])
|
|
134
|
+
|
|
135
|
+
for pg_key, ys in per_page_ys.items():
|
|
136
|
+
sorted_ys = sorted(set(ys))
|
|
137
|
+
for k in range(len(sorted_ys) - 1):
|
|
138
|
+
result["row_boundaries"].append({
|
|
139
|
+
"page": pg_key,
|
|
140
|
+
"row_top": sorted_ys[k],
|
|
141
|
+
"row_bottom": sorted_ys[k + 1],
|
|
142
|
+
"row_height": round(
|
|
143
|
+
sorted_ys[k + 1] - sorted_ys[k], COORDINATE_PRECISION
|
|
144
|
+
),
|
|
145
|
+
})
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# ---------------------------------------------------------------------------
|
|
149
|
+
# CLI
|
|
150
|
+
# ---------------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
154
|
+
"""Construct the CLI argument parser."""
|
|
155
|
+
parser = argparse.ArgumentParser(
|
|
156
|
+
description=(
|
|
157
|
+
"Analyze PDF layout structure: text labels, ruling lines, "
|
|
158
|
+
"and checkbox shapes."
|
|
159
|
+
)
|
|
160
|
+
)
|
|
161
|
+
parser.add_argument(
|
|
162
|
+
"input_pdf",
|
|
163
|
+
type=Path,
|
|
164
|
+
help="Path to the source PDF to analyze.",
|
|
165
|
+
)
|
|
166
|
+
parser.add_argument(
|
|
167
|
+
"output_json",
|
|
168
|
+
type=Path,
|
|
169
|
+
help="Destination path for the JSON structure output.",
|
|
170
|
+
)
|
|
171
|
+
return parser
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def main() -> None:
|
|
175
|
+
"""Entry point: parse arguments and run layout analysis."""
|
|
176
|
+
parser = build_parser()
|
|
177
|
+
args = parser.parse_args()
|
|
178
|
+
|
|
179
|
+
input_pdf: Path = args.input_pdf
|
|
180
|
+
output_json: Path = args.output_json
|
|
181
|
+
|
|
182
|
+
if not input_pdf.exists():
|
|
183
|
+
print("ERROR: File not found: {}".format(input_pdf), file=sys.stderr)
|
|
184
|
+
sys.exit(EXIT_FAILURE)
|
|
185
|
+
|
|
186
|
+
print("Extracting structure from %s..." % input_pdf)
|
|
187
|
+
data = analyze_pdf_layout(input_pdf)
|
|
188
|
+
|
|
189
|
+
with open(output_json, "w", encoding="utf-8") as out:
|
|
190
|
+
json.dump(data, out, indent=2)
|
|
191
|
+
|
|
192
|
+
print("Found:")
|
|
193
|
+
print(" - %d pages" % len(data["pages"]))
|
|
194
|
+
print(" - %d text labels" % len(data["labels"]))
|
|
195
|
+
print(" - %d horizontal lines" % len(data["lines"]))
|
|
196
|
+
print(" - %d checkboxes" % len(data["checkboxes"]))
|
|
197
|
+
print(" - %d row boundaries" % len(data["row_boundaries"]))
|
|
198
|
+
print("Saved to %s" % output_json)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
if __name__ == "__main__":
|
|
202
|
+
main()
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""Populate interactive PDF form fields with values specified in a JSON manifest.
|
|
2
|
+
|
|
3
|
+
Validates field IDs, page numbers, and value constraints before writing.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
python fill_fillable_fields.py <input.pdf> <field_values.json> <output.pdf>
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import json
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Dict, List, Optional
|
|
14
|
+
|
|
15
|
+
import pypdf
|
|
16
|
+
|
|
17
|
+
from extract_form_field_info import get_field_info
|
|
18
|
+
|
|
19
|
+
# ---------------------------------------------------------------------------
|
|
20
|
+
# Constants
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
EXIT_SUCCESS: int = 0
|
|
24
|
+
EXIT_FAILURE: int = 1
|
|
25
|
+
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
# Validation
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _check_value_constraint(descriptor: Dict[str, Any], val: str) -> Optional[str]:
|
|
32
|
+
"""Verify that *val* is acceptable for the given field descriptor.
|
|
33
|
+
|
|
34
|
+
Returns an error string or None if valid.
|
|
35
|
+
"""
|
|
36
|
+
ftype: str = descriptor["type"]
|
|
37
|
+
fid: str = descriptor["field_id"]
|
|
38
|
+
|
|
39
|
+
if ftype == "checkbox":
|
|
40
|
+
on_val: str = descriptor["checked_value"]
|
|
41
|
+
off_val: str = descriptor["unchecked_value"]
|
|
42
|
+
if val != on_val and val != off_val:
|
|
43
|
+
return (
|
|
44
|
+
'ERROR: Invalid value "%s" for checkbox field "%s". '
|
|
45
|
+
'The checked value is "%s" and the unchecked value is "%s"'
|
|
46
|
+
% (val, fid, on_val, off_val)
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
elif ftype == "radio_group":
|
|
50
|
+
allowed: List[str] = [o["value"] for o in descriptor["radio_options"]]
|
|
51
|
+
if val not in allowed:
|
|
52
|
+
return (
|
|
53
|
+
'ERROR: Invalid value "%s" for radio group field "%s". '
|
|
54
|
+
"Valid values are: %s" % (val, fid, allowed)
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
elif ftype == "choice":
|
|
58
|
+
allowed = [o["value"] for o in descriptor["choice_options"]]
|
|
59
|
+
if val not in allowed:
|
|
60
|
+
return (
|
|
61
|
+
'ERROR: Invalid value "%s" for choice field "%s". '
|
|
62
|
+
"Valid values are: %s" % (val, fid, allowed)
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
# pypdf compatibility patch
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _apply_pypdf_option_patch() -> None:
|
|
74
|
+
"""Monkey-patch pypdf to handle two-element option arrays correctly.
|
|
75
|
+
|
|
76
|
+
Some PDFs encode choices as [[export_value, display_text], ...].
|
|
77
|
+
"""
|
|
78
|
+
from pypdf.generic import DictionaryObject
|
|
79
|
+
from pypdf.constants import FieldDictionaryAttributes
|
|
80
|
+
|
|
81
|
+
_orig = DictionaryObject.get_inherited
|
|
82
|
+
|
|
83
|
+
def _patched(self: Any, key: str, default: Any = None) -> Any:
|
|
84
|
+
out = _orig(self, key, default)
|
|
85
|
+
if key == FieldDictionaryAttributes.Opt:
|
|
86
|
+
if isinstance(out, list) and all(
|
|
87
|
+
isinstance(v, list) and len(v) == 2 for v in out
|
|
88
|
+
):
|
|
89
|
+
out = [pair[0] for pair in out]
|
|
90
|
+
return out
|
|
91
|
+
|
|
92
|
+
DictionaryObject.get_inherited = _patched
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# ---------------------------------------------------------------------------
|
|
96
|
+
# Core logic
|
|
97
|
+
# ---------------------------------------------------------------------------
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def populate_fields(src_pdf: Path, values_json: Path, dest_pdf: Path) -> None:
|
|
101
|
+
"""Read field values from *values_json*, validate against the PDF's actual
|
|
102
|
+
fields, then write the filled output to *dest_pdf*.
|
|
103
|
+
"""
|
|
104
|
+
with open(values_json, "r", encoding="utf-8") as fh:
|
|
105
|
+
requested: List[Dict[str, Any]] = json.load(fh)
|
|
106
|
+
|
|
107
|
+
# Group values by page
|
|
108
|
+
page_map: Dict[int, Dict[str, str]] = {}
|
|
109
|
+
for item in requested:
|
|
110
|
+
if "value" not in item:
|
|
111
|
+
continue
|
|
112
|
+
page_map.setdefault(item["page"], {})[item["field_id"]] = item["value"]
|
|
113
|
+
|
|
114
|
+
reader = pypdf.PdfReader(str(src_pdf))
|
|
115
|
+
|
|
116
|
+
# Validate all entries
|
|
117
|
+
known_fields = get_field_info(reader)
|
|
118
|
+
lookup: Dict[str, Dict[str, Any]] = {f["field_id"]: f for f in known_fields}
|
|
119
|
+
error_found: bool = False
|
|
120
|
+
|
|
121
|
+
for item in requested:
|
|
122
|
+
fid: str = item["field_id"]
|
|
123
|
+
existing = lookup.get(fid)
|
|
124
|
+
if existing is None:
|
|
125
|
+
error_found = True
|
|
126
|
+
print("ERROR: `%s` is not a valid field ID" % fid)
|
|
127
|
+
elif item["page"] != existing["page"]:
|
|
128
|
+
error_found = True
|
|
129
|
+
print(
|
|
130
|
+
"ERROR: Incorrect page number for `%s` (got %s, expected %s)"
|
|
131
|
+
% (fid, item["page"], existing["page"])
|
|
132
|
+
)
|
|
133
|
+
elif "value" in item:
|
|
134
|
+
err = _check_value_constraint(existing, item["value"])
|
|
135
|
+
if err:
|
|
136
|
+
print(err)
|
|
137
|
+
error_found = True
|
|
138
|
+
|
|
139
|
+
if error_found:
|
|
140
|
+
sys.exit(EXIT_FAILURE)
|
|
141
|
+
|
|
142
|
+
# Write filled PDF
|
|
143
|
+
writer = pypdf.PdfWriter(clone_from=reader)
|
|
144
|
+
for pg_num, vals in page_map.items():
|
|
145
|
+
writer.update_page_form_field_values(
|
|
146
|
+
writer.pages[pg_num - 1], vals, auto_regenerate=False
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
writer.set_need_appearances_writer(True)
|
|
150
|
+
|
|
151
|
+
with open(dest_pdf, "wb") as out:
|
|
152
|
+
writer.write(out)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
# ---------------------------------------------------------------------------
|
|
156
|
+
# CLI
|
|
157
|
+
# ---------------------------------------------------------------------------
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
161
|
+
"""Construct the CLI argument parser."""
|
|
162
|
+
parser = argparse.ArgumentParser(
|
|
163
|
+
description="Fill interactive PDF form fields using a JSON value manifest."
|
|
164
|
+
)
|
|
165
|
+
parser.add_argument(
|
|
166
|
+
"input_pdf",
|
|
167
|
+
type=Path,
|
|
168
|
+
help="Path to the source PDF with form fields.",
|
|
169
|
+
)
|
|
170
|
+
parser.add_argument(
|
|
171
|
+
"field_values_json",
|
|
172
|
+
type=Path,
|
|
173
|
+
help="JSON file specifying field IDs and values.",
|
|
174
|
+
)
|
|
175
|
+
parser.add_argument(
|
|
176
|
+
"output_pdf",
|
|
177
|
+
type=Path,
|
|
178
|
+
help="Destination path for the filled PDF.",
|
|
179
|
+
)
|
|
180
|
+
return parser
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def main() -> None:
|
|
184
|
+
"""Entry point: parse arguments, apply patch, populate fields."""
|
|
185
|
+
parser = build_parser()
|
|
186
|
+
args = parser.parse_args()
|
|
187
|
+
|
|
188
|
+
input_pdf: Path = args.input_pdf
|
|
189
|
+
values_json: Path = args.field_values_json
|
|
190
|
+
output_pdf: Path = args.output_pdf
|
|
191
|
+
|
|
192
|
+
if not input_pdf.exists():
|
|
193
|
+
print("ERROR: File not found: {}".format(input_pdf), file=sys.stderr)
|
|
194
|
+
sys.exit(EXIT_FAILURE)
|
|
195
|
+
|
|
196
|
+
if not values_json.exists():
|
|
197
|
+
print("ERROR: File not found: {}".format(values_json), file=sys.stderr)
|
|
198
|
+
sys.exit(EXIT_FAILURE)
|
|
199
|
+
|
|
200
|
+
_apply_pypdf_option_patch()
|
|
201
|
+
populate_fields(input_pdf, values_json, output_pdf)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
if __name__ == "__main__":
|
|
205
|
+
main()
|