@panda-agent/panda-cli 0.1.29 → 0.1.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/pandacli.mjs +6 -1
- package/bundled-preset-skills/.gitkeep +0 -0
- package/bundled-preset-skills/README.md +17 -0
- package/bundled-preset-skills/docx/.skill-metadata.yaml +173 -0
- package/bundled-preset-skills/docx/LICENSE.txt +30 -0
- package/bundled-preset-skills/docx/SKILL.md +589 -0
- package/bundled-preset-skills/docx/scripts/__init__.py +1 -0
- package/bundled-preset-skills/docx/scripts/accept_changes.py +206 -0
- package/bundled-preset-skills/docx/scripts/comment.py +442 -0
- package/bundled-preset-skills/docx/scripts/office/helpers/__init__.py +1 -0
- package/bundled-preset-skills/docx/scripts/office/helpers/merge_runs.py +190 -0
- package/bundled-preset-skills/docx/scripts/office/helpers/simplify_redlines.py +185 -0
- package/bundled-preset-skills/docx/scripts/office/pack.py +167 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/bundled-preset-skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/bundled-preset-skills/docx/scripts/office/soffice.py +194 -0
- package/bundled-preset-skills/docx/scripts/office/unpack.py +145 -0
- package/bundled-preset-skills/docx/scripts/office/validate.py +114 -0
- package/bundled-preset-skills/docx/scripts/office/validators/__init__.py +16 -0
- package/bundled-preset-skills/docx/scripts/office/validators/base.py +733 -0
- package/bundled-preset-skills/docx/scripts/office/validators/docx.py +354 -0
- package/bundled-preset-skills/docx/scripts/office/validators/pptx.py +230 -0
- package/bundled-preset-skills/docx/scripts/office/validators/redlining.py +212 -0
- package/bundled-preset-skills/docx/scripts/templates/comments.xml +3 -0
- package/bundled-preset-skills/docx/scripts/templates/commentsExtended.xml +3 -0
- package/bundled-preset-skills/docx/scripts/templates/commentsExtensible.xml +3 -0
- package/bundled-preset-skills/docx/scripts/templates/commentsIds.xml +3 -0
- package/bundled-preset-skills/docx/scripts/templates/people.xml +3 -0
- package/bundled-preset-skills/frontend-design/LICENSE.txt +177 -0
- package/bundled-preset-skills/frontend-design/SKILL.md +42 -0
- package/bundled-preset-skills/pdf/.skill-metadata.yaml +273 -0
- package/bundled-preset-skills/pdf/LICENSE.txt +30 -0
- package/bundled-preset-skills/pdf/SKILL.md +324 -0
- package/bundled-preset-skills/pdf/advanced-reference.md +609 -0
- package/bundled-preset-skills/pdf/form-filling-guide.md +318 -0
- package/bundled-preset-skills/pdf/forms.md +294 -0
- package/bundled-preset-skills/pdf/reference.md +612 -0
- package/bundled-preset-skills/pdf/scripts/check_bounding_boxes.py +198 -0
- package/bundled-preset-skills/pdf/scripts/check_fillable_fields.py +64 -0
- package/bundled-preset-skills/pdf/scripts/convert_pdf_to_images.py +102 -0
- package/bundled-preset-skills/pdf/scripts/create_validation_image.py +125 -0
- package/bundled-preset-skills/pdf/scripts/extract_form_field_info.py +220 -0
- package/bundled-preset-skills/pdf/scripts/extract_form_structure.py +202 -0
- package/bundled-preset-skills/pdf/scripts/fill_fillable_fields.py +205 -0
- package/bundled-preset-skills/pdf/scripts/fill_pdf_form_with_annotations.py +193 -0
- package/bundled-preset-skills/pptx-generator/SKILL.md +204 -0
- package/bundled-preset-skills/pptx-generator/assets/styles/business.json +8 -0
- package/bundled-preset-skills/pptx-generator/assets/styles/minimal.json +8 -0
- package/bundled-preset-skills/pptx-generator/assets/styles/modern.json +8 -0
- package/bundled-preset-skills/pptx-generator/assets/templates/ppt_data_template.json +40 -0
- package/bundled-preset-skills/pptx-generator/references/collaboration_guide.md +381 -0
- package/bundled-preset-skills/pptx-generator/references/json_format_spec.md +215 -0
- package/bundled-preset-skills/pptx-generator/references/layout_guide.md +290 -0
- package/bundled-preset-skills/pptx-generator/scripts/json_validator.py +194 -0
- package/bundled-preset-skills/pptx-generator/scripts/pptx_builder.py +340 -0
- package/bundled-preset-skills/pptx-generator/scripts/pptx_validator.py +162 -0
- package/bundled-preset-skills/skill-creator/LICENSE.txt +202 -0
- package/bundled-preset-skills/skill-creator/SKILL.md +479 -0
- package/bundled-preset-skills/skill-creator/agents/analyzer.md +274 -0
- package/bundled-preset-skills/skill-creator/agents/comparator.md +202 -0
- package/bundled-preset-skills/skill-creator/agents/grader.md +223 -0
- package/bundled-preset-skills/skill-creator/assets/eval_review.html +146 -0
- package/bundled-preset-skills/skill-creator/eval-viewer/generate_review.py +471 -0
- package/bundled-preset-skills/skill-creator/eval-viewer/viewer.html +1325 -0
- package/bundled-preset-skills/skill-creator/references/schemas.md +430 -0
- package/bundled-preset-skills/skill-creator/scripts/__init__.py +0 -0
- package/bundled-preset-skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/bundled-preset-skills/skill-creator/scripts/generate_report.py +326 -0
- package/bundled-preset-skills/skill-creator/scripts/improve_description.py +248 -0
- package/bundled-preset-skills/skill-creator/scripts/package_skill.py +136 -0
- package/bundled-preset-skills/skill-creator/scripts/quick_validate.py +103 -0
- package/bundled-preset-skills/skill-creator/scripts/run_eval.py +310 -0
- package/bundled-preset-skills/skill-creator/scripts/run_loop.py +332 -0
- package/bundled-preset-skills/skill-creator/scripts/utils.py +47 -0
- package/bundled-preset-skills/xlsx/.skill-metadata.yaml +185 -0
- package/bundled-preset-skills/xlsx/LICENSE.txt +30 -0
- package/bundled-preset-skills/xlsx/SKILL.md +233 -0
- package/bundled-preset-skills/xlsx/scripts/office/helpers/__init__.py +1 -0
- package/bundled-preset-skills/xlsx/scripts/office/helpers/merge_runs.py +226 -0
- package/bundled-preset-skills/xlsx/scripts/office/helpers/simplify_redlines.py +198 -0
- package/bundled-preset-skills/xlsx/scripts/office/pack.py +162 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/bundled-preset-skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/bundled-preset-skills/xlsx/scripts/office/soffice.py +185 -0
- package/bundled-preset-skills/xlsx/scripts/office/unpack.py +146 -0
- package/bundled-preset-skills/xlsx/scripts/office/validate.py +108 -0
- package/bundled-preset-skills/xlsx/scripts/office/validators/__init__.py +13 -0
- package/bundled-preset-skills/xlsx/scripts/office/validators/base.py +800 -0
- package/bundled-preset-skills/xlsx/scripts/office/validators/docx.py +383 -0
- package/bundled-preset-skills/xlsx/scripts/office/validators/pptx.py +250 -0
- package/bundled-preset-skills/xlsx/scripts/office/validators/redlining.py +229 -0
- package/bundled-preset-skills/xlsx/scripts/recalc.py +296 -0
- package/dist/panda-cli-ink.bundle.mjs +276 -342
- package/package.json +6 -4
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
examples:
|
|
2
|
+
- id: extract-text
|
|
3
|
+
title:
|
|
4
|
+
zh: 提取文本内容
|
|
5
|
+
en: Extract Text Content
|
|
6
|
+
description:
|
|
7
|
+
zh: 从 PDF 文件中准确提取文本和结构化信息
|
|
8
|
+
en: Accurately extract text and structured information from PDF files
|
|
9
|
+
prompt:
|
|
10
|
+
zh: |-
|
|
11
|
+
请帮我从这个 PDF 文件中提取内容:
|
|
12
|
+
|
|
13
|
+
文件路径:{{PDF文件路径}}
|
|
14
|
+
|
|
15
|
+
提取要求:
|
|
16
|
+
1. 完整文本提取:
|
|
17
|
+
- 保留原文本顺序和段落结构
|
|
18
|
+
- 处理多栏布局的文本流
|
|
19
|
+
- 识别和提取标题层级
|
|
20
|
+
2. 结构化信息:
|
|
21
|
+
- 目录/索引信息
|
|
22
|
+
- 页眉页脚内容
|
|
23
|
+
- 页码和章节信息
|
|
24
|
+
3. 特殊内容:
|
|
25
|
+
- 表格数据的结构化提取
|
|
26
|
+
- 图片的描述性文字
|
|
27
|
+
- 链接和引用信息
|
|
28
|
+
4. 输出格式:
|
|
29
|
+
- 纯文本版本
|
|
30
|
+
- 结构化的 JSON 格式
|
|
31
|
+
- Markdown 格式(保持层次结构)
|
|
32
|
+
|
|
33
|
+
请提供多种格式的提取结果。
|
|
34
|
+
en: |-
|
|
35
|
+
Please help me extract content from this PDF file:
|
|
36
|
+
|
|
37
|
+
File path: {{PDF file path}}
|
|
38
|
+
|
|
39
|
+
Extraction requirements:
|
|
40
|
+
1. Complete text extraction:
|
|
41
|
+
- Preserve original text order and paragraph structure
|
|
42
|
+
- Handle text flow in multi-column layouts
|
|
43
|
+
- Identify and extract heading hierarchies
|
|
44
|
+
2. Structured information:
|
|
45
|
+
- Table of contents/index information
|
|
46
|
+
- Header and footer content
|
|
47
|
+
- Page numbers and chapter information
|
|
48
|
+
3. Special content:
|
|
49
|
+
- Structured extraction of table data
|
|
50
|
+
- Descriptive text for images
|
|
51
|
+
- Link and citation information
|
|
52
|
+
4. Output formats:
|
|
53
|
+
- Plain text version
|
|
54
|
+
- Structured JSON format
|
|
55
|
+
- Markdown format (preserving hierarchy)
|
|
56
|
+
|
|
57
|
+
Please provide extraction results in multiple formats.
|
|
58
|
+
- id: merge-split
|
|
59
|
+
title:
|
|
60
|
+
zh: 合并分割PDF
|
|
61
|
+
en: Merge and Split PDF
|
|
62
|
+
description:
|
|
63
|
+
zh: 合并多个PDF文件或将PDF分割成多个部分
|
|
64
|
+
en: Merge multiple PDF files or split PDF into multiple parts
|
|
65
|
+
prompt:
|
|
66
|
+
zh: |-
|
|
67
|
+
我需要处理这些 PDF 文件:
|
|
68
|
+
|
|
69
|
+
操作类型:{{合并/分割}}
|
|
70
|
+
|
|
71
|
+
如果是合并:
|
|
72
|
+
文件列表:
|
|
73
|
+
1. {{第一个PDF文件路径}}
|
|
74
|
+
2. {{第二个PDF文件路径}}
|
|
75
|
+
3. {{第三个PDF文件路径}}
|
|
76
|
+
|
|
77
|
+
合并要求:
|
|
78
|
+
- 按指定顺序合并
|
|
79
|
+
- 保持原有页面质量和格式
|
|
80
|
+
- 添加书签结构(可选)
|
|
81
|
+
- 生成目录页(可选)
|
|
82
|
+
|
|
83
|
+
如果是分割:
|
|
84
|
+
源文件:{{源PDF文件路径}}
|
|
85
|
+
分割方式:
|
|
86
|
+
- 按页码范围分割:{{起始页}}-{{结束页}}
|
|
87
|
+
- 按章节自动分割
|
|
88
|
+
- 按书签结构分割
|
|
89
|
+
- 每N页分割一次
|
|
90
|
+
|
|
91
|
+
输出要求:
|
|
92
|
+
- 保持原始质量
|
|
93
|
+
- 文件命名规范
|
|
94
|
+
- 添加页码信息
|
|
95
|
+
|
|
96
|
+
请执行相应操作并提供处理后的文件。
|
|
97
|
+
en: |-
|
|
98
|
+
I need to process these PDF files:
|
|
99
|
+
|
|
100
|
+
Operation type: {{Merge/Split}}
|
|
101
|
+
|
|
102
|
+
If merging:
|
|
103
|
+
File list:
|
|
104
|
+
1. {{First PDF file path}}
|
|
105
|
+
2. {{Second PDF file path}}
|
|
106
|
+
3. {{Third PDF file path}}
|
|
107
|
+
|
|
108
|
+
Merge requirements:
|
|
109
|
+
- Merge in specified order
|
|
110
|
+
- Maintain original page quality and formatting
|
|
111
|
+
- Add bookmark structure (optional)
|
|
112
|
+
- Generate table of contents page (optional)
|
|
113
|
+
|
|
114
|
+
If splitting:
|
|
115
|
+
Source file: {{Source PDF file path}}
|
|
116
|
+
Split method:
|
|
117
|
+
- Split by page range: {{Start page}}-{{End page}}
|
|
118
|
+
- Split by chapters automatically
|
|
119
|
+
- Split by bookmark structure
|
|
120
|
+
- Split every N pages
|
|
121
|
+
|
|
122
|
+
Output requirements:
|
|
123
|
+
- Maintain original quality
|
|
124
|
+
- Standardized file naming
|
|
125
|
+
- Add page number information
|
|
126
|
+
|
|
127
|
+
Please execute the corresponding operation and provide the processed files.
|
|
128
|
+
- id: form-processing
|
|
129
|
+
title:
|
|
130
|
+
zh: 表单处理
|
|
131
|
+
en: Form Processing
|
|
132
|
+
description:
|
|
133
|
+
zh: 处理PDF表单:填写、提取数据、验证等
|
|
134
|
+
en: "Process PDF forms: filling, data extraction, validation, etc."
|
|
135
|
+
prompt:
|
|
136
|
+
zh: |-
|
|
137
|
+
请帮我处理这个 PDF 表单文件:
|
|
138
|
+
|
|
139
|
+
表单文件:{{PDF表单文件路径}}
|
|
140
|
+
|
|
141
|
+
操作类型:{{填写表单/提取数据/验证表单}}
|
|
142
|
+
|
|
143
|
+
如果是填写表单:
|
|
144
|
+
数据来源:{{JSON数据或键值对}}
|
|
145
|
+
字段映射:
|
|
146
|
+
- 姓名 -> {{name_field}}
|
|
147
|
+
- 日期 -> {{date_field}}
|
|
148
|
+
- 金额 -> {{amount_field}}
|
|
149
|
+
- 选择项 -> {{choice_field}}
|
|
150
|
+
|
|
151
|
+
填写要求:
|
|
152
|
+
- 准确填入所有必填字段
|
|
153
|
+
- 保持原有表单格式
|
|
154
|
+
- 添加数字签名(如果需要)
|
|
155
|
+
- 验证数据有效性
|
|
156
|
+
|
|
157
|
+
如果是提取数据:
|
|
158
|
+
- 提取所有已填写的字段值
|
|
159
|
+
- 识别表单结构和字段类型
|
|
160
|
+
- 验证数据完整性
|
|
161
|
+
- 输出结构化数据
|
|
162
|
+
|
|
163
|
+
如果是验证表单:
|
|
164
|
+
- 检查必填字段是否完整
|
|
165
|
+
- 验证数据格式是否正确
|
|
166
|
+
- 检查逻辑关系
|
|
167
|
+
- 生成验证报告
|
|
168
|
+
|
|
169
|
+
请根据指定操作类型执行相应处理。
|
|
170
|
+
en: |-
|
|
171
|
+
Please help me process this PDF form file:
|
|
172
|
+
|
|
173
|
+
Form file: {{PDF form file path}}
|
|
174
|
+
|
|
175
|
+
Operation type: {{Fill form/Extract data/Validate form}}
|
|
176
|
+
|
|
177
|
+
If filling form:
|
|
178
|
+
Data source: {{JSON data or key-value pairs}}
|
|
179
|
+
Field mapping:
|
|
180
|
+
- Name -> {{name_field}}
|
|
181
|
+
- Date -> {{date_field}}
|
|
182
|
+
- Amount -> {{amount_field}}
|
|
183
|
+
- Choice -> {{choice_field}}
|
|
184
|
+
|
|
185
|
+
Filling requirements:
|
|
186
|
+
- Accurately fill all required fields
|
|
187
|
+
- Maintain original form formatting
|
|
188
|
+
- Add digital signature (if needed)
|
|
189
|
+
- Validate data validity
|
|
190
|
+
|
|
191
|
+
If extracting data:
|
|
192
|
+
- Extract all filled field values
|
|
193
|
+
- Identify form structure and field types
|
|
194
|
+
- Validate data completeness
|
|
195
|
+
- Output structured data
|
|
196
|
+
|
|
197
|
+
If validating form:
|
|
198
|
+
- Check if required fields are complete
|
|
199
|
+
- Validate data format correctness
|
|
200
|
+
- Check logical relationships
|
|
201
|
+
- Generate validation report
|
|
202
|
+
|
|
203
|
+
Please execute corresponding processing based on specified operation type.
|
|
204
|
+
- id: watermark-security
|
|
205
|
+
title:
|
|
206
|
+
zh: 水印和安全处理
|
|
207
|
+
en: Watermark and Security Processing
|
|
208
|
+
description:
|
|
209
|
+
zh: 添加水印、密码保护和权限控制
|
|
210
|
+
en: Add watermarks, password protection and permission control
|
|
211
|
+
prompt:
|
|
212
|
+
zh: |-
|
|
213
|
+
请帮我为这个 PDF 文件添加安全保护:
|
|
214
|
+
|
|
215
|
+
源文件:{{源PDF文件路径}}
|
|
216
|
+
|
|
217
|
+
安全措施:
|
|
218
|
+
1. 水印添加:
|
|
219
|
+
- 文本水印:{{公司名称}} - {{机密等级}}
|
|
220
|
+
- 图片水印:{{logo文件路径}}
|
|
221
|
+
- 水印位置:页面中心/对角线/平铺
|
|
222
|
+
- 透明度设置:{{透明度百分比}}
|
|
223
|
+
2. 密码保护:
|
|
224
|
+
- 打开密码:{{open_password}}
|
|
225
|
+
- 编辑密码:{{edit_password}}
|
|
226
|
+
- 密码强度要求
|
|
227
|
+
3. 权限控制:
|
|
228
|
+
- 禁止打印
|
|
229
|
+
- 禁止复制文本
|
|
230
|
+
- 禁止编辑内容
|
|
231
|
+
- 限制页面提取
|
|
232
|
+
4. 其他安全选项:
|
|
233
|
+
- 数字签名
|
|
234
|
+
- 文档加密算法
|
|
235
|
+
- 访问日志记录
|
|
236
|
+
|
|
237
|
+
输出要求:
|
|
238
|
+
- 保持文档原始质量
|
|
239
|
+
- 确保安全措施有效
|
|
240
|
+
- 提供安全设置说明
|
|
241
|
+
|
|
242
|
+
请实施相应的安全保护措施。
|
|
243
|
+
en: |-
|
|
244
|
+
Please help me add security protection to this PDF file:
|
|
245
|
+
|
|
246
|
+
Source file: {{Source PDF file path}}
|
|
247
|
+
|
|
248
|
+
Security measures:
|
|
249
|
+
1. Watermark addition:
|
|
250
|
+
- Text watermark: {{Company Name}} - {{Confidentiality Level}}
|
|
251
|
+
- Image watermark: {{logo file path}}
|
|
252
|
+
- Watermark position: Center/Diagonal/Tile
|
|
253
|
+
- Transparency setting: {{transparency percentage}}
|
|
254
|
+
2. Password protection:
|
|
255
|
+
- Open password: {{open_password}}
|
|
256
|
+
- Edit password: {{edit_password}}
|
|
257
|
+
- Password strength requirements
|
|
258
|
+
3. Permission control:
|
|
259
|
+
- Disable printing
|
|
260
|
+
- Disable text copying
|
|
261
|
+
- Disable content editing
|
|
262
|
+
- Restrict page extraction
|
|
263
|
+
4. Other security options:
|
|
264
|
+
- Digital signature
|
|
265
|
+
- Document encryption algorithm
|
|
266
|
+
- Access log recording
|
|
267
|
+
|
|
268
|
+
Output requirements:
|
|
269
|
+
- Maintain original document quality
|
|
270
|
+
- Ensure security measures are effective
|
|
271
|
+
- Provide security settings documentation
|
|
272
|
+
|
|
273
|
+
Please implement corresponding security protection measures.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
© 2025 Anthropic, PBC. All rights reserved.
|
|
2
|
+
|
|
3
|
+
LICENSE: Use of these materials (including all code, prompts, assets, files,
|
|
4
|
+
and other components of this Skill) is governed by your agreement with
|
|
5
|
+
Anthropic regarding use of Anthropic's services. If no separate agreement
|
|
6
|
+
exists, use is governed by Anthropic's Consumer Terms of Service or
|
|
7
|
+
Commercial Terms of Service, as applicable:
|
|
8
|
+
https://www.anthropic.com/legal/consumer-terms
|
|
9
|
+
https://www.anthropic.com/legal/commercial-terms
|
|
10
|
+
Your applicable agreement is referred to as the "Agreement." "Services" are
|
|
11
|
+
as defined in the Agreement.
|
|
12
|
+
|
|
13
|
+
ADDITIONAL RESTRICTIONS: Notwithstanding anything in the Agreement to the
|
|
14
|
+
contrary, users may not:
|
|
15
|
+
|
|
16
|
+
- Extract these materials from the Services or retain copies of these
|
|
17
|
+
materials outside the Services
|
|
18
|
+
- Reproduce or copy these materials, except for temporary copies created
|
|
19
|
+
automatically during authorized use of the Services
|
|
20
|
+
- Create derivative works based on these materials
|
|
21
|
+
- Distribute, sublicense, or transfer these materials to any third party
|
|
22
|
+
- Make, offer to sell, sell, or import any inventions embodied in these
|
|
23
|
+
materials
|
|
24
|
+
- Reverse engineer, decompile, or disassemble these materials
|
|
25
|
+
|
|
26
|
+
The receipt, viewing, or possession of these materials does not convey or
|
|
27
|
+
imply any license or right beyond those expressly granted above.
|
|
28
|
+
|
|
29
|
+
Anthropic retains all right, title, and interest in these materials,
|
|
30
|
+
including all copyrights, patents, and other intellectual property rights.
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: pdf
|
|
3
|
+
version: 1.0.1
|
|
4
|
+
description: Use this skill whenever the user wants to do anything with PDF files. This includes reading or extracting text/tables from PDFs, combining or merging multiple PDFs into one, splitting PDFs apart, rotating pages, adding watermarks, creating new PDFs, filling PDF forms, encrypting/decrypting PDFs, extracting images, and OCR on scanned PDFs to make them searchable. If the user mentions a .pdf file or asks to produce one, use this skill.
|
|
5
|
+
description_zh: 当用户需要对 PDF 文件执行任何操作时使用此技能。包括:读取或提取 PDF 中的文本/表格、合并多个 PDF、拆分 PDF、旋转页面、添加水印、创建新 PDF、填写 PDF 表单、加密/解密 PDF、提取图片,以及对扫描版 PDF 进行 OCR 使其可搜索。只要用户提及 .pdf 文件或要求生成 PDF,就使用此技能。
|
|
6
|
+
license: Proprietary. LICENSE.txt has complete terms
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Working with PDF Documents
|
|
10
|
+
|
|
11
|
+
## Introduction
|
|
12
|
+
|
|
13
|
+
A comprehensive toolkit for PDF manipulation through Python and shell utilities. Consult `advanced-reference.md` for extended capabilities (pypdfium2, JavaScript ecosystems, performance guidance). When form-filling is required, follow the workflow described in `form-filling-guide.md`.
|
|
14
|
+
|
|
15
|
+
## Getting Started
|
|
16
|
+
|
|
17
|
+
```python
|
|
18
|
+
import pypdf
|
|
19
|
+
|
|
20
|
+
# Open and inspect a document
|
|
21
|
+
doc = pypdf.PdfReader("document.pdf")
|
|
22
|
+
total_pages = len(doc.pages)
|
|
23
|
+
print("Pages: {}".format(total_pages))
|
|
24
|
+
|
|
25
|
+
# Gather all textual content
|
|
26
|
+
content = "".join(pg.extract_text() for pg in doc.pages)
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Core Python Libraries
|
|
30
|
+
|
|
31
|
+
### pypdf — Structural Manipulation
|
|
32
|
+
|
|
33
|
+
#### Combining Multiple Documents
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
import pypdf
|
|
37
|
+
|
|
38
|
+
output = pypdf.PdfWriter()
|
|
39
|
+
sources = ["doc1.pdf", "doc2.pdf", "doc3.pdf"]
|
|
40
|
+
for src in sources:
|
|
41
|
+
rdr = pypdf.PdfReader(src)
|
|
42
|
+
for pg in rdr.pages:
|
|
43
|
+
output.add_page(pg)
|
|
44
|
+
|
|
45
|
+
with open("merged.pdf", "wb") as dest:
|
|
46
|
+
output.write(dest)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
#### Separating Pages Into Individual Files
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
import pypdf
|
|
53
|
+
|
|
54
|
+
source = pypdf.PdfReader("input.pdf")
|
|
55
|
+
for idx, pg in enumerate(source.pages):
|
|
56
|
+
single = pypdf.PdfWriter()
|
|
57
|
+
single.add_page(pg)
|
|
58
|
+
with open("page_{}.pdf".format(idx + 1), "wb") as dest:
|
|
59
|
+
single.write(dest)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
#### Reading Document Properties
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
import pypdf
|
|
66
|
+
|
|
67
|
+
source = pypdf.PdfReader("document.pdf")
|
|
68
|
+
props = source.metadata
|
|
69
|
+
print("Title: {}".format(props.title))
|
|
70
|
+
print("Author: {}".format(props.author))
|
|
71
|
+
print("Subject: {}".format(props.subject))
|
|
72
|
+
print("Creator: {}".format(props.creator))
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
#### Changing Page Orientation
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
import pypdf
|
|
79
|
+
|
|
80
|
+
source = pypdf.PdfReader("input.pdf")
|
|
81
|
+
output = pypdf.PdfWriter()
|
|
82
|
+
|
|
83
|
+
target = source.pages[0]
|
|
84
|
+
target.rotate(90) # 90-degree clockwise rotation
|
|
85
|
+
output.add_page(target)
|
|
86
|
+
|
|
87
|
+
with open("rotated.pdf", "wb") as dest:
|
|
88
|
+
output.write(dest)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### pdfplumber — Content Extraction
|
|
92
|
+
|
|
93
|
+
#### Retrieving Text Preserving Layout
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
import pdfplumber
|
|
97
|
+
|
|
98
|
+
with pdfplumber.open("document.pdf") as doc:
|
|
99
|
+
for pg in doc.pages:
|
|
100
|
+
content = pg.extract_text()
|
|
101
|
+
print(content)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
#### Pulling Tabular Data
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
import pdfplumber
|
|
108
|
+
|
|
109
|
+
with pdfplumber.open("document.pdf") as doc:
|
|
110
|
+
for pg_idx, pg in enumerate(doc.pages):
|
|
111
|
+
found_tables = pg.extract_tables()
|
|
112
|
+
for tbl_idx, tbl in enumerate(found_tables):
|
|
113
|
+
print("Table {} on page {}:".format(tbl_idx + 1, pg_idx + 1))
|
|
114
|
+
for row in tbl:
|
|
115
|
+
print(row)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
#### Structured Table Export
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
import pdfplumber
|
|
122
|
+
import pandas as pd
|
|
123
|
+
|
|
124
|
+
with pdfplumber.open("document.pdf") as doc:
|
|
125
|
+
collected = []
|
|
126
|
+
for pg in doc.pages:
|
|
127
|
+
for tbl in pg.extract_tables():
|
|
128
|
+
if tbl:
|
|
129
|
+
frame = pd.DataFrame(tbl[1:], columns=tbl[0])
|
|
130
|
+
collected.append(frame)
|
|
131
|
+
|
|
132
|
+
if collected:
|
|
133
|
+
merged = pd.concat(collected, ignore_index=True)
|
|
134
|
+
merged.to_excel("extracted_tables.xlsx", index=False)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### reportlab — Document Generation
|
|
138
|
+
|
|
139
|
+
#### Producing a Simple PDF
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
from reportlab.lib.pagesizes import letter
|
|
143
|
+
from reportlab.pdfgen import canvas
|
|
144
|
+
|
|
145
|
+
doc = canvas.Canvas("hello.pdf", pagesize=letter)
|
|
146
|
+
w, h = letter
|
|
147
|
+
|
|
148
|
+
doc.drawString(100, h - 100, "Hello World!")
|
|
149
|
+
doc.drawString(100, h - 120, "This is a PDF created with reportlab")
|
|
150
|
+
|
|
151
|
+
doc.line(100, h - 140, 400, h - 140)
|
|
152
|
+
|
|
153
|
+
doc.save()
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
#### Multi-Page Document Construction
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
from reportlab.lib.pagesizes import letter
|
|
160
|
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
|
|
161
|
+
from reportlab.lib.styles import getSampleStyleSheet
|
|
162
|
+
|
|
163
|
+
template = SimpleDocTemplate("report.pdf", pagesize=letter)
|
|
164
|
+
styles = getSampleStyleSheet()
|
|
165
|
+
elements = []
|
|
166
|
+
|
|
167
|
+
elements.append(Paragraph("Report Title", styles['Title']))
|
|
168
|
+
elements.append(Spacer(1, 12))
|
|
169
|
+
elements.append(Paragraph("This is the body of the report. " * 20, styles['Normal']))
|
|
170
|
+
elements.append(PageBreak())
|
|
171
|
+
|
|
172
|
+
elements.append(Paragraph("Page 2", styles['Heading1']))
|
|
173
|
+
elements.append(Paragraph("Content for page 2", styles['Normal']))
|
|
174
|
+
|
|
175
|
+
template.build(elements)
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
#### Handling Sub/Superscripts
|
|
179
|
+
|
|
180
|
+
**IMPORTANT**: Never use Unicode subscript/superscript characters (₀₁₂₃₄₅₆₇₈₉, ⁰¹²³⁴⁵⁶⁷⁸⁹) in ReportLab PDFs. The built-in fonts do not include these glyphs, causing them to render as solid black boxes.
|
|
181
|
+
|
|
182
|
+
Instead, use ReportLab's XML markup tags in Paragraph objects:
|
|
183
|
+
```python
|
|
184
|
+
from reportlab.platypus import Paragraph
|
|
185
|
+
from reportlab.lib.styles import getSampleStyleSheet
|
|
186
|
+
|
|
187
|
+
styles = getSampleStyleSheet()
|
|
188
|
+
|
|
189
|
+
# Subscripts: use <sub> tag
|
|
190
|
+
chemical = Paragraph("H<sub>2</sub>O", styles['Normal'])
|
|
191
|
+
|
|
192
|
+
# Superscripts: use <super> tag
|
|
193
|
+
squared = Paragraph("x<super>2</super> + y<super>2</super>", styles['Normal'])
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
For canvas-drawn text (not Paragraph objects), manually adjust font the size and position rather than using Unicode subscripts/superscripts.
|
|
197
|
+
|
|
198
|
+
## Shell Utilities
|
|
199
|
+
|
|
200
|
+
### poppler-utils (pdftotext)
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
# Plain text extraction
|
|
204
|
+
pdftotext input.pdf output.txt
|
|
205
|
+
|
|
206
|
+
# Layout-preserving extraction
|
|
207
|
+
pdftotext -layout input.pdf output.txt
|
|
208
|
+
|
|
209
|
+
# Page range selection
|
|
210
|
+
pdftotext -f 1 -l 5 input.pdf output.txt # Pages 1-5
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### qpdf
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
# Combine documents
|
|
217
|
+
qpdf --empty --pages file1.pdf file2.pdf -- merged.pdf
|
|
218
|
+
|
|
219
|
+
# Extract page subsets
|
|
220
|
+
qpdf input.pdf --pages . 1-5 -- pages1-5.pdf
|
|
221
|
+
qpdf input.pdf --pages . 6-10 -- pages6-10.pdf
|
|
222
|
+
|
|
223
|
+
# Orientation adjustment
|
|
224
|
+
qpdf input.pdf output.pdf --rotate=+90:1 # Rotate page 1 by 90 degrees
|
|
225
|
+
|
|
226
|
+
# Decrypt protected files
|
|
227
|
+
qpdf --password=mypassword --decrypt encrypted.pdf decrypted.pdf
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
### pdftk (if available)
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
# Combine
|
|
234
|
+
pdftk file1.pdf file2.pdf cat output merged.pdf
|
|
235
|
+
|
|
236
|
+
# Burst into pages
|
|
237
|
+
pdftk input.pdf burst
|
|
238
|
+
|
|
239
|
+
# Orientation change
|
|
240
|
+
pdftk input.pdf rotate 1east output rotated.pdf
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
## Typical Workflows
|
|
244
|
+
|
|
245
|
+
### OCR for Scanned Documents
|
|
246
|
+
|
|
247
|
+
```python
|
|
248
|
+
import pytesseract
|
|
249
|
+
from pdf2image import convert_from_path
|
|
250
|
+
|
|
251
|
+
rendered = convert_from_path('scanned.pdf')
|
|
252
|
+
|
|
253
|
+
content = ""
|
|
254
|
+
for idx, frame in enumerate(rendered):
|
|
255
|
+
content += "Page {}:\n".format(idx + 1)
|
|
256
|
+
content += pytesseract.image_to_string(frame)
|
|
257
|
+
content += "\n\n"
|
|
258
|
+
|
|
259
|
+
print(content)
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
### Overlaying a Watermark
|
|
263
|
+
|
|
264
|
+
```python
|
|
265
|
+
import pypdf
|
|
266
|
+
|
|
267
|
+
stamp = pypdf.PdfReader("watermark.pdf").pages[0]
|
|
268
|
+
|
|
269
|
+
source = pypdf.PdfReader("document.pdf")
|
|
270
|
+
output = pypdf.PdfWriter()
|
|
271
|
+
|
|
272
|
+
for pg in source.pages:
|
|
273
|
+
pg.merge_page(stamp)
|
|
274
|
+
output.add_page(pg)
|
|
275
|
+
|
|
276
|
+
with open("watermarked.pdf", "wb") as dest:
|
|
277
|
+
output.write(dest)
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
### Extracting Embedded Graphics
|
|
281
|
+
|
|
282
|
+
```bash
|
|
283
|
+
# Using pdfimages (poppler-utils)
|
|
284
|
+
pdfimages -j input.pdf output_prefix
|
|
285
|
+
|
|
286
|
+
# This extracts all images as output_prefix-000.jpg, output_prefix-001.jpg, etc.
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
### Applying Password Protection
|
|
290
|
+
|
|
291
|
+
```python
|
|
292
|
+
import pypdf
|
|
293
|
+
|
|
294
|
+
source = pypdf.PdfReader("input.pdf")
|
|
295
|
+
output = pypdf.PdfWriter()
|
|
296
|
+
|
|
297
|
+
for pg in source.pages:
|
|
298
|
+
output.add_page(pg)
|
|
299
|
+
|
|
300
|
+
output.encrypt("userpassword", "ownerpassword")
|
|
301
|
+
|
|
302
|
+
with open("encrypted.pdf", "wb") as dest:
|
|
303
|
+
output.write(dest)
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
## Capability Matrix
|
|
307
|
+
|
|
308
|
+
| Operation | Recommended Tool | Approach |
|
|
309
|
+
|-----------|-----------------|----------|
|
|
310
|
+
| Combine documents | pypdf | `writer.add_page(page)` |
|
|
311
|
+
| Separate pages | pypdf | One file per page |
|
|
312
|
+
| Read text content | pdfplumber | `page.extract_text()` |
|
|
313
|
+
| Parse tables | pdfplumber | `page.extract_tables()` |
|
|
314
|
+
| Generate new PDFs | reportlab | Canvas or Platypus |
|
|
315
|
+
| CLI merging | qpdf | `qpdf --empty --pages ...` |
|
|
316
|
+
| Scanned document OCR | pytesseract | Render to image first |
|
|
317
|
+
| Form completion | pdf-lib or pypdf (see form-filling-guide.md) | See form-filling-guide.md |
|
|
318
|
+
|
|
319
|
+
## Additional Resources
|
|
320
|
+
|
|
321
|
+
- Extended pypdfium2 documentation: `advanced-reference.md`
|
|
322
|
+
- JavaScript library details (pdf-lib): `advanced-reference.md`
|
|
323
|
+
- Form-filling procedures: `form-filling-guide.md`
|
|
324
|
+
- Error resolution guidance: `advanced-reference.md`
|