@panda-agent/panda-cli 0.1.29 → 0.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/dist/panda-cli-ink.bundle.mjs +258 -247
  2. package/package.json +6 -4
  3. package/skills/.gitkeep +0 -0
  4. package/skills/README.md +13 -0
  5. package/skills/docx/.skill-metadata.yaml +173 -0
  6. package/skills/docx/LICENSE.txt +30 -0
  7. package/skills/docx/SKILL.md +589 -0
  8. package/skills/docx/scripts/__init__.py +1 -0
  9. package/skills/docx/scripts/accept_changes.py +206 -0
  10. package/skills/docx/scripts/comment.py +442 -0
  11. package/skills/docx/scripts/office/helpers/__init__.py +1 -0
  12. package/skills/docx/scripts/office/helpers/merge_runs.py +190 -0
  13. package/skills/docx/scripts/office/helpers/simplify_redlines.py +185 -0
  14. package/skills/docx/scripts/office/pack.py +167 -0
  15. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  16. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  17. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  18. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  19. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  20. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  21. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  22. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  23. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  24. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  25. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  26. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  27. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  28. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  29. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  30. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  31. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  32. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  33. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  34. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  35. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  36. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  37. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  38. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  39. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  40. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  41. package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  42. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  43. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  44. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  45. package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  46. package/skills/docx/scripts/office/schemas/mce/mc.xsd +75 -0
  47. package/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  48. package/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  49. package/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  50. package/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  51. package/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  52. package/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  53. package/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  54. package/skills/docx/scripts/office/soffice.py +194 -0
  55. package/skills/docx/scripts/office/unpack.py +145 -0
  56. package/skills/docx/scripts/office/validate.py +114 -0
  57. package/skills/docx/scripts/office/validators/__init__.py +16 -0
  58. package/skills/docx/scripts/office/validators/base.py +733 -0
  59. package/skills/docx/scripts/office/validators/docx.py +354 -0
  60. package/skills/docx/scripts/office/validators/pptx.py +230 -0
  61. package/skills/docx/scripts/office/validators/redlining.py +212 -0
  62. package/skills/docx/scripts/templates/comments.xml +3 -0
  63. package/skills/docx/scripts/templates/commentsExtended.xml +3 -0
  64. package/skills/docx/scripts/templates/commentsExtensible.xml +3 -0
  65. package/skills/docx/scripts/templates/commentsIds.xml +3 -0
  66. package/skills/docx/scripts/templates/people.xml +3 -0
  67. package/skills/frontend-design/LICENSE.txt +177 -0
  68. package/skills/frontend-design/SKILL.md +42 -0
  69. package/skills/pdf/.skill-metadata.yaml +273 -0
  70. package/skills/pdf/LICENSE.txt +30 -0
  71. package/skills/pdf/SKILL.md +324 -0
  72. package/skills/pdf/advanced-reference.md +609 -0
  73. package/skills/pdf/form-filling-guide.md +318 -0
  74. package/skills/pdf/forms.md +294 -0
  75. package/skills/pdf/reference.md +612 -0
  76. package/skills/pdf/scripts/check_bounding_boxes.py +198 -0
  77. package/skills/pdf/scripts/check_fillable_fields.py +64 -0
  78. package/skills/pdf/scripts/convert_pdf_to_images.py +102 -0
  79. package/skills/pdf/scripts/create_validation_image.py +125 -0
  80. package/skills/pdf/scripts/extract_form_field_info.py +220 -0
  81. package/skills/pdf/scripts/extract_form_structure.py +202 -0
  82. package/skills/pdf/scripts/fill_fillable_fields.py +205 -0
  83. package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +193 -0
  84. package/skills/pptx-generator/SKILL.md +204 -0
  85. package/skills/pptx-generator/assets/styles/business.json +8 -0
  86. package/skills/pptx-generator/assets/styles/minimal.json +8 -0
  87. package/skills/pptx-generator/assets/styles/modern.json +8 -0
  88. package/skills/pptx-generator/assets/templates/ppt_data_template.json +40 -0
  89. package/skills/pptx-generator/references/collaboration_guide.md +381 -0
  90. package/skills/pptx-generator/references/json_format_spec.md +215 -0
  91. package/skills/pptx-generator/references/layout_guide.md +290 -0
  92. package/skills/pptx-generator/scripts/json_validator.py +194 -0
  93. package/skills/pptx-generator/scripts/pptx_builder.py +340 -0
  94. package/skills/pptx-generator/scripts/pptx_validator.py +162 -0
  95. package/skills/skill-creator/LICENSE.txt +202 -0
  96. package/skills/skill-creator/SKILL.md +479 -0
  97. package/skills/skill-creator/agents/analyzer.md +274 -0
  98. package/skills/skill-creator/agents/comparator.md +202 -0
  99. package/skills/skill-creator/agents/grader.md +223 -0
  100. package/skills/skill-creator/assets/eval_review.html +146 -0
  101. package/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  102. package/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  103. package/skills/skill-creator/references/schemas.md +430 -0
  104. package/skills/skill-creator/scripts/__init__.py +0 -0
  105. package/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  106. package/skills/skill-creator/scripts/generate_report.py +326 -0
  107. package/skills/skill-creator/scripts/improve_description.py +248 -0
  108. package/skills/skill-creator/scripts/package_skill.py +136 -0
  109. package/skills/skill-creator/scripts/quick_validate.py +103 -0
  110. package/skills/skill-creator/scripts/run_eval.py +310 -0
  111. package/skills/skill-creator/scripts/run_loop.py +332 -0
  112. package/skills/skill-creator/scripts/utils.py +47 -0
  113. package/skills/xlsx/.skill-metadata.yaml +185 -0
  114. package/skills/xlsx/LICENSE.txt +30 -0
  115. package/skills/xlsx/SKILL.md +233 -0
  116. package/skills/xlsx/scripts/office/helpers/__init__.py +1 -0
  117. package/skills/xlsx/scripts/office/helpers/merge_runs.py +226 -0
  118. package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +198 -0
  119. package/skills/xlsx/scripts/office/pack.py +162 -0
  120. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  121. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  122. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  123. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  124. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  125. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  126. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  127. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  128. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  129. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  130. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  131. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  132. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  133. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  134. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  135. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  136. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  137. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  138. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  139. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  140. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  141. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  142. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  143. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  144. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  145. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  146. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  147. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  148. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  149. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  150. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  151. package/skills/xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
  152. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  153. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  154. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  155. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  156. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  157. package/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  158. package/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  159. package/skills/xlsx/scripts/office/soffice.py +185 -0
  160. package/skills/xlsx/scripts/office/unpack.py +146 -0
  161. package/skills/xlsx/scripts/office/validate.py +108 -0
  162. package/skills/xlsx/scripts/office/validators/__init__.py +13 -0
  163. package/skills/xlsx/scripts/office/validators/base.py +800 -0
  164. package/skills/xlsx/scripts/office/validators/docx.py +383 -0
  165. package/skills/xlsx/scripts/office/validators/pptx.py +250 -0
  166. package/skills/xlsx/scripts/office/validators/redlining.py +229 -0
  167. package/skills/xlsx/scripts/recalc.py +296 -0
@@ -0,0 +1,609 @@
1
+ # Advanced PDF Toolkit Reference
2
+
3
+ This companion document covers advanced capabilities, auxiliary libraries, and specialized workflows beyond the essentials in the primary skill guide.
4
+
5
+ ---
6
+
7
+ ## pypdfium2 (Apache/BSD Licensed)
8
+
9
+ ### About the Library
10
+
11
+ A Python wrapper around PDFium (the rendering engine inside Chromium). Ideal for high-fidelity page rasterization and serves as an alternative to PyMuPDF.
12
+
13
+ ### Page Rasterization
14
+
15
+ ```python
16
+ import pypdfium2 as pdfium
17
+ from PIL import Image
18
+
19
+ doc = pdfium.PdfDocument("document.pdf")
20
+
21
+ # Render the first page at double resolution
22
+ first_page = doc[0]
23
+ bmp = first_page.render(scale=2.0, rotation=0)
24
+
25
+ # Export as PIL image
26
+ pil_img = bmp.to_pil()
27
+ pil_img.save("page_1.png", "PNG")
28
+
29
+ # Batch-render every page
30
+ for n, pg in enumerate(doc):
31
+ bmp = pg.render(scale=1.5)
32
+ pil_img = bmp.to_pil()
33
+ pil_img.save("page_{}.jpg".format(n + 1), "JPEG", quality=90)
34
+ ```
35
+
36
+ ### Textual Content Retrieval
37
+
38
+ ```python
39
+ import pypdfium2 as pdfium
40
+
41
+ doc = pdfium.PdfDocument("document.pdf")
42
+ for n, pg in enumerate(doc):
43
+ raw = pg.get_text()
44
+ print("Page {} text length: {} chars".format(n + 1, len(raw)))
45
+ ```
46
+
47
+ ---
48
+
49
+ ## JavaScript Ecosystem
50
+
51
+ ### pdf-lib (MIT Licensed)
52
+
53
+ A versatile JS library for constructing and editing PDF documents across all JavaScript runtimes.
54
+
55
+ #### Modifying an Existing Document
56
+
57
+ ```javascript
58
+ import { PDFDocument } from 'pdf-lib';
59
+ import fs from 'fs';
60
+
61
+ async function manipulatePDF() {
62
+ const rawBytes = fs.readFileSync('input.pdf');
63
+ const doc = await PDFDocument.load(rawBytes);
64
+
65
+ const numPages = doc.getPageCount();
66
+ console.log(`Document has ${numPages} pages`);
67
+
68
+ const fresh = doc.addPage([600, 400]);
69
+ fresh.drawText('Added by pdf-lib', {
70
+ x: 100,
71
+ y: 300,
72
+ size: 16
73
+ });
74
+
75
+ const output = await doc.save();
76
+ fs.writeFileSync('modified.pdf', output);
77
+ }
78
+ ```
79
+
80
+ #### Building a Document From Scratch
81
+
82
+ ```javascript
83
+ import { PDFDocument, rgb, StandardFonts } from 'pdf-lib';
84
+ import fs from 'fs';
85
+
86
+ async function createPDF() {
87
+ const doc = await PDFDocument.create();
88
+
89
+ const regular = await doc.embedFont(StandardFonts.Helvetica);
90
+ const bold = await doc.embedFont(StandardFonts.HelveticaBold);
91
+
92
+ const sheet = doc.addPage([595, 842]); // A4 dimensions
93
+ const { width, height } = sheet.getSize();
94
+
95
+ sheet.drawText('Invoice #12345', {
96
+ x: 50,
97
+ y: height - 50,
98
+ size: 18,
99
+ font: bold,
100
+ color: rgb(0.2, 0.2, 0.8)
101
+ });
102
+
103
+ sheet.drawRectangle({
104
+ x: 40,
105
+ y: height - 100,
106
+ width: width - 80,
107
+ height: 30,
108
+ color: rgb(0.9, 0.9, 0.9)
109
+ });
110
+
111
+ const rows = [
112
+ ['Item', 'Qty', 'Price', 'Total'],
113
+ ['Widget', '2', '$50', '$100'],
114
+ ['Gadget', '1', '$75', '$75']
115
+ ];
116
+
117
+ let cursorY = height - 150;
118
+ rows.forEach(cells => {
119
+ let cursorX = 50;
120
+ cells.forEach(val => {
121
+ sheet.drawText(val, {
122
+ x: cursorX,
123
+ y: cursorY,
124
+ size: 12,
125
+ font: regular
126
+ });
127
+ cursorX += 120;
128
+ });
129
+ cursorY -= 25;
130
+ });
131
+
132
+ const bytes = await doc.save();
133
+ fs.writeFileSync('created.pdf', bytes);
134
+ }
135
+ ```
136
+
137
+ #### Page Selection and Merging
138
+
139
+ ```javascript
140
+ import { PDFDocument } from 'pdf-lib';
141
+ import fs from 'fs';
142
+
143
+ async function mergePDFs() {
144
+ const combined = await PDFDocument.create();
145
+
146
+ const src1 = await PDFDocument.load(fs.readFileSync('doc1.pdf'));
147
+ const src2 = await PDFDocument.load(fs.readFileSync('doc2.pdf'));
148
+
149
+ const allFromFirst = await combined.copyPages(src1, src1.getPageIndices());
150
+ allFromFirst.forEach(p => combined.addPage(p));
151
+
152
+ const selectedFromSecond = await combined.copyPages(src2, [0, 2, 4]);
153
+ selectedFromSecond.forEach(p => combined.addPage(p));
154
+
155
+ const result = await combined.save();
156
+ fs.writeFileSync('merged.pdf', result);
157
+ }
158
+ ```
159
+
160
+ ### pdfjs-dist (Apache Licensed)
161
+
162
+ Mozilla's client-side PDF rendering engine.
163
+
164
+ #### Loading and Rendering
165
+
166
+ ```javascript
167
+ import * as pdfjsLib from 'pdfjs-dist';
168
+
169
+ pdfjsLib.GlobalWorkerOptions.workerSrc = './pdf.worker.js';
170
+
171
+ async function renderPDF() {
172
+ const task = pdfjsLib.getDocument('document.pdf');
173
+ const doc = await task.promise;
174
+
175
+ console.log(`Loaded PDF with ${doc.numPages} pages`);
176
+
177
+ const pg = await doc.getPage(1);
178
+ const vp = pg.getViewport({ scale: 1.5 });
179
+
180
+ const cvs = document.createElement('canvas');
181
+ const ctx = cvs.getContext('2d');
182
+ cvs.height = vp.height;
183
+ cvs.width = vp.width;
184
+
185
+ await pg.render({ canvasContext: ctx, viewport: vp }).promise;
186
+ document.body.appendChild(cvs);
187
+ }
188
+ ```
189
+
190
+ #### Positioned Text Extraction
191
+
192
+ ```javascript
193
+ import * as pdfjsLib from 'pdfjs-dist';
194
+
195
+ async function extractText() {
196
+ const task = pdfjsLib.getDocument('document.pdf');
197
+ const doc = await task.promise;
198
+
199
+ let accumulated = '';
200
+
201
+ for (let n = 1; n <= doc.numPages; n++) {
202
+ const pg = await doc.getPage(n);
203
+ const tc = await pg.getTextContent();
204
+
205
+ const pageStr = tc.items.map(el => el.str).join(' ');
206
+ accumulated += `\n--- Page ${n} ---\n${pageStr}`;
207
+
208
+ const positioned = tc.items.map(el => ({
209
+ text: el.str,
210
+ x: el.transform[4],
211
+ y: el.transform[5],
212
+ width: el.width,
213
+ height: el.height
214
+ }));
215
+ }
216
+
217
+ console.log(accumulated);
218
+ return accumulated;
219
+ }
220
+ ```
221
+
222
+ #### Annotation and Form Field Discovery
223
+
224
+ ```javascript
225
+ import * as pdfjsLib from 'pdfjs-dist';
226
+
227
+ async function extractAnnotations() {
228
+ const task = pdfjsLib.getDocument('annotated.pdf');
229
+ const doc = await task.promise;
230
+
231
+ for (let n = 1; n <= doc.numPages; n++) {
232
+ const pg = await doc.getPage(n);
233
+ const notes = await pg.getAnnotations();
234
+
235
+ notes.forEach(note => {
236
+ console.log(`Annotation type: ${note.subtype}`);
237
+ console.log(`Content: ${note.contents}`);
238
+ console.log(`Coordinates: ${JSON.stringify(note.rect)}`);
239
+ });
240
+ }
241
+ }
242
+ ```
243
+
244
+ ---
245
+
246
+ ## Advanced Shell Operations
247
+
248
+ ### poppler-utils: Deep Features
249
+
250
+ #### Coordinate-Tagged Text Export
251
+
252
+ ```bash
253
+ # Produce XML with precise bounding boxes per text element
254
+ pdftotext -bbox-layout document.pdf output.xml
255
+
256
+ # The XML contains exact spatial data for every text fragment
257
+ ```
258
+
259
+ #### High-Fidelity Image Conversion
260
+
261
+ ```bash
262
+ # PNG output at 300 DPI
263
+ pdftoppm -png -r 300 document.pdf output_prefix
264
+
265
+ # Selective pages at maximum quality
266
+ pdftoppm -png -r 600 -f 1 -l 3 document.pdf high_res_pages
267
+
268
+ # JPEG with compression control
269
+ pdftoppm -jpeg -jpegopt quality=85 -r 200 document.pdf jpeg_output
270
+ ```
271
+
272
+ #### Embedded Image Retrieval
273
+
274
+ ```bash
275
+ # Dump all images preserving original encoding
276
+ pdfimages -j -p document.pdf page_images
277
+
278
+ # Catalogue images without extraction
279
+ pdfimages -list document.pdf
280
+
281
+ # Native-format extraction
282
+ pdfimages -all document.pdf images/img
283
+ ```
284
+
285
+ ### qpdf: Power Features
286
+
287
+ #### Sophisticated Page Operations
288
+
289
+ ```bash
290
+ # Chunk-split every 3 pages
291
+ qpdf --split-pages=3 input.pdf output_group_%02d.pdf
292
+
293
+ # Complex range expressions
294
+ qpdf input.pdf --pages input.pdf 1,3-5,8,10-end -- extracted.pdf
295
+
296
+ # Cross-document page assembly
297
+ qpdf --empty --pages doc1.pdf 1-3 doc2.pdf 5-7 doc3.pdf 2,4 -- combined.pdf
298
+ ```
299
+
300
+ #### Optimization and Recovery
301
+
302
+ ```bash
303
+ # Web-optimized streaming layout
304
+ qpdf --linearize input.pdf optimized.pdf
305
+
306
+ # Aggressive size reduction
307
+ qpdf --optimize-level=all input.pdf compressed.pdf
308
+
309
+ # Structural integrity check
310
+ qpdf --check input.pdf
311
+ qpdf --fix-qdf damaged.pdf repaired.pdf
312
+
313
+ # Dump internal structure
314
+ qpdf --show-all-pages input.pdf > structure.txt
315
+ ```
316
+
317
+ #### Encryption Management
318
+
319
+ ```bash
320
+ # Apply 256-bit encryption with restricted permissions
321
+ qpdf --encrypt user_pass owner_pass 256 --print=none --modify=none -- input.pdf encrypted.pdf
322
+
323
+ # Inspect protection status
324
+ qpdf --show-encryption encrypted.pdf
325
+
326
+ # Strip encryption (password required)
327
+ qpdf --password=secret123 --decrypt encrypted.pdf decrypted.pdf
328
+ ```
329
+
330
+ ---
331
+
332
+ ## Advanced Python Patterns
333
+
334
+ ### pdfplumber: Precision Features
335
+
336
+ #### Character-Level Coordinate Access
337
+
338
+ ```python
339
+ import pdfplumber
340
+
341
+ with pdfplumber.open("document.pdf") as doc:
342
+ pg = doc.pages[0]
343
+
344
+ # Individual character positions
345
+ for ch in pg.chars[:10]:
346
+ print("Char: '{}' at x:{:.1f} y:{:.1f}".format(ch['text'], ch['x0'], ch['y0']))
347
+
348
+ # Region-bounded text extraction (left, top, right, bottom)
349
+ region_text = pg.within_bbox((100, 100, 400, 200)).extract_text()
350
+ ```
351
+
352
+ #### Custom Table Detection Parameters
353
+
354
+ ```python
355
+ import pdfplumber
356
+ import pandas as pd
357
+
358
+ with pdfplumber.open("complex_table.pdf") as doc:
359
+ pg = doc.pages[0]
360
+
361
+ config = {
362
+ "vertical_strategy": "lines",
363
+ "horizontal_strategy": "lines",
364
+ "snap_tolerance": 3,
365
+ "intersection_tolerance": 15
366
+ }
367
+ found = pg.extract_tables(config)
368
+
369
+ # Debug visualization
370
+ debug_img = pg.to_image(resolution=150)
371
+ debug_img.save("debug_layout.png")
372
+ ```
373
+
374
+ ### reportlab: Professional Output
375
+
376
+ #### Styled Tabular Reports
377
+
378
+ ```python
379
+ from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph
380
+ from reportlab.lib.styles import getSampleStyleSheet
381
+ from reportlab.lib import colors
382
+
383
+ records = [
384
+ ['Product', 'Q1', 'Q2', 'Q3', 'Q4'],
385
+ ['Widgets', '120', '135', '142', '158'],
386
+ ['Gadgets', '85', '92', '98', '105']
387
+ ]
388
+
389
+ template = SimpleDocTemplate("report.pdf")
390
+ parts = []
391
+
392
+ styles = getSampleStyleSheet()
393
+ parts.append(Paragraph("Quarterly Sales Report", styles['Title']))
394
+
395
+ grid = Table(records)
396
+ grid.setStyle(TableStyle([
397
+ ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
398
+ ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
399
+ ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
400
+ ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
401
+ ('FONTSIZE', (0, 0), (-1, 0), 14),
402
+ ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
403
+ ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
404
+ ('GRID', (0, 0), (-1, -1), 1, colors.black)
405
+ ]))
406
+ parts.append(grid)
407
+
408
+ template.build(parts)
409
+ ```
410
+
411
+ ---
412
+
413
+ ## Composite Workflows
414
+
415
+ ### Extracting Visual Assets
416
+
417
+ #### Approach 1: Shell-Based (Fastest)
418
+
419
+ ```bash
420
+ pdfimages -all document.pdf images/img
421
+ ```
422
+
423
+ #### Approach 2: Programmatic with pypdfium2
424
+
425
+ ```python
426
+ import pypdfium2 as pdfium
427
+ from PIL import Image
428
+ import numpy as np
429
+
430
+ def extract_figures(pdf_path, output_dir):
431
+ doc = pdfium.PdfDocument(pdf_path)
432
+
433
+ for pg_idx, pg in enumerate(doc):
434
+ bmp = pg.render(scale=3.0)
435
+ frame = bmp.to_pil()
436
+
437
+ arr = np.array(frame)
438
+
439
+ # Naive figure detection: non-white pixel regions
440
+ non_white = np.any(arr != [255, 255, 255], axis=2)
441
+
442
+ # Contour analysis and bounding box extraction
443
+ # (Simplified — production use needs more robust detection)
444
+
445
+ # Save discovered regions
446
+ # ... details depend on requirements
447
+ ```
448
+
449
+ ### Batch Operations with Resilience
450
+
451
+ ```python
452
+ import os
453
+ import glob
454
+ import logging
455
+
456
+ import pypdf
457
+
458
+ logging.basicConfig(level=logging.INFO)
459
+ log = logging.getLogger(__name__)
460
+
461
+ def batch_process(source_dir, mode='merge'):
462
+ targets = glob.glob(os.path.join(source_dir, "*.pdf"))
463
+
464
+ if mode == 'merge':
465
+ output = pypdf.PdfWriter()
466
+ for path in targets:
467
+ try:
468
+ rdr = pypdf.PdfReader(path)
469
+ for pg in rdr.pages:
470
+ output.add_page(pg)
471
+ log.info("Processed: %s", path)
472
+ except Exception as exc:
473
+ log.error("Failed to process %s: %s", path, exc)
474
+ continue
475
+
476
+ with open("batch_merged.pdf", "wb") as dest:
477
+ output.write(dest)
478
+
479
+ elif mode == 'extract_text':
480
+ for path in targets:
481
+ try:
482
+ rdr = pypdf.PdfReader(path)
483
+ content = "".join(pg.extract_text() for pg in rdr.pages)
484
+
485
+ txt_path = path.replace('.pdf', '.txt')
486
+ with open(txt_path, 'w', encoding='utf-8') as out:
487
+ out.write(content)
488
+ log.info("Extracted text from: %s", path)
489
+
490
+ except Exception as exc:
491
+ log.error("Failed to extract text from %s: %s", path, exc)
492
+ continue
493
+ ```
494
+
495
+ ### Region Cropping
496
+
497
+ ```python
498
+ import pypdf
499
+
500
+ source = pypdf.PdfReader("input.pdf")
501
+ output = pypdf.PdfWriter()
502
+
503
+ # Define visible area (left, bottom, right, top in points)
504
+ pg = source.pages[0]
505
+ pg.mediabox.left = 50
506
+ pg.mediabox.bottom = 50
507
+ pg.mediabox.right = 550
508
+ pg.mediabox.top = 750
509
+
510
+ output.add_page(pg)
511
+ with open("cropped.pdf", "wb") as dest:
512
+ output.write(dest)
513
+ ```
514
+
515
+ ---
516
+
517
+ ## Performance Guidelines
518
+
519
+ ### 1. Handling Large Documents
520
+ - Process pages individually rather than loading entire files into memory
521
+ - Leverage `qpdf --split-pages` for breaking apart large PDFs
522
+ - Use pypdfium2 for per-page rendering without full document buffering
523
+
524
+ ### 2. Text Extraction Speed
525
+ - `pdftotext -bbox-layout` provides the fastest plain-text pipeline
526
+ - pdfplumber excels at structured/tabular content
527
+ - Avoid `pypdf.extract_text()` on very large files
528
+
529
+ ### 3. Image Extraction Efficiency
530
+ - `pdfimages` significantly outperforms page rendering for embedded assets
531
+ - Use low DPI for thumbnails, high DPI for production output
532
+
533
+ ### 4. Form Processing
534
+ - pdf-lib preserves form structure more reliably than most alternatives
535
+ - Always validate field specifications before bulk processing
536
+
537
+ ### 5. Memory-Conscious Processing
538
+
539
+ ```python
540
+ import pypdf
541
+
542
+ def chunked_processing(pdf_path, pages_per_chunk=10):
543
+ source = pypdf.PdfReader(pdf_path)
544
+ n_pages = len(source.pages)
545
+
546
+ for offset in range(0, n_pages, pages_per_chunk):
547
+ limit = min(offset + pages_per_chunk, n_pages)
548
+ chunk = pypdf.PdfWriter()
549
+
550
+ for k in range(offset, limit):
551
+ chunk.add_page(source.pages[k])
552
+
553
+ with open("chunk_{}.pdf".format(offset // pages_per_chunk), "wb") as dest:
554
+ chunk.write(dest)
555
+ ```
556
+
557
+ ---
558
+
559
+ ## Diagnosing Common Problems
560
+
561
+ ### Encrypted Documents
562
+
563
+ ```python
564
+ import pypdf
565
+
566
+ try:
567
+ doc = pypdf.PdfReader("encrypted.pdf")
568
+ if doc.is_encrypted:
569
+ doc.decrypt("password")
570
+ except Exception as exc:
571
+ print("Failed to decrypt: {}".format(exc))
572
+ ```
573
+
574
+ ### Damaged Files
575
+
576
+ ```bash
577
+ # Verify structural integrity
578
+ qpdf --check corrupted.pdf
579
+ qpdf --replace-input corrupted.pdf
580
+ ```
581
+
582
+ ### Unreadable Scanned Content
583
+
584
+ ```python
585
+ import pytesseract
586
+ from pdf2image import convert_from_path
587
+
588
+ def ocr_fallback(pdf_path):
589
+ frames = convert_from_path(pdf_path)
590
+ content = ""
591
+ for frame in frames:
592
+ content += pytesseract.image_to_string(frame)
593
+ return content
594
+ ```
595
+
596
+ ---
597
+
598
+ ## Licensing Summary
599
+
600
+ | Library | License |
601
+ |---------|---------|
602
+ | pypdf | BSD |
603
+ | pdfplumber | MIT |
604
+ | pypdfium2 | Apache/BSD |
605
+ | reportlab | BSD |
606
+ | poppler-utils | GPL-2 |
607
+ | qpdf | Apache |
608
+ | pdf-lib | MIT |
609
+ | pdfjs-dist | Apache |