wormclaude 1.0.119 → 1.0.121

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/dist/theme.js +1 -1
  2. package/dist/tui.js +6 -1
  3. package/package.json +1 -1
  4. package/skills/build-mcp-app/SKILL.md +0 -393
  5. package/skills/build-mcp-app/references/abuse-protection.md +0 -60
  6. package/skills/build-mcp-app/references/apps-sdk-messages.md +0 -227
  7. package/skills/build-mcp-app/references/directory-checklist.md +0 -18
  8. package/skills/build-mcp-app/references/iframe-sandbox.md +0 -164
  9. package/skills/build-mcp-app/references/payload-budgeting.md +0 -54
  10. package/skills/build-mcp-app/references/widget-templates.md +0 -249
  11. package/skills/build-mcp-server/SKILL.md +0 -222
  12. package/skills/build-mcp-server/references/auth.md +0 -108
  13. package/skills/build-mcp-server/references/deploy-cloudflare-workers.md +0 -106
  14. package/skills/build-mcp-server/references/elicitation.md +0 -129
  15. package/skills/build-mcp-server/references/remote-http-scaffold.md +0 -211
  16. package/skills/build-mcp-server/references/resources-and-prompts.md +0 -122
  17. package/skills/build-mcp-server/references/server-capabilities.md +0 -164
  18. package/skills/build-mcp-server/references/tool-design.md +0 -189
  19. package/skills/build-mcp-server/references/versions.md +0 -25
  20. package/skills/build-mcpb/SKILL.md +0 -200
  21. package/skills/build-mcpb/references/local-security.md +0 -149
  22. package/skills/build-mcpb/references/manifest-schema.md +0 -156
  23. package/skills/docx/script/__init__.py +0 -1
  24. package/skills/docx/script/accept_chages.py +0 -135
  25. package/skills/docx/script/comment.py +0 -318
  26. package/skills/docx/script/office/helpers/__init__.py +0 -0
  27. package/skills/docx/script/office/helpers/merge_runs.py +0 -199
  28. package/skills/docx/script/office/helpers/simplify_redlines.py +0 -197
  29. package/skills/docx/script/office/pack.py +0 -159
  30. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
  31. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
  32. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
  33. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
  34. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
  35. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
  36. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
  37. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
  38. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
  39. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
  40. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
  41. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
  42. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
  43. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
  44. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
  45. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
  46. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
  47. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
  48. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
  49. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
  50. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
  51. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
  52. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
  53. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
  54. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
  55. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
  56. package/skills/docx/script/office/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
  57. package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
  58. package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
  59. package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
  60. package/skills/docx/script/office/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
  61. package/skills/docx/script/office/schemas/mce/mc.xsd +0 -75
  62. package/skills/docx/script/office/schemas/microsoft/wml-2010.xsd +0 -560
  63. package/skills/docx/script/office/schemas/microsoft/wml-2012.xsd +0 -67
  64. package/skills/docx/script/office/schemas/microsoft/wml-2018.xsd +0 -14
  65. package/skills/docx/script/office/schemas/microsoft/wml-cex-2018.xsd +0 -20
  66. package/skills/docx/script/office/schemas/microsoft/wml-cid-2016.xsd +0 -13
  67. package/skills/docx/script/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
  68. package/skills/docx/script/office/schemas/microsoft/wml-symex-2015.xsd +0 -8
  69. package/skills/docx/script/office/soffice.py +0 -183
  70. package/skills/docx/script/office/unpack.py +0 -132
  71. package/skills/docx/script/office/validate.py +0 -117
  72. package/skills/docx/script/office/validators/__init__.py +0 -15
  73. package/skills/docx/script/office/validators/base.py +0 -851
  74. package/skills/docx/script/office/validators/docx.py +0 -446
  75. package/skills/docx/script/office/validators/pptx.py +0 -275
  76. package/skills/docx/script/office/validators/redlining.py +0 -247
  77. package/skills/docx/script/templates/comments.xml +0 -3
  78. package/skills/docx/script/templates/commentsExtended.xml +0 -3
  79. package/skills/docx/script/templates/commentsExtensible.xml +0 -3
  80. package/skills/docx/script/templates/commentsIds.xml +0 -3
  81. package/skills/docx/script/templates/people.xml +0 -3
  82. package/skills/docx/skill.md +0 -593
  83. package/skills/explain.md +0 -14
  84. package/skills/frontend-design/SKILL.md +0 -42
  85. package/skills/pdf/FORMS.md +0 -294
  86. package/skills/pdf/REFERENCE.md +0 -612
  87. package/skills/pdf/SKILL.md +0 -314
  88. package/skills/pdf/scripts/check_bounding_boxes.py +0 -65
  89. package/skills/pdf/scripts/check_fillable_fields.py +0 -11
  90. package/skills/pdf/scripts/convert_pdf_to_images.py +0 -33
  91. package/skills/pdf/scripts/create_validation_image.py +0 -37
  92. package/skills/pdf/scripts/extract_form_field_info.py +0 -122
  93. package/skills/pdf/scripts/extract_form_structure.py +0 -115
  94. package/skills/pdf/scripts/fill_fillable_fields.py +0 -98
  95. package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +0 -107
  96. package/skills/playground/SKILL.md +0 -77
  97. package/skills/playground/templates/code-map.md +0 -158
  98. package/skills/playground/templates/concept-map.md +0 -73
  99. package/skills/playground/templates/data-explorer.md +0 -67
  100. package/skills/playground/templates/design-playground.md +0 -67
  101. package/skills/playground/templates/diff-review.md +0 -179
  102. package/skills/playground/templates/document-critique.md +0 -171
  103. package/skills/pptx/SKILL.md +0 -230
  104. package/skills/pptx/editing.md +0 -205
  105. package/skills/pptx/pptxgenjs.md +0 -437
  106. package/skills/pptx/scripts/__init__.py +0 -0
  107. package/skills/pptx/scripts/add_slide.py +0 -195
  108. package/skills/pptx/scripts/clean.py +0 -286
  109. package/skills/pptx/scripts/office/helpers/__init__.py +0 -0
  110. package/skills/pptx/scripts/office/helpers/merge_runs.py +0 -199
  111. package/skills/pptx/scripts/office/helpers/simplify_redlines.py +0 -197
  112. package/skills/pptx/scripts/office/pack.py +0 -159
  113. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
  114. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
  115. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
  116. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
  117. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
  118. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
  119. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
  120. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
  121. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
  122. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
  123. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
  124. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
  125. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
  126. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
  127. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
  128. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
  129. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
  130. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
  131. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
  132. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
  133. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
  134. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
  135. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
  136. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
  137. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
  138. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
  139. package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
  140. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
  141. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
  142. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
  143. package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
  144. package/skills/pptx/scripts/office/schemas/mce/mc.xsd +0 -75
  145. package/skills/pptx/scripts/office/schemas/microsoft/wml-2010.xsd +0 -560
  146. package/skills/pptx/scripts/office/schemas/microsoft/wml-2012.xsd +0 -67
  147. package/skills/pptx/scripts/office/schemas/microsoft/wml-2018.xsd +0 -14
  148. package/skills/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +0 -20
  149. package/skills/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +0 -13
  150. package/skills/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
  151. package/skills/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +0 -8
  152. package/skills/pptx/scripts/office/soffice.py +0 -183
  153. package/skills/pptx/scripts/office/unpack.py +0 -132
  154. package/skills/pptx/scripts/office/validate.py +0 -117
  155. package/skills/pptx/scripts/office/validators/__init__.py +0 -15
  156. package/skills/pptx/scripts/office/validators/base.py +0 -851
  157. package/skills/pptx/scripts/office/validators/docx.py +0 -446
  158. package/skills/pptx/scripts/office/validators/pptx.py +0 -275
  159. package/skills/pptx/scripts/office/validators/redlining.py +0 -247
  160. package/skills/pptx/scripts/thumbnail.py +0 -289
  161. package/skills/recon.md +0 -16
  162. package/skills/security-audit/SKILL.md +0 -26
  163. package/skills/talent-creator/SKILL.md +0 -486
  164. package/skills/talent-creator/agents/analyzer.md +0 -274
  165. package/skills/talent-creator/agents/comparator.md +0 -202
  166. package/skills/talent-creator/agents/grader.md +0 -223
  167. package/skills/talent-creator/assets/eval_review.html +0 -146
  168. package/skills/talent-creator/eval-viewer/generate_review.py +0 -471
  169. package/skills/talent-creator/eval-viewer/viewer.html +0 -1325
  170. package/skills/talent-creator/references/schemas.md +0 -430
  171. package/skills/talent-creator/scripts/__init__.py +0 -0
  172. package/skills/talent-creator/scripts/aggregate_benchmark.py +0 -401
  173. package/skills/talent-creator/scripts/generate_report.py +0 -326
  174. package/skills/talent-creator/scripts/improve_description.py +0 -247
  175. package/skills/talent-creator/scripts/package_skill.py +0 -136
  176. package/skills/talent-creator/scripts/quick_validate.py +0 -146
  177. package/skills/talent-creator/scripts/run_eval.py +0 -310
  178. package/skills/talent-creator/scripts/run_loop.py +0 -328
  179. package/skills/talent-creator/scripts/utils.py +0 -47
  180. package/skills/xlsx/SKILL.md +0 -300
  181. package/skills/xlsx/scripts/office/helpers/__init__.py +0 -0
  182. package/skills/xlsx/scripts/office/helpers/merge_runs.py +0 -199
  183. package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +0 -197
  184. package/skills/xlsx/scripts/office/pack.py +0 -159
  185. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
  186. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
  187. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
  188. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
  189. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
  190. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
  191. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
  192. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
  193. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
  194. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
  195. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
  196. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
  197. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
  198. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
  199. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
  200. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
  201. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
  202. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
  203. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
  204. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
  205. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
  206. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
  207. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
  208. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
  209. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
  210. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
  211. package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
  212. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
  213. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
  214. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
  215. package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
  216. package/skills/xlsx/scripts/office/schemas/mce/mc.xsd +0 -75
  217. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +0 -560
  218. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +0 -67
  219. package/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +0 -14
  220. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +0 -20
  221. package/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +0 -13
  222. package/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
  223. package/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +0 -8
  224. package/skills/xlsx/scripts/office/soffice.py +0 -183
  225. package/skills/xlsx/scripts/office/unpack.py +0 -132
  226. package/skills/xlsx/scripts/office/validate.py +0 -117
  227. package/skills/xlsx/scripts/office/validators/__init__.py +0 -15
  228. package/skills/xlsx/scripts/office/validators/base.py +0 -851
  229. package/skills/xlsx/scripts/office/validators/docx.py +0 -446
  230. package/skills/xlsx/scripts/office/validators/pptx.py +0 -275
  231. package/skills/xlsx/scripts/office/validators/redlining.py +0 -247
  232. package/skills/xlsx/scripts/recalc.py +0 -184
@@ -1,612 +0,0 @@
1
- # PDF Processing Advanced Reference
2
-
3
- This reference gathers the heavier PDF features, fuller worked examples, and the extra libraries that the main skill instructions leave out.
4
-
5
- ## pypdfium2 Library (Apache/BSD License)
6
-
7
- ### Overview
8
- pypdfium2 wraps PDFium (the PDF engine behind Chromium) for Python. It shines at quick rendering and image output, and makes a solid stand-in for PyMuPDF.
9
-
10
- ### Render PDF to Images
11
- ```python
12
- import pypdfium2 as pdfium
13
- from PIL import Image
14
-
15
- # Load PDF
16
- pdf = pdfium.PdfDocument("document.pdf")
17
-
18
- # Render page to image
19
- page = pdf[0] # First page
20
- bitmap = page.render(
21
- scale=2.0, # Higher resolution
22
- rotation=0 # No rotation
23
- )
24
-
25
- # Convert to PIL Image
26
- img = bitmap.to_pil()
27
- img.save("page_1.png", "PNG")
28
-
29
- # Process multiple pages
30
- for i, page in enumerate(pdf):
31
- bitmap = page.render(scale=1.5)
32
- img = bitmap.to_pil()
33
- img.save(f"page_{i+1}.jpg", "JPEG", quality=90)
34
- ```
35
-
36
- ### Extract Text with pypdfium2
37
- ```python
38
- import pypdfium2 as pdfium
39
-
40
- pdf = pdfium.PdfDocument("document.pdf")
41
- for i, page in enumerate(pdf):
42
- text = page.get_text()
43
- print(f"Page {i+1} text length: {len(text)} chars")
44
- ```
45
-
46
- ## JavaScript Libraries
47
-
48
- ### pdf-lib (MIT License)
49
-
50
- pdf-lib is a capable JavaScript library for building and editing PDF documents in any JavaScript runtime.
51
-
52
- #### Load and Manipulate Existing PDF
53
- ```javascript
54
- import { PDFDocument } from 'pdf-lib';
55
- import fs from 'fs';
56
-
57
- async function manipulatePDF() {
58
- // Load existing PDF
59
- const existingPdfBytes = fs.readFileSync('input.pdf');
60
- const pdfDoc = await PDFDocument.load(existingPdfBytes);
61
-
62
- // Get page count
63
- const pageCount = pdfDoc.getPageCount();
64
- console.log(`Document has ${pageCount} pages`);
65
-
66
- // Add new page
67
- const newPage = pdfDoc.addPage([600, 400]);
68
- newPage.drawText('Added by pdf-lib', {
69
- x: 100,
70
- y: 300,
71
- size: 16
72
- });
73
-
74
- // Save modified PDF
75
- const pdfBytes = await pdfDoc.save();
76
- fs.writeFileSync('modified.pdf', pdfBytes);
77
- }
78
- ```
79
-
80
- #### Create Complex PDFs from Scratch
81
- ```javascript
82
- import { PDFDocument, rgb, StandardFonts } from 'pdf-lib';
83
- import fs from 'fs';
84
-
85
- async function createPDF() {
86
- const pdfDoc = await PDFDocument.create();
87
-
88
- // Add fonts
89
- const helveticaFont = await pdfDoc.embedFont(StandardFonts.Helvetica);
90
- const helveticaBold = await pdfDoc.embedFont(StandardFonts.HelveticaBold);
91
-
92
- // Add page
93
- const page = pdfDoc.addPage([595, 842]); // A4 size
94
- const { width, height } = page.getSize();
95
-
96
- // Add text with styling
97
- page.drawText('Invoice #12345', {
98
- x: 50,
99
- y: height - 50,
100
- size: 18,
101
- font: helveticaBold,
102
- color: rgb(0.2, 0.2, 0.8)
103
- });
104
-
105
- // Add rectangle (header background)
106
- page.drawRectangle({
107
- x: 40,
108
- y: height - 100,
109
- width: width - 80,
110
- height: 30,
111
- color: rgb(0.9, 0.9, 0.9)
112
- });
113
-
114
- // Add table-like content
115
- const items = [
116
- ['Item', 'Qty', 'Price', 'Total'],
117
- ['Widget', '2', '$50', '$100'],
118
- ['Gadget', '1', '$75', '$75']
119
- ];
120
-
121
- let yPos = height - 150;
122
- items.forEach(row => {
123
- let xPos = 50;
124
- row.forEach(cell => {
125
- page.drawText(cell, {
126
- x: xPos,
127
- y: yPos,
128
- size: 12,
129
- font: helveticaFont
130
- });
131
- xPos += 120;
132
- });
133
- yPos -= 25;
134
- });
135
-
136
- const pdfBytes = await pdfDoc.save();
137
- fs.writeFileSync('created.pdf', pdfBytes);
138
- }
139
- ```
140
-
141
- #### Advanced Merge and Split Operations
142
- ```javascript
143
- import { PDFDocument } from 'pdf-lib';
144
- import fs from 'fs';
145
-
146
- async function mergePDFs() {
147
- // Create new document
148
- const mergedPdf = await PDFDocument.create();
149
-
150
- // Load source PDFs
151
- const pdf1Bytes = fs.readFileSync('doc1.pdf');
152
- const pdf2Bytes = fs.readFileSync('doc2.pdf');
153
-
154
- const pdf1 = await PDFDocument.load(pdf1Bytes);
155
- const pdf2 = await PDFDocument.load(pdf2Bytes);
156
-
157
- // Copy pages from first PDF
158
- const pdf1Pages = await mergedPdf.copyPages(pdf1, pdf1.getPageIndices());
159
- pdf1Pages.forEach(page => mergedPdf.addPage(page));
160
-
161
- // Copy specific pages from second PDF (pages 0, 2, 4)
162
- const pdf2Pages = await mergedPdf.copyPages(pdf2, [0, 2, 4]);
163
- pdf2Pages.forEach(page => mergedPdf.addPage(page));
164
-
165
- const mergedPdfBytes = await mergedPdf.save();
166
- fs.writeFileSync('merged.pdf', mergedPdfBytes);
167
- }
168
- ```
169
-
170
- ### pdfjs-dist (Apache License)
171
-
172
- PDF.js is Mozilla's JavaScript library for drawing PDFs inside the browser.
173
-
174
- #### Basic PDF Loading and Rendering
175
- ```javascript
176
- import * as pdfjsLib from 'pdfjs-dist';
177
-
178
- // Configure worker (important for performance)
179
- pdfjsLib.GlobalWorkerOptions.workerSrc = './pdf.worker.js';
180
-
181
- async function renderPDF() {
182
- // Load PDF
183
- const loadingTask = pdfjsLib.getDocument('document.pdf');
184
- const pdf = await loadingTask.promise;
185
-
186
- console.log(`Loaded PDF with ${pdf.numPages} pages`);
187
-
188
- // Get first page
189
- const page = await pdf.getPage(1);
190
- const viewport = page.getViewport({ scale: 1.5 });
191
-
192
- // Render to canvas
193
- const canvas = document.createElement('canvas');
194
- const context = canvas.getContext('2d');
195
- canvas.height = viewport.height;
196
- canvas.width = viewport.width;
197
-
198
- const renderContext = {
199
- canvasContext: context,
200
- viewport: viewport
201
- };
202
-
203
- await page.render(renderContext).promise;
204
- document.body.appendChild(canvas);
205
- }
206
- ```
207
-
208
- #### Extract Text with Coordinates
209
- ```javascript
210
- import * as pdfjsLib from 'pdfjs-dist';
211
-
212
- async function extractText() {
213
- const loadingTask = pdfjsLib.getDocument('document.pdf');
214
- const pdf = await loadingTask.promise;
215
-
216
- let fullText = '';
217
-
218
- // Extract text from all pages
219
- for (let i = 1; i <= pdf.numPages; i++) {
220
- const page = await pdf.getPage(i);
221
- const textContent = await page.getTextContent();
222
-
223
- const pageText = textContent.items
224
- .map(item => item.str)
225
- .join(' ');
226
-
227
- fullText += `\n--- Page ${i} ---\n${pageText}`;
228
-
229
- // Get text with coordinates for advanced processing
230
- const textWithCoords = textContent.items.map(item => ({
231
- text: item.str,
232
- x: item.transform[4],
233
- y: item.transform[5],
234
- width: item.width,
235
- height: item.height
236
- }));
237
- }
238
-
239
- console.log(fullText);
240
- return fullText;
241
- }
242
- ```
243
-
244
- #### Extract Annotations and Forms
245
- ```javascript
246
- import * as pdfjsLib from 'pdfjs-dist';
247
-
248
- async function extractAnnotations() {
249
- const loadingTask = pdfjsLib.getDocument('annotated.pdf');
250
- const pdf = await loadingTask.promise;
251
-
252
- for (let i = 1; i <= pdf.numPages; i++) {
253
- const page = await pdf.getPage(i);
254
- const annotations = await page.getAnnotations();
255
-
256
- annotations.forEach(annotation => {
257
- console.log(`Annotation type: ${annotation.subtype}`);
258
- console.log(`Content: ${annotation.contents}`);
259
- console.log(`Coordinates: ${JSON.stringify(annotation.rect)}`);
260
- });
261
- }
262
- }
263
- ```
264
-
265
- ## Advanced Command-Line Operations
266
-
267
- ### poppler-utils Advanced Features
268
-
269
- #### Extract Text with Bounding Box Coordinates
270
- ```bash
271
- # Extract text with bounding box coordinates (essential for structured data)
272
- pdftotext -bbox-layout document.pdf output.xml
273
-
274
- # The XML output contains precise coordinates for each text element
275
- ```
276
-
277
- #### Advanced Image Conversion
278
- ```bash
279
- # Convert to PNG images with specific resolution
280
- pdftoppm -png -r 300 document.pdf output_prefix
281
-
282
- # Convert specific page range with high resolution
283
- pdftoppm -png -r 600 -f 1 -l 3 document.pdf high_res_pages
284
-
285
- # Convert to JPEG with quality setting
286
- pdftoppm -jpeg -jpegopt quality=85 -r 200 document.pdf jpeg_output
287
- ```
288
-
289
- #### Extract Embedded Images
290
- ```bash
291
- # Extract all embedded images with metadata
292
- pdfimages -j -p document.pdf page_images
293
-
294
- # List image info without extracting
295
- pdfimages -list document.pdf
296
-
297
- # Extract images in their original format
298
- pdfimages -all document.pdf images/img
299
- ```
300
-
301
- ### qpdf Advanced Features
302
-
303
- #### Complex Page Manipulation
304
- ```bash
305
- # Split PDF into groups of pages
306
- qpdf --split-pages=3 input.pdf output_group_%02d.pdf
307
-
308
- # Extract specific pages with complex ranges
309
- qpdf input.pdf --pages input.pdf 1,3-5,8,10-end -- extracted.pdf
310
-
311
- # Merge specific pages from multiple PDFs
312
- qpdf --empty --pages doc1.pdf 1-3 doc2.pdf 5-7 doc3.pdf 2,4 -- combined.pdf
313
- ```
314
-
315
- #### PDF Optimization and Repair
316
- ```bash
317
- # Optimize PDF for web (linearize for streaming)
318
- qpdf --linearize input.pdf optimized.pdf
319
-
320
- # Remove unused objects and compress
321
- qpdf --optimize-level=all input.pdf compressed.pdf
322
-
323
- # Attempt to repair corrupted PDF structure
324
- qpdf --check input.pdf
325
- qpdf --fix-qdf damaged.pdf repaired.pdf
326
-
327
- # Show detailed PDF structure for debugging
328
- qpdf --show-all-pages input.pdf > structure.txt
329
- ```
330
-
331
- #### Advanced Encryption
332
- ```bash
333
- # Add password protection with specific permissions
334
- qpdf --encrypt user_pass owner_pass 256 --print=none --modify=none -- input.pdf encrypted.pdf
335
-
336
- # Check encryption status
337
- qpdf --show-encryption encrypted.pdf
338
-
339
- # Remove password protection (requires password)
340
- qpdf --password=secret123 --decrypt encrypted.pdf decrypted.pdf
341
- ```
342
-
343
- ## Advanced Python Techniques
344
-
345
- ### pdfplumber Advanced Features
346
-
347
- #### Extract Text with Precise Coordinates
348
- ```python
349
- import pdfplumber
350
-
351
- with pdfplumber.open("document.pdf") as pdf:
352
- page = pdf.pages[0]
353
-
354
- # Extract all text with coordinates
355
- chars = page.chars
356
- for char in chars[:10]: # First 10 characters
357
- print(f"Char: '{char['text']}' at x:{char['x0']:.1f} y:{char['y0']:.1f}")
358
-
359
- # Extract text by bounding box (left, top, right, bottom)
360
- bbox_text = page.within_bbox((100, 100, 400, 200)).extract_text()
361
- ```
362
-
363
- #### Advanced Table Extraction with Custom Settings
364
- ```python
365
- import pdfplumber
366
- import pandas as pd
367
-
368
- with pdfplumber.open("complex_table.pdf") as pdf:
369
- page = pdf.pages[0]
370
-
371
- # Extract tables with custom settings for complex layouts
372
- table_settings = {
373
- "vertical_strategy": "lines",
374
- "horizontal_strategy": "lines",
375
- "snap_tolerance": 3,
376
- "intersection_tolerance": 15
377
- }
378
- tables = page.extract_tables(table_settings)
379
-
380
- # Visual debugging for table extraction
381
- img = page.to_image(resolution=150)
382
- img.save("debug_layout.png")
383
- ```
384
-
385
- ### reportlab Advanced Features
386
-
387
- #### Create Professional Reports with Tables
388
- ```python
389
- from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph
390
- from reportlab.lib.styles import getSampleStyleSheet
391
- from reportlab.lib import colors
392
-
393
- # Sample data
394
- data = [
395
- ['Product', 'Q1', 'Q2', 'Q3', 'Q4'],
396
- ['Widgets', '120', '135', '142', '158'],
397
- ['Gadgets', '85', '92', '98', '105']
398
- ]
399
-
400
- # Create PDF with table
401
- doc = SimpleDocTemplate("report.pdf")
402
- elements = []
403
-
404
- # Add title
405
- styles = getSampleStyleSheet()
406
- title = Paragraph("Quarterly Sales Report", styles['Title'])
407
- elements.append(title)
408
-
409
- # Add table with advanced styling
410
- table = Table(data)
411
- table.setStyle(TableStyle([
412
- ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
413
- ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
414
- ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
415
- ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
416
- ('FONTSIZE', (0, 0), (-1, 0), 14),
417
- ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
418
- ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
419
- ('GRID', (0, 0), (-1, -1), 1, colors.black)
420
- ]))
421
- elements.append(table)
422
-
423
- doc.build(elements)
424
- ```
425
-
426
- ## Complex Workflows
427
-
428
- ### Extract Figures/Images from PDF
429
-
430
- #### Method 1: Using pdfimages (fastest)
431
- ```bash
432
- # Extract all images with original quality
433
- pdfimages -all document.pdf images/img
434
- ```
435
-
436
- #### Method 2: Using pypdfium2 + Image Processing
437
- ```python
438
- import pypdfium2 as pdfium
439
- from PIL import Image
440
- import numpy as np
441
-
442
- def extract_figures(pdf_path, output_dir):
443
- pdf = pdfium.PdfDocument(pdf_path)
444
-
445
- for page_num, page in enumerate(pdf):
446
- # Render high-resolution page
447
- bitmap = page.render(scale=3.0)
448
- img = bitmap.to_pil()
449
-
450
- # Convert to numpy for processing
451
- img_array = np.array(img)
452
-
453
- # Simple figure detection (non-white regions)
454
- mask = np.any(img_array != [255, 255, 255], axis=2)
455
-
456
- # Find contours and extract bounding boxes
457
- # (This is simplified - real implementation would need more sophisticated detection)
458
-
459
- # Save detected figures
460
- # ... implementation depends on specific needs
461
- ```
462
-
463
- ### Batch PDF Processing with Error Handling
464
- ```python
465
- import os
466
- import glob
467
- from pypdf import PdfReader, PdfWriter
468
- import logging
469
-
470
- logging.basicConfig(level=logging.INFO)
471
- logger = logging.getLogger(__name__)
472
-
473
- def batch_process_pdfs(input_dir, operation='merge'):
474
- pdf_files = glob.glob(os.path.join(input_dir, "*.pdf"))
475
-
476
- if operation == 'merge':
477
- writer = PdfWriter()
478
- for pdf_file in pdf_files:
479
- try:
480
- reader = PdfReader(pdf_file)
481
- for page in reader.pages:
482
- writer.add_page(page)
483
- logger.info(f"Processed: {pdf_file}")
484
- except Exception as e:
485
- logger.error(f"Failed to process {pdf_file}: {e}")
486
- continue
487
-
488
- with open("batch_merged.pdf", "wb") as output:
489
- writer.write(output)
490
-
491
- elif operation == 'extract_text':
492
- for pdf_file in pdf_files:
493
- try:
494
- reader = PdfReader(pdf_file)
495
- text = ""
496
- for page in reader.pages:
497
- text += page.extract_text()
498
-
499
- output_file = pdf_file.replace('.pdf', '.txt')
500
- with open(output_file, 'w', encoding='utf-8') as f:
501
- f.write(text)
502
- logger.info(f"Extracted text from: {pdf_file}")
503
-
504
- except Exception as e:
505
- logger.error(f"Failed to extract text from {pdf_file}: {e}")
506
- continue
507
- ```
508
-
509
- ### Advanced PDF Cropping
510
- ```python
511
- from pypdf import PdfWriter, PdfReader
512
-
513
- reader = PdfReader("input.pdf")
514
- writer = PdfWriter()
515
-
516
- # Crop page (left, bottom, right, top in points)
517
- page = reader.pages[0]
518
- page.mediabox.left = 50
519
- page.mediabox.bottom = 50
520
- page.mediabox.right = 550
521
- page.mediabox.top = 750
522
-
523
- writer.add_page(page)
524
- with open("cropped.pdf", "wb") as output:
525
- writer.write(output)
526
- ```
527
-
528
- ## Performance Optimization Tips
529
-
530
- ### 1. For Large PDFs
531
- - Stream the file rather than pulling the whole PDF into memory
532
- - Lean on `qpdf --split-pages` to break up big files
533
- - Handle pages one at a time with pypdfium2
534
-
535
- ### 2. For Text Extraction
536
- - `pdftotext -bbox-layout` is the quickest way to grab plain text
537
- - Reach for pdfplumber when you need structured data and tables
538
- - Steer clear of `pypdf.extract_text()` on very large documents
539
-
540
- ### 3. For Image Extraction
541
- - `pdfimages` runs far faster than rendering whole pages
542
- - Keep resolution low for previews and high for the final output
543
-
544
- ### 4. For Form Filling
545
- - pdf-lib preserves form structure better than most of the alternatives
546
- - Validate form fields up front before you process them
547
-
548
- ### 5. Memory Management
549
- ```python
550
- # Process PDFs in chunks
551
- def process_large_pdf(pdf_path, chunk_size=10):
552
- reader = PdfReader(pdf_path)
553
- total_pages = len(reader.pages)
554
-
555
- for start_idx in range(0, total_pages, chunk_size):
556
- end_idx = min(start_idx + chunk_size, total_pages)
557
- writer = PdfWriter()
558
-
559
- for i in range(start_idx, end_idx):
560
- writer.add_page(reader.pages[i])
561
-
562
- # Process chunk
563
- with open(f"chunk_{start_idx//chunk_size}.pdf", "wb") as output:
564
- writer.write(output)
565
- ```
566
-
567
- ## Troubleshooting Common Issues
568
-
569
- ### Encrypted PDFs
570
- ```python
571
- # Handle password-protected PDFs
572
- from pypdf import PdfReader
573
-
574
- try:
575
- reader = PdfReader("encrypted.pdf")
576
- if reader.is_encrypted:
577
- reader.decrypt("password")
578
- except Exception as e:
579
- print(f"Failed to decrypt: {e}")
580
- ```
581
-
582
- ### Corrupted PDFs
583
- ```bash
584
- # Use qpdf to repair
585
- qpdf --check corrupted.pdf
586
- qpdf --replace-input corrupted.pdf
587
- ```
588
-
589
- ### Text Extraction Issues
590
- ```python
591
- # Fallback to OCR for scanned PDFs
592
- import pytesseract
593
- from pdf2image import convert_from_path
594
-
595
- def extract_text_with_ocr(pdf_path):
596
- images = convert_from_path(pdf_path)
597
- text = ""
598
- for i, image in enumerate(images):
599
- text += pytesseract.image_to_string(image)
600
- return text
601
- ```
602
-
603
- ## License Information
604
-
605
- - **pypdf**: BSD License
606
- - **pdfplumber**: MIT License
607
- - **pypdfium2**: Apache/BSD License
608
- - **reportlab**: BSD License
609
- - **poppler-utils**: GPL-2 License
610
- - **qpdf**: Apache License
611
- - **pdf-lib**: MIT License
612
- - **pdfjs-dist**: Apache License