claude-code-templates 1.21.13 → 1.21.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +8 -2
- package/.claude-plugin/marketplace.json +251 -0
- package/.github/workflows/component-security-validation.yml +129 -0
- package/README.md +13 -6
- package/cli-tool/README.md +56 -0
- package/cli-tool/bin/create-claude-config.js +2 -0
- package/cli-tool/components/.claude-plugin/marketplace.json +92 -0
- package/cli-tool/components/agents/development-tools/flutter-go-reviewer.md +163 -0
- package/cli-tool/components/agents/development-tools/unused-code-cleaner.md +194 -0
- package/cli-tool/components/commands/git-workflow/gemini-review.md +293 -0
- package/cli-tool/components/commands/testing/add-mutation-testing.md +2 -2
- package/cli-tool/components/commands/testing/add-property-based-testing.md +2 -2
- package/cli-tool/components/commands/testing/e2e-setup.md +2 -2
- package/cli-tool/components/commands/testing/generate-test-cases.md +2 -2
- package/cli-tool/components/commands/testing/generate-tests.md +8 -3
- package/cli-tool/components/commands/testing/setup-comprehensive-testing.md +2 -2
- package/cli-tool/components/commands/testing/setup-load-testing.md +2 -2
- package/cli-tool/components/commands/testing/setup-visual-testing.md +2 -2
- package/cli-tool/components/commands/testing/test-automation-orchestrator.md +2 -2
- package/cli-tool/components/commands/testing/test-changelog-automation.md +2 -2
- package/cli-tool/components/commands/testing/test-coverage.md +2 -2
- package/cli-tool/components/commands/testing/test-quality-analyzer.md +2 -2
- package/cli-tool/components/commands/testing/testing_plan_integration.md +2 -2
- package/cli-tool/components/commands/testing/write-tests.md +2 -2
- package/cli-tool/components/commands/utilities/ultra-think.md +10 -5
- package/cli-tool/components/hooks/git/validate-branch-name.json +1 -1
- package/cli-tool/components/mcps/devtools/chrome-devtools.json +9 -0
- package/cli-tool/components/mcps/devtools/grafana.json +15 -0
- package/cli-tool/components/mcps/devtools/pulumi.json +9 -0
- package/cli-tool/components/mcps/devtools/terraform.json +1 -1
- package/cli-tool/components/settings/statusline/context-monitor.py +1 -1
- package/cli-tool/components/skills/ANTHROPIC_ATTRIBUTION.md +81 -0
- package/cli-tool/components/skills/creative-design/algorithmic-art/LICENSE.txt +202 -0
- package/cli-tool/components/skills/creative-design/algorithmic-art/SKILL.md +405 -0
- package/cli-tool/components/skills/creative-design/algorithmic-art/templates/generator_template.js +223 -0
- package/cli-tool/components/skills/creative-design/algorithmic-art/templates/viewer.html +599 -0
- package/cli-tool/components/skills/creative-design/canvas-design/LICENSE.txt +202 -0
- package/cli-tool/components/skills/creative-design/canvas-design/SKILL.md +130 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/ArsenalSC-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/ArsenalSC-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/BigShoulders-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/BigShoulders-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/BigShoulders-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Boldonse-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Boldonse-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/BricolageGrotesque-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/BricolageGrotesque-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/BricolageGrotesque-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/CrimsonPro-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/CrimsonPro-Italic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/CrimsonPro-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/CrimsonPro-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/DMMono-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/DMMono-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/EricaOne-OFL.txt +94 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/EricaOne-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/GeistMono-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/GeistMono-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/GeistMono-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Gloock-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Gloock-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/IBMPlexMono-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/IBMPlexMono-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/IBMPlexMono-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/IBMPlexSerif-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/IBMPlexSerif-BoldItalic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/IBMPlexSerif-Italic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/IBMPlexSerif-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/InstrumentSans-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/InstrumentSans-BoldItalic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/InstrumentSans-Italic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/InstrumentSans-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/InstrumentSans-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/InstrumentSerif-Italic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/InstrumentSerif-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Italiana-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Italiana-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/JetBrainsMono-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/JetBrainsMono-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/JetBrainsMono-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Jura-Light.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Jura-Medium.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Jura-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/LibreBaskerville-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/LibreBaskerville-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Lora-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Lora-BoldItalic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Lora-Italic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Lora-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Lora-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/NationalPark-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/NationalPark-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/NationalPark-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/NothingYouCouldDo-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/NothingYouCouldDo-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Outfit-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Outfit-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Outfit-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/PixelifySans-Medium.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/PixelifySans-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/PoiretOne-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/PoiretOne-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/RedHatMono-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/RedHatMono-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/RedHatMono-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Silkscreen-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Silkscreen-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/SmoochSans-Medium.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/SmoochSans-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Tektur-Medium.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Tektur-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Tektur-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/WorkSans-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/WorkSans-BoldItalic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/WorkSans-Italic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/WorkSans-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/WorkSans-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/YoungSerif-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/YoungSerif-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/LICENSE.txt +202 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/SKILL.md +646 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/core/color_palettes.py +302 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/core/easing.py +230 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/core/frame_composer.py +469 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/core/gif_builder.py +246 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/core/typography.py +357 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/core/validators.py +264 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/core/visual_effects.py +494 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/requirements.txt +4 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/bounce.py +106 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/explode.py +331 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/fade.py +329 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/flip.py +291 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/kaleidoscope.py +211 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/morph.py +329 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/move.py +293 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/pulse.py +268 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/shake.py +127 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/slide.py +291 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/spin.py +269 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/wiggle.py +300 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/zoom.py +312 -0
- package/cli-tool/components/skills/creative-design/theme-factory/LICENSE.txt +202 -0
- package/cli-tool/components/skills/creative-design/theme-factory/SKILL.md +59 -0
- package/cli-tool/components/skills/creative-design/theme-factory/theme-showcase.pdf +0 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/arctic-frost.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/botanical-garden.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/desert-rose.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/forest-canopy.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/golden-hour.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/midnight-galaxy.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/modern-minimalist.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/ocean-depths.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/sunset-boulevard.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/tech-innovation.md +19 -0
- package/cli-tool/components/skills/development/artifacts-builder/LICENSE.txt +202 -0
- package/cli-tool/components/skills/development/artifacts-builder/SKILL.md +74 -0
- package/cli-tool/components/skills/development/artifacts-builder/scripts/bundle-artifact.sh +54 -0
- package/cli-tool/components/skills/development/artifacts-builder/scripts/init-artifact.sh +322 -0
- package/cli-tool/components/skills/development/artifacts-builder/scripts/shadcn-components.tar.gz +0 -0
- package/cli-tool/components/skills/development/git-commit-helper/SKILL.md +203 -0
- package/cli-tool/components/skills/development/mcp-builder/LICENSE.txt +202 -0
- package/cli-tool/components/skills/development/mcp-builder/SKILL.md +328 -0
- package/cli-tool/components/skills/development/mcp-builder/reference/evaluation.md +602 -0
- package/cli-tool/components/skills/development/mcp-builder/reference/mcp_best_practices.md +915 -0
- package/cli-tool/components/skills/development/mcp-builder/reference/node_mcp_server.md +916 -0
- package/cli-tool/components/skills/development/mcp-builder/reference/python_mcp_server.md +752 -0
- package/cli-tool/components/skills/development/mcp-builder/scripts/connections.py +151 -0
- package/cli-tool/components/skills/development/mcp-builder/scripts/evaluation.py +373 -0
- package/cli-tool/components/skills/development/mcp-builder/scripts/example_evaluation.xml +22 -0
- package/cli-tool/components/skills/development/mcp-builder/scripts/requirements.txt +2 -0
- package/cli-tool/components/skills/development/skill-creator/LICENSE.txt +202 -0
- package/cli-tool/components/skills/development/skill-creator/SKILL.md +209 -0
- package/cli-tool/components/skills/development/skill-creator/scripts/init_skill.py +303 -0
- package/cli-tool/components/skills/development/skill-creator/scripts/package_skill.py +110 -0
- package/cli-tool/components/skills/development/skill-creator/scripts/quick_validate.py +65 -0
- package/cli-tool/components/skills/development/webapp-testing/LICENSE.txt +202 -0
- package/cli-tool/components/skills/development/webapp-testing/SKILL.md +96 -0
- package/cli-tool/components/skills/development/webapp-testing/examples/console_logging.py +35 -0
- package/cli-tool/components/skills/development/webapp-testing/examples/element_discovery.py +40 -0
- package/cli-tool/components/skills/development/webapp-testing/examples/static_html_automation.py +33 -0
- package/cli-tool/components/skills/development/webapp-testing/scripts/with_server.py +106 -0
- package/cli-tool/components/skills/document-processing/docx/LICENSE.txt +30 -0
- package/cli-tool/components/skills/document-processing/docx/SKILL.md +197 -0
- package/cli-tool/components/skills/document-processing/docx/docx-js.md +350 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/mce/mc.xsd +75 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/scripts/pack.py +159 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/scripts/unpack.py +29 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/scripts/validate.py +69 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/scripts/validation/__init__.py +15 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/scripts/validation/base.py +951 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/scripts/validation/docx.py +274 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/scripts/validation/pptx.py +315 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/scripts/validation/redlining.py +279 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml.md +610 -0
- package/cli-tool/components/skills/document-processing/docx/scripts/__init__.py +1 -0
- package/cli-tool/components/skills/document-processing/docx/scripts/document.py +1276 -0
- package/cli-tool/components/skills/document-processing/docx/scripts/templates/comments.xml +3 -0
- package/cli-tool/components/skills/document-processing/docx/scripts/templates/commentsExtended.xml +3 -0
- package/cli-tool/components/skills/document-processing/docx/scripts/templates/commentsExtensible.xml +3 -0
- package/cli-tool/components/skills/document-processing/docx/scripts/templates/commentsIds.xml +3 -0
- package/cli-tool/components/skills/document-processing/docx/scripts/templates/people.xml +3 -0
- package/cli-tool/components/skills/document-processing/docx/scripts/utilities.py +374 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/LICENSE.txt +30 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/SKILL.md +294 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/forms.md +205 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/reference.md +612 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/check_bounding_boxes.py +70 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/check_bounding_boxes_test.py +226 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/check_fillable_fields.py +12 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/convert_pdf_to_images.py +35 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/create_validation_image.py +41 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/extract_form_field_info.py +152 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/fill_fillable_fields.py +114 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/fill_pdf_form_with_annotations.py +108 -0
- package/cli-tool/components/skills/document-processing/pdf-processing/FORMS.md +143 -0
- package/cli-tool/components/skills/document-processing/pdf-processing/SKILL.md +149 -0
- package/cli-tool/components/skills/document-processing/pdf-processing-pro/FORMS.md +610 -0
- package/cli-tool/components/skills/document-processing/pdf-processing-pro/OCR.md +137 -0
- package/cli-tool/components/skills/document-processing/pdf-processing-pro/SKILL.md +296 -0
- package/cli-tool/components/skills/document-processing/pdf-processing-pro/TABLES.md +626 -0
- package/cli-tool/components/skills/document-processing/pdf-processing-pro/scripts/analyze_form.py +307 -0
- package/cli-tool/components/skills/document-processing/pptx/LICENSE.txt +30 -0
- package/cli-tool/components/skills/document-processing/pptx/SKILL.md +484 -0
- package/cli-tool/components/skills/document-processing/pptx/html2pptx.md +625 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/mce/mc.xsd +75 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/scripts/pack.py +159 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/scripts/unpack.py +29 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/scripts/validate.py +69 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/scripts/validation/__init__.py +15 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/scripts/validation/base.py +951 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/scripts/validation/docx.py +274 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/scripts/validation/pptx.py +315 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/scripts/validation/redlining.py +279 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml.md +427 -0
- package/cli-tool/components/skills/document-processing/pptx/scripts/html2pptx.js +979 -0
- package/cli-tool/components/skills/document-processing/pptx/scripts/inventory.py +1020 -0
- package/cli-tool/components/skills/document-processing/pptx/scripts/rearrange.py +231 -0
- package/cli-tool/components/skills/document-processing/pptx/scripts/replace.py +385 -0
- package/cli-tool/components/skills/document-processing/pptx/scripts/thumbnail.py +450 -0
- package/cli-tool/components/skills/document-processing/xlsx/LICENSE.txt +30 -0
- package/cli-tool/components/skills/document-processing/xlsx/SKILL.md +289 -0
- package/cli-tool/components/skills/document-processing/xlsx/recalc.py +178 -0
- package/cli-tool/components/skills/enterprise-communication/brand-guidelines/LICENSE.txt +202 -0
- package/cli-tool/components/skills/enterprise-communication/brand-guidelines/SKILL.md +73 -0
- package/cli-tool/components/skills/enterprise-communication/email-composer/SKILL.md +317 -0
- package/cli-tool/components/skills/enterprise-communication/excel-analysis/SKILL.md +247 -0
- package/cli-tool/components/skills/enterprise-communication/internal-comms/LICENSE.txt +202 -0
- package/cli-tool/components/skills/enterprise-communication/internal-comms/SKILL.md +32 -0
- package/cli-tool/components/skills/enterprise-communication/internal-comms/examples/3p-updates.md +47 -0
- package/cli-tool/components/skills/enterprise-communication/internal-comms/examples/company-newsletter.md +65 -0
- package/cli-tool/components/skills/enterprise-communication/internal-comms/examples/faq-answers.md +30 -0
- package/cli-tool/components/skills/enterprise-communication/internal-comms/examples/general-comms.md +16 -0
- package/cli-tool/package-lock.json +39 -16
- package/cli-tool/package.json +7 -2
- package/cli-tool/security-report.json +62361 -0
- package/cli-tool/src/analytics-web/chats_mobile.html +17 -16
- package/cli-tool/src/console-bridge.js +3 -3
- package/cli-tool/src/index.js +157 -10
- package/cli-tool/src/plugin-dashboard-web/app.js +806 -0
- package/cli-tool/src/plugin-dashboard-web/index.html +292 -0
- package/cli-tool/src/plugin-dashboard-web/styles.css +1781 -0
- package/cli-tool/src/plugin-dashboard.js +689 -0
- package/cli-tool/src/security-audit.js +164 -0
- package/cli-tool/src/validation/ARCHITECTURE.md +309 -0
- package/cli-tool/src/validation/BaseValidator.js +152 -0
- package/cli-tool/src/validation/README.md +543 -0
- package/cli-tool/src/validation/ValidationOrchestrator.js +305 -0
- package/cli-tool/src/validation/validators/IntegrityValidator.js +338 -0
- package/cli-tool/src/validation/validators/ProvenanceValidator.js +399 -0
- package/cli-tool/src/validation/validators/ReferenceValidator.js +373 -0
- package/cli-tool/src/validation/validators/SemanticValidator.js +449 -0
- package/cli-tool/src/validation/validators/StructuralValidator.js +376 -0
- package/docs/CLAUDE.md +363 -0
- package/docs/api/README.md +297 -0
- package/docs/api/package.json +7 -0
- package/docs/api/track-download-supabase.js +150 -0
- package/docs/blog/README.md +199 -0
- package/docs/blog/blog-articles.json +133 -0
- package/docs/blog/css/blog-controls.css +254 -0
- package/docs/blog/e2b-claude-code-sandbox/index.html +8 -0
- package/docs/blog/index.html +81 -124
- package/docs/blog/js/blog-loader.js +602 -0
- package/docs/blog/nextjs-vercel-claude-code-integration/index.html +8 -0
- package/docs/blog/supabase-claude-code-integration/index.html +8 -0
- package/docs/component.html +226 -48
- package/docs/components.json +61610 -604
- package/docs/css/blog.css +292 -0
- package/docs/css/component-page.css +840 -3
- package/docs/css/plugin-page.css +648 -0
- package/docs/css/styles.css +504 -1
- package/docs/css/trending.css +110 -6
- package/docs/download-stats.html +8 -0
- package/docs/index.html +48 -22
- package/docs/jobs.html +8 -0
- package/docs/js/cart-manager.js +21 -8
- package/docs/js/component-page.js +1013 -12
- package/docs/js/data-loader.js +11 -8
- package/docs/js/index-events.js +305 -53
- package/docs/js/plugin-page.js +390 -0
- package/docs/js/script.js +50 -3
- package/docs/js/search-functionality.js +19 -16
- package/docs/js/trending.js +55 -20
- package/docs/plugin.html +262 -0
- package/docs/sandbox-interface.html +8 -0
- package/docs/static/favicon/about.txt +6 -0
- package/docs/static/favicon/android-chrome-192x192.png +0 -0
- package/docs/static/favicon/android-chrome-512x512.png +0 -0
- package/docs/static/favicon/apple-touch-icon.png +0 -0
- package/docs/static/favicon/favicon-16x16.png +0 -0
- package/docs/static/favicon/favicon-32x32.png +0 -0
- package/docs/static/favicon/favicon.ico +0 -0
- package/docs/static/favicon/site.webmanifest +1 -0
- package/docs/trending-data.json +616 -579
- package/docs/trending.html +24 -3
- package/docs/vercel.json +12 -0
- package/docs/workflows.html +8 -0
- package/generate_components_json.py +386 -11
- package/package.json +1 -1
- package/sync-api.sh +50 -0
- package/vercel.json +10 -75
- package/ROADMAP.md +0 -278
- package/test_serpapi.py +0 -36
- /package/cli-tool/components/commands/svelte/{svelte:a11y.md → svelte-a11y.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:component.md → svelte-component.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:debug.md → svelte-debug.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:migrate.md → svelte-migrate.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:optimize.md → svelte-optimize.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:scaffold.md → svelte-scaffold.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:storybook-migrate.md → svelte-storybook-migrate.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:storybook-mock.md → svelte-storybook-mock.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:storybook-setup.md → svelte-storybook-setup.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:storybook-story.md → svelte-storybook-story.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:storybook-troubleshoot.md → svelte-storybook-troubleshoot.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:storybook.md → svelte-storybook.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:test-coverage.md → svelte-test-coverage.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:test-fix.md → svelte-test-fix.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:test-setup.md → svelte-test-setup.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:test.md → svelte-test.md} +0 -0
|
@@ -0,0 +1,612 @@
|
|
|
1
|
+
# PDF Processing Advanced Reference
|
|
2
|
+
|
|
3
|
+
This document contains advanced PDF processing features, detailed examples, and additional libraries not covered in the main skill instructions.
|
|
4
|
+
|
|
5
|
+
## pypdfium2 Library (Apache/BSD License)
|
|
6
|
+
|
|
7
|
+
### Overview
|
|
8
|
+
pypdfium2 is a Python binding for PDFium (Chromium's PDF library). It's excellent for fast PDF rendering, image generation, and serves as a PyMuPDF replacement.
|
|
9
|
+
|
|
10
|
+
### Render PDF to Images
|
|
11
|
+
```python
|
|
12
|
+
import pypdfium2 as pdfium
|
|
13
|
+
from PIL import Image
|
|
14
|
+
|
|
15
|
+
# Load PDF
|
|
16
|
+
pdf = pdfium.PdfDocument("document.pdf")
|
|
17
|
+
|
|
18
|
+
# Render page to image
|
|
19
|
+
page = pdf[0] # First page
|
|
20
|
+
bitmap = page.render(
|
|
21
|
+
scale=2.0, # Higher resolution
|
|
22
|
+
rotation=0 # No rotation
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# Convert to PIL Image
|
|
26
|
+
img = bitmap.to_pil()
|
|
27
|
+
img.save("page_1.png", "PNG")
|
|
28
|
+
|
|
29
|
+
# Process multiple pages
|
|
30
|
+
for i, page in enumerate(pdf):
|
|
31
|
+
bitmap = page.render(scale=1.5)
|
|
32
|
+
img = bitmap.to_pil()
|
|
33
|
+
img.save(f"page_{i+1}.jpg", "JPEG", quality=90)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Extract Text with pypdfium2
|
|
37
|
+
```python
|
|
38
|
+
import pypdfium2 as pdfium
|
|
39
|
+
|
|
40
|
+
pdf = pdfium.PdfDocument("document.pdf")
|
|
41
|
+
for i, page in enumerate(pdf):
|
|
42
|
+
text = page.get_text()
|
|
43
|
+
print(f"Page {i+1} text length: {len(text)} chars")
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## JavaScript Libraries
|
|
47
|
+
|
|
48
|
+
### pdf-lib (MIT License)
|
|
49
|
+
|
|
50
|
+
pdf-lib is a powerful JavaScript library for creating and modifying PDF documents in any JavaScript environment.
|
|
51
|
+
|
|
52
|
+
#### Load and Manipulate Existing PDF
|
|
53
|
+
```javascript
|
|
54
|
+
import { PDFDocument } from 'pdf-lib';
|
|
55
|
+
import fs from 'fs';
|
|
56
|
+
|
|
57
|
+
async function manipulatePDF() {
|
|
58
|
+
// Load existing PDF
|
|
59
|
+
const existingPdfBytes = fs.readFileSync('input.pdf');
|
|
60
|
+
const pdfDoc = await PDFDocument.load(existingPdfBytes);
|
|
61
|
+
|
|
62
|
+
// Get page count
|
|
63
|
+
const pageCount = pdfDoc.getPageCount();
|
|
64
|
+
console.log(`Document has ${pageCount} pages`);
|
|
65
|
+
|
|
66
|
+
// Add new page
|
|
67
|
+
const newPage = pdfDoc.addPage([600, 400]);
|
|
68
|
+
newPage.drawText('Added by pdf-lib', {
|
|
69
|
+
x: 100,
|
|
70
|
+
y: 300,
|
|
71
|
+
size: 16
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
// Save modified PDF
|
|
75
|
+
const pdfBytes = await pdfDoc.save();
|
|
76
|
+
fs.writeFileSync('modified.pdf', pdfBytes);
|
|
77
|
+
}
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
#### Create Complex PDFs from Scratch
|
|
81
|
+
```javascript
|
|
82
|
+
import { PDFDocument, rgb, StandardFonts } from 'pdf-lib';
|
|
83
|
+
import fs from 'fs';
|
|
84
|
+
|
|
85
|
+
async function createPDF() {
|
|
86
|
+
const pdfDoc = await PDFDocument.create();
|
|
87
|
+
|
|
88
|
+
// Add fonts
|
|
89
|
+
const helveticaFont = await pdfDoc.embedFont(StandardFonts.Helvetica);
|
|
90
|
+
const helveticaBold = await pdfDoc.embedFont(StandardFonts.HelveticaBold);
|
|
91
|
+
|
|
92
|
+
// Add page
|
|
93
|
+
const page = pdfDoc.addPage([595, 842]); // A4 size
|
|
94
|
+
const { width, height } = page.getSize();
|
|
95
|
+
|
|
96
|
+
// Add text with styling
|
|
97
|
+
page.drawText('Invoice #12345', {
|
|
98
|
+
x: 50,
|
|
99
|
+
y: height - 50,
|
|
100
|
+
size: 18,
|
|
101
|
+
font: helveticaBold,
|
|
102
|
+
color: rgb(0.2, 0.2, 0.8)
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
// Add rectangle (header background)
|
|
106
|
+
page.drawRectangle({
|
|
107
|
+
x: 40,
|
|
108
|
+
y: height - 100,
|
|
109
|
+
width: width - 80,
|
|
110
|
+
height: 30,
|
|
111
|
+
color: rgb(0.9, 0.9, 0.9)
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
// Add table-like content
|
|
115
|
+
const items = [
|
|
116
|
+
['Item', 'Qty', 'Price', 'Total'],
|
|
117
|
+
['Widget', '2', '$50', '$100'],
|
|
118
|
+
['Gadget', '1', '$75', '$75']
|
|
119
|
+
];
|
|
120
|
+
|
|
121
|
+
let yPos = height - 150;
|
|
122
|
+
items.forEach(row => {
|
|
123
|
+
let xPos = 50;
|
|
124
|
+
row.forEach(cell => {
|
|
125
|
+
page.drawText(cell, {
|
|
126
|
+
x: xPos,
|
|
127
|
+
y: yPos,
|
|
128
|
+
size: 12,
|
|
129
|
+
font: helveticaFont
|
|
130
|
+
});
|
|
131
|
+
xPos += 120;
|
|
132
|
+
});
|
|
133
|
+
yPos -= 25;
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
const pdfBytes = await pdfDoc.save();
|
|
137
|
+
fs.writeFileSync('created.pdf', pdfBytes);
|
|
138
|
+
}
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
#### Advanced Merge and Split Operations
|
|
142
|
+
```javascript
|
|
143
|
+
import { PDFDocument } from 'pdf-lib';
|
|
144
|
+
import fs from 'fs';
|
|
145
|
+
|
|
146
|
+
async function mergePDFs() {
|
|
147
|
+
// Create new document
|
|
148
|
+
const mergedPdf = await PDFDocument.create();
|
|
149
|
+
|
|
150
|
+
// Load source PDFs
|
|
151
|
+
const pdf1Bytes = fs.readFileSync('doc1.pdf');
|
|
152
|
+
const pdf2Bytes = fs.readFileSync('doc2.pdf');
|
|
153
|
+
|
|
154
|
+
const pdf1 = await PDFDocument.load(pdf1Bytes);
|
|
155
|
+
const pdf2 = await PDFDocument.load(pdf2Bytes);
|
|
156
|
+
|
|
157
|
+
// Copy pages from first PDF
|
|
158
|
+
const pdf1Pages = await mergedPdf.copyPages(pdf1, pdf1.getPageIndices());
|
|
159
|
+
pdf1Pages.forEach(page => mergedPdf.addPage(page));
|
|
160
|
+
|
|
161
|
+
// Copy specific pages from second PDF (pages 0, 2, 4)
|
|
162
|
+
const pdf2Pages = await mergedPdf.copyPages(pdf2, [0, 2, 4]);
|
|
163
|
+
pdf2Pages.forEach(page => mergedPdf.addPage(page));
|
|
164
|
+
|
|
165
|
+
const mergedPdfBytes = await mergedPdf.save();
|
|
166
|
+
fs.writeFileSync('merged.pdf', mergedPdfBytes);
|
|
167
|
+
}
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### pdfjs-dist (Apache License)
|
|
171
|
+
|
|
172
|
+
PDF.js is Mozilla's JavaScript library for rendering PDFs in the browser.
|
|
173
|
+
|
|
174
|
+
#### Basic PDF Loading and Rendering
|
|
175
|
+
```javascript
|
|
176
|
+
import * as pdfjsLib from 'pdfjs-dist';
|
|
177
|
+
|
|
178
|
+
// Configure worker (important for performance)
|
|
179
|
+
pdfjsLib.GlobalWorkerOptions.workerSrc = './pdf.worker.js';
|
|
180
|
+
|
|
181
|
+
async function renderPDF() {
|
|
182
|
+
// Load PDF
|
|
183
|
+
const loadingTask = pdfjsLib.getDocument('document.pdf');
|
|
184
|
+
const pdf = await loadingTask.promise;
|
|
185
|
+
|
|
186
|
+
console.log(`Loaded PDF with ${pdf.numPages} pages`);
|
|
187
|
+
|
|
188
|
+
// Get first page
|
|
189
|
+
const page = await pdf.getPage(1);
|
|
190
|
+
const viewport = page.getViewport({ scale: 1.5 });
|
|
191
|
+
|
|
192
|
+
// Render to canvas
|
|
193
|
+
const canvas = document.createElement('canvas');
|
|
194
|
+
const context = canvas.getContext('2d');
|
|
195
|
+
canvas.height = viewport.height;
|
|
196
|
+
canvas.width = viewport.width;
|
|
197
|
+
|
|
198
|
+
const renderContext = {
|
|
199
|
+
canvasContext: context,
|
|
200
|
+
viewport: viewport
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
await page.render(renderContext).promise;
|
|
204
|
+
document.body.appendChild(canvas);
|
|
205
|
+
}
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
#### Extract Text with Coordinates
|
|
209
|
+
```javascript
|
|
210
|
+
import * as pdfjsLib from 'pdfjs-dist';
|
|
211
|
+
|
|
212
|
+
async function extractText() {
|
|
213
|
+
const loadingTask = pdfjsLib.getDocument('document.pdf');
|
|
214
|
+
const pdf = await loadingTask.promise;
|
|
215
|
+
|
|
216
|
+
let fullText = '';
|
|
217
|
+
|
|
218
|
+
// Extract text from all pages
|
|
219
|
+
for (let i = 1; i <= pdf.numPages; i++) {
|
|
220
|
+
const page = await pdf.getPage(i);
|
|
221
|
+
const textContent = await page.getTextContent();
|
|
222
|
+
|
|
223
|
+
const pageText = textContent.items
|
|
224
|
+
.map(item => item.str)
|
|
225
|
+
.join(' ');
|
|
226
|
+
|
|
227
|
+
fullText += `\n--- Page ${i} ---\n${pageText}`;
|
|
228
|
+
|
|
229
|
+
// Get text with coordinates for advanced processing
|
|
230
|
+
const textWithCoords = textContent.items.map(item => ({
|
|
231
|
+
text: item.str,
|
|
232
|
+
x: item.transform[4],
|
|
233
|
+
y: item.transform[5],
|
|
234
|
+
width: item.width,
|
|
235
|
+
height: item.height
|
|
236
|
+
}));
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
console.log(fullText);
|
|
240
|
+
return fullText;
|
|
241
|
+
}
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
#### Extract Annotations and Forms
|
|
245
|
+
```javascript
|
|
246
|
+
import * as pdfjsLib from 'pdfjs-dist';
|
|
247
|
+
|
|
248
|
+
async function extractAnnotations() {
|
|
249
|
+
const loadingTask = pdfjsLib.getDocument('annotated.pdf');
|
|
250
|
+
const pdf = await loadingTask.promise;
|
|
251
|
+
|
|
252
|
+
for (let i = 1; i <= pdf.numPages; i++) {
|
|
253
|
+
const page = await pdf.getPage(i);
|
|
254
|
+
const annotations = await page.getAnnotations();
|
|
255
|
+
|
|
256
|
+
annotations.forEach(annotation => {
|
|
257
|
+
console.log(`Annotation type: ${annotation.subtype}`);
|
|
258
|
+
console.log(`Content: ${annotation.contents}`);
|
|
259
|
+
console.log(`Coordinates: ${JSON.stringify(annotation.rect)}`);
|
|
260
|
+
});
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
## Advanced Command-Line Operations
|
|
266
|
+
|
|
267
|
+
### poppler-utils Advanced Features
|
|
268
|
+
|
|
269
|
+
#### Extract Text with Bounding Box Coordinates
|
|
270
|
+
```bash
|
|
271
|
+
# Extract text with bounding box coordinates (essential for structured data)
|
|
272
|
+
pdftotext -bbox-layout document.pdf output.xml
|
|
273
|
+
|
|
274
|
+
# The XML output contains precise coordinates for each text element
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
#### Advanced Image Conversion
|
|
278
|
+
```bash
|
|
279
|
+
# Convert to PNG images with specific resolution
|
|
280
|
+
pdftoppm -png -r 300 document.pdf output_prefix
|
|
281
|
+
|
|
282
|
+
# Convert specific page range with high resolution
|
|
283
|
+
pdftoppm -png -r 600 -f 1 -l 3 document.pdf high_res_pages
|
|
284
|
+
|
|
285
|
+
# Convert to JPEG with quality setting
|
|
286
|
+
pdftoppm -jpeg -jpegopt quality=85 -r 200 document.pdf jpeg_output
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
#### Extract Embedded Images
|
|
290
|
+
```bash
|
|
291
|
+
# Extract all embedded images with metadata
|
|
292
|
+
pdfimages -j -p document.pdf page_images
|
|
293
|
+
|
|
294
|
+
# List image info without extracting
|
|
295
|
+
pdfimages -list document.pdf
|
|
296
|
+
|
|
297
|
+
# Extract images in their original format
|
|
298
|
+
pdfimages -all document.pdf images/img
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
### qpdf Advanced Features
|
|
302
|
+
|
|
303
|
+
#### Complex Page Manipulation
|
|
304
|
+
```bash
|
|
305
|
+
# Split PDF into groups of pages
|
|
306
|
+
qpdf --split-pages=3 input.pdf output_group_%02d.pdf
|
|
307
|
+
|
|
308
|
+
# Extract specific pages with complex ranges
|
|
309
|
+
qpdf input.pdf --pages input.pdf 1,3-5,8,10-end -- extracted.pdf
|
|
310
|
+
|
|
311
|
+
# Merge specific pages from multiple PDFs
|
|
312
|
+
qpdf --empty --pages doc1.pdf 1-3 doc2.pdf 5-7 doc3.pdf 2,4 -- combined.pdf
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
#### PDF Optimization and Repair
|
|
316
|
+
```bash
|
|
317
|
+
# Optimize PDF for web (linearize for streaming)
|
|
318
|
+
qpdf --linearize input.pdf optimized.pdf
|
|
319
|
+
|
|
320
|
+
# Remove unused objects and compress
|
|
321
|
+
qpdf --optimize-level=all input.pdf compressed.pdf
|
|
322
|
+
|
|
323
|
+
# Attempt to repair corrupted PDF structure
|
|
324
|
+
qpdf --check input.pdf
|
|
325
|
+
qpdf --fix-qdf damaged.pdf repaired.pdf
|
|
326
|
+
|
|
327
|
+
# Show detailed PDF structure for debugging
|
|
328
|
+
qpdf --show-all-pages input.pdf > structure.txt
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
#### Advanced Encryption
|
|
332
|
+
```bash
|
|
333
|
+
# Add password protection with specific permissions
|
|
334
|
+
qpdf --encrypt user_pass owner_pass 256 --print=none --modify=none -- input.pdf encrypted.pdf
|
|
335
|
+
|
|
336
|
+
# Check encryption status
|
|
337
|
+
qpdf --show-encryption encrypted.pdf
|
|
338
|
+
|
|
339
|
+
# Remove password protection (requires password)
|
|
340
|
+
qpdf --password=secret123 --decrypt encrypted.pdf decrypted.pdf
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
## Advanced Python Techniques
|
|
344
|
+
|
|
345
|
+
### pdfplumber Advanced Features
|
|
346
|
+
|
|
347
|
+
#### Extract Text with Precise Coordinates
|
|
348
|
+
```python
|
|
349
|
+
import pdfplumber
|
|
350
|
+
|
|
351
|
+
with pdfplumber.open("document.pdf") as pdf:
|
|
352
|
+
page = pdf.pages[0]
|
|
353
|
+
|
|
354
|
+
# Extract all text with coordinates
|
|
355
|
+
chars = page.chars
|
|
356
|
+
for char in chars[:10]: # First 10 characters
|
|
357
|
+
print(f"Char: '{char['text']}' at x:{char['x0']:.1f} y:{char['y0']:.1f}")
|
|
358
|
+
|
|
359
|
+
# Extract text by bounding box (left, top, right, bottom)
|
|
360
|
+
bbox_text = page.within_bbox((100, 100, 400, 200)).extract_text()
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
#### Advanced Table Extraction with Custom Settings
|
|
364
|
+
```python
|
|
365
|
+
import pdfplumber
|
|
366
|
+
import pandas as pd
|
|
367
|
+
|
|
368
|
+
with pdfplumber.open("complex_table.pdf") as pdf:
|
|
369
|
+
page = pdf.pages[0]
|
|
370
|
+
|
|
371
|
+
# Extract tables with custom settings for complex layouts
|
|
372
|
+
table_settings = {
|
|
373
|
+
"vertical_strategy": "lines",
|
|
374
|
+
"horizontal_strategy": "lines",
|
|
375
|
+
"snap_tolerance": 3,
|
|
376
|
+
"intersection_tolerance": 15
|
|
377
|
+
}
|
|
378
|
+
tables = page.extract_tables(table_settings)
|
|
379
|
+
|
|
380
|
+
# Visual debugging for table extraction
|
|
381
|
+
img = page.to_image(resolution=150)
|
|
382
|
+
img.save("debug_layout.png")
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
### reportlab Advanced Features
|
|
386
|
+
|
|
387
|
+
#### Create Professional Reports with Tables
|
|
388
|
+
```python
|
|
389
|
+
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph
|
|
390
|
+
from reportlab.lib.styles import getSampleStyleSheet
|
|
391
|
+
from reportlab.lib import colors
|
|
392
|
+
|
|
393
|
+
# Sample data
|
|
394
|
+
data = [
|
|
395
|
+
['Product', 'Q1', 'Q2', 'Q3', 'Q4'],
|
|
396
|
+
['Widgets', '120', '135', '142', '158'],
|
|
397
|
+
['Gadgets', '85', '92', '98', '105']
|
|
398
|
+
]
|
|
399
|
+
|
|
400
|
+
# Create PDF with table
|
|
401
|
+
doc = SimpleDocTemplate("report.pdf")
|
|
402
|
+
elements = []
|
|
403
|
+
|
|
404
|
+
# Add title
|
|
405
|
+
styles = getSampleStyleSheet()
|
|
406
|
+
title = Paragraph("Quarterly Sales Report", styles['Title'])
|
|
407
|
+
elements.append(title)
|
|
408
|
+
|
|
409
|
+
# Add table with advanced styling
|
|
410
|
+
table = Table(data)
|
|
411
|
+
table.setStyle(TableStyle([
|
|
412
|
+
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
|
|
413
|
+
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
|
414
|
+
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
|
|
415
|
+
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
|
416
|
+
('FONTSIZE', (0, 0), (-1, 0), 14),
|
|
417
|
+
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
|
418
|
+
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
|
|
419
|
+
('GRID', (0, 0), (-1, -1), 1, colors.black)
|
|
420
|
+
]))
|
|
421
|
+
elements.append(table)
|
|
422
|
+
|
|
423
|
+
doc.build(elements)
|
|
424
|
+
```
|
|
425
|
+
|
|
426
|
+
## Complex Workflows
|
|
427
|
+
|
|
428
|
+
### Extract Figures/Images from PDF
|
|
429
|
+
|
|
430
|
+
#### Method 1: Using pdfimages (fastest)
|
|
431
|
+
```bash
|
|
432
|
+
# Extract all images with original quality
|
|
433
|
+
pdfimages -all document.pdf images/img
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
#### Method 2: Using pypdfium2 + Image Processing
|
|
437
|
+
```python
|
|
438
|
+
import pypdfium2 as pdfium
|
|
439
|
+
from PIL import Image
|
|
440
|
+
import numpy as np
|
|
441
|
+
|
|
442
|
+
def extract_figures(pdf_path, output_dir):
|
|
443
|
+
pdf = pdfium.PdfDocument(pdf_path)
|
|
444
|
+
|
|
445
|
+
for page_num, page in enumerate(pdf):
|
|
446
|
+
# Render high-resolution page
|
|
447
|
+
bitmap = page.render(scale=3.0)
|
|
448
|
+
img = bitmap.to_pil()
|
|
449
|
+
|
|
450
|
+
# Convert to numpy for processing
|
|
451
|
+
img_array = np.array(img)
|
|
452
|
+
|
|
453
|
+
# Simple figure detection (non-white regions)
|
|
454
|
+
mask = np.any(img_array != [255, 255, 255], axis=2)
|
|
455
|
+
|
|
456
|
+
# Find contours and extract bounding boxes
|
|
457
|
+
# (This is simplified - real implementation would need more sophisticated detection)
|
|
458
|
+
|
|
459
|
+
# Save detected figures
|
|
460
|
+
# ... implementation depends on specific needs
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
### Batch PDF Processing with Error Handling
|
|
464
|
+
```python
|
|
465
|
+
import os
|
|
466
|
+
import glob
|
|
467
|
+
from pypdf import PdfReader, PdfWriter
|
|
468
|
+
import logging
|
|
469
|
+
|
|
470
|
+
logging.basicConfig(level=logging.INFO)
|
|
471
|
+
logger = logging.getLogger(__name__)
|
|
472
|
+
|
|
473
|
+
def batch_process_pdfs(input_dir, operation='merge'):
|
|
474
|
+
pdf_files = glob.glob(os.path.join(input_dir, "*.pdf"))
|
|
475
|
+
|
|
476
|
+
if operation == 'merge':
|
|
477
|
+
writer = PdfWriter()
|
|
478
|
+
for pdf_file in pdf_files:
|
|
479
|
+
try:
|
|
480
|
+
reader = PdfReader(pdf_file)
|
|
481
|
+
for page in reader.pages:
|
|
482
|
+
writer.add_page(page)
|
|
483
|
+
logger.info(f"Processed: {pdf_file}")
|
|
484
|
+
except Exception as e:
|
|
485
|
+
logger.error(f"Failed to process {pdf_file}: {e}")
|
|
486
|
+
continue
|
|
487
|
+
|
|
488
|
+
with open("batch_merged.pdf", "wb") as output:
|
|
489
|
+
writer.write(output)
|
|
490
|
+
|
|
491
|
+
elif operation == 'extract_text':
|
|
492
|
+
for pdf_file in pdf_files:
|
|
493
|
+
try:
|
|
494
|
+
reader = PdfReader(pdf_file)
|
|
495
|
+
text = ""
|
|
496
|
+
for page in reader.pages:
|
|
497
|
+
text += page.extract_text()
|
|
498
|
+
|
|
499
|
+
output_file = pdf_file.replace('.pdf', '.txt')
|
|
500
|
+
with open(output_file, 'w', encoding='utf-8') as f:
|
|
501
|
+
f.write(text)
|
|
502
|
+
logger.info(f"Extracted text from: {pdf_file}")
|
|
503
|
+
|
|
504
|
+
except Exception as e:
|
|
505
|
+
logger.error(f"Failed to extract text from {pdf_file}: {e}")
|
|
506
|
+
continue
|
|
507
|
+
```
|
|
508
|
+
|
|
509
|
+
### Advanced PDF Cropping
|
|
510
|
+
```python
|
|
511
|
+
from pypdf import PdfWriter, PdfReader
|
|
512
|
+
|
|
513
|
+
reader = PdfReader("input.pdf")
|
|
514
|
+
writer = PdfWriter()
|
|
515
|
+
|
|
516
|
+
# Crop page (left, bottom, right, top in points)
|
|
517
|
+
page = reader.pages[0]
|
|
518
|
+
page.mediabox.left = 50
|
|
519
|
+
page.mediabox.bottom = 50
|
|
520
|
+
page.mediabox.right = 550
|
|
521
|
+
page.mediabox.top = 750
|
|
522
|
+
|
|
523
|
+
writer.add_page(page)
|
|
524
|
+
with open("cropped.pdf", "wb") as output:
|
|
525
|
+
writer.write(output)
|
|
526
|
+
```
|
|
527
|
+
|
|
528
|
+
## Performance Optimization Tips
|
|
529
|
+
|
|
530
|
+
### 1. For Large PDFs
|
|
531
|
+
- Use streaming approaches instead of loading entire PDF in memory
|
|
532
|
+
- Use `qpdf --split-pages` for splitting large files
|
|
533
|
+
- Process pages individually with pypdfium2
|
|
534
|
+
|
|
535
|
+
### 2. For Text Extraction
|
|
536
|
+
- `pdftotext -bbox-layout` is fastest for plain text extraction
|
|
537
|
+
- Use pdfplumber for structured data and tables
|
|
538
|
+
- Avoid `pypdf.extract_text()` for very large documents
|
|
539
|
+
|
|
540
|
+
### 3. For Image Extraction
|
|
541
|
+
- `pdfimages` is much faster than rendering pages
|
|
542
|
+
- Use low resolution for previews, high resolution for final output
|
|
543
|
+
|
|
544
|
+
### 4. For Form Filling
|
|
545
|
+
- pdf-lib maintains form structure better than most alternatives
|
|
546
|
+
- Pre-validate form fields before processing
|
|
547
|
+
|
|
548
|
+
### 5. Memory Management
|
|
549
|
+
```python
|
|
550
|
+
# Process PDFs in chunks
|
|
551
|
+
def process_large_pdf(pdf_path, chunk_size=10):
|
|
552
|
+
reader = PdfReader(pdf_path)
|
|
553
|
+
total_pages = len(reader.pages)
|
|
554
|
+
|
|
555
|
+
for start_idx in range(0, total_pages, chunk_size):
|
|
556
|
+
end_idx = min(start_idx + chunk_size, total_pages)
|
|
557
|
+
writer = PdfWriter()
|
|
558
|
+
|
|
559
|
+
for i in range(start_idx, end_idx):
|
|
560
|
+
writer.add_page(reader.pages[i])
|
|
561
|
+
|
|
562
|
+
# Process chunk
|
|
563
|
+
with open(f"chunk_{start_idx//chunk_size}.pdf", "wb") as output:
|
|
564
|
+
writer.write(output)
|
|
565
|
+
```
|
|
566
|
+
|
|
567
|
+
## Troubleshooting Common Issues
|
|
568
|
+
|
|
569
|
+
### Encrypted PDFs
|
|
570
|
+
```python
|
|
571
|
+
# Handle password-protected PDFs
|
|
572
|
+
from pypdf import PdfReader
|
|
573
|
+
|
|
574
|
+
try:
|
|
575
|
+
reader = PdfReader("encrypted.pdf")
|
|
576
|
+
if reader.is_encrypted:
|
|
577
|
+
reader.decrypt("password")
|
|
578
|
+
except Exception as e:
|
|
579
|
+
print(f"Failed to decrypt: {e}")
|
|
580
|
+
```
|
|
581
|
+
|
|
582
|
+
### Corrupted PDFs
|
|
583
|
+
```bash
|
|
584
|
+
# Use qpdf to repair
|
|
585
|
+
qpdf --check corrupted.pdf
|
|
586
|
+
qpdf --replace-input corrupted.pdf
|
|
587
|
+
```
|
|
588
|
+
|
|
589
|
+
### Text Extraction Issues
|
|
590
|
+
```python
|
|
591
|
+
# Fallback to OCR for scanned PDFs
|
|
592
|
+
import pytesseract
|
|
593
|
+
from pdf2image import convert_from_path
|
|
594
|
+
|
|
595
|
+
def extract_text_with_ocr(pdf_path):
|
|
596
|
+
images = convert_from_path(pdf_path)
|
|
597
|
+
text = ""
|
|
598
|
+
for i, image in enumerate(images):
|
|
599
|
+
text += pytesseract.image_to_string(image)
|
|
600
|
+
return text
|
|
601
|
+
```
|
|
602
|
+
|
|
603
|
+
## License Information
|
|
604
|
+
|
|
605
|
+
- **pypdf**: BSD License
|
|
606
|
+
- **pdfplumber**: MIT License
|
|
607
|
+
- **pypdfium2**: Apache/BSD License
|
|
608
|
+
- **reportlab**: BSD License
|
|
609
|
+
- **poppler-utils**: GPL-2 License
|
|
610
|
+
- **qpdf**: Apache License
|
|
611
|
+
- **pdf-lib**: MIT License
|
|
612
|
+
- **pdfjs-dist**: Apache License
|
package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/check_bounding_boxes.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
import json
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# Script to check that the `fields.json` file that Claude creates when analyzing PDFs
|
|
7
|
+
# does not have overlapping bounding boxes. See forms.md.
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class RectAndField:
|
|
12
|
+
rect: list[float]
|
|
13
|
+
rect_type: str
|
|
14
|
+
field: dict
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Returns a list of messages that are printed to stdout for Claude to read.
|
|
18
|
+
def get_bounding_box_messages(fields_json_stream) -> list[str]:
|
|
19
|
+
messages = []
|
|
20
|
+
fields = json.load(fields_json_stream)
|
|
21
|
+
messages.append(f"Read {len(fields['form_fields'])} fields")
|
|
22
|
+
|
|
23
|
+
def rects_intersect(r1, r2):
|
|
24
|
+
disjoint_horizontal = r1[0] >= r2[2] or r1[2] <= r2[0]
|
|
25
|
+
disjoint_vertical = r1[1] >= r2[3] or r1[3] <= r2[1]
|
|
26
|
+
return not (disjoint_horizontal or disjoint_vertical)
|
|
27
|
+
|
|
28
|
+
rects_and_fields = []
|
|
29
|
+
for f in fields["form_fields"]:
|
|
30
|
+
rects_and_fields.append(RectAndField(f["label_bounding_box"], "label", f))
|
|
31
|
+
rects_and_fields.append(RectAndField(f["entry_bounding_box"], "entry", f))
|
|
32
|
+
|
|
33
|
+
has_error = False
|
|
34
|
+
for i, ri in enumerate(rects_and_fields):
|
|
35
|
+
# This is O(N^2); we can optimize if it becomes a problem.
|
|
36
|
+
for j in range(i + 1, len(rects_and_fields)):
|
|
37
|
+
rj = rects_and_fields[j]
|
|
38
|
+
if ri.field["page_number"] == rj.field["page_number"] and rects_intersect(ri.rect, rj.rect):
|
|
39
|
+
has_error = True
|
|
40
|
+
if ri.field is rj.field:
|
|
41
|
+
messages.append(f"FAILURE: intersection between label and entry bounding boxes for `{ri.field['description']}` ({ri.rect}, {rj.rect})")
|
|
42
|
+
else:
|
|
43
|
+
messages.append(f"FAILURE: intersection between {ri.rect_type} bounding box for `{ri.field['description']}` ({ri.rect}) and {rj.rect_type} bounding box for `{rj.field['description']}` ({rj.rect})")
|
|
44
|
+
if len(messages) >= 20:
|
|
45
|
+
messages.append("Aborting further checks; fix bounding boxes and try again")
|
|
46
|
+
return messages
|
|
47
|
+
if ri.rect_type == "entry":
|
|
48
|
+
if "entry_text" in ri.field:
|
|
49
|
+
font_size = ri.field["entry_text"].get("font_size", 14)
|
|
50
|
+
entry_height = ri.rect[3] - ri.rect[1]
|
|
51
|
+
if entry_height < font_size:
|
|
52
|
+
has_error = True
|
|
53
|
+
messages.append(f"FAILURE: entry bounding box height ({entry_height}) for `{ri.field['description']}` is too short for the text content (font size: {font_size}). Increase the box height or decrease the font size.")
|
|
54
|
+
if len(messages) >= 20:
|
|
55
|
+
messages.append("Aborting further checks; fix bounding boxes and try again")
|
|
56
|
+
return messages
|
|
57
|
+
|
|
58
|
+
if not has_error:
|
|
59
|
+
messages.append("SUCCESS: All bounding boxes are valid")
|
|
60
|
+
return messages
|
|
61
|
+
|
|
62
|
+
if __name__ == "__main__":
|
|
63
|
+
if len(sys.argv) != 2:
|
|
64
|
+
print("Usage: check_bounding_boxes.py [fields.json]")
|
|
65
|
+
sys.exit(1)
|
|
66
|
+
# Input file should be in the `fields.json` format described in forms.md.
|
|
67
|
+
with open(sys.argv[1]) as f:
|
|
68
|
+
messages = get_bounding_box_messages(f)
|
|
69
|
+
for msg in messages:
|
|
70
|
+
print(msg)
|