claude-code-templates 1.21.13 → 1.21.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +8 -2
- package/.claude-plugin/marketplace.json +251 -0
- package/.github/workflows/component-security-validation.yml +129 -0
- package/README.md +13 -6
- package/cli-tool/README.md +56 -0
- package/cli-tool/bin/create-claude-config.js +2 -0
- package/cli-tool/components/.claude-plugin/marketplace.json +92 -0
- package/cli-tool/components/agents/development-tools/flutter-go-reviewer.md +163 -0
- package/cli-tool/components/agents/development-tools/unused-code-cleaner.md +194 -0
- package/cli-tool/components/commands/git-workflow/gemini-review.md +293 -0
- package/cli-tool/components/commands/testing/add-mutation-testing.md +2 -2
- package/cli-tool/components/commands/testing/add-property-based-testing.md +2 -2
- package/cli-tool/components/commands/testing/e2e-setup.md +2 -2
- package/cli-tool/components/commands/testing/generate-test-cases.md +2 -2
- package/cli-tool/components/commands/testing/generate-tests.md +8 -3
- package/cli-tool/components/commands/testing/setup-comprehensive-testing.md +2 -2
- package/cli-tool/components/commands/testing/setup-load-testing.md +2 -2
- package/cli-tool/components/commands/testing/setup-visual-testing.md +2 -2
- package/cli-tool/components/commands/testing/test-automation-orchestrator.md +2 -2
- package/cli-tool/components/commands/testing/test-changelog-automation.md +2 -2
- package/cli-tool/components/commands/testing/test-coverage.md +2 -2
- package/cli-tool/components/commands/testing/test-quality-analyzer.md +2 -2
- package/cli-tool/components/commands/testing/testing_plan_integration.md +2 -2
- package/cli-tool/components/commands/testing/write-tests.md +2 -2
- package/cli-tool/components/commands/utilities/ultra-think.md +10 -5
- package/cli-tool/components/hooks/git/validate-branch-name.json +1 -1
- package/cli-tool/components/mcps/devtools/chrome-devtools.json +9 -0
- package/cli-tool/components/mcps/devtools/grafana.json +15 -0
- package/cli-tool/components/mcps/devtools/pulumi.json +9 -0
- package/cli-tool/components/mcps/devtools/terraform.json +1 -1
- package/cli-tool/components/settings/statusline/context-monitor.py +1 -1
- package/cli-tool/components/skills/ANTHROPIC_ATTRIBUTION.md +81 -0
- package/cli-tool/components/skills/creative-design/algorithmic-art/LICENSE.txt +202 -0
- package/cli-tool/components/skills/creative-design/algorithmic-art/SKILL.md +405 -0
- package/cli-tool/components/skills/creative-design/algorithmic-art/templates/generator_template.js +223 -0
- package/cli-tool/components/skills/creative-design/algorithmic-art/templates/viewer.html +599 -0
- package/cli-tool/components/skills/creative-design/canvas-design/LICENSE.txt +202 -0
- package/cli-tool/components/skills/creative-design/canvas-design/SKILL.md +130 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/ArsenalSC-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/ArsenalSC-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/BigShoulders-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/BigShoulders-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/BigShoulders-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Boldonse-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Boldonse-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/BricolageGrotesque-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/BricolageGrotesque-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/BricolageGrotesque-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/CrimsonPro-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/CrimsonPro-Italic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/CrimsonPro-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/CrimsonPro-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/DMMono-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/DMMono-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/EricaOne-OFL.txt +94 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/EricaOne-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/GeistMono-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/GeistMono-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/GeistMono-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Gloock-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Gloock-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/IBMPlexMono-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/IBMPlexMono-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/IBMPlexMono-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/IBMPlexSerif-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/IBMPlexSerif-BoldItalic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/IBMPlexSerif-Italic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/IBMPlexSerif-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/InstrumentSans-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/InstrumentSans-BoldItalic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/InstrumentSans-Italic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/InstrumentSans-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/InstrumentSans-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/InstrumentSerif-Italic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/InstrumentSerif-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Italiana-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Italiana-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/JetBrainsMono-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/JetBrainsMono-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/JetBrainsMono-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Jura-Light.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Jura-Medium.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Jura-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/LibreBaskerville-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/LibreBaskerville-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Lora-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Lora-BoldItalic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Lora-Italic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Lora-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Lora-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/NationalPark-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/NationalPark-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/NationalPark-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/NothingYouCouldDo-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/NothingYouCouldDo-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Outfit-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Outfit-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Outfit-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/PixelifySans-Medium.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/PixelifySans-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/PoiretOne-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/PoiretOne-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/RedHatMono-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/RedHatMono-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/RedHatMono-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Silkscreen-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Silkscreen-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/SmoochSans-Medium.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/SmoochSans-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Tektur-Medium.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Tektur-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/Tektur-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/WorkSans-Bold.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/WorkSans-BoldItalic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/WorkSans-Italic.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/WorkSans-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/WorkSans-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/YoungSerif-OFL.txt +93 -0
- package/cli-tool/components/skills/creative-design/canvas-design/canvas-fonts/YoungSerif-Regular.ttf +0 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/LICENSE.txt +202 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/SKILL.md +646 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/core/color_palettes.py +302 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/core/easing.py +230 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/core/frame_composer.py +469 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/core/gif_builder.py +246 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/core/typography.py +357 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/core/validators.py +264 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/core/visual_effects.py +494 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/requirements.txt +4 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/bounce.py +106 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/explode.py +331 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/fade.py +329 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/flip.py +291 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/kaleidoscope.py +211 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/morph.py +329 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/move.py +293 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/pulse.py +268 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/shake.py +127 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/slide.py +291 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/spin.py +269 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/wiggle.py +300 -0
- package/cli-tool/components/skills/creative-design/slack-gif-creator/templates/zoom.py +312 -0
- package/cli-tool/components/skills/creative-design/theme-factory/LICENSE.txt +202 -0
- package/cli-tool/components/skills/creative-design/theme-factory/SKILL.md +59 -0
- package/cli-tool/components/skills/creative-design/theme-factory/theme-showcase.pdf +0 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/arctic-frost.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/botanical-garden.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/desert-rose.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/forest-canopy.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/golden-hour.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/midnight-galaxy.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/modern-minimalist.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/ocean-depths.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/sunset-boulevard.md +19 -0
- package/cli-tool/components/skills/creative-design/theme-factory/themes/tech-innovation.md +19 -0
- package/cli-tool/components/skills/development/artifacts-builder/LICENSE.txt +202 -0
- package/cli-tool/components/skills/development/artifacts-builder/SKILL.md +74 -0
- package/cli-tool/components/skills/development/artifacts-builder/scripts/bundle-artifact.sh +54 -0
- package/cli-tool/components/skills/development/artifacts-builder/scripts/init-artifact.sh +322 -0
- package/cli-tool/components/skills/development/artifacts-builder/scripts/shadcn-components.tar.gz +0 -0
- package/cli-tool/components/skills/development/git-commit-helper/SKILL.md +203 -0
- package/cli-tool/components/skills/development/mcp-builder/LICENSE.txt +202 -0
- package/cli-tool/components/skills/development/mcp-builder/SKILL.md +328 -0
- package/cli-tool/components/skills/development/mcp-builder/reference/evaluation.md +602 -0
- package/cli-tool/components/skills/development/mcp-builder/reference/mcp_best_practices.md +915 -0
- package/cli-tool/components/skills/development/mcp-builder/reference/node_mcp_server.md +916 -0
- package/cli-tool/components/skills/development/mcp-builder/reference/python_mcp_server.md +752 -0
- package/cli-tool/components/skills/development/mcp-builder/scripts/connections.py +151 -0
- package/cli-tool/components/skills/development/mcp-builder/scripts/evaluation.py +373 -0
- package/cli-tool/components/skills/development/mcp-builder/scripts/example_evaluation.xml +22 -0
- package/cli-tool/components/skills/development/mcp-builder/scripts/requirements.txt +2 -0
- package/cli-tool/components/skills/development/skill-creator/LICENSE.txt +202 -0
- package/cli-tool/components/skills/development/skill-creator/SKILL.md +209 -0
- package/cli-tool/components/skills/development/skill-creator/scripts/init_skill.py +303 -0
- package/cli-tool/components/skills/development/skill-creator/scripts/package_skill.py +110 -0
- package/cli-tool/components/skills/development/skill-creator/scripts/quick_validate.py +65 -0
- package/cli-tool/components/skills/development/webapp-testing/LICENSE.txt +202 -0
- package/cli-tool/components/skills/development/webapp-testing/SKILL.md +96 -0
- package/cli-tool/components/skills/development/webapp-testing/examples/console_logging.py +35 -0
- package/cli-tool/components/skills/development/webapp-testing/examples/element_discovery.py +40 -0
- package/cli-tool/components/skills/development/webapp-testing/examples/static_html_automation.py +33 -0
- package/cli-tool/components/skills/development/webapp-testing/scripts/with_server.py +106 -0
- package/cli-tool/components/skills/document-processing/docx/LICENSE.txt +30 -0
- package/cli-tool/components/skills/document-processing/docx/SKILL.md +197 -0
- package/cli-tool/components/skills/document-processing/docx/docx-js.md +350 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/mce/mc.xsd +75 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/scripts/pack.py +159 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/scripts/unpack.py +29 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/scripts/validate.py +69 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/scripts/validation/__init__.py +15 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/scripts/validation/base.py +951 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/scripts/validation/docx.py +274 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/scripts/validation/pptx.py +315 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml/scripts/validation/redlining.py +279 -0
- package/cli-tool/components/skills/document-processing/docx/ooxml.md +610 -0
- package/cli-tool/components/skills/document-processing/docx/scripts/__init__.py +1 -0
- package/cli-tool/components/skills/document-processing/docx/scripts/document.py +1276 -0
- package/cli-tool/components/skills/document-processing/docx/scripts/templates/comments.xml +3 -0
- package/cli-tool/components/skills/document-processing/docx/scripts/templates/commentsExtended.xml +3 -0
- package/cli-tool/components/skills/document-processing/docx/scripts/templates/commentsExtensible.xml +3 -0
- package/cli-tool/components/skills/document-processing/docx/scripts/templates/commentsIds.xml +3 -0
- package/cli-tool/components/skills/document-processing/docx/scripts/templates/people.xml +3 -0
- package/cli-tool/components/skills/document-processing/docx/scripts/utilities.py +374 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/LICENSE.txt +30 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/SKILL.md +294 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/forms.md +205 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/reference.md +612 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/check_bounding_boxes.py +70 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/check_bounding_boxes_test.py +226 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/check_fillable_fields.py +12 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/convert_pdf_to_images.py +35 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/create_validation_image.py +41 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/extract_form_field_info.py +152 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/fill_fillable_fields.py +114 -0
- package/cli-tool/components/skills/document-processing/pdf-anthropic/scripts/fill_pdf_form_with_annotations.py +108 -0
- package/cli-tool/components/skills/document-processing/pdf-processing/FORMS.md +143 -0
- package/cli-tool/components/skills/document-processing/pdf-processing/SKILL.md +149 -0
- package/cli-tool/components/skills/document-processing/pdf-processing-pro/FORMS.md +610 -0
- package/cli-tool/components/skills/document-processing/pdf-processing-pro/OCR.md +137 -0
- package/cli-tool/components/skills/document-processing/pdf-processing-pro/SKILL.md +296 -0
- package/cli-tool/components/skills/document-processing/pdf-processing-pro/TABLES.md +626 -0
- package/cli-tool/components/skills/document-processing/pdf-processing-pro/scripts/analyze_form.py +307 -0
- package/cli-tool/components/skills/document-processing/pptx/LICENSE.txt +30 -0
- package/cli-tool/components/skills/document-processing/pptx/SKILL.md +484 -0
- package/cli-tool/components/skills/document-processing/pptx/html2pptx.md +625 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/mce/mc.xsd +75 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/scripts/pack.py +159 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/scripts/unpack.py +29 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/scripts/validate.py +69 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/scripts/validation/__init__.py +15 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/scripts/validation/base.py +951 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/scripts/validation/docx.py +274 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/scripts/validation/pptx.py +315 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml/scripts/validation/redlining.py +279 -0
- package/cli-tool/components/skills/document-processing/pptx/ooxml.md +427 -0
- package/cli-tool/components/skills/document-processing/pptx/scripts/html2pptx.js +979 -0
- package/cli-tool/components/skills/document-processing/pptx/scripts/inventory.py +1020 -0
- package/cli-tool/components/skills/document-processing/pptx/scripts/rearrange.py +231 -0
- package/cli-tool/components/skills/document-processing/pptx/scripts/replace.py +385 -0
- package/cli-tool/components/skills/document-processing/pptx/scripts/thumbnail.py +450 -0
- package/cli-tool/components/skills/document-processing/xlsx/LICENSE.txt +30 -0
- package/cli-tool/components/skills/document-processing/xlsx/SKILL.md +289 -0
- package/cli-tool/components/skills/document-processing/xlsx/recalc.py +178 -0
- package/cli-tool/components/skills/enterprise-communication/brand-guidelines/LICENSE.txt +202 -0
- package/cli-tool/components/skills/enterprise-communication/brand-guidelines/SKILL.md +73 -0
- package/cli-tool/components/skills/enterprise-communication/email-composer/SKILL.md +317 -0
- package/cli-tool/components/skills/enterprise-communication/excel-analysis/SKILL.md +247 -0
- package/cli-tool/components/skills/enterprise-communication/internal-comms/LICENSE.txt +202 -0
- package/cli-tool/components/skills/enterprise-communication/internal-comms/SKILL.md +32 -0
- package/cli-tool/components/skills/enterprise-communication/internal-comms/examples/3p-updates.md +47 -0
- package/cli-tool/components/skills/enterprise-communication/internal-comms/examples/company-newsletter.md +65 -0
- package/cli-tool/components/skills/enterprise-communication/internal-comms/examples/faq-answers.md +30 -0
- package/cli-tool/components/skills/enterprise-communication/internal-comms/examples/general-comms.md +16 -0
- package/cli-tool/package-lock.json +39 -16
- package/cli-tool/package.json +7 -2
- package/cli-tool/security-report.json +62361 -0
- package/cli-tool/src/analytics-web/chats_mobile.html +17 -16
- package/cli-tool/src/console-bridge.js +3 -3
- package/cli-tool/src/index.js +157 -10
- package/cli-tool/src/plugin-dashboard-web/app.js +806 -0
- package/cli-tool/src/plugin-dashboard-web/index.html +292 -0
- package/cli-tool/src/plugin-dashboard-web/styles.css +1781 -0
- package/cli-tool/src/plugin-dashboard.js +689 -0
- package/cli-tool/src/security-audit.js +164 -0
- package/cli-tool/src/validation/ARCHITECTURE.md +309 -0
- package/cli-tool/src/validation/BaseValidator.js +152 -0
- package/cli-tool/src/validation/README.md +543 -0
- package/cli-tool/src/validation/ValidationOrchestrator.js +305 -0
- package/cli-tool/src/validation/validators/IntegrityValidator.js +338 -0
- package/cli-tool/src/validation/validators/ProvenanceValidator.js +399 -0
- package/cli-tool/src/validation/validators/ReferenceValidator.js +373 -0
- package/cli-tool/src/validation/validators/SemanticValidator.js +449 -0
- package/cli-tool/src/validation/validators/StructuralValidator.js +376 -0
- package/docs/CLAUDE.md +363 -0
- package/docs/api/README.md +297 -0
- package/docs/api/package.json +7 -0
- package/docs/api/track-download-supabase.js +150 -0
- package/docs/blog/README.md +199 -0
- package/docs/blog/blog-articles.json +133 -0
- package/docs/blog/css/blog-controls.css +254 -0
- package/docs/blog/e2b-claude-code-sandbox/index.html +8 -0
- package/docs/blog/index.html +81 -124
- package/docs/blog/js/blog-loader.js +602 -0
- package/docs/blog/nextjs-vercel-claude-code-integration/index.html +8 -0
- package/docs/blog/supabase-claude-code-integration/index.html +8 -0
- package/docs/component.html +226 -48
- package/docs/components.json +61610 -604
- package/docs/css/blog.css +292 -0
- package/docs/css/component-page.css +840 -3
- package/docs/css/plugin-page.css +648 -0
- package/docs/css/styles.css +504 -1
- package/docs/css/trending.css +110 -6
- package/docs/download-stats.html +8 -0
- package/docs/index.html +48 -22
- package/docs/jobs.html +8 -0
- package/docs/js/cart-manager.js +21 -8
- package/docs/js/component-page.js +1013 -12
- package/docs/js/data-loader.js +11 -8
- package/docs/js/index-events.js +305 -53
- package/docs/js/plugin-page.js +390 -0
- package/docs/js/script.js +50 -3
- package/docs/js/search-functionality.js +19 -16
- package/docs/js/trending.js +55 -20
- package/docs/plugin.html +262 -0
- package/docs/sandbox-interface.html +8 -0
- package/docs/static/favicon/about.txt +6 -0
- package/docs/static/favicon/android-chrome-192x192.png +0 -0
- package/docs/static/favicon/android-chrome-512x512.png +0 -0
- package/docs/static/favicon/apple-touch-icon.png +0 -0
- package/docs/static/favicon/favicon-16x16.png +0 -0
- package/docs/static/favicon/favicon-32x32.png +0 -0
- package/docs/static/favicon/favicon.ico +0 -0
- package/docs/static/favicon/site.webmanifest +1 -0
- package/docs/trending-data.json +616 -579
- package/docs/trending.html +24 -3
- package/docs/vercel.json +12 -0
- package/docs/workflows.html +8 -0
- package/generate_components_json.py +386 -11
- package/package.json +1 -1
- package/sync-api.sh +50 -0
- package/vercel.json +10 -75
- package/ROADMAP.md +0 -278
- package/test_serpapi.py +0 -36
- /package/cli-tool/components/commands/svelte/{svelte:a11y.md → svelte-a11y.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:component.md → svelte-component.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:debug.md → svelte-debug.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:migrate.md → svelte-migrate.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:optimize.md → svelte-optimize.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:scaffold.md → svelte-scaffold.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:storybook-migrate.md → svelte-storybook-migrate.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:storybook-mock.md → svelte-storybook-mock.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:storybook-setup.md → svelte-storybook-setup.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:storybook-story.md → svelte-storybook-story.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:storybook-troubleshoot.md → svelte-storybook-troubleshoot.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:storybook.md → svelte-storybook.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:test-coverage.md → svelte-test-coverage.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:test-fix.md → svelte-test-fix.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:test-setup.md → svelte-test-setup.md} +0 -0
- /package/cli-tool/components/commands/svelte/{svelte:test.md → svelte-test.md} +0 -0
|
@@ -0,0 +1,626 @@
|
|
|
1
|
+
# PDF Table Extraction Guide
|
|
2
|
+
|
|
3
|
+
Advanced table extraction strategies for production environments.
|
|
4
|
+
|
|
5
|
+
## Table of contents
|
|
6
|
+
|
|
7
|
+
- Basic table extraction
|
|
8
|
+
- Multi-page tables
|
|
9
|
+
- Complex table structures
|
|
10
|
+
- Export formats
|
|
11
|
+
- Table detection algorithms
|
|
12
|
+
- Custom extraction rules
|
|
13
|
+
- Performance optimization
|
|
14
|
+
- Production examples
|
|
15
|
+
|
|
16
|
+
## Basic table extraction
|
|
17
|
+
|
|
18
|
+
### Using pdfplumber (recommended)
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
import pdfplumber
|
|
22
|
+
|
|
23
|
+
with pdfplumber.open("report.pdf") as pdf:
|
|
24
|
+
page = pdf.pages[0]
|
|
25
|
+
tables = page.extract_tables()
|
|
26
|
+
|
|
27
|
+
for i, table in enumerate(tables):
|
|
28
|
+
print(f"\nTable {i + 1}:")
|
|
29
|
+
for row in table:
|
|
30
|
+
print(row)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Using included script
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
python scripts/extract_tables.py report.pdf --output tables.csv
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Output:
|
|
40
|
+
```csv
|
|
41
|
+
Name,Age,City
|
|
42
|
+
John Doe,30,New York
|
|
43
|
+
Jane Smith,25,Los Angeles
|
|
44
|
+
Bob Johnson,35,Chicago
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Table extraction strategies
|
|
48
|
+
|
|
49
|
+
### Strategy 1: Automatic detection
|
|
50
|
+
|
|
51
|
+
Let pdfplumber auto-detect tables:
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
import pdfplumber
|
|
55
|
+
|
|
56
|
+
with pdfplumber.open("document.pdf") as pdf:
|
|
57
|
+
for page_num, page in enumerate(pdf.pages, 1):
|
|
58
|
+
tables = page.extract_tables()
|
|
59
|
+
|
|
60
|
+
if tables:
|
|
61
|
+
print(f"Found {len(tables)} table(s) on page {page_num}")
|
|
62
|
+
|
|
63
|
+
for table_num, table in enumerate(tables, 1):
|
|
64
|
+
print(f"\nTable {table_num}:")
|
|
65
|
+
# First row is usually headers
|
|
66
|
+
headers = table[0]
|
|
67
|
+
print(f"Columns: {headers}")
|
|
68
|
+
|
|
69
|
+
# Data rows
|
|
70
|
+
for row in table[1:]:
|
|
71
|
+
print(row)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Strategy 2: Custom table settings
|
|
75
|
+
|
|
76
|
+
Fine-tune detection with custom settings:
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
import pdfplumber
|
|
80
|
+
|
|
81
|
+
table_settings = {
|
|
82
|
+
"vertical_strategy": "lines", # or "text", "lines_strict"
|
|
83
|
+
"horizontal_strategy": "lines",
|
|
84
|
+
"explicit_vertical_lines": [],
|
|
85
|
+
"explicit_horizontal_lines": [],
|
|
86
|
+
"snap_tolerance": 3,
|
|
87
|
+
"join_tolerance": 3,
|
|
88
|
+
"edge_min_length": 3,
|
|
89
|
+
"min_words_vertical": 3,
|
|
90
|
+
"min_words_horizontal": 1,
|
|
91
|
+
"keep_blank_chars": False,
|
|
92
|
+
"text_tolerance": 3,
|
|
93
|
+
"text_x_tolerance": 3,
|
|
94
|
+
"text_y_tolerance": 3,
|
|
95
|
+
"intersection_tolerance": 3
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
with pdfplumber.open("document.pdf") as pdf:
|
|
99
|
+
page = pdf.pages[0]
|
|
100
|
+
tables = page.extract_tables(table_settings=table_settings)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Strategy 3: Explicit boundaries
|
|
104
|
+
|
|
105
|
+
Define table boundaries manually:
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
import pdfplumber
|
|
109
|
+
|
|
110
|
+
with pdfplumber.open("document.pdf") as pdf:
|
|
111
|
+
page = pdf.pages[0]
|
|
112
|
+
|
|
113
|
+
# Define bounding box (x0, top, x1, bottom)
|
|
114
|
+
bbox = (50, 100, 550, 700)
|
|
115
|
+
|
|
116
|
+
# Extract table within bounding box
|
|
117
|
+
cropped = page.within_bbox(bbox)
|
|
118
|
+
tables = cropped.extract_tables()
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Multi-page tables
|
|
122
|
+
|
|
123
|
+
### Detect and merge multi-page tables
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
import pdfplumber
|
|
127
|
+
|
|
128
|
+
def extract_multipage_table(pdf_path, start_page=0, end_page=None):
|
|
129
|
+
"""Extract table that spans multiple pages."""
|
|
130
|
+
|
|
131
|
+
all_rows = []
|
|
132
|
+
headers = None
|
|
133
|
+
|
|
134
|
+
with pdfplumber.open(pdf_path) as pdf:
|
|
135
|
+
pages = pdf.pages[start_page:end_page]
|
|
136
|
+
|
|
137
|
+
for page_num, page in enumerate(pages):
|
|
138
|
+
tables = page.extract_tables()
|
|
139
|
+
|
|
140
|
+
if not tables:
|
|
141
|
+
continue
|
|
142
|
+
|
|
143
|
+
# Assume first table on page
|
|
144
|
+
table = tables[0]
|
|
145
|
+
|
|
146
|
+
if page_num == 0:
|
|
147
|
+
# First page: capture headers and data
|
|
148
|
+
headers = table[0]
|
|
149
|
+
all_rows.extend(table[1:])
|
|
150
|
+
else:
|
|
151
|
+
# Subsequent pages: skip headers if they repeat
|
|
152
|
+
if table[0] == headers:
|
|
153
|
+
all_rows.extend(table[1:])
|
|
154
|
+
else:
|
|
155
|
+
all_rows.extend(table)
|
|
156
|
+
|
|
157
|
+
return [headers] + all_rows if headers else all_rows
|
|
158
|
+
|
|
159
|
+
# Usage
|
|
160
|
+
table = extract_multipage_table("report.pdf", start_page=2, end_page=5)
|
|
161
|
+
|
|
162
|
+
print(f"Extracted {len(table) - 1} rows")
|
|
163
|
+
print(f"Columns: {table[0]}")
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## Complex table structures
|
|
167
|
+
|
|
168
|
+
### Handling merged cells
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
import pdfplumber
|
|
172
|
+
|
|
173
|
+
def handle_merged_cells(table):
|
|
174
|
+
"""Process table with merged cells."""
|
|
175
|
+
|
|
176
|
+
processed = []
|
|
177
|
+
|
|
178
|
+
for row in table:
|
|
179
|
+
new_row = []
|
|
180
|
+
last_value = None
|
|
181
|
+
|
|
182
|
+
for cell in row:
|
|
183
|
+
if cell is None or cell == "":
|
|
184
|
+
# Merged cell - use value from left
|
|
185
|
+
new_row.append(last_value)
|
|
186
|
+
else:
|
|
187
|
+
new_row.append(cell)
|
|
188
|
+
last_value = cell
|
|
189
|
+
|
|
190
|
+
processed.append(new_row)
|
|
191
|
+
|
|
192
|
+
return processed
|
|
193
|
+
|
|
194
|
+
# Usage
|
|
195
|
+
with pdfplumber.open("document.pdf") as pdf:
|
|
196
|
+
table = pdf.pages[0].extract_tables()[0]
|
|
197
|
+
clean_table = handle_merged_cells(table)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Nested tables
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
def extract_nested_tables(page, bbox):
|
|
204
|
+
"""Extract nested tables from a region."""
|
|
205
|
+
|
|
206
|
+
cropped = page.within_bbox(bbox)
|
|
207
|
+
|
|
208
|
+
# Try to detect sub-regions with tables
|
|
209
|
+
tables = cropped.extract_tables()
|
|
210
|
+
|
|
211
|
+
result = []
|
|
212
|
+
for table in tables:
|
|
213
|
+
# Process each nested table
|
|
214
|
+
if table:
|
|
215
|
+
result.append({
|
|
216
|
+
"type": "nested",
|
|
217
|
+
"data": table
|
|
218
|
+
})
|
|
219
|
+
|
|
220
|
+
return result
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
### Tables with varying column counts
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
def normalize_table_columns(table):
|
|
227
|
+
"""Normalize table with inconsistent column counts."""
|
|
228
|
+
|
|
229
|
+
if not table:
|
|
230
|
+
return table
|
|
231
|
+
|
|
232
|
+
# Find max column count
|
|
233
|
+
max_cols = max(len(row) for row in table)
|
|
234
|
+
|
|
235
|
+
# Pad short rows
|
|
236
|
+
normalized = []
|
|
237
|
+
for row in table:
|
|
238
|
+
if len(row) < max_cols:
|
|
239
|
+
# Pad with empty strings
|
|
240
|
+
row = row + [""] * (max_cols - len(row))
|
|
241
|
+
normalized.append(row)
|
|
242
|
+
|
|
243
|
+
return normalized
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
## Export formats
|
|
247
|
+
|
|
248
|
+
### Export to CSV
|
|
249
|
+
|
|
250
|
+
```python
|
|
251
|
+
import csv
|
|
252
|
+
|
|
253
|
+
def export_to_csv(table, output_path):
|
|
254
|
+
"""Export table to CSV."""
|
|
255
|
+
|
|
256
|
+
with open(output_path, "w", newline="", encoding="utf-8") as f:
|
|
257
|
+
writer = csv.writer(f)
|
|
258
|
+
writer.writerows(table)
|
|
259
|
+
|
|
260
|
+
# Usage
|
|
261
|
+
table = extract_table("report.pdf")
|
|
262
|
+
export_to_csv(table, "output.csv")
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
### Export to Excel
|
|
266
|
+
|
|
267
|
+
```python
|
|
268
|
+
import pandas as pd
|
|
269
|
+
|
|
270
|
+
def export_to_excel(tables, output_path):
|
|
271
|
+
"""Export multiple tables to Excel with sheets."""
|
|
272
|
+
|
|
273
|
+
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
|
|
274
|
+
for i, table in enumerate(tables):
|
|
275
|
+
if not table:
|
|
276
|
+
continue
|
|
277
|
+
|
|
278
|
+
# Convert to DataFrame
|
|
279
|
+
headers = table[0]
|
|
280
|
+
data = table[1:]
|
|
281
|
+
df = pd.DataFrame(data, columns=headers)
|
|
282
|
+
|
|
283
|
+
# Write to sheet
|
|
284
|
+
sheet_name = f"Table_{i + 1}"
|
|
285
|
+
df.to_excel(writer, sheet_name=sheet_name, index=False)
|
|
286
|
+
|
|
287
|
+
# Auto-adjust column widths
|
|
288
|
+
worksheet = writer.sheets[sheet_name]
|
|
289
|
+
for column in worksheet.columns:
|
|
290
|
+
max_length = 0
|
|
291
|
+
column_letter = column[0].column_letter
|
|
292
|
+
for cell in column:
|
|
293
|
+
if len(str(cell.value)) > max_length:
|
|
294
|
+
max_length = len(str(cell.value))
|
|
295
|
+
worksheet.column_dimensions[column_letter].width = max_length + 2
|
|
296
|
+
|
|
297
|
+
# Usage
|
|
298
|
+
tables = extract_all_tables("report.pdf")
|
|
299
|
+
export_to_excel(tables, "output.xlsx")
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
### Export to JSON
|
|
303
|
+
|
|
304
|
+
```python
|
|
305
|
+
import json
|
|
306
|
+
|
|
307
|
+
def export_to_json(table, output_path):
|
|
308
|
+
"""Export table to JSON."""
|
|
309
|
+
|
|
310
|
+
if not table:
|
|
311
|
+
return
|
|
312
|
+
|
|
313
|
+
headers = table[0]
|
|
314
|
+
data = table[1:]
|
|
315
|
+
|
|
316
|
+
# Convert to list of dictionaries
|
|
317
|
+
records = []
|
|
318
|
+
for row in data:
|
|
319
|
+
record = {}
|
|
320
|
+
for i, header in enumerate(headers):
|
|
321
|
+
value = row[i] if i < len(row) else None
|
|
322
|
+
record[header] = value
|
|
323
|
+
records.append(record)
|
|
324
|
+
|
|
325
|
+
# Save to JSON
|
|
326
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
327
|
+
json.dump(records, f, indent=2)
|
|
328
|
+
|
|
329
|
+
# Usage
|
|
330
|
+
table = extract_table("report.pdf")
|
|
331
|
+
export_to_json(table, "output.json")
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
## Table detection algorithms
|
|
335
|
+
|
|
336
|
+
### Visual debugging
|
|
337
|
+
|
|
338
|
+
```python
|
|
339
|
+
import pdfplumber
|
|
340
|
+
|
|
341
|
+
def visualize_table_detection(pdf_path, page_num=0, output_path="debug.png"):
|
|
342
|
+
"""Visualize detected table structure."""
|
|
343
|
+
|
|
344
|
+
with pdfplumber.open(pdf_path) as pdf:
|
|
345
|
+
page = pdf.pages[page_num]
|
|
346
|
+
|
|
347
|
+
# Draw detected table lines
|
|
348
|
+
im = page.to_image(resolution=150)
|
|
349
|
+
im = im.debug_tablefinder()
|
|
350
|
+
im.save(output_path)
|
|
351
|
+
|
|
352
|
+
print(f"Saved debug image to {output_path}")
|
|
353
|
+
|
|
354
|
+
# Usage
|
|
355
|
+
visualize_table_detection("document.pdf", page_num=0)
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
### Algorithm: Line-based detection
|
|
359
|
+
|
|
360
|
+
Best for tables with visible borders:
|
|
361
|
+
|
|
362
|
+
```python
|
|
363
|
+
table_settings = {
|
|
364
|
+
"vertical_strategy": "lines",
|
|
365
|
+
"horizontal_strategy": "lines"
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
tables = page.extract_tables(table_settings=table_settings)
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
### Algorithm: Text-based detection
|
|
372
|
+
|
|
373
|
+
Best for tables without borders:
|
|
374
|
+
|
|
375
|
+
```python
|
|
376
|
+
table_settings = {
|
|
377
|
+
"vertical_strategy": "text",
|
|
378
|
+
"horizontal_strategy": "text"
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
tables = page.extract_tables(table_settings=table_settings)
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
### Algorithm: Explicit lines
|
|
385
|
+
|
|
386
|
+
For complex layouts, define lines manually:
|
|
387
|
+
|
|
388
|
+
```python
|
|
389
|
+
# Define vertical lines at x-coordinates
|
|
390
|
+
vertical_lines = [50, 150, 250, 350, 450, 550]
|
|
391
|
+
|
|
392
|
+
# Define horizontal lines at y-coordinates
|
|
393
|
+
horizontal_lines = [100, 130, 160, 190, 220, 250]
|
|
394
|
+
|
|
395
|
+
table_settings = {
|
|
396
|
+
"explicit_vertical_lines": vertical_lines,
|
|
397
|
+
"explicit_horizontal_lines": horizontal_lines
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
tables = page.extract_tables(table_settings=table_settings)
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
## Custom extraction rules
|
|
404
|
+
|
|
405
|
+
### Rule-based extraction
|
|
406
|
+
|
|
407
|
+
```python
|
|
408
|
+
def extract_with_rules(page, rules):
|
|
409
|
+
"""Extract table using custom rules."""
|
|
410
|
+
|
|
411
|
+
# Rule: "Headers are bold"
|
|
412
|
+
if rules.get("bold_headers"):
|
|
413
|
+
chars = page.chars
|
|
414
|
+
bold_chars = [c for c in chars if "Bold" in c.get("fontname", "")]
|
|
415
|
+
# Use bold chars to identify header row
|
|
416
|
+
pass
|
|
417
|
+
|
|
418
|
+
# Rule: "First column is always left-aligned"
|
|
419
|
+
if rules.get("left_align_first_col"):
|
|
420
|
+
# Adjust extraction to respect alignment
|
|
421
|
+
pass
|
|
422
|
+
|
|
423
|
+
# Rule: "Currency values in last column"
|
|
424
|
+
if rules.get("currency_last_col"):
|
|
425
|
+
# Parse currency format
|
|
426
|
+
pass
|
|
427
|
+
|
|
428
|
+
# Extract with adjusted settings
|
|
429
|
+
return page.extract_tables()
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
### Post-processing rules
|
|
433
|
+
|
|
434
|
+
```python
|
|
435
|
+
def apply_post_processing(table, rules):
|
|
436
|
+
"""Apply post-processing rules to extracted table."""
|
|
437
|
+
|
|
438
|
+
processed = []
|
|
439
|
+
|
|
440
|
+
for row in table:
|
|
441
|
+
new_row = []
|
|
442
|
+
|
|
443
|
+
for i, cell in enumerate(row):
|
|
444
|
+
value = cell
|
|
445
|
+
|
|
446
|
+
# Rule: Strip whitespace
|
|
447
|
+
if rules.get("strip_whitespace"):
|
|
448
|
+
value = value.strip() if value else value
|
|
449
|
+
|
|
450
|
+
# Rule: Convert currency to float
|
|
451
|
+
if rules.get("parse_currency") and i == len(row) - 1:
|
|
452
|
+
if value and "$" in value:
|
|
453
|
+
value = float(value.replace("$", "").replace(",", ""))
|
|
454
|
+
|
|
455
|
+
# Rule: Parse dates
|
|
456
|
+
if rules.get("parse_dates") and i == 0:
|
|
457
|
+
# Convert to datetime
|
|
458
|
+
pass
|
|
459
|
+
|
|
460
|
+
new_row.append(value)
|
|
461
|
+
|
|
462
|
+
processed.append(new_row)
|
|
463
|
+
|
|
464
|
+
return processed
|
|
465
|
+
```
|
|
466
|
+
|
|
467
|
+
## Performance optimization
|
|
468
|
+
|
|
469
|
+
### Process large PDFs efficiently
|
|
470
|
+
|
|
471
|
+
```python
|
|
472
|
+
def extract_tables_optimized(pdf_path):
|
|
473
|
+
"""Extract tables with memory optimization."""
|
|
474
|
+
|
|
475
|
+
import gc
|
|
476
|
+
|
|
477
|
+
results = []
|
|
478
|
+
|
|
479
|
+
with pdfplumber.open(pdf_path) as pdf:
|
|
480
|
+
for page_num, page in enumerate(pdf.pages):
|
|
481
|
+
print(f"Processing page {page_num + 1}/{len(pdf.pages)}")
|
|
482
|
+
|
|
483
|
+
# Extract tables from current page
|
|
484
|
+
tables = page.extract_tables()
|
|
485
|
+
results.extend(tables)
|
|
486
|
+
|
|
487
|
+
# Force garbage collection
|
|
488
|
+
gc.collect()
|
|
489
|
+
|
|
490
|
+
return results
|
|
491
|
+
```
|
|
492
|
+
|
|
493
|
+
### Parallel processing
|
|
494
|
+
|
|
495
|
+
```python
|
|
496
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
497
|
+
import pdfplumber
|
|
498
|
+
|
|
499
|
+
def extract_page_tables(args):
|
|
500
|
+
"""Extract tables from a single page."""
|
|
501
|
+
pdf_path, page_num = args
|
|
502
|
+
|
|
503
|
+
with pdfplumber.open(pdf_path) as pdf:
|
|
504
|
+
page = pdf.pages[page_num]
|
|
505
|
+
return page.extract_tables()
|
|
506
|
+
|
|
507
|
+
def extract_tables_parallel(pdf_path, max_workers=4):
|
|
508
|
+
"""Extract tables using multiple processes."""
|
|
509
|
+
|
|
510
|
+
with pdfplumber.open(pdf_path) as pdf:
|
|
511
|
+
page_count = len(pdf.pages)
|
|
512
|
+
|
|
513
|
+
# Create tasks
|
|
514
|
+
tasks = [(pdf_path, i) for i in range(page_count)]
|
|
515
|
+
|
|
516
|
+
# Process in parallel
|
|
517
|
+
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
|
518
|
+
results = list(executor.map(extract_page_tables, tasks))
|
|
519
|
+
|
|
520
|
+
# Flatten results
|
|
521
|
+
all_tables = []
|
|
522
|
+
for page_tables in results:
|
|
523
|
+
all_tables.extend(page_tables)
|
|
524
|
+
|
|
525
|
+
return all_tables
|
|
526
|
+
```
|
|
527
|
+
|
|
528
|
+
## Production examples
|
|
529
|
+
|
|
530
|
+
### Example 1: Financial report extraction
|
|
531
|
+
|
|
532
|
+
```python
|
|
533
|
+
import pdfplumber
|
|
534
|
+
import pandas as pd
|
|
535
|
+
from decimal import Decimal
|
|
536
|
+
|
|
537
|
+
def extract_financial_tables(pdf_path):
|
|
538
|
+
"""Extract financial data with proper number formatting."""
|
|
539
|
+
|
|
540
|
+
tables = []
|
|
541
|
+
|
|
542
|
+
with pdfplumber.open(pdf_path) as pdf:
|
|
543
|
+
for page in pdf.pages:
|
|
544
|
+
page_tables = page.extract_tables()
|
|
545
|
+
|
|
546
|
+
for table in page_tables:
|
|
547
|
+
# Convert to DataFrame
|
|
548
|
+
df = pd.DataFrame(table[1:], columns=table[0])
|
|
549
|
+
|
|
550
|
+
# Parse currency columns
|
|
551
|
+
for col in df.columns:
|
|
552
|
+
if df[col].str.contains("$", na=False).any():
|
|
553
|
+
df[col] = df[col].str.replace(r"[$,()]", "", regex=True)
|
|
554
|
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
|
555
|
+
|
|
556
|
+
tables.append(df)
|
|
557
|
+
|
|
558
|
+
return tables
|
|
559
|
+
```
|
|
560
|
+
|
|
561
|
+
### Example 2: Batch table extraction
|
|
562
|
+
|
|
563
|
+
```python
|
|
564
|
+
import glob
|
|
565
|
+
from pathlib import Path
|
|
566
|
+
|
|
567
|
+
def batch_extract_tables(input_dir, output_dir):
|
|
568
|
+
"""Extract tables from all PDFs in directory."""
|
|
569
|
+
|
|
570
|
+
input_path = Path(input_dir)
|
|
571
|
+
output_path = Path(output_dir)
|
|
572
|
+
output_path.mkdir(exist_ok=True)
|
|
573
|
+
|
|
574
|
+
for pdf_file in input_path.glob("*.pdf"):
|
|
575
|
+
print(f"Processing: {pdf_file.name}")
|
|
576
|
+
|
|
577
|
+
try:
|
|
578
|
+
# Extract tables
|
|
579
|
+
tables = extract_all_tables(str(pdf_file))
|
|
580
|
+
|
|
581
|
+
# Export to Excel
|
|
582
|
+
output_file = output_path / f"{pdf_file.stem}_tables.xlsx"
|
|
583
|
+
export_to_excel(tables, str(output_file))
|
|
584
|
+
|
|
585
|
+
print(f" ✓ Extracted {len(tables)} table(s)")
|
|
586
|
+
|
|
587
|
+
except Exception as e:
|
|
588
|
+
print(f" ✗ Error: {e}")
|
|
589
|
+
|
|
590
|
+
# Usage
|
|
591
|
+
batch_extract_tables("invoices/", "extracted/")
|
|
592
|
+
```
|
|
593
|
+
|
|
594
|
+
## Best practices
|
|
595
|
+
|
|
596
|
+
1. **Visualize first**: Use debug mode to understand table structure
|
|
597
|
+
2. **Test settings**: Try different strategies for best results
|
|
598
|
+
3. **Handle errors**: PDFs vary widely in quality
|
|
599
|
+
4. **Validate output**: Check extracted data makes sense
|
|
600
|
+
5. **Post-process**: Clean and normalize extracted data
|
|
601
|
+
6. **Use pandas**: Leverage DataFrame operations for analysis
|
|
602
|
+
7. **Cache results**: Avoid re-processing large files
|
|
603
|
+
8. **Monitor performance**: Profile for bottlenecks
|
|
604
|
+
|
|
605
|
+
## Troubleshooting
|
|
606
|
+
|
|
607
|
+
### Tables not detected
|
|
608
|
+
|
|
609
|
+
1. Try different detection strategies
|
|
610
|
+
2. Use visual debugging to see structure
|
|
611
|
+
3. Define explicit lines manually
|
|
612
|
+
4. Check if table is actually an image
|
|
613
|
+
|
|
614
|
+
### Incorrect cell values
|
|
615
|
+
|
|
616
|
+
1. Adjust snap/join tolerance
|
|
617
|
+
2. Check text extraction quality
|
|
618
|
+
3. Use post-processing to clean data
|
|
619
|
+
4. Verify PDF is not scanned image
|
|
620
|
+
|
|
621
|
+
### Performance issues
|
|
622
|
+
|
|
623
|
+
1. Process pages individually
|
|
624
|
+
2. Use parallel processing
|
|
625
|
+
3. Reduce image resolution
|
|
626
|
+
4. Extract only needed pages
|