@_vrsen/openswarm 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +152 -0
- package/bin/openswarm.js +38 -0
- package/config.py +34 -0
- package/data_analyst_agent/.cursor/rules/data_analyst.mdc +43 -0
- package/data_analyst_agent/__init__.py +3 -0
- package/data_analyst_agent/__pycache__/__init__.cpython-312.pyc +0 -0
- package/data_analyst_agent/__pycache__/data_analyst_agent.cpython-312.pyc +0 -0
- package/data_analyst_agent/data_analyst_agent.py +45 -0
- package/data_analyst_agent/instructions.md +173 -0
- package/data_analyst_agent/test_files/test_file.csv +21 -0
- package/data_analyst_agent/tools/__init__.py +6 -0
- package/deep_research/__init__.py +1 -0
- package/deep_research/__pycache__/__init__.cpython-312.pyc +0 -0
- package/deep_research/__pycache__/deep_research.cpython-312.pyc +0 -0
- package/deep_research/deep_research.py +27 -0
- package/deep_research/instructions.md +104 -0
- package/deep_research/tools/__init__.py +1 -0
- package/docs_agent/__init__.py +3 -0
- package/docs_agent/__pycache__/__init__.cpython-312.pyc +0 -0
- package/docs_agent/__pycache__/docs_agent.cpython-312.pyc +0 -0
- package/docs_agent/docs_agent.py +61 -0
- package/docs_agent/instructions.md +418 -0
- package/docs_agent/tools/ConvertDocument.py +323 -0
- package/docs_agent/tools/CreateDocument.py +287 -0
- package/docs_agent/tools/ListDocuments.py +134 -0
- package/docs_agent/tools/ModifyDocument.py +247 -0
- package/docs_agent/tools/RestoreDocument.py +79 -0
- package/docs_agent/tools/ViewDocument.py +153 -0
- package/docs_agent/tools/__init__.py +1 -0
- package/docs_agent/tools/__pycache__/ConvertDocument.cpython-312.pyc +0 -0
- package/docs_agent/tools/__pycache__/CreateDocument.cpython-312.pyc +0 -0
- package/docs_agent/tools/__pycache__/ListDocuments.cpython-312.pyc +0 -0
- package/docs_agent/tools/__pycache__/ModifyDocument.cpython-312.pyc +0 -0
- package/docs_agent/tools/__pycache__/RestoreDocument.cpython-312.pyc +0 -0
- package/docs_agent/tools/__pycache__/ViewDocument.cpython-312.pyc +0 -0
- package/docs_agent/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- package/docs_agent/tools/utils/__init__.py +1 -0
- package/docs_agent/tools/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- package/docs_agent/tools/utils/__pycache__/doc_file_utils.cpython-312.pyc +0 -0
- package/docs_agent/tools/utils/__pycache__/html_docx_blocks.cpython-312.pyc +0 -0
- package/docs_agent/tools/utils/__pycache__/html_docx_constants.cpython-312.pyc +0 -0
- package/docs_agent/tools/utils/__pycache__/html_docx_core.cpython-312.pyc +0 -0
- package/docs_agent/tools/utils/__pycache__/html_docx_css.cpython-312.pyc +0 -0
- package/docs_agent/tools/utils/__pycache__/html_docx_images.cpython-312.pyc +0 -0
- package/docs_agent/tools/utils/__pycache__/html_docx_page.cpython-312.pyc +0 -0
- package/docs_agent/tools/utils/__pycache__/html_docx_paragraphs.cpython-312.pyc +0 -0
- package/docs_agent/tools/utils/__pycache__/html_docx_playwright.cpython-312.pyc +0 -0
- package/docs_agent/tools/utils/__pycache__/html_docx_selectors.cpython-312.pyc +0 -0
- package/docs_agent/tools/utils/__pycache__/html_docx_shared.cpython-312.pyc +0 -0
- package/docs_agent/tools/utils/__pycache__/html_validation.cpython-312.pyc +0 -0
- package/docs_agent/tools/utils/doc_file_utils.py +29 -0
- package/docs_agent/tools/utils/html_docx_blocks.py +262 -0
- package/docs_agent/tools/utils/html_docx_constants.py +78 -0
- package/docs_agent/tools/utils/html_docx_core.py +138 -0
- package/docs_agent/tools/utils/html_docx_css.py +262 -0
- package/docs_agent/tools/utils/html_docx_images.py +293 -0
- package/docs_agent/tools/utils/html_docx_page.py +185 -0
- package/docs_agent/tools/utils/html_docx_paragraphs.py +342 -0
- package/docs_agent/tools/utils/html_docx_playwright.py +184 -0
- package/docs_agent/tools/utils/html_docx_selectors.py +196 -0
- package/docs_agent/tools/utils/html_docx_shared.py +23 -0
- package/docs_agent/tools/utils/html_docx_tables.py +942 -0
- package/docs_agent/tools/utils/html_validation.py +102 -0
- package/helpers.py +59 -0
- package/image_generation_agent/__init__.py +1 -0
- package/image_generation_agent/__pycache__/__init__.cpython-312.pyc +0 -0
- package/image_generation_agent/__pycache__/image_generation_agent.cpython-312.pyc +0 -0
- package/image_generation_agent/image_generation_agent.py +31 -0
- package/image_generation_agent/instructions.md +80 -0
- package/image_generation_agent/tools/CombineImages.py +211 -0
- package/image_generation_agent/tools/EditImages.py +200 -0
- package/image_generation_agent/tools/GenerateImages.py +184 -0
- package/image_generation_agent/tools/RemoveBackground.py +108 -0
- package/image_generation_agent/tools/__init__.py +2 -0
- package/image_generation_agent/tools/__pycache__/CombineImages.cpython-312.pyc +0 -0
- package/image_generation_agent/tools/__pycache__/EditImages.cpython-312.pyc +0 -0
- package/image_generation_agent/tools/__pycache__/GenerateImages.cpython-312.pyc +0 -0
- package/image_generation_agent/tools/__pycache__/RemoveBackground.cpython-312.pyc +0 -0
- package/image_generation_agent/tools/utils/__init__.py +2 -0
- package/image_generation_agent/tools/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- package/image_generation_agent/tools/utils/__pycache__/image_io.cpython-312.pyc +0 -0
- package/image_generation_agent/tools/utils/image_io.py +308 -0
- package/onboard.py +325 -0
- package/orchestrator/__init__.py +3 -0
- package/orchestrator/__pycache__/__init__.cpython-312.pyc +0 -0
- package/orchestrator/__pycache__/orchestrator.cpython-312.pyc +0 -0
- package/orchestrator/instructions.md +90 -0
- package/orchestrator/orchestrator.py +33 -0
- package/package.json +49 -0
- package/patches/__init__.py +1 -0
- package/patches/__pycache__/__init__.cpython-312.pyc +0 -0
- package/patches/__pycache__/patch_agency_swarm_dual_comms.cpython-312.pyc +0 -0
- package/patches/__pycache__/patch_file_attachment_refs.cpython-312.pyc +0 -0
- package/patches/__pycache__/patch_ipython_interpreter_composio.cpython-312.pyc +0 -0
- package/patches/dom-to-pptx+1.1.5.patch +133440 -0
- package/patches/patch_agency_swarm_dual_comms.py +199 -0
- package/patches/patch_file_attachment_refs.py +74 -0
- package/patches/patch_ipython_interpreter_composio.py +54 -0
- package/pyproject.toml +67 -0
- package/run.py +343 -0
- package/server.py +26 -0
- package/shared_instructions.md +119 -0
- package/shared_tools/CopyFile.py +68 -0
- package/shared_tools/ExecuteTool.py +184 -0
- package/shared_tools/FindTools.py +101 -0
- package/shared_tools/ManageConnections.py +43 -0
- package/shared_tools/SearchTools.py +44 -0
- package/shared_tools/__init__.py +7 -0
- package/shared_tools/__pycache__/CopyFile.cpython-312.pyc +0 -0
- package/shared_tools/__pycache__/ExecuteTool.cpython-312.pyc +0 -0
- package/shared_tools/__pycache__/FindTools.cpython-312.pyc +0 -0
- package/shared_tools/__pycache__/ManageConnections.cpython-312.pyc +0 -0
- package/shared_tools/__pycache__/SearchTools.cpython-312.pyc +0 -0
- package/shared_tools/__pycache__/__init__.cpython-312.pyc +0 -0
- package/slides_agent/.cursor/rules/slides-agent-workflow.mdc +9 -0
- package/slides_agent/__init__.py +1 -0
- package/slides_agent/__pycache__/__init__.cpython-312.pyc +0 -0
- package/slides_agent/__pycache__/slides_agent.cpython-312.pyc +0 -0
- package/slides_agent/instructions.md +298 -0
- package/slides_agent/pptx/SKILL.md +528 -0
- package/slides_agent/pptx/html2pptx.md +625 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/slides_agent/pptx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/slides_agent/pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/slides_agent/pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/slides_agent/pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/slides_agent/pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/slides_agent/pptx/ooxml/schemas/mce/mc.xsd +75 -0
- package/slides_agent/pptx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/slides_agent/pptx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/slides_agent/pptx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/slides_agent/pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/slides_agent/pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/slides_agent/pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/slides_agent/pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/slides_agent/pptx/ooxml/scripts/pack.py +169 -0
- package/slides_agent/pptx/ooxml/scripts/unpack.py +29 -0
- package/slides_agent/pptx/ooxml/scripts/validate.py +69 -0
- package/slides_agent/pptx/ooxml/scripts/validation/__init__.py +15 -0
- package/slides_agent/pptx/ooxml/scripts/validation/base.py +951 -0
- package/slides_agent/pptx/ooxml/scripts/validation/docx.py +274 -0
- package/slides_agent/pptx/ooxml/scripts/validation/pptx.py +315 -0
- package/slides_agent/pptx/ooxml/scripts/validation/redlining.py +279 -0
- package/slides_agent/pptx/ooxml.md +427 -0
- package/slides_agent/pptx/scripts/html2pptx.js +1092 -0
- package/slides_agent/pptx/scripts/inventory.py +1020 -0
- package/slides_agent/pptx/scripts/rearrange.py +231 -0
- package/slides_agent/pptx/scripts/replace.py +385 -0
- package/slides_agent/pptx/scripts/thumbnail.py +451 -0
- package/slides_agent/slides_agent.py +109 -0
- package/slides_agent/test_deck/_theme.css +92 -0
- package/slides_agent/test_deck/assets/placeholder.svg +11 -0
- package/slides_agent/test_deck/slide_01_title.html +10 -0
- package/slides_agent/test_deck/slide_02_image_split.html +23 -0
- package/slides_agent/test_deck/slide_03_kpi.html +21 -0
- package/slides_agent/tools/ApplyPptxTextReplacements.py +91 -0
- package/slides_agent/tools/BuildPptxFromHtmlSlides.py +221 -0
- package/slides_agent/tools/CheckSlide.py +218 -0
- package/slides_agent/tools/CheckSlideCanvasOverflow.py +221 -0
- package/slides_agent/tools/CreateImageMontage.py +261 -0
- package/slides_agent/tools/CreatePptxThumbnailGrid.py +168 -0
- package/slides_agent/tools/DeleteSlide.py +78 -0
- package/slides_agent/tools/DownloadImage.py +79 -0
- package/slides_agent/tools/EnsureRasterImage.py +157 -0
- package/slides_agent/tools/ExtractPptxTextInventory.py +104 -0
- package/slides_agent/tools/GenerateImage.py +189 -0
- package/slides_agent/tools/ImageSearch.py +127 -0
- package/slides_agent/tools/InsertNewSlides.py +393 -0
- package/slides_agent/tools/ManageTheme.py +112 -0
- package/slides_agent/tools/ModifySlide.py +563 -0
- package/slides_agent/tools/ReadSlide.py +26 -0
- package/slides_agent/tools/RearrangePptxSlidesFromTemplate.py +114 -0
- package/slides_agent/tools/RestoreSnapshot.py +89 -0
- package/slides_agent/tools/SlideScreenshot.py +66 -0
- package/slides_agent/tools/__init__.py +54 -0
- package/slides_agent/tools/__pycache__/ApplyPptxTextReplacements.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/BuildPptxFromHtmlSlides.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/CheckSlide.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/CheckSlideCanvasOverflow.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/CreateImageMontage.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/CreatePptxThumbnailGrid.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/DeleteSlide.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/DownloadImage.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/EnsureRasterImage.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/ExtractPptxTextInventory.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/GenerateImage.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/ImageSearch.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/InsertNewSlides.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/ManageTheme.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/ModifySlide.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/ReadSlide.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/RearrangePptxSlidesFromTemplate.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/RestoreSnapshot.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/SlideScreenshot.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/slide_file_utils.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/slide_html_utils.cpython-312.pyc +0 -0
- package/slides_agent/tools/__pycache__/template_registry.cpython-312.pyc +0 -0
- package/slides_agent/tools/deck_utils.py +31 -0
- package/slides_agent/tools/html2pptx_runner.js +1183 -0
- package/slides_agent/tools/html_writer_instructions.md +149 -0
- package/slides_agent/tools/slide_file_utils.py +108 -0
- package/slides_agent/tools/slide_html_utils.py +354 -0
- package/slides_agent/tools/template_registry.py +55 -0
- package/swarm.py +82 -0
- package/video_generation_agent/__init__.py +1 -0
- package/video_generation_agent/__pycache__/__init__.cpython-312.pyc +0 -0
- package/video_generation_agent/__pycache__/video_generation_agent.cpython-312.pyc +0 -0
- package/video_generation_agent/instructions.md +178 -0
- package/video_generation_agent/tools/AddSubtitles.py +425 -0
- package/video_generation_agent/tools/CombineImages.py +166 -0
- package/video_generation_agent/tools/CombineVideos.py +113 -0
- package/video_generation_agent/tools/EditAudio.py +297 -0
- package/video_generation_agent/tools/EditImage.py +144 -0
- package/video_generation_agent/tools/EditVideoContent.py +369 -0
- package/video_generation_agent/tools/GenerateImage.py +133 -0
- package/video_generation_agent/tools/GenerateVideo.py +556 -0
- package/video_generation_agent/tools/TrimVideo.py +233 -0
- package/video_generation_agent/tools/__init__.py +1 -0
- package/video_generation_agent/tools/__pycache__/AddSubtitles.cpython-312.pyc +0 -0
- package/video_generation_agent/tools/__pycache__/CombineImages.cpython-312.pyc +0 -0
- package/video_generation_agent/tools/__pycache__/CombineVideos.cpython-312.pyc +0 -0
- package/video_generation_agent/tools/__pycache__/EditAudio.cpython-312.pyc +0 -0
- package/video_generation_agent/tools/__pycache__/EditImage.cpython-312.pyc +0 -0
- package/video_generation_agent/tools/__pycache__/EditVideoContent.cpython-312.pyc +0 -0
- package/video_generation_agent/tools/__pycache__/GenerateImage.cpython-312.pyc +0 -0
- package/video_generation_agent/tools/__pycache__/GenerateVideo.cpython-312.pyc +0 -0
- package/video_generation_agent/tools/__pycache__/TrimVideo.cpython-312.pyc +0 -0
- package/video_generation_agent/tools/utils/__init__.py +1 -0
- package/video_generation_agent/tools/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- package/video_generation_agent/tools/utils/__pycache__/image_utils.cpython-312.pyc +0 -0
- package/video_generation_agent/tools/utils/__pycache__/video_utils.cpython-312.pyc +0 -0
- package/video_generation_agent/tools/utils/image_utils.py +174 -0
- package/video_generation_agent/tools/utils/video_utils.py +522 -0
- package/video_generation_agent/video_generation_agent.py +26 -0
- package/virtual_assistant/__init__.py +1 -0
- package/virtual_assistant/__pycache__/__init__.cpython-312.pyc +0 -0
- package/virtual_assistant/__pycache__/virtual_assistant.cpython-312.pyc +0 -0
- package/virtual_assistant/instructions.md +206 -0
- package/virtual_assistant/tools/AddLabelToEmail.py +154 -0
- package/virtual_assistant/tools/CheckEventsForDate.py +218 -0
- package/virtual_assistant/tools/CheckUnreadSlackMessages.py +216 -0
- package/virtual_assistant/tools/CreateCalendarEvent.py +261 -0
- package/virtual_assistant/tools/DeleteCalendarEvent.py +137 -0
- package/virtual_assistant/tools/DeleteDraft.py +95 -0
- package/virtual_assistant/tools/DraftEmail.py +239 -0
- package/virtual_assistant/tools/EditFile.py +113 -0
- package/virtual_assistant/tools/FindEmails.py +330 -0
- package/virtual_assistant/tools/GetCurrentTime.py +69 -0
- package/virtual_assistant/tools/GetSlackUserInfo.py +117 -0
- package/virtual_assistant/tools/ListDirectory.py +113 -0
- package/virtual_assistant/tools/ListSkills.py +94 -0
- package/virtual_assistant/tools/ManageLabels.py +295 -0
- package/virtual_assistant/tools/ProductSearch.py +254 -0
- package/virtual_assistant/tools/ReadEmail.py +251 -0
- package/virtual_assistant/tools/ReadFile.py +108 -0
- package/virtual_assistant/tools/ReadSlackMessages.py +191 -0
- package/virtual_assistant/tools/RemoveLabelFromEmail.py +137 -0
- package/virtual_assistant/tools/RescheduleCalendarEvent.py +227 -0
- package/virtual_assistant/tools/ScholarSearch.py +216 -0
- package/virtual_assistant/tools/SendDraft.py +101 -0
- package/virtual_assistant/tools/SendSlackMessage.py +148 -0
- package/virtual_assistant/tools/WriteFile.py +95 -0
- package/virtual_assistant/tools/__init__.py +1 -0
- package/virtual_assistant/tools/__pycache__/AddLabelToEmail.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/CheckEventsForDate.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/CheckUnreadSlackMessages.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/CreateCalendarEvent.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/DeleteCalendarEvent.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/DeleteDraft.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/DraftEmail.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/EditFile.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/FindEmails.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/GetCurrentTime.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/GetSlackUserInfo.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/ListDirectory.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/ListSkills.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/ManageLabels.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/ProductSearch.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/ReadEmail.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/ReadFile.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/ReadSlackMessages.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/RemoveLabelFromEmail.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/RescheduleCalendarEvent.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/ScholarSearch.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/SendDraft.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/SendSlackMessage.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/WriteFile.cpython-312.pyc +0 -0
- package/virtual_assistant/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- package/virtual_assistant/virtual_assistant.py +52 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Utilities for managing document project directories."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_mnt_dir() -> Path:
|
|
8
|
+
return Path("/app/mnt") if Path("/.dockerenv").is_file() else Path(__file__).parents[3] / "mnt"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_project_dir(project_name: str) -> Path:
|
|
12
|
+
return get_mnt_dir() / project_name / "documents"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def next_docx_version(desired: Path) -> Path:
|
|
16
|
+
"""Return desired if it doesn't exist, otherwise the next free _vN path.
|
|
17
|
+
|
|
18
|
+
Strips any existing _vN suffix before searching so passing report_v2.docx
|
|
19
|
+
when that file already exists yields report_v3.docx, not report_v2_v2.docx.
|
|
20
|
+
"""
|
|
21
|
+
if not desired.exists():
|
|
22
|
+
return desired
|
|
23
|
+
base = re.sub(r"_v\d+$", "", desired.stem)
|
|
24
|
+
n = 2
|
|
25
|
+
while True:
|
|
26
|
+
candidate = desired.parent / f"{base}_v{n}{desired.suffix}"
|
|
27
|
+
if not candidate.exists():
|
|
28
|
+
return candidate
|
|
29
|
+
n += 1
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
from typing import Dict, Optional
|
|
2
|
+
|
|
3
|
+
from bs4.element import Comment, NavigableString, Tag
|
|
4
|
+
from docx.oxml.ns import qn
|
|
5
|
+
from docx.shared import Pt
|
|
6
|
+
|
|
7
|
+
from .html_docx_constants import _BLOCK_TAGS, _INHERITABLE_STYLES, _INLINE_TAGS
|
|
8
|
+
from .html_docx_css import _parse_background_color, _parse_border_left, _parse_padding
|
|
9
|
+
from .html_docx_images import _add_image_run, _add_svg_run
|
|
10
|
+
from .html_docx_paragraphs import (
|
|
11
|
+
_apply_paragraph_style,
|
|
12
|
+
_apply_run_style,
|
|
13
|
+
_apply_ul_margin_indent,
|
|
14
|
+
_resolve_list_indent_pt,
|
|
15
|
+
)
|
|
16
|
+
from .html_docx_selectors import _compute_style_map
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
_SKIP_TAGS = {"style", "script", "head", "meta", "link", "title", "noscript"}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _handle_block(node, target, css_rules, parent_style: Dict[str, str], table_auto_widths) -> None:
|
|
23
|
+
target_container = _ensure_container(target)
|
|
24
|
+
if isinstance(node, Comment):
|
|
25
|
+
return
|
|
26
|
+
if isinstance(node, Tag) and node.name in _SKIP_TAGS:
|
|
27
|
+
return
|
|
28
|
+
if isinstance(node, NavigableString):
|
|
29
|
+
text = node.strip()
|
|
30
|
+
if not text:
|
|
31
|
+
return
|
|
32
|
+
paragraph = _ensure_paragraph(target_container)
|
|
33
|
+
_apply_paragraph_style(paragraph, parent_style)
|
|
34
|
+
run = paragraph.add_run(_transform_text(_normalize_text(text), parent_style))
|
|
35
|
+
_apply_run_style(run, parent_style)
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
if not isinstance(node, Tag):
|
|
39
|
+
return
|
|
40
|
+
|
|
41
|
+
if node.name == "img":
|
|
42
|
+
paragraph = _ensure_paragraph(target_container)
|
|
43
|
+
_apply_paragraph_style(paragraph, parent_style)
|
|
44
|
+
_add_image_run(paragraph, node, parent_style)
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
if node.name == "svg":
|
|
48
|
+
paragraph = _ensure_paragraph(target_container)
|
|
49
|
+
_apply_paragraph_style(paragraph, parent_style)
|
|
50
|
+
_add_svg_run(paragraph, node, parent_style)
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
if node.name == "table":
|
|
54
|
+
from .html_docx_tables import _handle_table
|
|
55
|
+
|
|
56
|
+
_handle_table(node, target_container, css_rules, parent_style, table_auto_widths)
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
if node.name in _BLOCK_TAGS:
|
|
60
|
+
current_style = _merge_styles(parent_style, _compute_style_map(node, css_rules))
|
|
61
|
+
|
|
62
|
+
if _should_wrap_container(node, current_style):
|
|
63
|
+
container = _add_container(
|
|
64
|
+
target_container, node, current_style, css_rules, table_auto_widths
|
|
65
|
+
)
|
|
66
|
+
for child in node.children:
|
|
67
|
+
_handle_block(child, container, css_rules, current_style, table_auto_widths)
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
if _has_block_children(node):
|
|
71
|
+
for child in node.children:
|
|
72
|
+
_handle_block(child, target_container, css_rules, current_style, table_auto_widths)
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
paragraph = _ensure_paragraph(
|
|
76
|
+
target_container,
|
|
77
|
+
style="List Bullet" if node.name == "li" else None,
|
|
78
|
+
)
|
|
79
|
+
_apply_paragraph_style(paragraph, current_style)
|
|
80
|
+
if node.name == "li":
|
|
81
|
+
paragraph.paragraph_format.space_before = Pt(0)
|
|
82
|
+
paragraph.paragraph_format.space_after = Pt(0)
|
|
83
|
+
# Apply the parent <ul> margin-left as additional left indent so the
|
|
84
|
+
# list is offset the same amount it is in the HTML rendering.
|
|
85
|
+
_apply_ul_margin_indent(paragraph, _resolve_list_indent_pt(parent_style))
|
|
86
|
+
_add_inline_runs(node, paragraph, css_rules, current_style)
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
if node.name in _INLINE_TAGS:
|
|
90
|
+
paragraph = _ensure_paragraph(target_container)
|
|
91
|
+
_add_inline_runs(node, paragraph, css_rules, parent_style)
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
for child in node.children:
|
|
95
|
+
_handle_block(child, target_container, css_rules, parent_style, table_auto_widths)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _add_inline_runs(node, paragraph, css_rules, parent_style: Dict[str, str]) -> None:
|
|
99
|
+
if isinstance(node, Comment):
|
|
100
|
+
return
|
|
101
|
+
if isinstance(node, NavigableString):
|
|
102
|
+
raw_text = str(node)
|
|
103
|
+
if not raw_text.strip():
|
|
104
|
+
paragraph.add_run(" ")
|
|
105
|
+
return
|
|
106
|
+
normalized = _normalize_inline_text(raw_text)
|
|
107
|
+
if not paragraph.text and normalized.startswith(" "):
|
|
108
|
+
normalized = normalized.lstrip()
|
|
109
|
+
if not normalized:
|
|
110
|
+
return
|
|
111
|
+
run = paragraph.add_run(_transform_text(normalized, parent_style))
|
|
112
|
+
_apply_run_style(run, parent_style)
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
if not isinstance(node, Tag):
|
|
116
|
+
return
|
|
117
|
+
|
|
118
|
+
if node.name == "img":
|
|
119
|
+
_add_image_run(paragraph, node, parent_style)
|
|
120
|
+
return
|
|
121
|
+
|
|
122
|
+
if node.name == "svg":
|
|
123
|
+
_add_svg_run(paragraph, node, parent_style)
|
|
124
|
+
return
|
|
125
|
+
|
|
126
|
+
if node.name == "br":
|
|
127
|
+
paragraph.add_run().add_break()
|
|
128
|
+
return
|
|
129
|
+
|
|
130
|
+
current_style = _merge_styles(parent_style, _compute_style_map(node, css_rules))
|
|
131
|
+
|
|
132
|
+
if node.name in _INLINE_TAGS:
|
|
133
|
+
if node.name in {"strong", "b"}:
|
|
134
|
+
current_style = {**current_style, "font-weight": "bold"}
|
|
135
|
+
if node.name in {"em", "i"}:
|
|
136
|
+
current_style = {**current_style, "font-style": "italic"}
|
|
137
|
+
if node.name == "u":
|
|
138
|
+
current_style = {**current_style, "text-decoration": "underline"}
|
|
139
|
+
|
|
140
|
+
for child in node.children:
|
|
141
|
+
_add_inline_runs(child, paragraph, css_rules, current_style)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _transform_text(text: str, style_map: Dict[str, str]) -> str:
|
|
145
|
+
transform = style_map.get("text-transform", "").strip().lower()
|
|
146
|
+
if transform == "uppercase":
|
|
147
|
+
return text.upper()
|
|
148
|
+
if transform == "lowercase":
|
|
149
|
+
return text.lower()
|
|
150
|
+
if transform == "capitalize":
|
|
151
|
+
return " ".join(word.capitalize() for word in text.split())
|
|
152
|
+
return text
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _has_block_children(node: Tag) -> bool:
|
|
156
|
+
for child in node.children:
|
|
157
|
+
if isinstance(child, Tag) and child.name in _BLOCK_TAGS:
|
|
158
|
+
return True
|
|
159
|
+
return False
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _merge_styles(parent_style: Dict[str, str], own_style: Dict[str, str]) -> Dict[str, str]:
|
|
163
|
+
merged: Dict[str, str] = {}
|
|
164
|
+
for key in _INHERITABLE_STYLES:
|
|
165
|
+
if key in parent_style:
|
|
166
|
+
merged[key] = parent_style[key]
|
|
167
|
+
for key, value in own_style.items():
|
|
168
|
+
merged[key] = value
|
|
169
|
+
return merged
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _should_wrap_container(node: Tag, style_map: Dict[str, str]) -> bool:
|
|
173
|
+
if style_map.get("display", "").strip().lower() == "flex":
|
|
174
|
+
return True
|
|
175
|
+
if _parse_background_color(style_map) is not None:
|
|
176
|
+
return True
|
|
177
|
+
if _parse_border_left(style_map) is not None:
|
|
178
|
+
return True
|
|
179
|
+
padding = _parse_padding(style_map.get("padding", ""))
|
|
180
|
+
if padding and any(v > 0 for v in padding):
|
|
181
|
+
return True
|
|
182
|
+
if node.name in {"header", "section"}:
|
|
183
|
+
return True
|
|
184
|
+
return False
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _add_container(target, node: Tag, style_map: Dict[str, str], css_rules, table_auto_widths):
|
|
188
|
+
from .html_docx_tables import _apply_cell_styles, _apply_table_styles
|
|
189
|
+
|
|
190
|
+
if style_map.get("display", "").strip().lower() == "flex":
|
|
191
|
+
children = [child for child in node.children if isinstance(child, Tag)]
|
|
192
|
+
if len(children) == 2:
|
|
193
|
+
table = target.add_table(rows=1, cols=2)
|
|
194
|
+
_apply_table_styles(table, style_map)
|
|
195
|
+
left_cell = table.cell(0, 0)
|
|
196
|
+
right_cell = table.cell(0, 1)
|
|
197
|
+
_apply_cell_styles(left_cell, style_map)
|
|
198
|
+
_apply_cell_styles(right_cell, style_map)
|
|
199
|
+
for child in children[0].children:
|
|
200
|
+
_handle_block(child, left_cell, css_rules, style_map, table_auto_widths)
|
|
201
|
+
for child in children[1].children:
|
|
202
|
+
_handle_block(child, right_cell, css_rules, style_map, table_auto_widths)
|
|
203
|
+
return table
|
|
204
|
+
|
|
205
|
+
table = target.add_table(rows=1, cols=1)
|
|
206
|
+
_apply_table_styles(table, style_map)
|
|
207
|
+
cell = table.cell(0, 0)
|
|
208
|
+
_apply_cell_styles(cell, style_map)
|
|
209
|
+
return cell
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _ensure_container(target):
|
|
213
|
+
if hasattr(target, "add_paragraph"):
|
|
214
|
+
return target
|
|
215
|
+
if hasattr(target, "cell"):
|
|
216
|
+
return target.cell(0, 0)
|
|
217
|
+
return target
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _ensure_paragraph(container, style: Optional[str] = None):
|
|
221
|
+
if hasattr(container, "paragraphs") and container.paragraphs:
|
|
222
|
+
last = container.paragraphs[-1]
|
|
223
|
+
if not last.text and len(last.runs) == 0 and _is_pristine_paragraph(last):
|
|
224
|
+
if style is not None:
|
|
225
|
+
last.style = style
|
|
226
|
+
return last
|
|
227
|
+
return container.add_paragraph(style=style)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _is_pristine_paragraph(paragraph) -> bool:
|
|
231
|
+
"""Return True only for the unformatted initial placeholder paragraph.
|
|
232
|
+
|
|
233
|
+
We must not reuse paragraphs that carry visual formatting (borders, exact
|
|
234
|
+
spacing, shading) from a previously processed block element, otherwise the
|
|
235
|
+
next element would overwrite that formatting. A pristine paragraph is one
|
|
236
|
+
whose pPr contains only a pStyle declaration (or nothing at all).
|
|
237
|
+
"""
|
|
238
|
+
p_pr = paragraph._p.find(qn("w:pPr"))
|
|
239
|
+
if p_pr is None:
|
|
240
|
+
return True
|
|
241
|
+
_ALLOWED_TAGS = {qn("w:pStyle"), qn("w:rPr")}
|
|
242
|
+
return all(child.tag in _ALLOWED_TAGS for child in p_pr)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _normalize_text(text: str) -> str:
|
|
246
|
+
return " ".join(text.split())
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _normalize_inline_text(text: str) -> str:
|
|
250
|
+
text = text.replace("\xa0", " ")
|
|
251
|
+
if not text:
|
|
252
|
+
return ""
|
|
253
|
+
lead_space = text[:1].isspace()
|
|
254
|
+
trail_space = text[-1:].isspace()
|
|
255
|
+
collapsed = " ".join(text.split())
|
|
256
|
+
if not collapsed and (lead_space or trail_space):
|
|
257
|
+
return " "
|
|
258
|
+
if lead_space:
|
|
259
|
+
collapsed = " " + collapsed
|
|
260
|
+
if trail_space:
|
|
261
|
+
collapsed = collapsed + " "
|
|
262
|
+
return collapsed
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
_BLOCK_TAGS = {
|
|
2
|
+
"h1",
|
|
3
|
+
"h2",
|
|
4
|
+
"h3",
|
|
5
|
+
"h4",
|
|
6
|
+
"h5",
|
|
7
|
+
"h6",
|
|
8
|
+
"p",
|
|
9
|
+
"li",
|
|
10
|
+
"div",
|
|
11
|
+
"section",
|
|
12
|
+
"header",
|
|
13
|
+
"ul",
|
|
14
|
+
"ol",
|
|
15
|
+
"table",
|
|
16
|
+
}
|
|
17
|
+
_INLINE_TAGS = {"span", "a", "strong", "em", "b", "i", "u", "small", "sup", "sub"}
|
|
18
|
+
_INHERITABLE_STYLES = {
|
|
19
|
+
"font-family",
|
|
20
|
+
"font-size",
|
|
21
|
+
"color",
|
|
22
|
+
"font-weight",
|
|
23
|
+
"font-style",
|
|
24
|
+
"text-decoration",
|
|
25
|
+
"text-transform",
|
|
26
|
+
"text-align",
|
|
27
|
+
"line-height",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
_NAMED_COLORS = {
|
|
31
|
+
"white": (255, 255, 255),
|
|
32
|
+
"black": (0, 0, 0),
|
|
33
|
+
"gray": (128, 128, 128),
|
|
34
|
+
"grey": (128, 128, 128),
|
|
35
|
+
"lightgray": (211, 211, 211),
|
|
36
|
+
"lightgrey": (211, 211, 211),
|
|
37
|
+
"darkgray": (169, 169, 169),
|
|
38
|
+
"darkgrey": (169, 169, 169),
|
|
39
|
+
"red": (255, 0, 0),
|
|
40
|
+
"green": (0, 128, 0),
|
|
41
|
+
"blue": (0, 0, 255),
|
|
42
|
+
"navy": (0, 0, 128),
|
|
43
|
+
"purple": (128, 0, 128),
|
|
44
|
+
"teal": (0, 128, 128),
|
|
45
|
+
"orange": (255, 165, 0),
|
|
46
|
+
"yellow": (255, 255, 0),
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
_PADDING_SCALE = 0.7
|
|
50
|
+
_BORDER_SCALE = 1.5
|
|
51
|
+
|
|
52
|
+
_PAGE_SIZES_PT = {
|
|
53
|
+
"a4": (595.3, 841.9),
|
|
54
|
+
"letter": (612.0, 792.0),
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
_LIST_BASE_LEFT_TWIPS = 360
|
|
58
|
+
_LIST_BASE_HANGING_TWIPS = 360 # matches ilvl-0 hanging in the default Word template abstractNum
|
|
59
|
+
|
|
60
|
+
_UA_RESET_STYLE = """<style>
|
|
61
|
+
/* UA reset to neutralize browser defaults */
|
|
62
|
+
* {
|
|
63
|
+
margin: 0;
|
|
64
|
+
padding: 0;
|
|
65
|
+
border: 0;
|
|
66
|
+
font: inherit;
|
|
67
|
+
vertical-align: baseline;
|
|
68
|
+
}
|
|
69
|
+
h1, h2, h3, h4, h5, h6 {
|
|
70
|
+
font-weight: normal;
|
|
71
|
+
}
|
|
72
|
+
p, ul, ol, li {
|
|
73
|
+
margin: 0;
|
|
74
|
+
padding: 0;
|
|
75
|
+
}
|
|
76
|
+
ul { list-style: disc; padding-left: 20px; }
|
|
77
|
+
ol { list-style: decimal; padding-left: 20px; }
|
|
78
|
+
</style>"""
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from bs4 import BeautifulSoup
|
|
5
|
+
from bs4.element import Comment, NavigableString, Tag
|
|
6
|
+
from docx import Document
|
|
7
|
+
from docx.oxml import OxmlElement
|
|
8
|
+
from docx.oxml.ns import qn
|
|
9
|
+
from docx.shared import Pt
|
|
10
|
+
|
|
11
|
+
from .html_docx_blocks import _handle_block
|
|
12
|
+
from .html_docx_page import (
|
|
13
|
+
_apply_page_background,
|
|
14
|
+
_apply_page_settings,
|
|
15
|
+
_ensure_display_background_shape,
|
|
16
|
+
)
|
|
17
|
+
from .html_docx_playwright import _annotate_tables, _compute_table_auto_widths
|
|
18
|
+
from .html_docx_selectors import _compute_style_map, _extract_css_rules
|
|
19
|
+
from .html_docx_shared import _remove_trailing_empty_paragraph
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def html_to_docx(html_content: str, output_path: Path) -> None:
|
|
23
|
+
doc = Document()
|
|
24
|
+
_set_default_paragraph_style(doc)
|
|
25
|
+
_apply_page_settings(doc, html_content)
|
|
26
|
+
_remove_trailing_empty_paragraph(doc)
|
|
27
|
+
soup = BeautifulSoup(html_content, "html.parser")
|
|
28
|
+
table_auto_widths = _compute_table_auto_widths(_annotate_tables(soup))
|
|
29
|
+
css_rules = _extract_css_rules(soup)
|
|
30
|
+
|
|
31
|
+
body = soup.body or soup
|
|
32
|
+
body_style = _compute_style_map(body, css_rules)
|
|
33
|
+
_apply_page_background(doc, body_style)
|
|
34
|
+
_ensure_display_background_shape(doc)
|
|
35
|
+
content_root = _unwrap_layout_table(body) or body
|
|
36
|
+
for child in content_root.children:
|
|
37
|
+
_handle_block(child, doc, css_rules, body_style, table_auto_widths)
|
|
38
|
+
|
|
39
|
+
# When the document body starts with a table, Word renders an implicit gap above it.
|
|
40
|
+
# Inserting a 1pt-height anchor paragraph before the first table eliminates that gap.
|
|
41
|
+
_insert_top_anchor_paragraph(doc)
|
|
42
|
+
|
|
43
|
+
doc.save(str(output_path))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
_SKIP_TAGS = {"style", "script", "head", "meta", "link", "title", "noscript"}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _unwrap_layout_table(body: Tag) -> Optional[Tag]:
|
|
50
|
+
"""Return the single content cell of the outer centered layout table.
|
|
51
|
+
|
|
52
|
+
Many generated docs wrap the entire body in a single-row, single-cell table
|
|
53
|
+
purely to constrain width (`width:547pt; margin:auto`). In DOCX this turns
|
|
54
|
+
the whole document into one giant table cell, which makes Word paginate
|
|
55
|
+
nested content poorly. When we detect that specific wrapper, process the
|
|
56
|
+
cell's children directly at the document body level instead.
|
|
57
|
+
"""
|
|
58
|
+
content_children = [
|
|
59
|
+
child
|
|
60
|
+
for child in body.children
|
|
61
|
+
if not isinstance(child, Comment)
|
|
62
|
+
and not (isinstance(child, NavigableString) and not child.strip())
|
|
63
|
+
and not (isinstance(child, Tag) and child.name in _SKIP_TAGS)
|
|
64
|
+
]
|
|
65
|
+
if len(content_children) != 1:
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
table = content_children[0]
|
|
69
|
+
if not isinstance(table, Tag) or table.name != "table":
|
|
70
|
+
return None
|
|
71
|
+
if not _is_layout_wrapper_table(table):
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
rows = _direct_rows(table)
|
|
75
|
+
if len(rows) != 1:
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
cells = rows[0].find_all(["td", "th"], recursive=False)
|
|
79
|
+
if len(cells) != 1:
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
return cells[0]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _is_layout_wrapper_table(table: Tag) -> bool:
|
|
86
|
+
style = (table.get("style") or "").replace(" ", "").lower()
|
|
87
|
+
return (
|
|
88
|
+
"width:547pt" in style
|
|
89
|
+
and "margin-left:auto" in style
|
|
90
|
+
and "margin-right:auto" in style
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _direct_rows(table: Tag) -> list[Tag]:
|
|
95
|
+
rows = table.find_all("tr", recursive=False)
|
|
96
|
+
if rows:
|
|
97
|
+
return rows
|
|
98
|
+
|
|
99
|
+
rows = []
|
|
100
|
+
for section in table.find_all(["thead", "tbody", "tfoot"], recursive=False):
|
|
101
|
+
rows.extend(section.find_all("tr", recursive=False))
|
|
102
|
+
return rows
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _insert_top_anchor_paragraph(doc: Document) -> None:
|
|
106
|
+
"""Insert a near-zero-height paragraph before the first body-level table."""
|
|
107
|
+
body_el = doc._body._body
|
|
108
|
+
first = body_el[0] if len(body_el) else None
|
|
109
|
+
if first is None or first.tag != qn("w:tbl"):
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
p_el = OxmlElement("w:p")
|
|
113
|
+
p_pr = OxmlElement("w:pPr")
|
|
114
|
+
spacing = OxmlElement("w:spacing")
|
|
115
|
+
spacing.set(qn("w:before"), "0")
|
|
116
|
+
spacing.set(qn("w:after"), "0")
|
|
117
|
+
spacing.set(qn("w:line"), "20") # 1pt exact
|
|
118
|
+
spacing.set(qn("w:lineRule"), "exact")
|
|
119
|
+
ctx = OxmlElement("w:contextualSpacing")
|
|
120
|
+
r_pr = OxmlElement("w:rPr")
|
|
121
|
+
sz = OxmlElement("w:sz")
|
|
122
|
+
sz.set(qn("w:val"), "2") # 1pt font
|
|
123
|
+
r_pr.append(sz)
|
|
124
|
+
p_pr.append(spacing)
|
|
125
|
+
p_pr.append(ctx)
|
|
126
|
+
p_pr.append(r_pr)
|
|
127
|
+
p_el.append(p_pr)
|
|
128
|
+
body_el.insert(0, p_el)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _set_default_paragraph_style(doc: Document) -> None:
|
|
132
|
+
try:
|
|
133
|
+
normal = doc.styles["Normal"]
|
|
134
|
+
except KeyError:
|
|
135
|
+
return
|
|
136
|
+
normal_pf = normal.paragraph_format
|
|
137
|
+
normal_pf.space_before = Pt(0)
|
|
138
|
+
normal_pf.space_after = Pt(0)
|