ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentui/.prettierrc +15 -0
- agentui/QUICKSTART.md +272 -0
- agentui/README.md +59 -0
- agentui/env.example +16 -0
- agentui/jsconfig.json +14 -0
- agentui/package-lock.json +4242 -0
- agentui/package.json +34 -0
- agentui/scripts/postinstall/apply-patches.mjs +260 -0
- agentui/src/app.css +61 -0
- agentui/src/app.d.ts +13 -0
- agentui/src/app.html +12 -0
- agentui/src/components/LoadingSpinner.svelte +64 -0
- agentui/src/components/ThemeSwitcher.svelte +159 -0
- agentui/src/components/index.js +4 -0
- agentui/src/lib/api/bots.ts +60 -0
- agentui/src/lib/api/chat.ts +22 -0
- agentui/src/lib/api/http.ts +25 -0
- agentui/src/lib/components/BotCard.svelte +33 -0
- agentui/src/lib/components/ChatBubble.svelte +63 -0
- agentui/src/lib/components/Toast.svelte +21 -0
- agentui/src/lib/config.ts +20 -0
- agentui/src/lib/stores/auth.svelte.ts +73 -0
- agentui/src/lib/stores/theme.svelte.js +64 -0
- agentui/src/lib/stores/toast.svelte.ts +31 -0
- agentui/src/lib/utils/conversation.ts +39 -0
- agentui/src/routes/+layout.svelte +20 -0
- agentui/src/routes/+page.svelte +232 -0
- agentui/src/routes/login/+page.svelte +200 -0
- agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
- agentui/src/routes/talk/[agentId]/+page.ts +7 -0
- agentui/static/README.md +1 -0
- agentui/svelte.config.js +11 -0
- agentui/tailwind.config.ts +53 -0
- agentui/tsconfig.json +3 -0
- agentui/vite.config.ts +10 -0
- ai_parrot-0.17.2.dist-info/METADATA +472 -0
- ai_parrot-0.17.2.dist-info/RECORD +535 -0
- ai_parrot-0.17.2.dist-info/WHEEL +6 -0
- ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
- ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
- ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
- crew-builder/.prettierrc +15 -0
- crew-builder/QUICKSTART.md +259 -0
- crew-builder/README.md +113 -0
- crew-builder/env.example +17 -0
- crew-builder/jsconfig.json +14 -0
- crew-builder/package-lock.json +4182 -0
- crew-builder/package.json +37 -0
- crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
- crew-builder/src/app.css +62 -0
- crew-builder/src/app.d.ts +13 -0
- crew-builder/src/app.html +12 -0
- crew-builder/src/components/LoadingSpinner.svelte +64 -0
- crew-builder/src/components/ThemeSwitcher.svelte +149 -0
- crew-builder/src/components/index.js +9 -0
- crew-builder/src/lib/api/bots.ts +60 -0
- crew-builder/src/lib/api/chat.ts +80 -0
- crew-builder/src/lib/api/client.ts +56 -0
- crew-builder/src/lib/api/crew/crew.ts +136 -0
- crew-builder/src/lib/api/index.ts +5 -0
- crew-builder/src/lib/api/o365/auth.ts +65 -0
- crew-builder/src/lib/auth/auth.ts +54 -0
- crew-builder/src/lib/components/AgentNode.svelte +43 -0
- crew-builder/src/lib/components/BotCard.svelte +33 -0
- crew-builder/src/lib/components/ChatBubble.svelte +67 -0
- crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
- crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
- crew-builder/src/lib/components/JsonViewer.svelte +24 -0
- crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
- crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
- crew-builder/src/lib/components/Toast.svelte +67 -0
- crew-builder/src/lib/components/Toolbar.svelte +157 -0
- crew-builder/src/lib/components/index.ts +10 -0
- crew-builder/src/lib/config.ts +8 -0
- crew-builder/src/lib/stores/auth.svelte.ts +228 -0
- crew-builder/src/lib/stores/crewStore.ts +369 -0
- crew-builder/src/lib/stores/theme.svelte.js +145 -0
- crew-builder/src/lib/stores/toast.svelte.ts +69 -0
- crew-builder/src/lib/utils/conversation.ts +39 -0
- crew-builder/src/lib/utils/markdown.ts +122 -0
- crew-builder/src/lib/utils/talkHistory.ts +47 -0
- crew-builder/src/routes/+layout.svelte +20 -0
- crew-builder/src/routes/+page.svelte +539 -0
- crew-builder/src/routes/agents/+page.svelte +247 -0
- crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
- crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
- crew-builder/src/routes/builder/+page.svelte +204 -0
- crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
- crew-builder/src/routes/crew/ask/+page.ts +1 -0
- crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
- crew-builder/src/routes/login/+page.svelte +197 -0
- crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
- crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
- crew-builder/static/README.md +1 -0
- crew-builder/svelte.config.js +11 -0
- crew-builder/tailwind.config.ts +53 -0
- crew-builder/tsconfig.json +3 -0
- crew-builder/vite.config.ts +10 -0
- mcp_servers/calculator_server.py +309 -0
- parrot/__init__.py +27 -0
- parrot/__pycache__/__init__.cpython-310.pyc +0 -0
- parrot/__pycache__/version.cpython-310.pyc +0 -0
- parrot/_version.py +34 -0
- parrot/a2a/__init__.py +48 -0
- parrot/a2a/client.py +658 -0
- parrot/a2a/discovery.py +89 -0
- parrot/a2a/mixin.py +257 -0
- parrot/a2a/models.py +376 -0
- parrot/a2a/server.py +770 -0
- parrot/agents/__init__.py +29 -0
- parrot/bots/__init__.py +12 -0
- parrot/bots/a2a_agent.py +19 -0
- parrot/bots/abstract.py +3139 -0
- parrot/bots/agent.py +1129 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/chatbot.py +669 -0
- parrot/bots/data.py +1618 -0
- parrot/bots/database/__init__.py +5 -0
- parrot/bots/database/abstract.py +3071 -0
- parrot/bots/database/cache.py +286 -0
- parrot/bots/database/models.py +468 -0
- parrot/bots/database/prompts.py +154 -0
- parrot/bots/database/retries.py +98 -0
- parrot/bots/database/router.py +269 -0
- parrot/bots/database/sql.py +41 -0
- parrot/bots/db/__init__.py +6 -0
- parrot/bots/db/abstract.py +556 -0
- parrot/bots/db/bigquery.py +602 -0
- parrot/bots/db/cache.py +85 -0
- parrot/bots/db/documentdb.py +668 -0
- parrot/bots/db/elastic.py +1014 -0
- parrot/bots/db/influx.py +898 -0
- parrot/bots/db/mock.py +96 -0
- parrot/bots/db/multi.py +783 -0
- parrot/bots/db/prompts.py +185 -0
- parrot/bots/db/sql.py +1255 -0
- parrot/bots/db/tools.py +212 -0
- parrot/bots/document.py +680 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/kb.py +170 -0
- parrot/bots/mcp.py +36 -0
- parrot/bots/orchestration/README.md +463 -0
- parrot/bots/orchestration/__init__.py +1 -0
- parrot/bots/orchestration/agent.py +155 -0
- parrot/bots/orchestration/crew.py +3330 -0
- parrot/bots/orchestration/fsm.py +1179 -0
- parrot/bots/orchestration/hr.py +434 -0
- parrot/bots/orchestration/storage/__init__.py +4 -0
- parrot/bots/orchestration/storage/memory.py +100 -0
- parrot/bots/orchestration/storage/mixin.py +119 -0
- parrot/bots/orchestration/verify.py +202 -0
- parrot/bots/product.py +204 -0
- parrot/bots/prompts/__init__.py +96 -0
- parrot/bots/prompts/agents.py +155 -0
- parrot/bots/prompts/data.py +216 -0
- parrot/bots/prompts/output_generation.py +8 -0
- parrot/bots/scraper/__init__.py +3 -0
- parrot/bots/scraper/models.py +122 -0
- parrot/bots/scraper/scraper.py +1173 -0
- parrot/bots/scraper/templates.py +115 -0
- parrot/bots/stores/__init__.py +5 -0
- parrot/bots/stores/local.py +172 -0
- parrot/bots/webdev.py +81 -0
- parrot/cli.py +17 -0
- parrot/clients/__init__.py +16 -0
- parrot/clients/base.py +1491 -0
- parrot/clients/claude.py +1191 -0
- parrot/clients/factory.py +129 -0
- parrot/clients/google.py +4567 -0
- parrot/clients/gpt.py +1975 -0
- parrot/clients/grok.py +432 -0
- parrot/clients/groq.py +986 -0
- parrot/clients/hf.py +582 -0
- parrot/clients/models.py +18 -0
- parrot/conf.py +395 -0
- parrot/embeddings/__init__.py +9 -0
- parrot/embeddings/base.py +157 -0
- parrot/embeddings/google.py +98 -0
- parrot/embeddings/huggingface.py +74 -0
- parrot/embeddings/openai.py +84 -0
- parrot/embeddings/processor.py +88 -0
- parrot/exceptions.c +13868 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/exceptions.pxd +22 -0
- parrot/exceptions.pxi +15 -0
- parrot/exceptions.pyx +44 -0
- parrot/generators/__init__.py +29 -0
- parrot/generators/base.py +200 -0
- parrot/generators/html.py +293 -0
- parrot/generators/react.py +205 -0
- parrot/generators/streamlit.py +203 -0
- parrot/generators/template.py +105 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agent.py +861 -0
- parrot/handlers/agents/__init__.py +1 -0
- parrot/handlers/agents/abstract.py +900 -0
- parrot/handlers/bots.py +338 -0
- parrot/handlers/chat.py +915 -0
- parrot/handlers/creation.sql +192 -0
- parrot/handlers/crew/ARCHITECTURE.md +362 -0
- parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
- parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
- parrot/handlers/crew/__init__.py +0 -0
- parrot/handlers/crew/handler.py +801 -0
- parrot/handlers/crew/models.py +229 -0
- parrot/handlers/crew/redis_persistence.py +523 -0
- parrot/handlers/jobs/__init__.py +10 -0
- parrot/handlers/jobs/job.py +384 -0
- parrot/handlers/jobs/mixin.py +627 -0
- parrot/handlers/jobs/models.py +115 -0
- parrot/handlers/jobs/worker.py +31 -0
- parrot/handlers/models.py +596 -0
- parrot/handlers/o365_auth.py +105 -0
- parrot/handlers/stream.py +337 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/aws.py +143 -0
- parrot/interfaces/credentials.py +113 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/google.py +1123 -0
- parrot/interfaces/hierarchy.py +1227 -0
- parrot/interfaces/http.py +651 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +24 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/analisys.py +148 -0
- parrot/interfaces/images/plugins/classify.py +150 -0
- parrot/interfaces/images/plugins/classifybase.py +182 -0
- parrot/interfaces/images/plugins/detect.py +150 -0
- parrot/interfaces/images/plugins/exif.py +1103 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/interfaces/o365.py +978 -0
- parrot/interfaces/onedrive.py +822 -0
- parrot/interfaces/sharepoint.py +1435 -0
- parrot/interfaces/soap.py +257 -0
- parrot/loaders/__init__.py +8 -0
- parrot/loaders/abstract.py +1131 -0
- parrot/loaders/audio.py +199 -0
- parrot/loaders/basepdf.py +53 -0
- parrot/loaders/basevideo.py +1568 -0
- parrot/loaders/csv.py +409 -0
- parrot/loaders/docx.py +116 -0
- parrot/loaders/epubloader.py +316 -0
- parrot/loaders/excel.py +199 -0
- parrot/loaders/factory.py +55 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/html.py +26 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/html.py +152 -0
- parrot/loaders/markdown.py +442 -0
- parrot/loaders/pdf.py +373 -0
- parrot/loaders/pdfmark.py +320 -0
- parrot/loaders/pdftables.py +506 -0
- parrot/loaders/ppt.py +476 -0
- parrot/loaders/qa.py +63 -0
- parrot/loaders/splitters/__init__.py +10 -0
- parrot/loaders/splitters/base.py +138 -0
- parrot/loaders/splitters/md.py +228 -0
- parrot/loaders/splitters/token.py +143 -0
- parrot/loaders/txt.py +26 -0
- parrot/loaders/video.py +89 -0
- parrot/loaders/videolocal.py +218 -0
- parrot/loaders/videounderstanding.py +377 -0
- parrot/loaders/vimeo.py +167 -0
- parrot/loaders/web.py +599 -0
- parrot/loaders/youtube.py +504 -0
- parrot/manager/__init__.py +5 -0
- parrot/manager/manager.py +1030 -0
- parrot/mcp/__init__.py +28 -0
- parrot/mcp/adapter.py +105 -0
- parrot/mcp/cli.py +174 -0
- parrot/mcp/client.py +119 -0
- parrot/mcp/config.py +75 -0
- parrot/mcp/integration.py +842 -0
- parrot/mcp/oauth.py +933 -0
- parrot/mcp/server.py +225 -0
- parrot/mcp/transports/__init__.py +3 -0
- parrot/mcp/transports/base.py +279 -0
- parrot/mcp/transports/grpc_session.py +163 -0
- parrot/mcp/transports/http.py +312 -0
- parrot/mcp/transports/mcp.proto +108 -0
- parrot/mcp/transports/quic.py +1082 -0
- parrot/mcp/transports/sse.py +330 -0
- parrot/mcp/transports/stdio.py +309 -0
- parrot/mcp/transports/unix.py +395 -0
- parrot/mcp/transports/websocket.py +547 -0
- parrot/memory/__init__.py +16 -0
- parrot/memory/abstract.py +209 -0
- parrot/memory/agent.py +32 -0
- parrot/memory/cache.py +175 -0
- parrot/memory/core.py +555 -0
- parrot/memory/file.py +153 -0
- parrot/memory/mem.py +131 -0
- parrot/memory/redis.py +613 -0
- parrot/models/__init__.py +46 -0
- parrot/models/basic.py +118 -0
- parrot/models/compliance.py +208 -0
- parrot/models/crew.py +395 -0
- parrot/models/detections.py +654 -0
- parrot/models/generation.py +85 -0
- parrot/models/google.py +223 -0
- parrot/models/groq.py +23 -0
- parrot/models/openai.py +30 -0
- parrot/models/outputs.py +285 -0
- parrot/models/responses.py +938 -0
- parrot/notifications/__init__.py +743 -0
- parrot/openapi/__init__.py +3 -0
- parrot/openapi/components.yaml +641 -0
- parrot/openapi/config.py +322 -0
- parrot/outputs/__init__.py +32 -0
- parrot/outputs/formats/__init__.py +108 -0
- parrot/outputs/formats/altair.py +359 -0
- parrot/outputs/formats/application.py +122 -0
- parrot/outputs/formats/base.py +351 -0
- parrot/outputs/formats/bokeh.py +356 -0
- parrot/outputs/formats/card.py +424 -0
- parrot/outputs/formats/chart.py +436 -0
- parrot/outputs/formats/d3.py +255 -0
- parrot/outputs/formats/echarts.py +310 -0
- parrot/outputs/formats/generators/__init__.py +0 -0
- parrot/outputs/formats/generators/abstract.py +61 -0
- parrot/outputs/formats/generators/panel.py +145 -0
- parrot/outputs/formats/generators/streamlit.py +86 -0
- parrot/outputs/formats/generators/terminal.py +63 -0
- parrot/outputs/formats/holoviews.py +310 -0
- parrot/outputs/formats/html.py +147 -0
- parrot/outputs/formats/jinja2.py +46 -0
- parrot/outputs/formats/json.py +87 -0
- parrot/outputs/formats/map.py +933 -0
- parrot/outputs/formats/markdown.py +172 -0
- parrot/outputs/formats/matplotlib.py +237 -0
- parrot/outputs/formats/mixins/__init__.py +0 -0
- parrot/outputs/formats/mixins/emaps.py +855 -0
- parrot/outputs/formats/plotly.py +341 -0
- parrot/outputs/formats/seaborn.py +310 -0
- parrot/outputs/formats/table.py +397 -0
- parrot/outputs/formats/template_report.py +138 -0
- parrot/outputs/formats/yaml.py +125 -0
- parrot/outputs/formatter.py +152 -0
- parrot/outputs/templates/__init__.py +95 -0
- parrot/pipelines/__init__.py +0 -0
- parrot/pipelines/abstract.py +210 -0
- parrot/pipelines/detector.py +124 -0
- parrot/pipelines/models.py +90 -0
- parrot/pipelines/planogram.py +3002 -0
- parrot/pipelines/table.sql +97 -0
- parrot/plugins/__init__.py +106 -0
- parrot/plugins/importer.py +80 -0
- parrot/py.typed +0 -0
- parrot/registry/__init__.py +18 -0
- parrot/registry/registry.py +594 -0
- parrot/scheduler/__init__.py +1189 -0
- parrot/scheduler/models.py +60 -0
- parrot/security/__init__.py +16 -0
- parrot/security/prompt_injection.py +268 -0
- parrot/security/security_events.sql +25 -0
- parrot/services/__init__.py +1 -0
- parrot/services/mcp/__init__.py +8 -0
- parrot/services/mcp/config.py +13 -0
- parrot/services/mcp/server.py +295 -0
- parrot/services/o365_remote_auth.py +235 -0
- parrot/stores/__init__.py +7 -0
- parrot/stores/abstract.py +352 -0
- parrot/stores/arango.py +1090 -0
- parrot/stores/bigquery.py +1377 -0
- parrot/stores/cache.py +106 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss_store.py +1157 -0
- parrot/stores/kb/__init__.py +9 -0
- parrot/stores/kb/abstract.py +68 -0
- parrot/stores/kb/cache.py +165 -0
- parrot/stores/kb/doc.py +325 -0
- parrot/stores/kb/hierarchy.py +346 -0
- parrot/stores/kb/local.py +457 -0
- parrot/stores/kb/prompt.py +28 -0
- parrot/stores/kb/redis.py +659 -0
- parrot/stores/kb/store.py +115 -0
- parrot/stores/kb/user.py +374 -0
- parrot/stores/models.py +59 -0
- parrot/stores/pgvector.py +3 -0
- parrot/stores/postgres.py +2853 -0
- parrot/stores/utils/__init__.py +0 -0
- parrot/stores/utils/chunking.py +197 -0
- parrot/telemetry/__init__.py +3 -0
- parrot/telemetry/mixin.py +111 -0
- parrot/template/__init__.py +3 -0
- parrot/template/engine.py +259 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +644 -0
- parrot/tools/agent.py +363 -0
- parrot/tools/arangodbsearch.py +537 -0
- parrot/tools/arxiv_tool.py +188 -0
- parrot/tools/calculator/__init__.py +3 -0
- parrot/tools/calculator/operations/__init__.py +38 -0
- parrot/tools/calculator/operations/calculus.py +80 -0
- parrot/tools/calculator/operations/statistics.py +76 -0
- parrot/tools/calculator/tool.py +150 -0
- parrot/tools/cloudwatch.py +988 -0
- parrot/tools/codeinterpreter/__init__.py +127 -0
- parrot/tools/codeinterpreter/executor.py +371 -0
- parrot/tools/codeinterpreter/internals.py +473 -0
- parrot/tools/codeinterpreter/models.py +643 -0
- parrot/tools/codeinterpreter/prompts.py +224 -0
- parrot/tools/codeinterpreter/tool.py +664 -0
- parrot/tools/company_info/__init__.py +6 -0
- parrot/tools/company_info/tool.py +1138 -0
- parrot/tools/correlationanalysis.py +437 -0
- parrot/tools/database/abstract.py +286 -0
- parrot/tools/database/bq.py +115 -0
- parrot/tools/database/cache.py +284 -0
- parrot/tools/database/models.py +95 -0
- parrot/tools/database/pg.py +343 -0
- parrot/tools/databasequery.py +1159 -0
- parrot/tools/db.py +1800 -0
- parrot/tools/ddgo.py +370 -0
- parrot/tools/decorators.py +271 -0
- parrot/tools/dftohtml.py +282 -0
- parrot/tools/document.py +549 -0
- parrot/tools/ecs.py +819 -0
- parrot/tools/edareport.py +368 -0
- parrot/tools/elasticsearch.py +1049 -0
- parrot/tools/employees.py +462 -0
- parrot/tools/epson/__init__.py +96 -0
- parrot/tools/excel.py +683 -0
- parrot/tools/file/__init__.py +13 -0
- parrot/tools/file/abstract.py +76 -0
- parrot/tools/file/gcs.py +378 -0
- parrot/tools/file/local.py +284 -0
- parrot/tools/file/s3.py +511 -0
- parrot/tools/file/tmp.py +309 -0
- parrot/tools/file/tool.py +501 -0
- parrot/tools/file_reader.py +129 -0
- parrot/tools/flowtask/__init__.py +19 -0
- parrot/tools/flowtask/tool.py +761 -0
- parrot/tools/gittoolkit.py +508 -0
- parrot/tools/google/__init__.py +18 -0
- parrot/tools/google/base.py +169 -0
- parrot/tools/google/tools.py +1251 -0
- parrot/tools/googlelocation.py +5 -0
- parrot/tools/googleroutes.py +5 -0
- parrot/tools/googlesearch.py +5 -0
- parrot/tools/googlesitesearch.py +5 -0
- parrot/tools/googlevoice.py +2 -0
- parrot/tools/gvoice.py +695 -0
- parrot/tools/ibisworld/README.md +225 -0
- parrot/tools/ibisworld/__init__.py +11 -0
- parrot/tools/ibisworld/tool.py +366 -0
- parrot/tools/jiratoolkit.py +1718 -0
- parrot/tools/manager.py +1098 -0
- parrot/tools/math.py +152 -0
- parrot/tools/metadata.py +476 -0
- parrot/tools/msteams.py +1621 -0
- parrot/tools/msword.py +635 -0
- parrot/tools/multidb.py +580 -0
- parrot/tools/multistoresearch.py +369 -0
- parrot/tools/networkninja.py +167 -0
- parrot/tools/nextstop/__init__.py +4 -0
- parrot/tools/nextstop/base.py +286 -0
- parrot/tools/nextstop/employee.py +733 -0
- parrot/tools/nextstop/store.py +462 -0
- parrot/tools/notification.py +435 -0
- parrot/tools/o365/__init__.py +42 -0
- parrot/tools/o365/base.py +295 -0
- parrot/tools/o365/bundle.py +522 -0
- parrot/tools/o365/events.py +554 -0
- parrot/tools/o365/mail.py +992 -0
- parrot/tools/o365/onedrive.py +497 -0
- parrot/tools/o365/sharepoint.py +641 -0
- parrot/tools/openapi_toolkit.py +904 -0
- parrot/tools/openweather.py +527 -0
- parrot/tools/pdfprint.py +1001 -0
- parrot/tools/powerbi.py +518 -0
- parrot/tools/powerpoint.py +1113 -0
- parrot/tools/pricestool.py +146 -0
- parrot/tools/products/__init__.py +246 -0
- parrot/tools/prophet_tool.py +171 -0
- parrot/tools/pythonpandas.py +630 -0
- parrot/tools/pythonrepl.py +910 -0
- parrot/tools/qsource.py +436 -0
- parrot/tools/querytoolkit.py +395 -0
- parrot/tools/quickeda.py +827 -0
- parrot/tools/resttool.py +553 -0
- parrot/tools/retail/__init__.py +0 -0
- parrot/tools/retail/bby.py +528 -0
- parrot/tools/sandboxtool.py +703 -0
- parrot/tools/sassie/__init__.py +352 -0
- parrot/tools/scraping/__init__.py +7 -0
- parrot/tools/scraping/docs/select.md +466 -0
- parrot/tools/scraping/documentation.md +1278 -0
- parrot/tools/scraping/driver.py +436 -0
- parrot/tools/scraping/models.py +576 -0
- parrot/tools/scraping/options.py +85 -0
- parrot/tools/scraping/orchestrator.py +517 -0
- parrot/tools/scraping/readme.md +740 -0
- parrot/tools/scraping/tool.py +3115 -0
- parrot/tools/seasonaldetection.py +642 -0
- parrot/tools/shell_tool/__init__.py +5 -0
- parrot/tools/shell_tool/actions.py +408 -0
- parrot/tools/shell_tool/engine.py +155 -0
- parrot/tools/shell_tool/models.py +322 -0
- parrot/tools/shell_tool/tool.py +442 -0
- parrot/tools/site_search.py +214 -0
- parrot/tools/textfile.py +418 -0
- parrot/tools/think.py +378 -0
- parrot/tools/toolkit.py +298 -0
- parrot/tools/webapp_tool.py +187 -0
- parrot/tools/whatif.py +1279 -0
- parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
- parrot/tools/workday/__init__.py +6 -0
- parrot/tools/workday/models.py +1389 -0
- parrot/tools/workday/tool.py +1293 -0
- parrot/tools/yfinance_tool.py +306 -0
- parrot/tools/zipcode.py +217 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/helpers.py +73 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.c +12078 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/parsers/toml.pyx +21 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpp +20936 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/types.pyx +213 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- parrot/yaml-rs/Cargo.lock +350 -0
- parrot/yaml-rs/Cargo.toml +19 -0
- parrot/yaml-rs/pyproject.toml +19 -0
- parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
- parrot/yaml-rs/src/lib.rs +222 -0
- requirements/docker-compose.yml +24 -0
- requirements/requirements-dev.txt +21 -0
parrot/tools/msword.py
ADDED
|
@@ -0,0 +1,635 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MS Word Tool migrated to use AbstractDocumentTool framework.
|
|
3
|
+
"""
|
|
4
|
+
from typing import Any, Dict, List, Optional, Union
|
|
5
|
+
import re
|
|
6
|
+
import tempfile
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import io
|
|
10
|
+
from urllib.parse import urlparse
|
|
11
|
+
import aiohttp
|
|
12
|
+
import aiofiles
|
|
13
|
+
from docx import Document
|
|
14
|
+
from docx.shared import Inches, Pt
|
|
15
|
+
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
|
16
|
+
from docx.enum.style import WD_STYLE_TYPE
|
|
17
|
+
from jinja2 import Environment, FileSystemLoader
|
|
18
|
+
from pydantic import BaseModel, Field, field_validator
|
|
19
|
+
import mammoth
|
|
20
|
+
import markdown
|
|
21
|
+
from bs4 import BeautifulSoup, NavigableString
|
|
22
|
+
from markdownify import markdownify as md
|
|
23
|
+
from .document import AbstractDocumentTool, DocumentGenerationArgs
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class MSWordArgs(DocumentGenerationArgs):
|
|
27
|
+
"""Arguments schema for MS Word Document generation."""
|
|
28
|
+
|
|
29
|
+
template_name: Optional[str] = Field(
|
|
30
|
+
None,
|
|
31
|
+
description="Name of the HTML template (e.g., 'report.html') to render before conversion"
|
|
32
|
+
)
|
|
33
|
+
template_vars: Optional[Dict[str, Any]] = Field(
|
|
34
|
+
None,
|
|
35
|
+
description="Variables to pass to the HTML template (e.g., title, author, date)"
|
|
36
|
+
)
|
|
37
|
+
docx_template: Optional[str] = Field(
|
|
38
|
+
None,
|
|
39
|
+
description="Path to a DOCX template file to use as base document"
|
|
40
|
+
)
|
|
41
|
+
style_config: Optional[Dict[str, Any]] = Field(
|
|
42
|
+
None,
|
|
43
|
+
description="Custom styling configuration for the document"
|
|
44
|
+
)
|
|
45
|
+
page_margins: Optional[Dict[str, float]] = Field(
|
|
46
|
+
None,
|
|
47
|
+
description="Page margins in inches (top, bottom, left, right)"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
@field_validator('template_name')
|
|
51
|
+
@classmethod
|
|
52
|
+
def validate_template_name(cls, v):
|
|
53
|
+
if v and not v.endswith('.html'):
|
|
54
|
+
v = f"{v}.html"
|
|
55
|
+
return v
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class MSWordTool(AbstractDocumentTool):
|
|
59
|
+
"""
|
|
60
|
+
Microsoft Word Document Generation Tool.
|
|
61
|
+
|
|
62
|
+
This tool converts text content (including Markdown and HTML) into professionally
|
|
63
|
+
formatted Word documents (.docx). It supports custom templates, styling, and
|
|
64
|
+
advanced document formatting features.
|
|
65
|
+
|
|
66
|
+
Features:
|
|
67
|
+
- Markdown to Word conversion with proper formatting
|
|
68
|
+
- HTML to Word conversion support
|
|
69
|
+
- Custom DOCX template support
|
|
70
|
+
- Jinja2 HTML template processing
|
|
71
|
+
- Configurable styling and page setup
|
|
72
|
+
- Table, list, and heading support
|
|
73
|
+
- Professional document formatting
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
name = "msword_generator"
|
|
77
|
+
description = (
|
|
78
|
+
"Generate Microsoft Word documents from text, Markdown, or HTML content. "
|
|
79
|
+
"Supports custom templates, styling, and professional document formatting. "
|
|
80
|
+
"Perfect for creating reports, documentation, and formatted documents."
|
|
81
|
+
)
|
|
82
|
+
args_schema = MSWordArgs
|
|
83
|
+
|
|
84
|
+
# Document type configuration
|
|
85
|
+
document_type = "document"
|
|
86
|
+
default_extension = "docx"
|
|
87
|
+
supported_extensions = [".docx", ".dotx"]
|
|
88
|
+
|
|
89
|
+
def __init__(
|
|
90
|
+
self,
|
|
91
|
+
templates_dir: Optional[Path] = None,
|
|
92
|
+
default_html_template: Optional[str] = None,
|
|
93
|
+
**kwargs
|
|
94
|
+
):
|
|
95
|
+
"""
|
|
96
|
+
Initialize the MS Word Tool.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
templates_dir: Directory containing HTML and DOCX templates
|
|
100
|
+
default_html_template: Default HTML template for content processing
|
|
101
|
+
**kwargs: Additional arguments for AbstractDocumentTool
|
|
102
|
+
"""
|
|
103
|
+
super().__init__(templates_dir=templates_dir, **kwargs)
|
|
104
|
+
|
|
105
|
+
self.default_html_template = default_html_template
|
|
106
|
+
|
|
107
|
+
# Initialize Jinja2 environment for HTML templates
|
|
108
|
+
if self.templates_dir:
|
|
109
|
+
self.html_env = Environment(
|
|
110
|
+
loader=FileSystemLoader(str(self.templates_dir)),
|
|
111
|
+
autoescape=True
|
|
112
|
+
)
|
|
113
|
+
else:
|
|
114
|
+
self.html_env = None
|
|
115
|
+
|
|
116
|
+
def _detect_content_type(self, text: str) -> str:
|
|
117
|
+
"""Detect if content is HTML, Markdown, or plain text."""
|
|
118
|
+
text_stripped = text.strip()
|
|
119
|
+
|
|
120
|
+
# Simple HTML detection
|
|
121
|
+
if (text_stripped.startswith('<') and text_stripped.endswith('>')) or \
|
|
122
|
+
any(tag in text_stripped.lower() for tag in ['<html', '<div', '<p', '<h1']):
|
|
123
|
+
return 'html'
|
|
124
|
+
|
|
125
|
+
# Markdown detection
|
|
126
|
+
markdown_patterns = [
|
|
127
|
+
r'^#{1,6}\s', # Headers
|
|
128
|
+
r'^\*\s', # Bullet points
|
|
129
|
+
r'^\d+\.\s', # Numbered lists
|
|
130
|
+
r'\*\*.*?\*\*', # Bold
|
|
131
|
+
r'\*.*?\*', # Italic
|
|
132
|
+
r'`.*?`', # Code
|
|
133
|
+
r'\[.*?\]\(.*?\)', # Links
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
for pattern in markdown_patterns:
|
|
137
|
+
if re.search(pattern, text_stripped, re.MULTILINE):
|
|
138
|
+
return 'markdown'
|
|
139
|
+
|
|
140
|
+
return 'markdown' # Default to markdown for processing
|
|
141
|
+
|
|
142
|
+
def _render_html_template(
|
|
143
|
+
self,
|
|
144
|
+
content: str,
|
|
145
|
+
template_name: Optional[str],
|
|
146
|
+
template_vars: Optional[Dict[str, Any]]
|
|
147
|
+
) -> str:
|
|
148
|
+
"""Render content through Jinja2 HTML template if provided."""
|
|
149
|
+
if not template_name or not self.html_env:
|
|
150
|
+
return content
|
|
151
|
+
|
|
152
|
+
try:
|
|
153
|
+
template = self.html_env.get_template(template_name)
|
|
154
|
+
vars_dict = template_vars or {}
|
|
155
|
+
|
|
156
|
+
# Add default variables
|
|
157
|
+
vars_dict.setdefault('content', content)
|
|
158
|
+
vars_dict.setdefault('date', self._get_current_date())
|
|
159
|
+
vars_dict.setdefault('timestamp', self._get_current_timestamp())
|
|
160
|
+
|
|
161
|
+
rendered = template.render(**vars_dict)
|
|
162
|
+
self.logger.info(
|
|
163
|
+
f"Rendered content through HTML template: {template_name}"
|
|
164
|
+
)
|
|
165
|
+
return rendered
|
|
166
|
+
|
|
167
|
+
except Exception as e:
|
|
168
|
+
self.logger.error(f"HTML template rendering failed: {e}")
|
|
169
|
+
return content
|
|
170
|
+
|
|
171
|
+
def _preprocess_markdown(self, text: str) -> str:
|
|
172
|
+
"""Preprocess markdown to handle common issues."""
|
|
173
|
+
# Replace placeholder variables with empty strings
|
|
174
|
+
text = re.sub(r'\{[a-zA-Z0-9_]+\}', '', text)
|
|
175
|
+
|
|
176
|
+
# Handle f-strings that weren't evaluated
|
|
177
|
+
text = re.sub(r'f"""(.*?)"""', r'\1', text, flags=re.DOTALL)
|
|
178
|
+
text = re.sub(r"f'''(.*?)'''", r'\1', text, flags=re.DOTALL)
|
|
179
|
+
|
|
180
|
+
# Remove triple backticks and language indicators
|
|
181
|
+
text = re.sub(r'```[a-zA-Z]*\n', '', text)
|
|
182
|
+
text = re.sub(r'```', '', text)
|
|
183
|
+
|
|
184
|
+
# Fix heading issues (ensure space after #)
|
|
185
|
+
text = re.sub(r'(#+)([^ \n])', r'\1 \2', text)
|
|
186
|
+
|
|
187
|
+
# Fix escaped newlines if any
|
|
188
|
+
text = text.replace('\\n', '\n')
|
|
189
|
+
|
|
190
|
+
return text
|
|
191
|
+
|
|
192
|
+
def _markdown_to_html(self, markdown_text: str) -> str:
|
|
193
|
+
"""Convert markdown to HTML."""
|
|
194
|
+
try:
|
|
195
|
+
html = markdown.markdown(
|
|
196
|
+
markdown_text,
|
|
197
|
+
extensions=['extra', 'codehilite', 'tables'] # Removed 'toc' to avoid issues
|
|
198
|
+
)
|
|
199
|
+
return html
|
|
200
|
+
except Exception as e:
|
|
201
|
+
self.logger.error(f"Markdown conversion failed: {e}")
|
|
202
|
+
# Fallback: wrap in paragraphs
|
|
203
|
+
paragraphs = markdown_text.split('\n\n')
|
|
204
|
+
html_paragraphs = [f'<p>{p.replace(chr(10), "<br>")}</p>' for p in paragraphs if p.strip()]
|
|
205
|
+
return '\n'.join(html_paragraphs)
|
|
206
|
+
|
|
207
|
+
def _create_document(self, template_path: Optional[str] = None) -> Document:
|
|
208
|
+
"""Create or load DOCX document."""
|
|
209
|
+
if template_path:
|
|
210
|
+
template_file = self._get_template_path(template_path)
|
|
211
|
+
if template_file and template_file.exists():
|
|
212
|
+
self.logger.info(f"Loading DOCX template: {template_file}")
|
|
213
|
+
return Document(str(template_file))
|
|
214
|
+
|
|
215
|
+
# Create new document with basic styling
|
|
216
|
+
doc = Document()
|
|
217
|
+
self._setup_document_styles(doc)
|
|
218
|
+
return doc
|
|
219
|
+
|
|
220
|
+
def _setup_document_styles(self, doc: Document) -> None:
|
|
221
|
+
"""Set up basic document styles."""
|
|
222
|
+
try:
|
|
223
|
+
styles = doc.styles
|
|
224
|
+
|
|
225
|
+
# Configure Normal style
|
|
226
|
+
if 'Normal' in styles:
|
|
227
|
+
normal = styles['Normal']
|
|
228
|
+
normal.font.name = 'Calibri'
|
|
229
|
+
normal.font.size = Pt(11)
|
|
230
|
+
|
|
231
|
+
# Configure heading styles
|
|
232
|
+
for i in range(1, 7):
|
|
233
|
+
heading_name = f'Heading {i}'
|
|
234
|
+
if heading_name in styles:
|
|
235
|
+
heading = styles[heading_name]
|
|
236
|
+
heading.font.name = 'Calibri'
|
|
237
|
+
heading.font.size = Pt(18 - i * 2)
|
|
238
|
+
|
|
239
|
+
self.logger.debug("Document styles configured successfully")
|
|
240
|
+
|
|
241
|
+
except Exception as e:
|
|
242
|
+
self.logger.error(f"Style setup failed: {e}")
|
|
243
|
+
|
|
244
|
+
def _apply_page_margins(self, doc: Document, margins: Dict[str, float]) -> None:
|
|
245
|
+
"""Apply custom page margins to the document."""
|
|
246
|
+
try:
|
|
247
|
+
section = doc.sections[0]
|
|
248
|
+
|
|
249
|
+
if 'top' in margins:
|
|
250
|
+
section.top_margin = Inches(margins['top'])
|
|
251
|
+
if 'bottom' in margins:
|
|
252
|
+
section.bottom_margin = Inches(margins['bottom'])
|
|
253
|
+
if 'left' in margins:
|
|
254
|
+
section.left_margin = Inches(margins['left'])
|
|
255
|
+
if 'right' in margins:
|
|
256
|
+
section.right_margin = Inches(margins['right'])
|
|
257
|
+
|
|
258
|
+
self.logger.debug(f"Applied page margins: {margins}")
|
|
259
|
+
|
|
260
|
+
except Exception as e:
|
|
261
|
+
self.logger.error(f"Failed to apply page margins: {e}")
|
|
262
|
+
|
|
263
|
+
def _html_to_docx(self, html_content: str, doc: Document) -> None:
|
|
264
|
+
"""Convert HTML content to DOCX document."""
|
|
265
|
+
try:
|
|
266
|
+
soup = BeautifulSoup(html_content, 'html.parser')
|
|
267
|
+
|
|
268
|
+
# Process each element in the HTML
|
|
269
|
+
for element in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'ul', 'ol', 'table', 'br']):
|
|
270
|
+
self._process_html_element(element, doc)
|
|
271
|
+
|
|
272
|
+
except Exception as e:
|
|
273
|
+
self.logger.error(f"HTML to DOCX conversion failed: {e}")
|
|
274
|
+
# Fallback: add as plain text
|
|
275
|
+
doc.add_paragraph(html_content)
|
|
276
|
+
|
|
277
|
+
def _process_html_element(self, element, doc: Document) -> None:
|
|
278
|
+
"""Process individual HTML elements."""
|
|
279
|
+
tag_name = element.name.lower()
|
|
280
|
+
|
|
281
|
+
if tag_name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
|
|
282
|
+
level = int(tag_name[1])
|
|
283
|
+
heading_text = self._get_text_content(element)
|
|
284
|
+
if heading_text.strip():
|
|
285
|
+
doc.add_heading(heading_text, level=level)
|
|
286
|
+
|
|
287
|
+
elif tag_name in ['p', 'div']:
|
|
288
|
+
text = self._get_text_content(element)
|
|
289
|
+
if text.strip():
|
|
290
|
+
paragraph = doc.add_paragraph()
|
|
291
|
+
self._add_formatted_text(paragraph, element)
|
|
292
|
+
|
|
293
|
+
elif tag_name == 'table':
|
|
294
|
+
self._process_table(element, doc)
|
|
295
|
+
|
|
296
|
+
elif tag_name in ['ul', 'ol']:
|
|
297
|
+
for li in element.find_all('li', recursive=False):
|
|
298
|
+
text = self._get_text_content(li)
|
|
299
|
+
if text.strip():
|
|
300
|
+
list_style = 'List Bullet' if tag_name == 'ul' else 'List Number'
|
|
301
|
+
doc.add_paragraph(text, style=list_style)
|
|
302
|
+
|
|
303
|
+
elif tag_name == 'br':
|
|
304
|
+
doc.add_paragraph()
|
|
305
|
+
|
|
306
|
+
def _get_text_content(self, element) -> str:
|
|
307
|
+
"""Extract text content from HTML element."""
|
|
308
|
+
if isinstance(element, NavigableString):
|
|
309
|
+
return str(element)
|
|
310
|
+
|
|
311
|
+
text_parts = []
|
|
312
|
+
for content in element.contents:
|
|
313
|
+
if isinstance(content, NavigableString):
|
|
314
|
+
text_parts.append(str(content))
|
|
315
|
+
else:
|
|
316
|
+
text_parts.append(self._get_text_content(content))
|
|
317
|
+
|
|
318
|
+
return ''.join(text_parts).strip()
|
|
319
|
+
|
|
320
|
+
def _process_table(self, table_element, doc: Document) -> None:
|
|
321
|
+
"""Process HTML table and convert to DOCX table."""
|
|
322
|
+
rows = table_element.find_all('tr')
|
|
323
|
+
if not rows:
|
|
324
|
+
return
|
|
325
|
+
|
|
326
|
+
# Create table with appropriate dimensions
|
|
327
|
+
max_cols = max(len(row.find_all(['td', 'th'])) for row in rows)
|
|
328
|
+
table = doc.add_table(rows=0, cols=max_cols)
|
|
329
|
+
table.style = 'Table Grid'
|
|
330
|
+
|
|
331
|
+
for row in rows:
|
|
332
|
+
cells = row.find_all(['td', 'th'])
|
|
333
|
+
table_row = table.add_row()
|
|
334
|
+
|
|
335
|
+
for i, cell in enumerate(cells):
|
|
336
|
+
if i < len(table_row.cells):
|
|
337
|
+
cell_text = self._get_text_content(cell)
|
|
338
|
+
table_row.cells[i].text = cell_text
|
|
339
|
+
|
|
340
|
+
# Make header cells bold
|
|
341
|
+
if cell.name == 'th':
|
|
342
|
+
for paragraph in table_row.cells[i].paragraphs:
|
|
343
|
+
for run in paragraph.runs:
|
|
344
|
+
run.bold = True
|
|
345
|
+
|
|
346
|
+
def _add_formatted_text(self, paragraph, element) -> None:
|
|
347
|
+
"""Add formatted text to paragraph maintaining basic formatting."""
|
|
348
|
+
if isinstance(element, NavigableString):
|
|
349
|
+
paragraph.add_run(str(element))
|
|
350
|
+
return
|
|
351
|
+
|
|
352
|
+
for content in element.contents:
|
|
353
|
+
if isinstance(content, NavigableString):
|
|
354
|
+
run = paragraph.add_run(str(content))
|
|
355
|
+
else:
|
|
356
|
+
text_content = self._get_text_content(content)
|
|
357
|
+
run = paragraph.add_run(text_content)
|
|
358
|
+
|
|
359
|
+
# Apply basic formatting based on HTML tags
|
|
360
|
+
if hasattr(content, 'name'):
|
|
361
|
+
if content.name in ['strong', 'b']:
|
|
362
|
+
run.bold = True
|
|
363
|
+
elif content.name in ['em', 'i']:
|
|
364
|
+
run.italic = True
|
|
365
|
+
elif content.name == 'code':
|
|
366
|
+
run.font.name = 'Courier New'
|
|
367
|
+
run.font.size = Pt(10)
|
|
368
|
+
|
|
369
|
+
async def _generate_document_content(self, content: str, **kwargs) -> bytes:
|
|
370
|
+
"""
|
|
371
|
+
Generate Word document content from input.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
content: Input content (text, markdown, or HTML)
|
|
375
|
+
**kwargs: Additional arguments from MSWordArgs
|
|
376
|
+
|
|
377
|
+
Returns:
|
|
378
|
+
DOCX document as bytes
|
|
379
|
+
"""
|
|
380
|
+
try:
|
|
381
|
+
# Extract arguments
|
|
382
|
+
template_name = kwargs.get('template_name')
|
|
383
|
+
template_vars = kwargs.get('template_vars')
|
|
384
|
+
docx_template = kwargs.get('docx_template')
|
|
385
|
+
style_config = kwargs.get('style_config')
|
|
386
|
+
page_margins = kwargs.get('page_margins')
|
|
387
|
+
|
|
388
|
+
# Process content through HTML template if provided
|
|
389
|
+
processed_content = self._render_html_template(content, template_name, template_vars)
|
|
390
|
+
|
|
391
|
+
# Detect content type
|
|
392
|
+
content_type = self._detect_content_type(processed_content)
|
|
393
|
+
self.logger.info(f"Detected content type: {content_type}")
|
|
394
|
+
|
|
395
|
+
# Create DOCX document
|
|
396
|
+
doc = self._create_document(docx_template)
|
|
397
|
+
|
|
398
|
+
# Apply page margins if specified
|
|
399
|
+
if page_margins:
|
|
400
|
+
self._apply_page_margins(doc, page_margins)
|
|
401
|
+
|
|
402
|
+
# Convert content to DOCX based on type
|
|
403
|
+
if content_type == 'html':
|
|
404
|
+
self._html_to_docx(processed_content, doc)
|
|
405
|
+
else: # markdown or plain text
|
|
406
|
+
# Preprocess and convert markdown to HTML
|
|
407
|
+
cleaned_content = self._preprocess_markdown(processed_content)
|
|
408
|
+
html_content = self._markdown_to_html(cleaned_content)
|
|
409
|
+
self._html_to_docx(html_content, doc)
|
|
410
|
+
|
|
411
|
+
# Save document to bytes
|
|
412
|
+
doc_bytes = io.BytesIO()
|
|
413
|
+
doc.save(doc_bytes)
|
|
414
|
+
doc_bytes.seek(0)
|
|
415
|
+
|
|
416
|
+
return doc_bytes.getvalue()
|
|
417
|
+
|
|
418
|
+
except Exception as e:
|
|
419
|
+
self.logger.error(f"Error generating Word document: {e}")
|
|
420
|
+
raise
|
|
421
|
+
|
|
422
|
+
async def _execute(
|
|
423
|
+
self,
|
|
424
|
+
content: str,
|
|
425
|
+
output_filename: Optional[str] = None,
|
|
426
|
+
file_prefix: str = "document",
|
|
427
|
+
output_dir: Optional[str] = None,
|
|
428
|
+
overwrite_existing: bool = False,
|
|
429
|
+
template_name: Optional[str] = None,
|
|
430
|
+
template_vars: Optional[Dict[str, Any]] = None,
|
|
431
|
+
docx_template: Optional[str] = None,
|
|
432
|
+
style_config: Optional[Dict[str, Any]] = None,
|
|
433
|
+
page_margins: Optional[Dict[str, float]] = None,
|
|
434
|
+
**kwargs
|
|
435
|
+
) -> Dict[str, Any]:
|
|
436
|
+
"""
|
|
437
|
+
Execute Word document generation (AbstractTool interface).
|
|
438
|
+
|
|
439
|
+
Args:
|
|
440
|
+
content: Content to convert to Word document
|
|
441
|
+
output_filename: Custom filename (without extension)
|
|
442
|
+
file_prefix: Prefix for auto-generated filenames
|
|
443
|
+
output_dir: Custom output directory
|
|
444
|
+
overwrite_existing: Whether to overwrite existing files
|
|
445
|
+
template_name: HTML template name for content processing
|
|
446
|
+
template_vars: Variables for HTML template
|
|
447
|
+
docx_template: DOCX template file path
|
|
448
|
+
style_config: Custom styling configuration
|
|
449
|
+
page_margins: Page margins configuration
|
|
450
|
+
**kwargs: Additional arguments
|
|
451
|
+
|
|
452
|
+
Returns:
|
|
453
|
+
Dictionary with document generation results
|
|
454
|
+
"""
|
|
455
|
+
try:
|
|
456
|
+
self.logger.info(
|
|
457
|
+
f"Starting Word document generation with {len(content)} characters of content"
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
# Use the safe document creation workflow
|
|
461
|
+
result = await self._create_document_safely(
|
|
462
|
+
content=content,
|
|
463
|
+
output_filename=output_filename,
|
|
464
|
+
file_prefix=file_prefix,
|
|
465
|
+
output_dir=output_dir,
|
|
466
|
+
overwrite_existing=overwrite_existing or self.overwrite_existing,
|
|
467
|
+
extension="docx",
|
|
468
|
+
template_name=template_name,
|
|
469
|
+
template_vars=template_vars,
|
|
470
|
+
docx_template=docx_template,
|
|
471
|
+
style_config=style_config,
|
|
472
|
+
page_margins=page_margins
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
if result['status'] == 'success':
|
|
476
|
+
self.logger.info(
|
|
477
|
+
f"Word document created successfully: {result['metadata']['filename']}"
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
return result
|
|
481
|
+
|
|
482
|
+
except Exception as e:
|
|
483
|
+
self.logger.error(f"Error in Word document generation: {e}")
|
|
484
|
+
raise
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
class WordToMarkdownTool(AbstractDocumentTool):
|
|
488
|
+
"""
|
|
489
|
+
Tool for converting Word documents to Markdown format.
|
|
490
|
+
|
|
491
|
+
This tool downloads Word documents from URLs and converts them to Markdown
|
|
492
|
+
format for easier processing by LLMs and other text analysis tools.
|
|
493
|
+
"""
|
|
494
|
+
|
|
495
|
+
name = "word_to_markdown"
|
|
496
|
+
description = (
|
|
497
|
+
"Convert Word documents to Markdown format from URLs. "
|
|
498
|
+
"Downloads Word documents and converts them to clean Markdown text. "
|
|
499
|
+
"Useful for processing and analyzing Word documents."
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
# Document type configuration
|
|
503
|
+
document_type = "conversion"
|
|
504
|
+
default_extension = "md"
|
|
505
|
+
supported_extensions = [".md", ".txt"]
|
|
506
|
+
|
|
507
|
+
def __init__(self, **kwargs):
|
|
508
|
+
"""Initialize the Word to Markdown tool."""
|
|
509
|
+
super().__init__(**kwargs)
|
|
510
|
+
self._temp_dir = None
|
|
511
|
+
|
|
512
|
+
async def _download_file(self, url: str) -> str:
|
|
513
|
+
"""Download Word document from URL to temporary file."""
|
|
514
|
+
# Create temporary directory if needed
|
|
515
|
+
if not self._temp_dir:
|
|
516
|
+
self._temp_dir = tempfile.mkdtemp()
|
|
517
|
+
|
|
518
|
+
# Generate filename from URL
|
|
519
|
+
parsed_url = urlparse(url)
|
|
520
|
+
filename = os.path.basename(parsed_url.path)
|
|
521
|
+
if not filename.endswith(('.docx', '.doc')):
|
|
522
|
+
filename += '.docx'
|
|
523
|
+
|
|
524
|
+
file_path = os.path.join(self._temp_dir, filename)
|
|
525
|
+
|
|
526
|
+
# Download file
|
|
527
|
+
async with aiohttp.ClientSession() as session:
|
|
528
|
+
async with session.get(url) as response:
|
|
529
|
+
if response.status != 200:
|
|
530
|
+
raise Exception(f"Download failed with status {response.status}")
|
|
531
|
+
|
|
532
|
+
async with aiofiles.open(file_path, 'wb') as f:
|
|
533
|
+
await f.write(await response.read())
|
|
534
|
+
|
|
535
|
+
self.logger.info(f"Downloaded Word document: {filename}")
|
|
536
|
+
return file_path
|
|
537
|
+
|
|
538
|
+
async def _convert_to_markdown(self, file_path: str) -> str:
|
|
539
|
+
"""Convert Word document to Markdown using mammoth."""
|
|
540
|
+
try:
|
|
541
|
+
with open(file_path, "rb") as docx_file:
|
|
542
|
+
result = mammoth.convert_to_html(docx_file)
|
|
543
|
+
html = result.value
|
|
544
|
+
markdown_text = md(html)
|
|
545
|
+
|
|
546
|
+
# Add conversion warnings as comments
|
|
547
|
+
if result.messages:
|
|
548
|
+
warnings = "\n".join([f"<!-- Warning: {msg} -->" for msg in result.messages])
|
|
549
|
+
markdown_text = f"{warnings}\n\n{markdown_text}"
|
|
550
|
+
|
|
551
|
+
return markdown_text
|
|
552
|
+
|
|
553
|
+
except Exception as e:
|
|
554
|
+
self.logger.error(f"Conversion to markdown failed: {e}")
|
|
555
|
+
raise
|
|
556
|
+
|
|
557
|
+
async def _cleanup_temp_files(self, file_path: Optional[str] = None) -> None:
|
|
558
|
+
"""Clean up temporary files and directory."""
|
|
559
|
+
try:
|
|
560
|
+
if file_path and os.path.exists(file_path):
|
|
561
|
+
os.remove(file_path)
|
|
562
|
+
|
|
563
|
+
if self._temp_dir and os.path.exists(self._temp_dir):
|
|
564
|
+
if not os.listdir(self._temp_dir): # Only remove if empty
|
|
565
|
+
os.rmdir(self._temp_dir)
|
|
566
|
+
self._temp_dir = None
|
|
567
|
+
|
|
568
|
+
except Exception as e:
|
|
569
|
+
self.logger.warning(f"Cleanup failed: {e}")
|
|
570
|
+
|
|
571
|
+
async def convert_from_url(self, url: str, save_markdown: bool = False, **kwargs) -> Dict[str, Any]:
|
|
572
|
+
"""
|
|
573
|
+
Convert Word document from URL to Markdown.
|
|
574
|
+
|
|
575
|
+
Args:
|
|
576
|
+
url: URL of the Word document
|
|
577
|
+
save_markdown: Whether to save the markdown to a file
|
|
578
|
+
**kwargs: Additional arguments for file saving
|
|
579
|
+
|
|
580
|
+
Returns:
|
|
581
|
+
Dictionary with conversion results
|
|
582
|
+
"""
|
|
583
|
+
file_path = None
|
|
584
|
+
try:
|
|
585
|
+
# Download the file
|
|
586
|
+
file_path = await self._download_file(url)
|
|
587
|
+
|
|
588
|
+
# Convert to markdown
|
|
589
|
+
markdown_content = await self._convert_to_markdown(file_path)
|
|
590
|
+
|
|
591
|
+
result = {
|
|
592
|
+
"status": "success",
|
|
593
|
+
"markdown_content": markdown_content,
|
|
594
|
+
"source_url": url,
|
|
595
|
+
"content_length": len(markdown_content),
|
|
596
|
+
"message": "Word document converted to Markdown successfully"
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
# Optionally save markdown to file
|
|
600
|
+
if save_markdown:
|
|
601
|
+
file_result = await self._create_document_safely(
|
|
602
|
+
content=markdown_content,
|
|
603
|
+
extension="md",
|
|
604
|
+
**kwargs
|
|
605
|
+
)
|
|
606
|
+
if file_result['status'] == 'success':
|
|
607
|
+
result.update({
|
|
608
|
+
"saved_file": file_result['metadata'],
|
|
609
|
+
"file_path": file_result['metadata']['file_path'],
|
|
610
|
+
"file_url": file_result['metadata']['file_url']
|
|
611
|
+
})
|
|
612
|
+
|
|
613
|
+
return result
|
|
614
|
+
|
|
615
|
+
except Exception as e:
|
|
616
|
+
self.logger.error(f"Word to Markdown conversion failed: {e}")
|
|
617
|
+
return {
|
|
618
|
+
"status": "error",
|
|
619
|
+
"error": str(e),
|
|
620
|
+
"source_url": url,
|
|
621
|
+
"message": f"Failed to convert Word document: {str(e)}"
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
finally:
|
|
625
|
+
# Clean up temporary files
|
|
626
|
+
await self._cleanup_temp_files(file_path)
|
|
627
|
+
|
|
628
|
+
async def _generate_document_content(self, content: str, **kwargs) -> str:
|
|
629
|
+
"""Generate markdown content (implementation required by AbstractDocumentTool)."""
|
|
630
|
+
# This tool is primarily for URL conversion, but we implement this for completeness
|
|
631
|
+
return content
|
|
632
|
+
|
|
633
|
+
async def _execute(self, url: str, save_markdown: bool = False, **kwargs) -> Dict[str, Any]:
|
|
634
|
+
"""Execute Word to Markdown conversion."""
|
|
635
|
+
return await self.convert_from_url(url, save_markdown, **kwargs)
|