ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentui/.prettierrc +15 -0
- agentui/QUICKSTART.md +272 -0
- agentui/README.md +59 -0
- agentui/env.example +16 -0
- agentui/jsconfig.json +14 -0
- agentui/package-lock.json +4242 -0
- agentui/package.json +34 -0
- agentui/scripts/postinstall/apply-patches.mjs +260 -0
- agentui/src/app.css +61 -0
- agentui/src/app.d.ts +13 -0
- agentui/src/app.html +12 -0
- agentui/src/components/LoadingSpinner.svelte +64 -0
- agentui/src/components/ThemeSwitcher.svelte +159 -0
- agentui/src/components/index.js +4 -0
- agentui/src/lib/api/bots.ts +60 -0
- agentui/src/lib/api/chat.ts +22 -0
- agentui/src/lib/api/http.ts +25 -0
- agentui/src/lib/components/BotCard.svelte +33 -0
- agentui/src/lib/components/ChatBubble.svelte +63 -0
- agentui/src/lib/components/Toast.svelte +21 -0
- agentui/src/lib/config.ts +20 -0
- agentui/src/lib/stores/auth.svelte.ts +73 -0
- agentui/src/lib/stores/theme.svelte.js +64 -0
- agentui/src/lib/stores/toast.svelte.ts +31 -0
- agentui/src/lib/utils/conversation.ts +39 -0
- agentui/src/routes/+layout.svelte +20 -0
- agentui/src/routes/+page.svelte +232 -0
- agentui/src/routes/login/+page.svelte +200 -0
- agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
- agentui/src/routes/talk/[agentId]/+page.ts +7 -0
- agentui/static/README.md +1 -0
- agentui/svelte.config.js +11 -0
- agentui/tailwind.config.ts +53 -0
- agentui/tsconfig.json +3 -0
- agentui/vite.config.ts +10 -0
- ai_parrot-0.17.2.dist-info/METADATA +472 -0
- ai_parrot-0.17.2.dist-info/RECORD +535 -0
- ai_parrot-0.17.2.dist-info/WHEEL +6 -0
- ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
- ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
- ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
- crew-builder/.prettierrc +15 -0
- crew-builder/QUICKSTART.md +259 -0
- crew-builder/README.md +113 -0
- crew-builder/env.example +17 -0
- crew-builder/jsconfig.json +14 -0
- crew-builder/package-lock.json +4182 -0
- crew-builder/package.json +37 -0
- crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
- crew-builder/src/app.css +62 -0
- crew-builder/src/app.d.ts +13 -0
- crew-builder/src/app.html +12 -0
- crew-builder/src/components/LoadingSpinner.svelte +64 -0
- crew-builder/src/components/ThemeSwitcher.svelte +149 -0
- crew-builder/src/components/index.js +9 -0
- crew-builder/src/lib/api/bots.ts +60 -0
- crew-builder/src/lib/api/chat.ts +80 -0
- crew-builder/src/lib/api/client.ts +56 -0
- crew-builder/src/lib/api/crew/crew.ts +136 -0
- crew-builder/src/lib/api/index.ts +5 -0
- crew-builder/src/lib/api/o365/auth.ts +65 -0
- crew-builder/src/lib/auth/auth.ts +54 -0
- crew-builder/src/lib/components/AgentNode.svelte +43 -0
- crew-builder/src/lib/components/BotCard.svelte +33 -0
- crew-builder/src/lib/components/ChatBubble.svelte +67 -0
- crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
- crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
- crew-builder/src/lib/components/JsonViewer.svelte +24 -0
- crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
- crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
- crew-builder/src/lib/components/Toast.svelte +67 -0
- crew-builder/src/lib/components/Toolbar.svelte +157 -0
- crew-builder/src/lib/components/index.ts +10 -0
- crew-builder/src/lib/config.ts +8 -0
- crew-builder/src/lib/stores/auth.svelte.ts +228 -0
- crew-builder/src/lib/stores/crewStore.ts +369 -0
- crew-builder/src/lib/stores/theme.svelte.js +145 -0
- crew-builder/src/lib/stores/toast.svelte.ts +69 -0
- crew-builder/src/lib/utils/conversation.ts +39 -0
- crew-builder/src/lib/utils/markdown.ts +122 -0
- crew-builder/src/lib/utils/talkHistory.ts +47 -0
- crew-builder/src/routes/+layout.svelte +20 -0
- crew-builder/src/routes/+page.svelte +539 -0
- crew-builder/src/routes/agents/+page.svelte +247 -0
- crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
- crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
- crew-builder/src/routes/builder/+page.svelte +204 -0
- crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
- crew-builder/src/routes/crew/ask/+page.ts +1 -0
- crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
- crew-builder/src/routes/login/+page.svelte +197 -0
- crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
- crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
- crew-builder/static/README.md +1 -0
- crew-builder/svelte.config.js +11 -0
- crew-builder/tailwind.config.ts +53 -0
- crew-builder/tsconfig.json +3 -0
- crew-builder/vite.config.ts +10 -0
- mcp_servers/calculator_server.py +309 -0
- parrot/__init__.py +27 -0
- parrot/__pycache__/__init__.cpython-310.pyc +0 -0
- parrot/__pycache__/version.cpython-310.pyc +0 -0
- parrot/_version.py +34 -0
- parrot/a2a/__init__.py +48 -0
- parrot/a2a/client.py +658 -0
- parrot/a2a/discovery.py +89 -0
- parrot/a2a/mixin.py +257 -0
- parrot/a2a/models.py +376 -0
- parrot/a2a/server.py +770 -0
- parrot/agents/__init__.py +29 -0
- parrot/bots/__init__.py +12 -0
- parrot/bots/a2a_agent.py +19 -0
- parrot/bots/abstract.py +3139 -0
- parrot/bots/agent.py +1129 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/chatbot.py +669 -0
- parrot/bots/data.py +1618 -0
- parrot/bots/database/__init__.py +5 -0
- parrot/bots/database/abstract.py +3071 -0
- parrot/bots/database/cache.py +286 -0
- parrot/bots/database/models.py +468 -0
- parrot/bots/database/prompts.py +154 -0
- parrot/bots/database/retries.py +98 -0
- parrot/bots/database/router.py +269 -0
- parrot/bots/database/sql.py +41 -0
- parrot/bots/db/__init__.py +6 -0
- parrot/bots/db/abstract.py +556 -0
- parrot/bots/db/bigquery.py +602 -0
- parrot/bots/db/cache.py +85 -0
- parrot/bots/db/documentdb.py +668 -0
- parrot/bots/db/elastic.py +1014 -0
- parrot/bots/db/influx.py +898 -0
- parrot/bots/db/mock.py +96 -0
- parrot/bots/db/multi.py +783 -0
- parrot/bots/db/prompts.py +185 -0
- parrot/bots/db/sql.py +1255 -0
- parrot/bots/db/tools.py +212 -0
- parrot/bots/document.py +680 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/kb.py +170 -0
- parrot/bots/mcp.py +36 -0
- parrot/bots/orchestration/README.md +463 -0
- parrot/bots/orchestration/__init__.py +1 -0
- parrot/bots/orchestration/agent.py +155 -0
- parrot/bots/orchestration/crew.py +3330 -0
- parrot/bots/orchestration/fsm.py +1179 -0
- parrot/bots/orchestration/hr.py +434 -0
- parrot/bots/orchestration/storage/__init__.py +4 -0
- parrot/bots/orchestration/storage/memory.py +100 -0
- parrot/bots/orchestration/storage/mixin.py +119 -0
- parrot/bots/orchestration/verify.py +202 -0
- parrot/bots/product.py +204 -0
- parrot/bots/prompts/__init__.py +96 -0
- parrot/bots/prompts/agents.py +155 -0
- parrot/bots/prompts/data.py +216 -0
- parrot/bots/prompts/output_generation.py +8 -0
- parrot/bots/scraper/__init__.py +3 -0
- parrot/bots/scraper/models.py +122 -0
- parrot/bots/scraper/scraper.py +1173 -0
- parrot/bots/scraper/templates.py +115 -0
- parrot/bots/stores/__init__.py +5 -0
- parrot/bots/stores/local.py +172 -0
- parrot/bots/webdev.py +81 -0
- parrot/cli.py +17 -0
- parrot/clients/__init__.py +16 -0
- parrot/clients/base.py +1491 -0
- parrot/clients/claude.py +1191 -0
- parrot/clients/factory.py +129 -0
- parrot/clients/google.py +4567 -0
- parrot/clients/gpt.py +1975 -0
- parrot/clients/grok.py +432 -0
- parrot/clients/groq.py +986 -0
- parrot/clients/hf.py +582 -0
- parrot/clients/models.py +18 -0
- parrot/conf.py +395 -0
- parrot/embeddings/__init__.py +9 -0
- parrot/embeddings/base.py +157 -0
- parrot/embeddings/google.py +98 -0
- parrot/embeddings/huggingface.py +74 -0
- parrot/embeddings/openai.py +84 -0
- parrot/embeddings/processor.py +88 -0
- parrot/exceptions.c +13868 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/exceptions.pxd +22 -0
- parrot/exceptions.pxi +15 -0
- parrot/exceptions.pyx +44 -0
- parrot/generators/__init__.py +29 -0
- parrot/generators/base.py +200 -0
- parrot/generators/html.py +293 -0
- parrot/generators/react.py +205 -0
- parrot/generators/streamlit.py +203 -0
- parrot/generators/template.py +105 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agent.py +861 -0
- parrot/handlers/agents/__init__.py +1 -0
- parrot/handlers/agents/abstract.py +900 -0
- parrot/handlers/bots.py +338 -0
- parrot/handlers/chat.py +915 -0
- parrot/handlers/creation.sql +192 -0
- parrot/handlers/crew/ARCHITECTURE.md +362 -0
- parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
- parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
- parrot/handlers/crew/__init__.py +0 -0
- parrot/handlers/crew/handler.py +801 -0
- parrot/handlers/crew/models.py +229 -0
- parrot/handlers/crew/redis_persistence.py +523 -0
- parrot/handlers/jobs/__init__.py +10 -0
- parrot/handlers/jobs/job.py +384 -0
- parrot/handlers/jobs/mixin.py +627 -0
- parrot/handlers/jobs/models.py +115 -0
- parrot/handlers/jobs/worker.py +31 -0
- parrot/handlers/models.py +596 -0
- parrot/handlers/o365_auth.py +105 -0
- parrot/handlers/stream.py +337 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/aws.py +143 -0
- parrot/interfaces/credentials.py +113 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/google.py +1123 -0
- parrot/interfaces/hierarchy.py +1227 -0
- parrot/interfaces/http.py +651 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +24 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/analisys.py +148 -0
- parrot/interfaces/images/plugins/classify.py +150 -0
- parrot/interfaces/images/plugins/classifybase.py +182 -0
- parrot/interfaces/images/plugins/detect.py +150 -0
- parrot/interfaces/images/plugins/exif.py +1103 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/interfaces/o365.py +978 -0
- parrot/interfaces/onedrive.py +822 -0
- parrot/interfaces/sharepoint.py +1435 -0
- parrot/interfaces/soap.py +257 -0
- parrot/loaders/__init__.py +8 -0
- parrot/loaders/abstract.py +1131 -0
- parrot/loaders/audio.py +199 -0
- parrot/loaders/basepdf.py +53 -0
- parrot/loaders/basevideo.py +1568 -0
- parrot/loaders/csv.py +409 -0
- parrot/loaders/docx.py +116 -0
- parrot/loaders/epubloader.py +316 -0
- parrot/loaders/excel.py +199 -0
- parrot/loaders/factory.py +55 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/html.py +26 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/html.py +152 -0
- parrot/loaders/markdown.py +442 -0
- parrot/loaders/pdf.py +373 -0
- parrot/loaders/pdfmark.py +320 -0
- parrot/loaders/pdftables.py +506 -0
- parrot/loaders/ppt.py +476 -0
- parrot/loaders/qa.py +63 -0
- parrot/loaders/splitters/__init__.py +10 -0
- parrot/loaders/splitters/base.py +138 -0
- parrot/loaders/splitters/md.py +228 -0
- parrot/loaders/splitters/token.py +143 -0
- parrot/loaders/txt.py +26 -0
- parrot/loaders/video.py +89 -0
- parrot/loaders/videolocal.py +218 -0
- parrot/loaders/videounderstanding.py +377 -0
- parrot/loaders/vimeo.py +167 -0
- parrot/loaders/web.py +599 -0
- parrot/loaders/youtube.py +504 -0
- parrot/manager/__init__.py +5 -0
- parrot/manager/manager.py +1030 -0
- parrot/mcp/__init__.py +28 -0
- parrot/mcp/adapter.py +105 -0
- parrot/mcp/cli.py +174 -0
- parrot/mcp/client.py +119 -0
- parrot/mcp/config.py +75 -0
- parrot/mcp/integration.py +842 -0
- parrot/mcp/oauth.py +933 -0
- parrot/mcp/server.py +225 -0
- parrot/mcp/transports/__init__.py +3 -0
- parrot/mcp/transports/base.py +279 -0
- parrot/mcp/transports/grpc_session.py +163 -0
- parrot/mcp/transports/http.py +312 -0
- parrot/mcp/transports/mcp.proto +108 -0
- parrot/mcp/transports/quic.py +1082 -0
- parrot/mcp/transports/sse.py +330 -0
- parrot/mcp/transports/stdio.py +309 -0
- parrot/mcp/transports/unix.py +395 -0
- parrot/mcp/transports/websocket.py +547 -0
- parrot/memory/__init__.py +16 -0
- parrot/memory/abstract.py +209 -0
- parrot/memory/agent.py +32 -0
- parrot/memory/cache.py +175 -0
- parrot/memory/core.py +555 -0
- parrot/memory/file.py +153 -0
- parrot/memory/mem.py +131 -0
- parrot/memory/redis.py +613 -0
- parrot/models/__init__.py +46 -0
- parrot/models/basic.py +118 -0
- parrot/models/compliance.py +208 -0
- parrot/models/crew.py +395 -0
- parrot/models/detections.py +654 -0
- parrot/models/generation.py +85 -0
- parrot/models/google.py +223 -0
- parrot/models/groq.py +23 -0
- parrot/models/openai.py +30 -0
- parrot/models/outputs.py +285 -0
- parrot/models/responses.py +938 -0
- parrot/notifications/__init__.py +743 -0
- parrot/openapi/__init__.py +3 -0
- parrot/openapi/components.yaml +641 -0
- parrot/openapi/config.py +322 -0
- parrot/outputs/__init__.py +32 -0
- parrot/outputs/formats/__init__.py +108 -0
- parrot/outputs/formats/altair.py +359 -0
- parrot/outputs/formats/application.py +122 -0
- parrot/outputs/formats/base.py +351 -0
- parrot/outputs/formats/bokeh.py +356 -0
- parrot/outputs/formats/card.py +424 -0
- parrot/outputs/formats/chart.py +436 -0
- parrot/outputs/formats/d3.py +255 -0
- parrot/outputs/formats/echarts.py +310 -0
- parrot/outputs/formats/generators/__init__.py +0 -0
- parrot/outputs/formats/generators/abstract.py +61 -0
- parrot/outputs/formats/generators/panel.py +145 -0
- parrot/outputs/formats/generators/streamlit.py +86 -0
- parrot/outputs/formats/generators/terminal.py +63 -0
- parrot/outputs/formats/holoviews.py +310 -0
- parrot/outputs/formats/html.py +147 -0
- parrot/outputs/formats/jinja2.py +46 -0
- parrot/outputs/formats/json.py +87 -0
- parrot/outputs/formats/map.py +933 -0
- parrot/outputs/formats/markdown.py +172 -0
- parrot/outputs/formats/matplotlib.py +237 -0
- parrot/outputs/formats/mixins/__init__.py +0 -0
- parrot/outputs/formats/mixins/emaps.py +855 -0
- parrot/outputs/formats/plotly.py +341 -0
- parrot/outputs/formats/seaborn.py +310 -0
- parrot/outputs/formats/table.py +397 -0
- parrot/outputs/formats/template_report.py +138 -0
- parrot/outputs/formats/yaml.py +125 -0
- parrot/outputs/formatter.py +152 -0
- parrot/outputs/templates/__init__.py +95 -0
- parrot/pipelines/__init__.py +0 -0
- parrot/pipelines/abstract.py +210 -0
- parrot/pipelines/detector.py +124 -0
- parrot/pipelines/models.py +90 -0
- parrot/pipelines/planogram.py +3002 -0
- parrot/pipelines/table.sql +97 -0
- parrot/plugins/__init__.py +106 -0
- parrot/plugins/importer.py +80 -0
- parrot/py.typed +0 -0
- parrot/registry/__init__.py +18 -0
- parrot/registry/registry.py +594 -0
- parrot/scheduler/__init__.py +1189 -0
- parrot/scheduler/models.py +60 -0
- parrot/security/__init__.py +16 -0
- parrot/security/prompt_injection.py +268 -0
- parrot/security/security_events.sql +25 -0
- parrot/services/__init__.py +1 -0
- parrot/services/mcp/__init__.py +8 -0
- parrot/services/mcp/config.py +13 -0
- parrot/services/mcp/server.py +295 -0
- parrot/services/o365_remote_auth.py +235 -0
- parrot/stores/__init__.py +7 -0
- parrot/stores/abstract.py +352 -0
- parrot/stores/arango.py +1090 -0
- parrot/stores/bigquery.py +1377 -0
- parrot/stores/cache.py +106 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss_store.py +1157 -0
- parrot/stores/kb/__init__.py +9 -0
- parrot/stores/kb/abstract.py +68 -0
- parrot/stores/kb/cache.py +165 -0
- parrot/stores/kb/doc.py +325 -0
- parrot/stores/kb/hierarchy.py +346 -0
- parrot/stores/kb/local.py +457 -0
- parrot/stores/kb/prompt.py +28 -0
- parrot/stores/kb/redis.py +659 -0
- parrot/stores/kb/store.py +115 -0
- parrot/stores/kb/user.py +374 -0
- parrot/stores/models.py +59 -0
- parrot/stores/pgvector.py +3 -0
- parrot/stores/postgres.py +2853 -0
- parrot/stores/utils/__init__.py +0 -0
- parrot/stores/utils/chunking.py +197 -0
- parrot/telemetry/__init__.py +3 -0
- parrot/telemetry/mixin.py +111 -0
- parrot/template/__init__.py +3 -0
- parrot/template/engine.py +259 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +644 -0
- parrot/tools/agent.py +363 -0
- parrot/tools/arangodbsearch.py +537 -0
- parrot/tools/arxiv_tool.py +188 -0
- parrot/tools/calculator/__init__.py +3 -0
- parrot/tools/calculator/operations/__init__.py +38 -0
- parrot/tools/calculator/operations/calculus.py +80 -0
- parrot/tools/calculator/operations/statistics.py +76 -0
- parrot/tools/calculator/tool.py +150 -0
- parrot/tools/cloudwatch.py +988 -0
- parrot/tools/codeinterpreter/__init__.py +127 -0
- parrot/tools/codeinterpreter/executor.py +371 -0
- parrot/tools/codeinterpreter/internals.py +473 -0
- parrot/tools/codeinterpreter/models.py +643 -0
- parrot/tools/codeinterpreter/prompts.py +224 -0
- parrot/tools/codeinterpreter/tool.py +664 -0
- parrot/tools/company_info/__init__.py +6 -0
- parrot/tools/company_info/tool.py +1138 -0
- parrot/tools/correlationanalysis.py +437 -0
- parrot/tools/database/abstract.py +286 -0
- parrot/tools/database/bq.py +115 -0
- parrot/tools/database/cache.py +284 -0
- parrot/tools/database/models.py +95 -0
- parrot/tools/database/pg.py +343 -0
- parrot/tools/databasequery.py +1159 -0
- parrot/tools/db.py +1800 -0
- parrot/tools/ddgo.py +370 -0
- parrot/tools/decorators.py +271 -0
- parrot/tools/dftohtml.py +282 -0
- parrot/tools/document.py +549 -0
- parrot/tools/ecs.py +819 -0
- parrot/tools/edareport.py +368 -0
- parrot/tools/elasticsearch.py +1049 -0
- parrot/tools/employees.py +462 -0
- parrot/tools/epson/__init__.py +96 -0
- parrot/tools/excel.py +683 -0
- parrot/tools/file/__init__.py +13 -0
- parrot/tools/file/abstract.py +76 -0
- parrot/tools/file/gcs.py +378 -0
- parrot/tools/file/local.py +284 -0
- parrot/tools/file/s3.py +511 -0
- parrot/tools/file/tmp.py +309 -0
- parrot/tools/file/tool.py +501 -0
- parrot/tools/file_reader.py +129 -0
- parrot/tools/flowtask/__init__.py +19 -0
- parrot/tools/flowtask/tool.py +761 -0
- parrot/tools/gittoolkit.py +508 -0
- parrot/tools/google/__init__.py +18 -0
- parrot/tools/google/base.py +169 -0
- parrot/tools/google/tools.py +1251 -0
- parrot/tools/googlelocation.py +5 -0
- parrot/tools/googleroutes.py +5 -0
- parrot/tools/googlesearch.py +5 -0
- parrot/tools/googlesitesearch.py +5 -0
- parrot/tools/googlevoice.py +2 -0
- parrot/tools/gvoice.py +695 -0
- parrot/tools/ibisworld/README.md +225 -0
- parrot/tools/ibisworld/__init__.py +11 -0
- parrot/tools/ibisworld/tool.py +366 -0
- parrot/tools/jiratoolkit.py +1718 -0
- parrot/tools/manager.py +1098 -0
- parrot/tools/math.py +152 -0
- parrot/tools/metadata.py +476 -0
- parrot/tools/msteams.py +1621 -0
- parrot/tools/msword.py +635 -0
- parrot/tools/multidb.py +580 -0
- parrot/tools/multistoresearch.py +369 -0
- parrot/tools/networkninja.py +167 -0
- parrot/tools/nextstop/__init__.py +4 -0
- parrot/tools/nextstop/base.py +286 -0
- parrot/tools/nextstop/employee.py +733 -0
- parrot/tools/nextstop/store.py +462 -0
- parrot/tools/notification.py +435 -0
- parrot/tools/o365/__init__.py +42 -0
- parrot/tools/o365/base.py +295 -0
- parrot/tools/o365/bundle.py +522 -0
- parrot/tools/o365/events.py +554 -0
- parrot/tools/o365/mail.py +992 -0
- parrot/tools/o365/onedrive.py +497 -0
- parrot/tools/o365/sharepoint.py +641 -0
- parrot/tools/openapi_toolkit.py +904 -0
- parrot/tools/openweather.py +527 -0
- parrot/tools/pdfprint.py +1001 -0
- parrot/tools/powerbi.py +518 -0
- parrot/tools/powerpoint.py +1113 -0
- parrot/tools/pricestool.py +146 -0
- parrot/tools/products/__init__.py +246 -0
- parrot/tools/prophet_tool.py +171 -0
- parrot/tools/pythonpandas.py +630 -0
- parrot/tools/pythonrepl.py +910 -0
- parrot/tools/qsource.py +436 -0
- parrot/tools/querytoolkit.py +395 -0
- parrot/tools/quickeda.py +827 -0
- parrot/tools/resttool.py +553 -0
- parrot/tools/retail/__init__.py +0 -0
- parrot/tools/retail/bby.py +528 -0
- parrot/tools/sandboxtool.py +703 -0
- parrot/tools/sassie/__init__.py +352 -0
- parrot/tools/scraping/__init__.py +7 -0
- parrot/tools/scraping/docs/select.md +466 -0
- parrot/tools/scraping/documentation.md +1278 -0
- parrot/tools/scraping/driver.py +436 -0
- parrot/tools/scraping/models.py +576 -0
- parrot/tools/scraping/options.py +85 -0
- parrot/tools/scraping/orchestrator.py +517 -0
- parrot/tools/scraping/readme.md +740 -0
- parrot/tools/scraping/tool.py +3115 -0
- parrot/tools/seasonaldetection.py +642 -0
- parrot/tools/shell_tool/__init__.py +5 -0
- parrot/tools/shell_tool/actions.py +408 -0
- parrot/tools/shell_tool/engine.py +155 -0
- parrot/tools/shell_tool/models.py +322 -0
- parrot/tools/shell_tool/tool.py +442 -0
- parrot/tools/site_search.py +214 -0
- parrot/tools/textfile.py +418 -0
- parrot/tools/think.py +378 -0
- parrot/tools/toolkit.py +298 -0
- parrot/tools/webapp_tool.py +187 -0
- parrot/tools/whatif.py +1279 -0
- parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
- parrot/tools/workday/__init__.py +6 -0
- parrot/tools/workday/models.py +1389 -0
- parrot/tools/workday/tool.py +1293 -0
- parrot/tools/yfinance_tool.py +306 -0
- parrot/tools/zipcode.py +217 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/helpers.py +73 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.c +12078 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/parsers/toml.pyx +21 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpp +20936 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/types.pyx +213 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- parrot/yaml-rs/Cargo.lock +350 -0
- parrot/yaml-rs/Cargo.toml +19 -0
- parrot/yaml-rs/pyproject.toml +19 -0
- parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
- parrot/yaml-rs/src/lib.rs +222 -0
- requirements/docker-compose.yml +24 -0
- requirements/requirements-dev.txt +21 -0
parrot/tools/quickeda.py
ADDED
|
@@ -0,0 +1,827 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Quick EDA Tool - Comprehensive Exploratory Data Analysis for pandas DataFrames.
|
|
3
|
+
"""
|
|
4
|
+
from typing import Any, Dict, Optional, List
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
import base64
|
|
7
|
+
import io
|
|
8
|
+
from html import escape
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import matplotlib
|
|
11
|
+
import matplotlib.pyplot as plt
|
|
12
|
+
import seaborn as sns
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pandas as pd
|
|
15
|
+
from pydantic import BaseModel, Field
|
|
16
|
+
from .abstract import AbstractTool
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
matplotlib.use('Agg') # Use non-interactive backend
|
|
20
|
+
|
|
21
|
+
class QuickEdaArgs(BaseModel):
|
|
22
|
+
"""Arguments schema for Quick EDA analysis."""
|
|
23
|
+
|
|
24
|
+
dataframe: Any = Field(
|
|
25
|
+
description="Pandas DataFrame to analyze"
|
|
26
|
+
)
|
|
27
|
+
filename: Optional[str] = Field(
|
|
28
|
+
default=None,
|
|
29
|
+
description="Optional filename to save the EDA report (without extension)"
|
|
30
|
+
)
|
|
31
|
+
title: str = Field(
|
|
32
|
+
default="Quick EDA Report",
|
|
33
|
+
description="Title for the EDA report"
|
|
34
|
+
)
|
|
35
|
+
max_numeric_plots: int = Field(
|
|
36
|
+
default=5,
|
|
37
|
+
description="Maximum number of numerical columns to plot"
|
|
38
|
+
)
|
|
39
|
+
max_categorical_plots: int = Field(
|
|
40
|
+
default=5,
|
|
41
|
+
description="Maximum number of categorical columns to plot"
|
|
42
|
+
)
|
|
43
|
+
include_correlations: bool = Field(
|
|
44
|
+
default=True,
|
|
45
|
+
description="Whether to include correlation matrix"
|
|
46
|
+
)
|
|
47
|
+
include_distributions: bool = Field(
|
|
48
|
+
default=True,
|
|
49
|
+
description="Whether to include distribution plots"
|
|
50
|
+
)
|
|
51
|
+
include_value_counts: bool = Field(
|
|
52
|
+
default=True,
|
|
53
|
+
description="Whether to include value counts for categorical columns"
|
|
54
|
+
)
|
|
55
|
+
plot_style: str = Field(
|
|
56
|
+
default="whitegrid",
|
|
57
|
+
description="Seaborn plot style"
|
|
58
|
+
)
|
|
59
|
+
color_palette: str = Field(
|
|
60
|
+
default="husl",
|
|
61
|
+
description="Color palette for plots"
|
|
62
|
+
)
|
|
63
|
+
figure_size: tuple = Field(
|
|
64
|
+
default=(12, 8),
|
|
65
|
+
description="Default figure size for plots"
|
|
66
|
+
)
|
|
67
|
+
include_missing_analysis: bool = Field(
|
|
68
|
+
default=True,
|
|
69
|
+
description="Whether to include detailed missing value analysis"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class QuickEdaTool(AbstractTool):
|
|
74
|
+
"""
|
|
75
|
+
Tool for performing comprehensive Exploratory Data Analysis on pandas DataFrames.
|
|
76
|
+
|
|
77
|
+
This tool generates a detailed HTML report with statistics, visualizations,
|
|
78
|
+
and insights about the DataFrame structure and data distribution.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
name: str = "quick_eda"
|
|
82
|
+
description: str = "Perform comprehensive Exploratory Data Analysis on pandas DataFrame"
|
|
83
|
+
args_schema = QuickEdaArgs
|
|
84
|
+
return_direct: bool = False
|
|
85
|
+
|
|
86
|
+
def _default_output_dir(self) -> Optional[Path]:
|
|
87
|
+
"""Default output directory for EDA reports."""
|
|
88
|
+
return self.static_dir / "eda_reports" if self.static_dir else None
|
|
89
|
+
|
|
90
|
+
def _get_eda_css(self) -> str:
|
|
91
|
+
"""Get comprehensive CSS styles for the EDA report."""
|
|
92
|
+
return """
|
|
93
|
+
<style>
|
|
94
|
+
body {
|
|
95
|
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
|
96
|
+
margin: 0;
|
|
97
|
+
padding: 20px;
|
|
98
|
+
background-color: #f8f9fa;
|
|
99
|
+
line-height: 1.6;
|
|
100
|
+
}
|
|
101
|
+
.container {
|
|
102
|
+
max-width: 1200px;
|
|
103
|
+
margin: 0 auto;
|
|
104
|
+
background-color: white;
|
|
105
|
+
padding: 30px;
|
|
106
|
+
border-radius: 10px;
|
|
107
|
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
|
108
|
+
}
|
|
109
|
+
h1 {
|
|
110
|
+
color: #2c3e50;
|
|
111
|
+
border-bottom: 3px solid #3498db;
|
|
112
|
+
padding-bottom: 10px;
|
|
113
|
+
text-align: center;
|
|
114
|
+
font-size: 2.5em;
|
|
115
|
+
margin-bottom: 30px;
|
|
116
|
+
}
|
|
117
|
+
h2 {
|
|
118
|
+
color: #34495e;
|
|
119
|
+
border-bottom: 2px solid #ecf0f1;
|
|
120
|
+
padding-bottom: 8px;
|
|
121
|
+
margin-top: 40px;
|
|
122
|
+
font-size: 1.8em;
|
|
123
|
+
}
|
|
124
|
+
h3 {
|
|
125
|
+
color: #2c3e50;
|
|
126
|
+
margin-top: 25px;
|
|
127
|
+
font-size: 1.3em;
|
|
128
|
+
}
|
|
129
|
+
.dataframe {
|
|
130
|
+
border-collapse: collapse;
|
|
131
|
+
margin: 20px 0;
|
|
132
|
+
font-size: 0.9em;
|
|
133
|
+
width: 100%;
|
|
134
|
+
border-radius: 8px;
|
|
135
|
+
overflow: hidden;
|
|
136
|
+
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
|
137
|
+
}
|
|
138
|
+
.dataframe th, .dataframe td {
|
|
139
|
+
border: 1px solid #bdc3c7;
|
|
140
|
+
padding: 12px 15px;
|
|
141
|
+
text-align: left;
|
|
142
|
+
}
|
|
143
|
+
.dataframe th {
|
|
144
|
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
145
|
+
color: white;
|
|
146
|
+
font-weight: 600;
|
|
147
|
+
text-transform: uppercase;
|
|
148
|
+
letter-spacing: 0.5px;
|
|
149
|
+
}
|
|
150
|
+
.dataframe tbody tr:nth-child(even) {
|
|
151
|
+
background-color: #f8f9fa;
|
|
152
|
+
}
|
|
153
|
+
.dataframe tbody tr:hover {
|
|
154
|
+
background-color: #e3f2fd;
|
|
155
|
+
transition: background-color 0.3s ease;
|
|
156
|
+
}
|
|
157
|
+
.dataframe caption {
|
|
158
|
+
caption-side: top;
|
|
159
|
+
font-weight: bold;
|
|
160
|
+
margin-bottom: 10px;
|
|
161
|
+
text-align: left;
|
|
162
|
+
font-size: 1.2em;
|
|
163
|
+
color: #2c3e50;
|
|
164
|
+
}
|
|
165
|
+
img {
|
|
166
|
+
max-width: 100%;
|
|
167
|
+
height: auto;
|
|
168
|
+
display: block;
|
|
169
|
+
margin: 20px auto;
|
|
170
|
+
border-radius: 8px;
|
|
171
|
+
box-shadow: 0 4px 12px rgba(0,0,0,0.15);
|
|
172
|
+
}
|
|
173
|
+
.plot-container {
|
|
174
|
+
margin-bottom: 30px;
|
|
175
|
+
background-color: #fafafa;
|
|
176
|
+
padding: 20px;
|
|
177
|
+
border-radius: 8px;
|
|
178
|
+
border-left: 4px solid #3498db;
|
|
179
|
+
}
|
|
180
|
+
.section {
|
|
181
|
+
margin-bottom: 40px;
|
|
182
|
+
padding: 25px;
|
|
183
|
+
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
|
|
184
|
+
border-radius: 10px;
|
|
185
|
+
box-shadow: 0 2px 10px rgba(0,0,0,0.08);
|
|
186
|
+
}
|
|
187
|
+
.missing-values {
|
|
188
|
+
color: #e74c3c;
|
|
189
|
+
font-weight: bold;
|
|
190
|
+
}
|
|
191
|
+
.info-box {
|
|
192
|
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
193
|
+
color: white;
|
|
194
|
+
padding: 20px;
|
|
195
|
+
border-radius: 8px;
|
|
196
|
+
margin: 20px 0;
|
|
197
|
+
text-align: center;
|
|
198
|
+
}
|
|
199
|
+
.info-box h3 {
|
|
200
|
+
color: white;
|
|
201
|
+
margin-top: 0;
|
|
202
|
+
}
|
|
203
|
+
.stats-grid {
|
|
204
|
+
display: grid;
|
|
205
|
+
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
|
206
|
+
gap: 20px;
|
|
207
|
+
margin: 20px 0;
|
|
208
|
+
}
|
|
209
|
+
.stat-card {
|
|
210
|
+
background: white;
|
|
211
|
+
padding: 15px;
|
|
212
|
+
border-radius: 8px;
|
|
213
|
+
text-align: center;
|
|
214
|
+
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
|
215
|
+
}
|
|
216
|
+
.stat-number {
|
|
217
|
+
font-size: 2em;
|
|
218
|
+
font-weight: bold;
|
|
219
|
+
color: #3498db;
|
|
220
|
+
}
|
|
221
|
+
.stat-label {
|
|
222
|
+
color: #7f8c8d;
|
|
223
|
+
font-size: 0.9em;
|
|
224
|
+
text-transform: uppercase;
|
|
225
|
+
letter-spacing: 0.5px;
|
|
226
|
+
}
|
|
227
|
+
.footer {
|
|
228
|
+
text-align: center;
|
|
229
|
+
color: #95a5a6;
|
|
230
|
+
font-size: 0.9em;
|
|
231
|
+
margin-top: 50px;
|
|
232
|
+
padding-top: 20px;
|
|
233
|
+
border-top: 1px solid #ecf0f1;
|
|
234
|
+
}
|
|
235
|
+
.alert {
|
|
236
|
+
padding: 15px;
|
|
237
|
+
margin: 15px 0;
|
|
238
|
+
border: 1px solid transparent;
|
|
239
|
+
border-radius: 4px;
|
|
240
|
+
}
|
|
241
|
+
.alert-warning {
|
|
242
|
+
color: #856404;
|
|
243
|
+
background-color: #fff3cd;
|
|
244
|
+
border-color: #ffeaa7;
|
|
245
|
+
}
|
|
246
|
+
.alert-info {
|
|
247
|
+
color: #0c5460;
|
|
248
|
+
background-color: #d1ecf1;
|
|
249
|
+
border-color: #bee5eb;
|
|
250
|
+
}
|
|
251
|
+
</style>
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
def _plot_to_base64(self, plt_figure) -> str:
|
|
255
|
+
"""Convert matplotlib figure to base64 encoded string."""
|
|
256
|
+
buf = io.BytesIO()
|
|
257
|
+
plt_figure.savefig(buf, format='png', bbox_inches='tight', dpi=100, facecolor='white')
|
|
258
|
+
buf.seek(0)
|
|
259
|
+
img_base64 = base64.b64encode(buf.read()).decode('utf-8')
|
|
260
|
+
buf.close()
|
|
261
|
+
plt.close(plt_figure)
|
|
262
|
+
return img_base64
|
|
263
|
+
|
|
264
|
+
def _df_to_html_with_style(self, df_input: pd.DataFrame, title: str = "") -> str:
|
|
265
|
+
"""Convert DataFrame to HTML with styling."""
|
|
266
|
+
styler = df_input.style.set_table_attributes('class="dataframe"')
|
|
267
|
+
if title:
|
|
268
|
+
styler = styler.set_caption(title)
|
|
269
|
+
return styler.to_html()
|
|
270
|
+
|
|
271
|
+
def _generate_basic_info_section(self, df: pd.DataFrame) -> str:
|
|
272
|
+
"""Generate basic information section."""
|
|
273
|
+
html = ['<div class="section">']
|
|
274
|
+
html.append('<h2>📏 Dataset Overview</h2>')
|
|
275
|
+
|
|
276
|
+
# Create info cards
|
|
277
|
+
html.append('<div class="stats-grid">')
|
|
278
|
+
html.append(f'''
|
|
279
|
+
<div class="stat-card">
|
|
280
|
+
<div class="stat-number">{df.shape[0]:,}</div>
|
|
281
|
+
<div class="stat-label">Rows</div>
|
|
282
|
+
</div>
|
|
283
|
+
<div class="stat-card">
|
|
284
|
+
<div class="stat-number">{df.shape[1]:,}</div>
|
|
285
|
+
<div class="stat-label">Columns</div>
|
|
286
|
+
</div>
|
|
287
|
+
<div class="stat-card">
|
|
288
|
+
<div class="stat-number">{df.memory_usage(deep=True).sum() / 1024**2:.1f} MB</div>
|
|
289
|
+
<div class="stat-label">Memory Usage</div>
|
|
290
|
+
</div>
|
|
291
|
+
<div class="stat-card">
|
|
292
|
+
<div class="stat-number">{df.duplicated().sum():,}</div>
|
|
293
|
+
<div class="stat-label">Duplicate Rows</div>
|
|
294
|
+
</div>
|
|
295
|
+
''')
|
|
296
|
+
html.append('</div>')
|
|
297
|
+
html.append('</div>')
|
|
298
|
+
return '\n'.join(html)
|
|
299
|
+
|
|
300
|
+
def _generate_data_types_section(self, df: pd.DataFrame) -> str:
|
|
301
|
+
"""Generate data types section."""
|
|
302
|
+
html = ['<div class="section">']
|
|
303
|
+
html.append('<h2>📋 Column Information</h2>')
|
|
304
|
+
|
|
305
|
+
# Create comprehensive column info
|
|
306
|
+
col_info = []
|
|
307
|
+
for col in df.columns:
|
|
308
|
+
dtype = str(df[col].dtype)
|
|
309
|
+
null_count = df[col].isna().sum()
|
|
310
|
+
null_pct = (null_count / len(df)) * 100
|
|
311
|
+
unique_count = df[col].nunique()
|
|
312
|
+
|
|
313
|
+
col_info.append({
|
|
314
|
+
'Column': col,
|
|
315
|
+
'Data Type': dtype,
|
|
316
|
+
'Non-Null Count': f"{len(df) - null_count:,}",
|
|
317
|
+
'Null Count': f"{null_count:,}",
|
|
318
|
+
'Null %': f"{null_pct:.1f}%",
|
|
319
|
+
'Unique Values': f"{unique_count:,}"
|
|
320
|
+
})
|
|
321
|
+
|
|
322
|
+
col_info_df = pd.DataFrame(col_info)
|
|
323
|
+
html.append(self._df_to_html_with_style(col_info_df, "Column Details"))
|
|
324
|
+
html.append('</div>')
|
|
325
|
+
return '\n'.join(html)
|
|
326
|
+
|
|
327
|
+
def _generate_missing_values_section(self, df: pd.DataFrame) -> str:
|
|
328
|
+
"""Generate missing values analysis section."""
|
|
329
|
+
html = ['<div class="section">']
|
|
330
|
+
html.append('<h2><span class="missing-values">🔍 Missing Values Analysis</span></h2>')
|
|
331
|
+
|
|
332
|
+
missing = df.isna().sum()
|
|
333
|
+
missing_filtered = missing[missing > 0].sort_values(ascending=False)
|
|
334
|
+
|
|
335
|
+
if not missing_filtered.empty:
|
|
336
|
+
missing_df = missing_filtered.to_frame(name='Missing Count')
|
|
337
|
+
missing_df['Percentage (%)'] = (missing_df['Missing Count'] / len(df) * 100).round(2)
|
|
338
|
+
html.append(self._df_to_html_with_style(missing_df, "Missing Values Summary"))
|
|
339
|
+
|
|
340
|
+
# Add alert if high missing values
|
|
341
|
+
high_missing = missing_df[missing_df['Percentage (%)'] > 50]
|
|
342
|
+
if not high_missing.empty:
|
|
343
|
+
html.append('<div class="alert alert-warning">')
|
|
344
|
+
html.append(f'<strong>Warning:</strong> {len(high_missing)} column(s) have more than 50% missing values.')
|
|
345
|
+
html.append('</div>')
|
|
346
|
+
else:
|
|
347
|
+
html.append('<div class="alert alert-info">')
|
|
348
|
+
html.append('<strong>Great!</strong> No missing values found in the dataset.')
|
|
349
|
+
html.append('</div>')
|
|
350
|
+
|
|
351
|
+
html.append('</div>')
|
|
352
|
+
return '\n'.join(html)
|
|
353
|
+
|
|
354
|
+
def _generate_descriptive_stats_section(self, df: pd.DataFrame) -> str:
|
|
355
|
+
"""Generate descriptive statistics section."""
|
|
356
|
+
html = ['<div class="section">']
|
|
357
|
+
html.append('<h2>📈 Descriptive Statistics</h2>')
|
|
358
|
+
|
|
359
|
+
try:
|
|
360
|
+
# Numerical statistics
|
|
361
|
+
numeric_cols = df.select_dtypes(include=['number']).columns
|
|
362
|
+
if len(numeric_cols) > 0:
|
|
363
|
+
html.append('<h3>Numerical Columns</h3>')
|
|
364
|
+
desc_stats = df[numeric_cols].describe().T
|
|
365
|
+
desc_stats = desc_stats.round(3)
|
|
366
|
+
html.append(self._df_to_html_with_style(desc_stats, "Numerical Statistics"))
|
|
367
|
+
|
|
368
|
+
# Categorical statistics
|
|
369
|
+
cat_cols = df.select_dtypes(include=['object', 'category']).columns
|
|
370
|
+
if len(cat_cols) > 0:
|
|
371
|
+
html.append('<h3>Categorical Columns</h3>')
|
|
372
|
+
cat_stats = []
|
|
373
|
+
for col in cat_cols:
|
|
374
|
+
stats = {
|
|
375
|
+
'Column': col,
|
|
376
|
+
'Unique Values': df[col].nunique(),
|
|
377
|
+
'Most Frequent': df[col].mode().iloc[0] if not df[col].mode().empty else 'N/A',
|
|
378
|
+
'Frequency': df[col].value_counts().iloc[0] if not df[col].value_counts().empty else 0
|
|
379
|
+
}
|
|
380
|
+
cat_stats.append(stats)
|
|
381
|
+
|
|
382
|
+
cat_stats_df = pd.DataFrame(cat_stats)
|
|
383
|
+
html.append(self._df_to_html_with_style(cat_stats_df, "Categorical Statistics"))
|
|
384
|
+
|
|
385
|
+
except Exception as e:
|
|
386
|
+
html.append(f'<div class="alert alert-warning">Could not generate descriptive statistics: {escape(str(e))}</div>')
|
|
387
|
+
|
|
388
|
+
html.append('</div>')
|
|
389
|
+
return '\n'.join(html)
|
|
390
|
+
|
|
391
|
+
def _generate_correlation_section(self, df: pd.DataFrame, plot_style: str, color_palette: str, figure_size: tuple) -> str:
|
|
392
|
+
"""Generate correlation analysis section."""
|
|
393
|
+
html = ['<div class="section">']
|
|
394
|
+
html.append('<h2>🔗 Correlation Analysis</h2>')
|
|
395
|
+
|
|
396
|
+
numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
|
|
397
|
+
|
|
398
|
+
if len(numeric_cols) > 1:
|
|
399
|
+
try:
|
|
400
|
+
sns.set_style(plot_style)
|
|
401
|
+
sns.set_palette(color_palette)
|
|
402
|
+
|
|
403
|
+
fig_corr, ax_corr = plt.subplots(figsize=figure_size)
|
|
404
|
+
corr = df[numeric_cols].corr()
|
|
405
|
+
|
|
406
|
+
# Create heatmap
|
|
407
|
+
sns.heatmap(corr, annot=True, cmap='RdYlBu_r', fmt=".2f",
|
|
408
|
+
center=0, square=True, ax=ax_corr, cbar_kws={"shrink": .8})
|
|
409
|
+
ax_corr.set_title("Correlation Matrix", fontsize=16, pad=20)
|
|
410
|
+
plt.tight_layout()
|
|
411
|
+
|
|
412
|
+
img_base64 = self._plot_to_base64(fig_corr)
|
|
413
|
+
html.append('<div class="plot-container">')
|
|
414
|
+
html.append(f'<img src="data:image/png;base64,{img_base64}" alt="Correlation Matrix">')
|
|
415
|
+
html.append('</div>')
|
|
416
|
+
|
|
417
|
+
# Find high correlations
|
|
418
|
+
high_corr_pairs = []
|
|
419
|
+
for i in range(len(corr.columns)):
|
|
420
|
+
for j in range(i+1, len(corr.columns)):
|
|
421
|
+
corr_val = corr.iloc[i, j]
|
|
422
|
+
if abs(corr_val) > 0.7: # High correlation threshold
|
|
423
|
+
high_corr_pairs.append({
|
|
424
|
+
'Variable 1': corr.columns[i],
|
|
425
|
+
'Variable 2': corr.columns[j],
|
|
426
|
+
'Correlation': round(corr_val, 3)
|
|
427
|
+
})
|
|
428
|
+
|
|
429
|
+
if high_corr_pairs:
|
|
430
|
+
html.append('<h3>High Correlations (|r| > 0.7)</h3>')
|
|
431
|
+
high_corr_df = pd.DataFrame(high_corr_pairs)
|
|
432
|
+
html.append(self._df_to_html_with_style(high_corr_df))
|
|
433
|
+
|
|
434
|
+
except Exception as e:
|
|
435
|
+
html.append(f'<div class="alert alert-warning">Could not generate correlation matrix: {escape(str(e))}</div>')
|
|
436
|
+
else:
|
|
437
|
+
html.append('<div class="alert alert-info">Need at least 2 numerical columns to calculate correlations.</div>')
|
|
438
|
+
|
|
439
|
+
html.append('</div>')
|
|
440
|
+
return '\n'.join(html)
|
|
441
|
+
|
|
442
|
+
def _generate_distribution_section(self, df: pd.DataFrame, max_plots: int, plot_style: str, color_palette: str, figure_size: tuple) -> str:
|
|
443
|
+
"""Generate distribution analysis section."""
|
|
444
|
+
html = ['<div class="section">']
|
|
445
|
+
html.append('<h2>📊 Distribution Analysis</h2>')
|
|
446
|
+
|
|
447
|
+
numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
|
|
448
|
+
|
|
449
|
+
if numeric_cols:
|
|
450
|
+
cols_to_plot = numeric_cols[:min(len(numeric_cols), max_plots)]
|
|
451
|
+
html.append(f'<p>Displaying distributions for {len(cols_to_plot)} numerical columns: <strong>{", ".join(map(escape, cols_to_plot))}</strong></p>')
|
|
452
|
+
|
|
453
|
+
sns.set_style(plot_style)
|
|
454
|
+
sns.set_palette(color_palette)
|
|
455
|
+
|
|
456
|
+
for col in cols_to_plot:
|
|
457
|
+
html.append('<div class="plot-container">')
|
|
458
|
+
html.append(f'<h3>Distribution of {escape(col)}</h3>')
|
|
459
|
+
|
|
460
|
+
try:
|
|
461
|
+
fig_dist, axes = plt.subplots(2, 2, figsize=(15, 10))
|
|
462
|
+
fig_dist.suptitle(f'Distribution Analysis: {escape(col)}', fontsize=16)
|
|
463
|
+
|
|
464
|
+
# Remove NaN values for plotting
|
|
465
|
+
data = df[col].dropna()
|
|
466
|
+
|
|
467
|
+
# Histogram with KDE
|
|
468
|
+
sns.histplot(data, kde=True, ax=axes[0, 0], alpha=0.7)
|
|
469
|
+
axes[0, 0].set_title('Histogram with KDE')
|
|
470
|
+
|
|
471
|
+
# Boxplot
|
|
472
|
+
sns.boxplot(y=data, ax=axes[0, 1])
|
|
473
|
+
axes[0, 1].set_title('Boxplot')
|
|
474
|
+
|
|
475
|
+
# Q-Q plot
|
|
476
|
+
from scipy import stats
|
|
477
|
+
stats.probplot(data, dist="norm", plot=axes[1, 0])
|
|
478
|
+
axes[1, 0].set_title('Q-Q Plot (Normal)')
|
|
479
|
+
|
|
480
|
+
# Violin plot
|
|
481
|
+
sns.violinplot(y=data, ax=axes[1, 1])
|
|
482
|
+
axes[1, 1].set_title('Violin Plot')
|
|
483
|
+
|
|
484
|
+
plt.tight_layout()
|
|
485
|
+
img_base64 = self._plot_to_base64(fig_dist)
|
|
486
|
+
html.append(f'<img src="data:image/png;base64,{img_base64}" alt="Distribution analysis for {escape(col)}">')
|
|
487
|
+
|
|
488
|
+
except Exception as e:
|
|
489
|
+
html.append(f'<div class="alert alert-warning">Could not generate distribution plot for {escape(col)}: {escape(str(e))}</div>')
|
|
490
|
+
|
|
491
|
+
html.append('</div>')
|
|
492
|
+
else:
|
|
493
|
+
html.append('<div class="alert alert-info">No numerical columns found for distribution analysis.</div>')
|
|
494
|
+
|
|
495
|
+
html.append('</div>')
|
|
496
|
+
return '\n'.join(html)
|
|
497
|
+
|
|
498
|
+
def _generate_categorical_section(self, df: pd.DataFrame, max_plots: int, plot_style: str, color_palette: str, figure_size: tuple) -> str:
|
|
499
|
+
"""Generate categorical analysis section."""
|
|
500
|
+
html = ['<div class="section">']
|
|
501
|
+
html.append('<h2>📊 Categorical Analysis</h2>')
|
|
502
|
+
|
|
503
|
+
cat_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
|
|
504
|
+
|
|
505
|
+
if cat_cols:
|
|
506
|
+
cols_to_plot = cat_cols[:min(len(cat_cols), max_plots)]
|
|
507
|
+
html.append(f'<p>Displaying analysis for {len(cols_to_plot)} categorical columns: <strong>{", ".join(map(escape, cols_to_plot))}</strong></p>')
|
|
508
|
+
|
|
509
|
+
sns.set_style(plot_style)
|
|
510
|
+
sns.set_palette(color_palette)
|
|
511
|
+
|
|
512
|
+
for col in cols_to_plot:
|
|
513
|
+
html.append('<div class="plot-container">')
|
|
514
|
+
html.append(f'<h3>Analysis of {escape(col)}</h3>')
|
|
515
|
+
|
|
516
|
+
try:
|
|
517
|
+
# Value counts table
|
|
518
|
+
value_counts = df[col].value_counts().head(15) # Top 15 values
|
|
519
|
+
if not value_counts.empty:
|
|
520
|
+
vc_df = value_counts.to_frame(name='Count')
|
|
521
|
+
vc_df['Percentage (%)'] = (vc_df['Count'] / len(df[col].dropna()) * 100).round(2)
|
|
522
|
+
html.append(self._df_to_html_with_style(vc_df, f"Top {len(value_counts)} values"))
|
|
523
|
+
|
|
524
|
+
# Create visualization
|
|
525
|
+
fig_cat, axes = plt.subplots(1, 2, figsize=(15, 6))
|
|
526
|
+
fig_cat.suptitle(f'Categorical Analysis: {escape(col)}', fontsize=16)
|
|
527
|
+
|
|
528
|
+
# Bar chart
|
|
529
|
+
value_counts.head(10).plot(kind='bar', ax=axes[0], color=sns.color_palette(color_palette, len(value_counts.head(10))))
|
|
530
|
+
axes[0].set_title(f'Top 10 Values')
|
|
531
|
+
axes[0].set_ylabel('Count')
|
|
532
|
+
axes[0].tick_params(axis='x', rotation=45)
|
|
533
|
+
|
|
534
|
+
# Pie chart (for top 8 values + others)
|
|
535
|
+
pie_data = value_counts.head(8)
|
|
536
|
+
if len(value_counts) > 8:
|
|
537
|
+
others_count = value_counts.iloc[8:].sum()
|
|
538
|
+
pie_data = pd.concat([pie_data, pd.Series([others_count], index=['Others'])])
|
|
539
|
+
|
|
540
|
+
axes[1].pie(pie_data.values, labels=pie_data.index, autopct='%1.1f%%', startangle=90)
|
|
541
|
+
axes[1].set_title('Distribution (Top 8 + Others)')
|
|
542
|
+
|
|
543
|
+
plt.tight_layout()
|
|
544
|
+
img_base64 = self._plot_to_base64(fig_cat)
|
|
545
|
+
html.append(f'<img src="data:image/png;base64,{img_base64}" alt="Categorical analysis for {escape(col)}">')
|
|
546
|
+
else:
|
|
547
|
+
html.append('<div class="alert alert-info">No values found for this column.</div>')
|
|
548
|
+
|
|
549
|
+
except Exception as e:
|
|
550
|
+
html.append(f'<div class="alert alert-warning">Could not generate analysis for {escape(col)}: {escape(str(e))}</div>')
|
|
551
|
+
|
|
552
|
+
html.append('</div>')
|
|
553
|
+
else:
|
|
554
|
+
html.append('<div class="alert alert-info">No categorical columns found for analysis.</div>')
|
|
555
|
+
|
|
556
|
+
html.append('</div>')
|
|
557
|
+
return '\n'.join(html)
|
|
558
|
+
|
|
559
|
+
async def _execute(
|
|
560
|
+
self,
|
|
561
|
+
dataframe: pd.DataFrame,
|
|
562
|
+
filename: Optional[str] = None,
|
|
563
|
+
title: str = "Quick EDA Report",
|
|
564
|
+
max_numeric_plots: int = 5,
|
|
565
|
+
max_categorical_plots: int = 5,
|
|
566
|
+
include_correlations: bool = True,
|
|
567
|
+
include_distributions: bool = True,
|
|
568
|
+
include_value_counts: bool = True,
|
|
569
|
+
plot_style: str = "whitegrid",
|
|
570
|
+
color_palette: str = "husl",
|
|
571
|
+
figure_size: tuple = (12, 8),
|
|
572
|
+
include_missing_analysis: bool = True,
|
|
573
|
+
**kwargs
|
|
574
|
+
) -> Dict[str, Any]:
|
|
575
|
+
"""
|
|
576
|
+
Execute the EDA analysis.
|
|
577
|
+
|
|
578
|
+
Returns:
|
|
579
|
+
Dictionary containing the HTML report and optional file information
|
|
580
|
+
"""
|
|
581
|
+
|
|
582
|
+
# Validate input
|
|
583
|
+
if not isinstance(dataframe, pd.DataFrame):
|
|
584
|
+
raise ValueError("Input must be a pandas DataFrame")
|
|
585
|
+
|
|
586
|
+
if dataframe.empty:
|
|
587
|
+
raise ValueError("DataFrame is empty - cannot perform EDA")
|
|
588
|
+
|
|
589
|
+
# Generate timestamp for unique identification
|
|
590
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
591
|
+
|
|
592
|
+
# Start building HTML report
|
|
593
|
+
html_parts = []
|
|
594
|
+
|
|
595
|
+
# HTML head and styling
|
|
596
|
+
html_parts.append('<!DOCTYPE html>')
|
|
597
|
+
html_parts.append('<html lang="en">')
|
|
598
|
+
html_parts.append('<head>')
|
|
599
|
+
html_parts.append('<meta charset="UTF-8">')
|
|
600
|
+
html_parts.append('<meta name="viewport" content="width=device-width, initial-scale=1.0">')
|
|
601
|
+
html_parts.append(f'<title>{title}</title>')
|
|
602
|
+
html_parts.append(self._get_eda_css())
|
|
603
|
+
html_parts.append('</head>')
|
|
604
|
+
html_parts.append('<body>')
|
|
605
|
+
html_parts.append('<div class="container">')
|
|
606
|
+
|
|
607
|
+
# Title and header
|
|
608
|
+
html_parts.append(f'<h1>📊 {title}</h1>')
|
|
609
|
+
html_parts.append(f'<div class="info-box">')
|
|
610
|
+
html_parts.append(f'<h3>Generated on: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</h3>')
|
|
611
|
+
html_parts.append(f'<p>Dataset: {dataframe.shape[0]:,} rows × {dataframe.shape[1]:,} columns</p>')
|
|
612
|
+
html_parts.append('</div>')
|
|
613
|
+
|
|
614
|
+
# Generate sections
|
|
615
|
+
try:
|
|
616
|
+
# Basic information
|
|
617
|
+
html_parts.append(self._generate_basic_info_section(dataframe))
|
|
618
|
+
|
|
619
|
+
# Data types and column info
|
|
620
|
+
html_parts.append(self._generate_data_types_section(dataframe))
|
|
621
|
+
|
|
622
|
+
# Missing values analysis
|
|
623
|
+
if include_missing_analysis:
|
|
624
|
+
html_parts.append(self._generate_missing_values_section(dataframe))
|
|
625
|
+
|
|
626
|
+
# Descriptive statistics
|
|
627
|
+
html_parts.append(self._generate_descriptive_stats_section(dataframe))
|
|
628
|
+
|
|
629
|
+
# Correlation analysis
|
|
630
|
+
if include_correlations:
|
|
631
|
+
html_parts.append(self._generate_correlation_section(
|
|
632
|
+
dataframe, plot_style, color_palette, figure_size
|
|
633
|
+
))
|
|
634
|
+
|
|
635
|
+
# Distribution analysis
|
|
636
|
+
if include_distributions:
|
|
637
|
+
html_parts.append(self._generate_distribution_section(
|
|
638
|
+
dataframe, max_numeric_plots, plot_style, color_palette, figure_size
|
|
639
|
+
))
|
|
640
|
+
|
|
641
|
+
# Categorical analysis
|
|
642
|
+
if include_value_counts:
|
|
643
|
+
html_parts.append(self._generate_categorical_section(
|
|
644
|
+
dataframe, max_categorical_plots, plot_style, color_palette, figure_size
|
|
645
|
+
))
|
|
646
|
+
|
|
647
|
+
except Exception as e:
|
|
648
|
+
html_parts.append(f'<div class="alert alert-warning">Error generating some sections: {escape(str(e))}</div>')
|
|
649
|
+
self.logger.error(f"Error generating EDA sections: {e}")
|
|
650
|
+
|
|
651
|
+
# Footer
|
|
652
|
+
html_parts.append('<div class="footer">')
|
|
653
|
+
html_parts.append('✅ EDA Report Generated Successfully')
|
|
654
|
+
html_parts.append('</div>')
|
|
655
|
+
html_parts.append('</div>') # Close container
|
|
656
|
+
html_parts.append('</body>')
|
|
657
|
+
html_parts.append('</html>')
|
|
658
|
+
|
|
659
|
+
# Combine HTML
|
|
660
|
+
complete_html = '\n'.join(html_parts)
|
|
661
|
+
|
|
662
|
+
# Prepare result
|
|
663
|
+
result = {
|
|
664
|
+
"html": complete_html,
|
|
665
|
+
"title": title,
|
|
666
|
+
"timestamp": timestamp,
|
|
667
|
+
"dataset_shape": dataframe.shape,
|
|
668
|
+
"columns": dataframe.columns.tolist(),
|
|
669
|
+
"data_types": dataframe.dtypes.to_dict(),
|
|
670
|
+
"missing_values": dataframe.isna().sum().to_dict()
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
# Save to file if filename provided
|
|
674
|
+
if filename:
|
|
675
|
+
if not filename.endswith('.html'):
|
|
676
|
+
filename = f"{filename}_{timestamp}.html"
|
|
677
|
+
|
|
678
|
+
# Ensure output directory exists
|
|
679
|
+
if self.output_dir:
|
|
680
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
681
|
+
file_path = self.output_dir / filename
|
|
682
|
+
else:
|
|
683
|
+
file_path = Path(filename)
|
|
684
|
+
|
|
685
|
+
try:
|
|
686
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
687
|
+
f.write(complete_html)
|
|
688
|
+
|
|
689
|
+
self.logger.info(f"EDA report saved to: {file_path}")
|
|
690
|
+
|
|
691
|
+
result.update({
|
|
692
|
+
"file_path": str(file_path),
|
|
693
|
+
"file_url": self.to_static_url(file_path),
|
|
694
|
+
"file_size": file_path.stat().st_size
|
|
695
|
+
})
|
|
696
|
+
|
|
697
|
+
except Exception as e:
|
|
698
|
+
self.logger.error(f"Failed to save EDA report: {e}")
|
|
699
|
+
result["save_error"] = str(e)
|
|
700
|
+
|
|
701
|
+
return result
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
# Additional utility functions for EDA
|
|
706
|
+
class EdaUtils:
|
|
707
|
+
"""Utility functions for EDA operations."""
|
|
708
|
+
|
|
709
|
+
@staticmethod
|
|
710
|
+
def detect_outliers(df: pd.DataFrame, columns: List[str] = None, method: str = 'iqr') -> Dict[str, List]:
|
|
711
|
+
"""
|
|
712
|
+
Detect outliers in numerical columns.
|
|
713
|
+
|
|
714
|
+
Args:
|
|
715
|
+
df: DataFrame to analyze
|
|
716
|
+
columns: Specific columns to check (default: all numerical)
|
|
717
|
+
method: Method to use ('iqr', 'zscore', 'isolation_forest')
|
|
718
|
+
|
|
719
|
+
Returns:
|
|
720
|
+
Dictionary with column names as keys and outlier indices as values
|
|
721
|
+
"""
|
|
722
|
+
if columns is None:
|
|
723
|
+
columns = df.select_dtypes(include=['number']).columns.tolist()
|
|
724
|
+
|
|
725
|
+
outliers = {}
|
|
726
|
+
|
|
727
|
+
outlier_indices = []
|
|
728
|
+
|
|
729
|
+
for col in columns:
|
|
730
|
+
if method == 'iqr':
|
|
731
|
+
Q1 = df[col].quantile(0.25)
|
|
732
|
+
Q3 = df[col].quantile(0.75)
|
|
733
|
+
IQR = Q3 - Q1
|
|
734
|
+
lower_bound = Q1 - 1.5 * IQR
|
|
735
|
+
upper_bound = Q3 + 1.5 * IQR
|
|
736
|
+
outlier_indices = df[(df[col] < lower_bound) | (df[col] > upper_bound)].index.tolist()
|
|
737
|
+
|
|
738
|
+
elif method == 'zscore':
|
|
739
|
+
from scipy import stats
|
|
740
|
+
z_scores = np.abs(stats.zscore(df[col].dropna()))
|
|
741
|
+
outlier_indices = df[col].dropna()[z_scores > 3].index.tolist()
|
|
742
|
+
|
|
743
|
+
elif method == 'isolation_forest':
|
|
744
|
+
from sklearn.ensemble import IsolationForest
|
|
745
|
+
iso_forest = IsolationForest(contamination=0.1, random_state=42)
|
|
746
|
+
outlier_pred = iso_forest.fit_predict(df[[col]].dropna())
|
|
747
|
+
outlier_indices = df[col].dropna()[outlier_pred == -1].index.tolist()
|
|
748
|
+
|
|
749
|
+
outliers[col] = outlier_indices
|
|
750
|
+
|
|
751
|
+
return outliers
|
|
752
|
+
|
|
753
|
+
@staticmethod
|
|
754
|
+
def suggest_data_types(df: pd.DataFrame) -> Dict[str, str]:
|
|
755
|
+
"""
|
|
756
|
+
Suggest optimal data types for DataFrame columns.
|
|
757
|
+
|
|
758
|
+
Args:
|
|
759
|
+
df: DataFrame to analyze
|
|
760
|
+
|
|
761
|
+
Returns:
|
|
762
|
+
Dictionary with column names and suggested data types
|
|
763
|
+
"""
|
|
764
|
+
suggestions = {}
|
|
765
|
+
|
|
766
|
+
for col in df.columns:
|
|
767
|
+
current_dtype = str(df[col].dtype)
|
|
768
|
+
|
|
769
|
+
# Check if numeric column can be downcasted
|
|
770
|
+
if df[col].dtype in ['int64', 'float64']:
|
|
771
|
+
if df[col].dtype == 'int64':
|
|
772
|
+
max_val = df[col].max()
|
|
773
|
+
min_val = df[col].min()
|
|
774
|
+
|
|
775
|
+
if min_val >= 0 and max_val <= 255:
|
|
776
|
+
suggestions[col] = 'uint8'
|
|
777
|
+
elif min_val >= -128 and max_val <= 127:
|
|
778
|
+
suggestions[col] = 'int8'
|
|
779
|
+
elif min_val >= -32768 and max_val <= 32767:
|
|
780
|
+
suggestions[col] = 'int16'
|
|
781
|
+
elif min_val >= -2147483648 and max_val <= 2147483647:
|
|
782
|
+
suggestions[col] = 'int32'
|
|
783
|
+
else:
|
|
784
|
+
suggestions[col] = current_dtype
|
|
785
|
+
|
|
786
|
+
elif df[col].dtype == 'float64':
|
|
787
|
+
# Check if can be converted to float32
|
|
788
|
+
if df[col].max() <= np.finfo(np.float32).max and df[col].min() >= np.finfo(np.float32).min:
|
|
789
|
+
suggestions[col] = 'float32'
|
|
790
|
+
else:
|
|
791
|
+
suggestions[col] = current_dtype
|
|
792
|
+
|
|
793
|
+
# Check if object column should be category
|
|
794
|
+
elif df[col].dtype == 'object':
|
|
795
|
+
unique_ratio = df[col].nunique() / len(df)
|
|
796
|
+
if unique_ratio < 0.5: # Less than 50% unique values
|
|
797
|
+
suggestions[col] = 'category'
|
|
798
|
+
else:
|
|
799
|
+
suggestions[col] = current_dtype
|
|
800
|
+
|
|
801
|
+
else:
|
|
802
|
+
suggestions[col] = current_dtype
|
|
803
|
+
|
|
804
|
+
return suggestions
|
|
805
|
+
|
|
806
|
+
@staticmethod
|
|
807
|
+
def memory_usage_analysis(df: pd.DataFrame) -> Dict[str, Any]:
|
|
808
|
+
"""
|
|
809
|
+
Analyze memory usage of DataFrame.
|
|
810
|
+
|
|
811
|
+
Args:
|
|
812
|
+
df: DataFrame to analyze
|
|
813
|
+
|
|
814
|
+
Returns:
|
|
815
|
+
Dictionary with memory usage analysis
|
|
816
|
+
"""
|
|
817
|
+
memory_usage = df.memory_usage(deep=True)
|
|
818
|
+
total_memory = memory_usage.sum()
|
|
819
|
+
|
|
820
|
+
analysis = {
|
|
821
|
+
'total_memory_mb': total_memory / 1024**2,
|
|
822
|
+
'column_memory': memory_usage.to_dict(),
|
|
823
|
+
'largest_columns': memory_usage.nlargest(5).to_dict(),
|
|
824
|
+
'memory_by_dtype': df.memory_usage(deep=True).groupby(df.dtypes).sum().to_dict()
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
return analysis
|