ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentui/.prettierrc +15 -0
- agentui/QUICKSTART.md +272 -0
- agentui/README.md +59 -0
- agentui/env.example +16 -0
- agentui/jsconfig.json +14 -0
- agentui/package-lock.json +4242 -0
- agentui/package.json +34 -0
- agentui/scripts/postinstall/apply-patches.mjs +260 -0
- agentui/src/app.css +61 -0
- agentui/src/app.d.ts +13 -0
- agentui/src/app.html +12 -0
- agentui/src/components/LoadingSpinner.svelte +64 -0
- agentui/src/components/ThemeSwitcher.svelte +159 -0
- agentui/src/components/index.js +4 -0
- agentui/src/lib/api/bots.ts +60 -0
- agentui/src/lib/api/chat.ts +22 -0
- agentui/src/lib/api/http.ts +25 -0
- agentui/src/lib/components/BotCard.svelte +33 -0
- agentui/src/lib/components/ChatBubble.svelte +63 -0
- agentui/src/lib/components/Toast.svelte +21 -0
- agentui/src/lib/config.ts +20 -0
- agentui/src/lib/stores/auth.svelte.ts +73 -0
- agentui/src/lib/stores/theme.svelte.js +64 -0
- agentui/src/lib/stores/toast.svelte.ts +31 -0
- agentui/src/lib/utils/conversation.ts +39 -0
- agentui/src/routes/+layout.svelte +20 -0
- agentui/src/routes/+page.svelte +232 -0
- agentui/src/routes/login/+page.svelte +200 -0
- agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
- agentui/src/routes/talk/[agentId]/+page.ts +7 -0
- agentui/static/README.md +1 -0
- agentui/svelte.config.js +11 -0
- agentui/tailwind.config.ts +53 -0
- agentui/tsconfig.json +3 -0
- agentui/vite.config.ts +10 -0
- ai_parrot-0.17.2.dist-info/METADATA +472 -0
- ai_parrot-0.17.2.dist-info/RECORD +535 -0
- ai_parrot-0.17.2.dist-info/WHEEL +6 -0
- ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
- ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
- ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
- crew-builder/.prettierrc +15 -0
- crew-builder/QUICKSTART.md +259 -0
- crew-builder/README.md +113 -0
- crew-builder/env.example +17 -0
- crew-builder/jsconfig.json +14 -0
- crew-builder/package-lock.json +4182 -0
- crew-builder/package.json +37 -0
- crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
- crew-builder/src/app.css +62 -0
- crew-builder/src/app.d.ts +13 -0
- crew-builder/src/app.html +12 -0
- crew-builder/src/components/LoadingSpinner.svelte +64 -0
- crew-builder/src/components/ThemeSwitcher.svelte +149 -0
- crew-builder/src/components/index.js +9 -0
- crew-builder/src/lib/api/bots.ts +60 -0
- crew-builder/src/lib/api/chat.ts +80 -0
- crew-builder/src/lib/api/client.ts +56 -0
- crew-builder/src/lib/api/crew/crew.ts +136 -0
- crew-builder/src/lib/api/index.ts +5 -0
- crew-builder/src/lib/api/o365/auth.ts +65 -0
- crew-builder/src/lib/auth/auth.ts +54 -0
- crew-builder/src/lib/components/AgentNode.svelte +43 -0
- crew-builder/src/lib/components/BotCard.svelte +33 -0
- crew-builder/src/lib/components/ChatBubble.svelte +67 -0
- crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
- crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
- crew-builder/src/lib/components/JsonViewer.svelte +24 -0
- crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
- crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
- crew-builder/src/lib/components/Toast.svelte +67 -0
- crew-builder/src/lib/components/Toolbar.svelte +157 -0
- crew-builder/src/lib/components/index.ts +10 -0
- crew-builder/src/lib/config.ts +8 -0
- crew-builder/src/lib/stores/auth.svelte.ts +228 -0
- crew-builder/src/lib/stores/crewStore.ts +369 -0
- crew-builder/src/lib/stores/theme.svelte.js +145 -0
- crew-builder/src/lib/stores/toast.svelte.ts +69 -0
- crew-builder/src/lib/utils/conversation.ts +39 -0
- crew-builder/src/lib/utils/markdown.ts +122 -0
- crew-builder/src/lib/utils/talkHistory.ts +47 -0
- crew-builder/src/routes/+layout.svelte +20 -0
- crew-builder/src/routes/+page.svelte +539 -0
- crew-builder/src/routes/agents/+page.svelte +247 -0
- crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
- crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
- crew-builder/src/routes/builder/+page.svelte +204 -0
- crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
- crew-builder/src/routes/crew/ask/+page.ts +1 -0
- crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
- crew-builder/src/routes/login/+page.svelte +197 -0
- crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
- crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
- crew-builder/static/README.md +1 -0
- crew-builder/svelte.config.js +11 -0
- crew-builder/tailwind.config.ts +53 -0
- crew-builder/tsconfig.json +3 -0
- crew-builder/vite.config.ts +10 -0
- mcp_servers/calculator_server.py +309 -0
- parrot/__init__.py +27 -0
- parrot/__pycache__/__init__.cpython-310.pyc +0 -0
- parrot/__pycache__/version.cpython-310.pyc +0 -0
- parrot/_version.py +34 -0
- parrot/a2a/__init__.py +48 -0
- parrot/a2a/client.py +658 -0
- parrot/a2a/discovery.py +89 -0
- parrot/a2a/mixin.py +257 -0
- parrot/a2a/models.py +376 -0
- parrot/a2a/server.py +770 -0
- parrot/agents/__init__.py +29 -0
- parrot/bots/__init__.py +12 -0
- parrot/bots/a2a_agent.py +19 -0
- parrot/bots/abstract.py +3139 -0
- parrot/bots/agent.py +1129 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/chatbot.py +669 -0
- parrot/bots/data.py +1618 -0
- parrot/bots/database/__init__.py +5 -0
- parrot/bots/database/abstract.py +3071 -0
- parrot/bots/database/cache.py +286 -0
- parrot/bots/database/models.py +468 -0
- parrot/bots/database/prompts.py +154 -0
- parrot/bots/database/retries.py +98 -0
- parrot/bots/database/router.py +269 -0
- parrot/bots/database/sql.py +41 -0
- parrot/bots/db/__init__.py +6 -0
- parrot/bots/db/abstract.py +556 -0
- parrot/bots/db/bigquery.py +602 -0
- parrot/bots/db/cache.py +85 -0
- parrot/bots/db/documentdb.py +668 -0
- parrot/bots/db/elastic.py +1014 -0
- parrot/bots/db/influx.py +898 -0
- parrot/bots/db/mock.py +96 -0
- parrot/bots/db/multi.py +783 -0
- parrot/bots/db/prompts.py +185 -0
- parrot/bots/db/sql.py +1255 -0
- parrot/bots/db/tools.py +212 -0
- parrot/bots/document.py +680 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/kb.py +170 -0
- parrot/bots/mcp.py +36 -0
- parrot/bots/orchestration/README.md +463 -0
- parrot/bots/orchestration/__init__.py +1 -0
- parrot/bots/orchestration/agent.py +155 -0
- parrot/bots/orchestration/crew.py +3330 -0
- parrot/bots/orchestration/fsm.py +1179 -0
- parrot/bots/orchestration/hr.py +434 -0
- parrot/bots/orchestration/storage/__init__.py +4 -0
- parrot/bots/orchestration/storage/memory.py +100 -0
- parrot/bots/orchestration/storage/mixin.py +119 -0
- parrot/bots/orchestration/verify.py +202 -0
- parrot/bots/product.py +204 -0
- parrot/bots/prompts/__init__.py +96 -0
- parrot/bots/prompts/agents.py +155 -0
- parrot/bots/prompts/data.py +216 -0
- parrot/bots/prompts/output_generation.py +8 -0
- parrot/bots/scraper/__init__.py +3 -0
- parrot/bots/scraper/models.py +122 -0
- parrot/bots/scraper/scraper.py +1173 -0
- parrot/bots/scraper/templates.py +115 -0
- parrot/bots/stores/__init__.py +5 -0
- parrot/bots/stores/local.py +172 -0
- parrot/bots/webdev.py +81 -0
- parrot/cli.py +17 -0
- parrot/clients/__init__.py +16 -0
- parrot/clients/base.py +1491 -0
- parrot/clients/claude.py +1191 -0
- parrot/clients/factory.py +129 -0
- parrot/clients/google.py +4567 -0
- parrot/clients/gpt.py +1975 -0
- parrot/clients/grok.py +432 -0
- parrot/clients/groq.py +986 -0
- parrot/clients/hf.py +582 -0
- parrot/clients/models.py +18 -0
- parrot/conf.py +395 -0
- parrot/embeddings/__init__.py +9 -0
- parrot/embeddings/base.py +157 -0
- parrot/embeddings/google.py +98 -0
- parrot/embeddings/huggingface.py +74 -0
- parrot/embeddings/openai.py +84 -0
- parrot/embeddings/processor.py +88 -0
- parrot/exceptions.c +13868 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/exceptions.pxd +22 -0
- parrot/exceptions.pxi +15 -0
- parrot/exceptions.pyx +44 -0
- parrot/generators/__init__.py +29 -0
- parrot/generators/base.py +200 -0
- parrot/generators/html.py +293 -0
- parrot/generators/react.py +205 -0
- parrot/generators/streamlit.py +203 -0
- parrot/generators/template.py +105 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agent.py +861 -0
- parrot/handlers/agents/__init__.py +1 -0
- parrot/handlers/agents/abstract.py +900 -0
- parrot/handlers/bots.py +338 -0
- parrot/handlers/chat.py +915 -0
- parrot/handlers/creation.sql +192 -0
- parrot/handlers/crew/ARCHITECTURE.md +362 -0
- parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
- parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
- parrot/handlers/crew/__init__.py +0 -0
- parrot/handlers/crew/handler.py +801 -0
- parrot/handlers/crew/models.py +229 -0
- parrot/handlers/crew/redis_persistence.py +523 -0
- parrot/handlers/jobs/__init__.py +10 -0
- parrot/handlers/jobs/job.py +384 -0
- parrot/handlers/jobs/mixin.py +627 -0
- parrot/handlers/jobs/models.py +115 -0
- parrot/handlers/jobs/worker.py +31 -0
- parrot/handlers/models.py +596 -0
- parrot/handlers/o365_auth.py +105 -0
- parrot/handlers/stream.py +337 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/aws.py +143 -0
- parrot/interfaces/credentials.py +113 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/google.py +1123 -0
- parrot/interfaces/hierarchy.py +1227 -0
- parrot/interfaces/http.py +651 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +24 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/analisys.py +148 -0
- parrot/interfaces/images/plugins/classify.py +150 -0
- parrot/interfaces/images/plugins/classifybase.py +182 -0
- parrot/interfaces/images/plugins/detect.py +150 -0
- parrot/interfaces/images/plugins/exif.py +1103 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/interfaces/o365.py +978 -0
- parrot/interfaces/onedrive.py +822 -0
- parrot/interfaces/sharepoint.py +1435 -0
- parrot/interfaces/soap.py +257 -0
- parrot/loaders/__init__.py +8 -0
- parrot/loaders/abstract.py +1131 -0
- parrot/loaders/audio.py +199 -0
- parrot/loaders/basepdf.py +53 -0
- parrot/loaders/basevideo.py +1568 -0
- parrot/loaders/csv.py +409 -0
- parrot/loaders/docx.py +116 -0
- parrot/loaders/epubloader.py +316 -0
- parrot/loaders/excel.py +199 -0
- parrot/loaders/factory.py +55 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/html.py +26 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/html.py +152 -0
- parrot/loaders/markdown.py +442 -0
- parrot/loaders/pdf.py +373 -0
- parrot/loaders/pdfmark.py +320 -0
- parrot/loaders/pdftables.py +506 -0
- parrot/loaders/ppt.py +476 -0
- parrot/loaders/qa.py +63 -0
- parrot/loaders/splitters/__init__.py +10 -0
- parrot/loaders/splitters/base.py +138 -0
- parrot/loaders/splitters/md.py +228 -0
- parrot/loaders/splitters/token.py +143 -0
- parrot/loaders/txt.py +26 -0
- parrot/loaders/video.py +89 -0
- parrot/loaders/videolocal.py +218 -0
- parrot/loaders/videounderstanding.py +377 -0
- parrot/loaders/vimeo.py +167 -0
- parrot/loaders/web.py +599 -0
- parrot/loaders/youtube.py +504 -0
- parrot/manager/__init__.py +5 -0
- parrot/manager/manager.py +1030 -0
- parrot/mcp/__init__.py +28 -0
- parrot/mcp/adapter.py +105 -0
- parrot/mcp/cli.py +174 -0
- parrot/mcp/client.py +119 -0
- parrot/mcp/config.py +75 -0
- parrot/mcp/integration.py +842 -0
- parrot/mcp/oauth.py +933 -0
- parrot/mcp/server.py +225 -0
- parrot/mcp/transports/__init__.py +3 -0
- parrot/mcp/transports/base.py +279 -0
- parrot/mcp/transports/grpc_session.py +163 -0
- parrot/mcp/transports/http.py +312 -0
- parrot/mcp/transports/mcp.proto +108 -0
- parrot/mcp/transports/quic.py +1082 -0
- parrot/mcp/transports/sse.py +330 -0
- parrot/mcp/transports/stdio.py +309 -0
- parrot/mcp/transports/unix.py +395 -0
- parrot/mcp/transports/websocket.py +547 -0
- parrot/memory/__init__.py +16 -0
- parrot/memory/abstract.py +209 -0
- parrot/memory/agent.py +32 -0
- parrot/memory/cache.py +175 -0
- parrot/memory/core.py +555 -0
- parrot/memory/file.py +153 -0
- parrot/memory/mem.py +131 -0
- parrot/memory/redis.py +613 -0
- parrot/models/__init__.py +46 -0
- parrot/models/basic.py +118 -0
- parrot/models/compliance.py +208 -0
- parrot/models/crew.py +395 -0
- parrot/models/detections.py +654 -0
- parrot/models/generation.py +85 -0
- parrot/models/google.py +223 -0
- parrot/models/groq.py +23 -0
- parrot/models/openai.py +30 -0
- parrot/models/outputs.py +285 -0
- parrot/models/responses.py +938 -0
- parrot/notifications/__init__.py +743 -0
- parrot/openapi/__init__.py +3 -0
- parrot/openapi/components.yaml +641 -0
- parrot/openapi/config.py +322 -0
- parrot/outputs/__init__.py +32 -0
- parrot/outputs/formats/__init__.py +108 -0
- parrot/outputs/formats/altair.py +359 -0
- parrot/outputs/formats/application.py +122 -0
- parrot/outputs/formats/base.py +351 -0
- parrot/outputs/formats/bokeh.py +356 -0
- parrot/outputs/formats/card.py +424 -0
- parrot/outputs/formats/chart.py +436 -0
- parrot/outputs/formats/d3.py +255 -0
- parrot/outputs/formats/echarts.py +310 -0
- parrot/outputs/formats/generators/__init__.py +0 -0
- parrot/outputs/formats/generators/abstract.py +61 -0
- parrot/outputs/formats/generators/panel.py +145 -0
- parrot/outputs/formats/generators/streamlit.py +86 -0
- parrot/outputs/formats/generators/terminal.py +63 -0
- parrot/outputs/formats/holoviews.py +310 -0
- parrot/outputs/formats/html.py +147 -0
- parrot/outputs/formats/jinja2.py +46 -0
- parrot/outputs/formats/json.py +87 -0
- parrot/outputs/formats/map.py +933 -0
- parrot/outputs/formats/markdown.py +172 -0
- parrot/outputs/formats/matplotlib.py +237 -0
- parrot/outputs/formats/mixins/__init__.py +0 -0
- parrot/outputs/formats/mixins/emaps.py +855 -0
- parrot/outputs/formats/plotly.py +341 -0
- parrot/outputs/formats/seaborn.py +310 -0
- parrot/outputs/formats/table.py +397 -0
- parrot/outputs/formats/template_report.py +138 -0
- parrot/outputs/formats/yaml.py +125 -0
- parrot/outputs/formatter.py +152 -0
- parrot/outputs/templates/__init__.py +95 -0
- parrot/pipelines/__init__.py +0 -0
- parrot/pipelines/abstract.py +210 -0
- parrot/pipelines/detector.py +124 -0
- parrot/pipelines/models.py +90 -0
- parrot/pipelines/planogram.py +3002 -0
- parrot/pipelines/table.sql +97 -0
- parrot/plugins/__init__.py +106 -0
- parrot/plugins/importer.py +80 -0
- parrot/py.typed +0 -0
- parrot/registry/__init__.py +18 -0
- parrot/registry/registry.py +594 -0
- parrot/scheduler/__init__.py +1189 -0
- parrot/scheduler/models.py +60 -0
- parrot/security/__init__.py +16 -0
- parrot/security/prompt_injection.py +268 -0
- parrot/security/security_events.sql +25 -0
- parrot/services/__init__.py +1 -0
- parrot/services/mcp/__init__.py +8 -0
- parrot/services/mcp/config.py +13 -0
- parrot/services/mcp/server.py +295 -0
- parrot/services/o365_remote_auth.py +235 -0
- parrot/stores/__init__.py +7 -0
- parrot/stores/abstract.py +352 -0
- parrot/stores/arango.py +1090 -0
- parrot/stores/bigquery.py +1377 -0
- parrot/stores/cache.py +106 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss_store.py +1157 -0
- parrot/stores/kb/__init__.py +9 -0
- parrot/stores/kb/abstract.py +68 -0
- parrot/stores/kb/cache.py +165 -0
- parrot/stores/kb/doc.py +325 -0
- parrot/stores/kb/hierarchy.py +346 -0
- parrot/stores/kb/local.py +457 -0
- parrot/stores/kb/prompt.py +28 -0
- parrot/stores/kb/redis.py +659 -0
- parrot/stores/kb/store.py +115 -0
- parrot/stores/kb/user.py +374 -0
- parrot/stores/models.py +59 -0
- parrot/stores/pgvector.py +3 -0
- parrot/stores/postgres.py +2853 -0
- parrot/stores/utils/__init__.py +0 -0
- parrot/stores/utils/chunking.py +197 -0
- parrot/telemetry/__init__.py +3 -0
- parrot/telemetry/mixin.py +111 -0
- parrot/template/__init__.py +3 -0
- parrot/template/engine.py +259 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +644 -0
- parrot/tools/agent.py +363 -0
- parrot/tools/arangodbsearch.py +537 -0
- parrot/tools/arxiv_tool.py +188 -0
- parrot/tools/calculator/__init__.py +3 -0
- parrot/tools/calculator/operations/__init__.py +38 -0
- parrot/tools/calculator/operations/calculus.py +80 -0
- parrot/tools/calculator/operations/statistics.py +76 -0
- parrot/tools/calculator/tool.py +150 -0
- parrot/tools/cloudwatch.py +988 -0
- parrot/tools/codeinterpreter/__init__.py +127 -0
- parrot/tools/codeinterpreter/executor.py +371 -0
- parrot/tools/codeinterpreter/internals.py +473 -0
- parrot/tools/codeinterpreter/models.py +643 -0
- parrot/tools/codeinterpreter/prompts.py +224 -0
- parrot/tools/codeinterpreter/tool.py +664 -0
- parrot/tools/company_info/__init__.py +6 -0
- parrot/tools/company_info/tool.py +1138 -0
- parrot/tools/correlationanalysis.py +437 -0
- parrot/tools/database/abstract.py +286 -0
- parrot/tools/database/bq.py +115 -0
- parrot/tools/database/cache.py +284 -0
- parrot/tools/database/models.py +95 -0
- parrot/tools/database/pg.py +343 -0
- parrot/tools/databasequery.py +1159 -0
- parrot/tools/db.py +1800 -0
- parrot/tools/ddgo.py +370 -0
- parrot/tools/decorators.py +271 -0
- parrot/tools/dftohtml.py +282 -0
- parrot/tools/document.py +549 -0
- parrot/tools/ecs.py +819 -0
- parrot/tools/edareport.py +368 -0
- parrot/tools/elasticsearch.py +1049 -0
- parrot/tools/employees.py +462 -0
- parrot/tools/epson/__init__.py +96 -0
- parrot/tools/excel.py +683 -0
- parrot/tools/file/__init__.py +13 -0
- parrot/tools/file/abstract.py +76 -0
- parrot/tools/file/gcs.py +378 -0
- parrot/tools/file/local.py +284 -0
- parrot/tools/file/s3.py +511 -0
- parrot/tools/file/tmp.py +309 -0
- parrot/tools/file/tool.py +501 -0
- parrot/tools/file_reader.py +129 -0
- parrot/tools/flowtask/__init__.py +19 -0
- parrot/tools/flowtask/tool.py +761 -0
- parrot/tools/gittoolkit.py +508 -0
- parrot/tools/google/__init__.py +18 -0
- parrot/tools/google/base.py +169 -0
- parrot/tools/google/tools.py +1251 -0
- parrot/tools/googlelocation.py +5 -0
- parrot/tools/googleroutes.py +5 -0
- parrot/tools/googlesearch.py +5 -0
- parrot/tools/googlesitesearch.py +5 -0
- parrot/tools/googlevoice.py +2 -0
- parrot/tools/gvoice.py +695 -0
- parrot/tools/ibisworld/README.md +225 -0
- parrot/tools/ibisworld/__init__.py +11 -0
- parrot/tools/ibisworld/tool.py +366 -0
- parrot/tools/jiratoolkit.py +1718 -0
- parrot/tools/manager.py +1098 -0
- parrot/tools/math.py +152 -0
- parrot/tools/metadata.py +476 -0
- parrot/tools/msteams.py +1621 -0
- parrot/tools/msword.py +635 -0
- parrot/tools/multidb.py +580 -0
- parrot/tools/multistoresearch.py +369 -0
- parrot/tools/networkninja.py +167 -0
- parrot/tools/nextstop/__init__.py +4 -0
- parrot/tools/nextstop/base.py +286 -0
- parrot/tools/nextstop/employee.py +733 -0
- parrot/tools/nextstop/store.py +462 -0
- parrot/tools/notification.py +435 -0
- parrot/tools/o365/__init__.py +42 -0
- parrot/tools/o365/base.py +295 -0
- parrot/tools/o365/bundle.py +522 -0
- parrot/tools/o365/events.py +554 -0
- parrot/tools/o365/mail.py +992 -0
- parrot/tools/o365/onedrive.py +497 -0
- parrot/tools/o365/sharepoint.py +641 -0
- parrot/tools/openapi_toolkit.py +904 -0
- parrot/tools/openweather.py +527 -0
- parrot/tools/pdfprint.py +1001 -0
- parrot/tools/powerbi.py +518 -0
- parrot/tools/powerpoint.py +1113 -0
- parrot/tools/pricestool.py +146 -0
- parrot/tools/products/__init__.py +246 -0
- parrot/tools/prophet_tool.py +171 -0
- parrot/tools/pythonpandas.py +630 -0
- parrot/tools/pythonrepl.py +910 -0
- parrot/tools/qsource.py +436 -0
- parrot/tools/querytoolkit.py +395 -0
- parrot/tools/quickeda.py +827 -0
- parrot/tools/resttool.py +553 -0
- parrot/tools/retail/__init__.py +0 -0
- parrot/tools/retail/bby.py +528 -0
- parrot/tools/sandboxtool.py +703 -0
- parrot/tools/sassie/__init__.py +352 -0
- parrot/tools/scraping/__init__.py +7 -0
- parrot/tools/scraping/docs/select.md +466 -0
- parrot/tools/scraping/documentation.md +1278 -0
- parrot/tools/scraping/driver.py +436 -0
- parrot/tools/scraping/models.py +576 -0
- parrot/tools/scraping/options.py +85 -0
- parrot/tools/scraping/orchestrator.py +517 -0
- parrot/tools/scraping/readme.md +740 -0
- parrot/tools/scraping/tool.py +3115 -0
- parrot/tools/seasonaldetection.py +642 -0
- parrot/tools/shell_tool/__init__.py +5 -0
- parrot/tools/shell_tool/actions.py +408 -0
- parrot/tools/shell_tool/engine.py +155 -0
- parrot/tools/shell_tool/models.py +322 -0
- parrot/tools/shell_tool/tool.py +442 -0
- parrot/tools/site_search.py +214 -0
- parrot/tools/textfile.py +418 -0
- parrot/tools/think.py +378 -0
- parrot/tools/toolkit.py +298 -0
- parrot/tools/webapp_tool.py +187 -0
- parrot/tools/whatif.py +1279 -0
- parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
- parrot/tools/workday/__init__.py +6 -0
- parrot/tools/workday/models.py +1389 -0
- parrot/tools/workday/tool.py +1293 -0
- parrot/tools/yfinance_tool.py +306 -0
- parrot/tools/zipcode.py +217 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/helpers.py +73 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.c +12078 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/parsers/toml.pyx +21 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpp +20936 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/types.pyx +213 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- parrot/yaml-rs/Cargo.lock +350 -0
- parrot/yaml-rs/Cargo.toml +19 -0
- parrot/yaml-rs/pyproject.toml +19 -0
- parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
- parrot/yaml-rs/src/lib.rs +222 -0
- requirements/docker-compose.yml +24 -0
- requirements/requirements-dev.txt +21 -0
|
@@ -0,0 +1,1159 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Database Query Tool migrated to use AbstractTool framework.
|
|
3
|
+
"""
|
|
4
|
+
import re
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import asyncio
|
|
8
|
+
from typing import Dict, Optional, Any, Tuple, Union, Literal, List
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
import pandas as pd
|
|
13
|
+
from pydantic import BaseModel, Field, field_validator
|
|
14
|
+
from asyncdb import AsyncDB
|
|
15
|
+
from navconfig import config, BASE_DIR
|
|
16
|
+
# from querysource.conf import default_dsn, INFLUX_TOKEN
|
|
17
|
+
from .abstract import AbstractTool
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class QueryLanguage(str, Enum):
|
|
21
|
+
"""Supported query languages."""
|
|
22
|
+
SQL = "sql"
|
|
23
|
+
FLUX = "flux" # InfluxDB
|
|
24
|
+
MQL = "mql" # MongoDB Query Language
|
|
25
|
+
CYPHER = "cypher" # Neo4j
|
|
26
|
+
JSON = "json" # Elasticsearch/OpenSearch JSON DSL
|
|
27
|
+
AQL = "aql" # ArangoDB Query Language
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class DriverInfo:
|
|
31
|
+
"""Information about database drivers and their characteristics."""
|
|
32
|
+
|
|
33
|
+
DRIVER_MAP = {
|
|
34
|
+
# SQL-based databases
|
|
35
|
+
'pg': {
|
|
36
|
+
'name': 'PostgreSQL',
|
|
37
|
+
'query_language': QueryLanguage.SQL,
|
|
38
|
+
'description': 'PostgreSQL database',
|
|
39
|
+
'aliases': ['postgres', 'postgresql'],
|
|
40
|
+
'asyncdb_driver': 'pg'
|
|
41
|
+
},
|
|
42
|
+
'mysql': {
|
|
43
|
+
'name': 'MySQL',
|
|
44
|
+
'query_language': QueryLanguage.SQL,
|
|
45
|
+
'description': 'MySQL/MariaDB database',
|
|
46
|
+
'aliases': ['mariadb'],
|
|
47
|
+
'asyncdb_driver': 'mysql'
|
|
48
|
+
},
|
|
49
|
+
'bigquery': {
|
|
50
|
+
'name': 'Google BigQuery',
|
|
51
|
+
'query_language': QueryLanguage.SQL,
|
|
52
|
+
'description': 'Google BigQuery data warehouse',
|
|
53
|
+
'aliases': ['bq'],
|
|
54
|
+
'asyncdb_driver': 'bigquery'
|
|
55
|
+
},
|
|
56
|
+
'sqlite': {
|
|
57
|
+
'name': 'SQLite',
|
|
58
|
+
'query_language': QueryLanguage.SQL,
|
|
59
|
+
'description': 'SQLite embedded database',
|
|
60
|
+
'aliases': [],
|
|
61
|
+
'asyncdb_driver': 'sqlite'
|
|
62
|
+
},
|
|
63
|
+
'oracle': {
|
|
64
|
+
'name': 'Oracle Database',
|
|
65
|
+
'query_language': QueryLanguage.SQL,
|
|
66
|
+
'description': 'Oracle Database',
|
|
67
|
+
'aliases': [],
|
|
68
|
+
'asyncdb_driver': 'oracle'
|
|
69
|
+
},
|
|
70
|
+
'mssql': {
|
|
71
|
+
'name': 'Microsoft SQL Server',
|
|
72
|
+
'query_language': QueryLanguage.SQL,
|
|
73
|
+
'description': 'Microsoft SQL Server database',
|
|
74
|
+
'aliases': ['sqlserver'],
|
|
75
|
+
'asyncdb_driver': 'mssql'
|
|
76
|
+
},
|
|
77
|
+
'clickhouse': {
|
|
78
|
+
'name': 'ClickHouse',
|
|
79
|
+
'query_language': QueryLanguage.SQL,
|
|
80
|
+
'description': 'ClickHouse OLAP database',
|
|
81
|
+
'aliases': [],
|
|
82
|
+
'asyncdb_driver': 'clickhouse'
|
|
83
|
+
},
|
|
84
|
+
'duckdb': {
|
|
85
|
+
'name': 'DuckDB',
|
|
86
|
+
'query_language': QueryLanguage.SQL,
|
|
87
|
+
'description': 'DuckDB embedded analytical database',
|
|
88
|
+
'aliases': [],
|
|
89
|
+
'asyncdb_driver': 'duckdb'
|
|
90
|
+
},
|
|
91
|
+
# Non-SQL databases
|
|
92
|
+
'influx': {
|
|
93
|
+
'name': 'InfluxDB',
|
|
94
|
+
'query_language': QueryLanguage.FLUX,
|
|
95
|
+
'description': 'InfluxDB time-series database (uses Flux query language)',
|
|
96
|
+
'aliases': ['influxdb'],
|
|
97
|
+
'asyncdb_driver': 'influx'
|
|
98
|
+
},
|
|
99
|
+
# MongoDB and compatible databases (both use 'mongo' driver in asyncdb)
|
|
100
|
+
'mongo': {
|
|
101
|
+
'name': 'MongoDB',
|
|
102
|
+
'query_language': QueryLanguage.MQL,
|
|
103
|
+
'description': 'MongoDB document-oriented database',
|
|
104
|
+
'aliases': ['mongo'],
|
|
105
|
+
'asyncdb_driver': 'mongo',
|
|
106
|
+
'dbtype': 'mongodb'
|
|
107
|
+
},
|
|
108
|
+
'atlas': {
|
|
109
|
+
'name': 'MongoDB Atlas',
|
|
110
|
+
'query_language': QueryLanguage.MQL,
|
|
111
|
+
'description': 'MongoDB Atlas cloud database',
|
|
112
|
+
'aliases': [],
|
|
113
|
+
'asyncdb_driver': 'mongo',
|
|
114
|
+
'dbtype': 'atlas'
|
|
115
|
+
},
|
|
116
|
+
'documentdb': {
|
|
117
|
+
'name': 'DocumentDB',
|
|
118
|
+
'query_language': QueryLanguage.MQL,
|
|
119
|
+
'description': 'AWS DocumentDB (MongoDB-compatible) document database',
|
|
120
|
+
'aliases': [],
|
|
121
|
+
'asyncdb_driver': 'mongo', # Uses mongo driver with dbtype parameter
|
|
122
|
+
'dbtype': 'documentdb'
|
|
123
|
+
},
|
|
124
|
+
# Elasticsearch:
|
|
125
|
+
'elastic': {
|
|
126
|
+
'name': 'Elasticsearch/OpenSearch',
|
|
127
|
+
'query_language': QueryLanguage.JSON,
|
|
128
|
+
'supports_limit': True
|
|
129
|
+
},
|
|
130
|
+
'elasticsearch': {
|
|
131
|
+
'name': 'Elasticsearch',
|
|
132
|
+
'query_language': QueryLanguage.JSON,
|
|
133
|
+
'supports_limit': True
|
|
134
|
+
},
|
|
135
|
+
'opensearch': {
|
|
136
|
+
'name': 'OpenSearch',
|
|
137
|
+
'query_language': QueryLanguage.JSON,
|
|
138
|
+
'supports_limit': True
|
|
139
|
+
},
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
@classmethod
|
|
143
|
+
def normalize_driver(cls, driver: str) -> str:
|
|
144
|
+
"""Normalize driver name from aliases."""
|
|
145
|
+
driver_lower = driver.lower()
|
|
146
|
+
|
|
147
|
+
# Check if it's already a canonical name
|
|
148
|
+
if driver_lower in cls.DRIVER_MAP:
|
|
149
|
+
return driver_lower
|
|
150
|
+
|
|
151
|
+
# Check aliases
|
|
152
|
+
for canonical_name, info in cls.DRIVER_MAP.items():
|
|
153
|
+
if driver_lower in info.get('aliases', []):
|
|
154
|
+
return canonical_name
|
|
155
|
+
|
|
156
|
+
return driver_lower
|
|
157
|
+
|
|
158
|
+
@classmethod
|
|
159
|
+
def get_asyncdb_driver(cls, driver: str) -> str:
|
|
160
|
+
"""Get the actual asyncdb driver name."""
|
|
161
|
+
driver = cls.normalize_driver(driver)
|
|
162
|
+
driver_info = cls.DRIVER_MAP.get(driver, {})
|
|
163
|
+
return driver_info.get('asyncdb_driver', driver)
|
|
164
|
+
|
|
165
|
+
@classmethod
|
|
166
|
+
def get_dbtype(cls, driver: str) -> Optional[str]:
|
|
167
|
+
"""Get the dbtype parameter for drivers that need it (mongo-based)."""
|
|
168
|
+
driver = cls.normalize_driver(driver)
|
|
169
|
+
driver_info = cls.DRIVER_MAP.get(driver, {})
|
|
170
|
+
return driver_info.get('dbtype')
|
|
171
|
+
|
|
172
|
+
@classmethod
|
|
173
|
+
def get_query_language(cls, driver: str) -> QueryLanguage:
|
|
174
|
+
"""Get the query language for a driver."""
|
|
175
|
+
driver = cls.normalize_driver(driver)
|
|
176
|
+
driver_info = cls.DRIVER_MAP.get(driver, {})
|
|
177
|
+
return driver_info.get('query_language', QueryLanguage.SQL)
|
|
178
|
+
|
|
179
|
+
@classmethod
|
|
180
|
+
def get_driver_info(cls, driver: str) -> Dict[str, Any]:
|
|
181
|
+
"""Get full information about a driver."""
|
|
182
|
+
driver = cls.normalize_driver(driver)
|
|
183
|
+
return cls.DRIVER_MAP.get(driver, {
|
|
184
|
+
'name': driver,
|
|
185
|
+
'query_language': QueryLanguage.SQL,
|
|
186
|
+
'description': f'{driver} database',
|
|
187
|
+
'aliases': [],
|
|
188
|
+
'asyncdb_driver': driver
|
|
189
|
+
})
|
|
190
|
+
|
|
191
|
+
@classmethod
|
|
192
|
+
def list_drivers(cls) -> List[Dict[str, Any]]:
|
|
193
|
+
"""List all supported drivers with their info."""
|
|
194
|
+
return [
|
|
195
|
+
{
|
|
196
|
+
'driver': driver,
|
|
197
|
+
**info
|
|
198
|
+
}
|
|
199
|
+
for driver, info in cls.DRIVER_MAP.items()
|
|
200
|
+
]
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class DatabaseQueryArgs(BaseModel):
|
|
204
|
+
"""Arguments schema for DatabaseQueryTool."""
|
|
205
|
+
|
|
206
|
+
driver: str = Field(
|
|
207
|
+
...,
|
|
208
|
+
description=(
|
|
209
|
+
"Database driver to use. Supported drivers:\n"
|
|
210
|
+
"SQL-based: 'pg' (PostgreSQL), 'mysql', 'bigquery', 'sqlite', 'oracle', "
|
|
211
|
+
"'mssql' (Microsoft SQL Server), 'clickhouse', 'duckdb'\n"
|
|
212
|
+
"Time-series: 'influx' (InfluxDB - uses Flux query language)\n"
|
|
213
|
+
"Document-based: 'mongo' (MongoDB), 'atlas' (MongoDB Atlas), 'documentdb' (AWS DocumentDB)\n"
|
|
214
|
+
"Note: Query syntax must match the driver's query language."
|
|
215
|
+
)
|
|
216
|
+
)
|
|
217
|
+
query: Union[str, Dict[str, Any]] = Field(
|
|
218
|
+
...,
|
|
219
|
+
description=(
|
|
220
|
+
"Query to execute for data retrieval. Query syntax depends on the driver:\n\n"
|
|
221
|
+
"SQL drivers (pg, mysql, bigquery, etc.):\n"
|
|
222
|
+
" Use SQL SELECT statements, e.g.: SELECT * FROM users WHERE age > 25\n\n"
|
|
223
|
+
"InfluxDB (influx):\n"
|
|
224
|
+
" Use Flux query language, e.g.: from(bucket:\"my-bucket\") |> range(start: -1h)\n\n"
|
|
225
|
+
"MongoDB/DocumentDB (mongo, atlas, documentdb):\n"
|
|
226
|
+
" Provide the MongoDB query filter as JSON.\n"
|
|
227
|
+
" The collection_name must be specified in the 'credentials' parameter, OR in the query.\n"
|
|
228
|
+
" Examples:\n"
|
|
229
|
+
" - Filter only: {\"status\": \"active\"}\n"
|
|
230
|
+
" - Command style: { \"find\": \"users\", \"filter\": {\"status\": \"active\"}, \"limit\": 10, \"sort\": {\"created_at\": -1} }\n\n"
|
|
231
|
+
"Only data retrieval queries are allowed - no DDL or DML operations."
|
|
232
|
+
)
|
|
233
|
+
)
|
|
234
|
+
credentials: Optional[Dict[str, Any]] = Field(
|
|
235
|
+
default=None,
|
|
236
|
+
description=(
|
|
237
|
+
"Dictionary containing database connection credentials (optional if defaults available).\n\n"
|
|
238
|
+
"For SQL databases:\n"
|
|
239
|
+
" {'host': 'localhost', 'port': 5432, 'database': 'mydb', 'user': 'admin', 'password': 'secret'}\n\n"
|
|
240
|
+
"For MongoDB/DocumentDB (mongo, atlas, documentdb):\n"
|
|
241
|
+
" REQUIRED: 'collection_name' - The collection to query\n"
|
|
242
|
+
" Example: {\n"
|
|
243
|
+
" 'host': 'cluster.docdb.amazonaws.com',\n"
|
|
244
|
+
" 'port': 27017,\n"
|
|
245
|
+
" 'database': 'mydb',\n"
|
|
246
|
+
" 'collection_name': 'users', # REQUIRED for mongo-based drivers\n"
|
|
247
|
+
" 'username': 'admin',\n"
|
|
248
|
+
" 'password': 'secret',\n"
|
|
249
|
+
" 'ssl': True, # For DocumentDB\n"
|
|
250
|
+
" 'tlsCAFile': '/path/to/cert.pem' # For DocumentDB\n"
|
|
251
|
+
" }"
|
|
252
|
+
)
|
|
253
|
+
)
|
|
254
|
+
dsn: Optional[str] = Field(
|
|
255
|
+
default=None,
|
|
256
|
+
description="Optional DSN string for database connection (overrides credentials if provided)"
|
|
257
|
+
)
|
|
258
|
+
output_format: Literal["pandas", "json", 'native', 'arrow'] = Field(
|
|
259
|
+
"pandas",
|
|
260
|
+
description="Output format for query results: 'pandas' for DataFrame, 'json' for JSON string, 'native' for native format, 'arrow' for Apache Arrow format"
|
|
261
|
+
)
|
|
262
|
+
query_timeout: int = Field(
|
|
263
|
+
300,
|
|
264
|
+
description="Query timeout in seconds (default: 300)"
|
|
265
|
+
)
|
|
266
|
+
max_rows: int = Field(
|
|
267
|
+
10000,
|
|
268
|
+
description="Maximum number of rows to return (default: 10000)"
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
@field_validator('query_timeout')
|
|
272
|
+
@classmethod
|
|
273
|
+
def validate_timeout(cls, v):
|
|
274
|
+
if v <= 0:
|
|
275
|
+
raise ValueError("Query timeout must be positive")
|
|
276
|
+
return v
|
|
277
|
+
|
|
278
|
+
@field_validator('max_rows')
|
|
279
|
+
@classmethod
|
|
280
|
+
def validate_max_rows(cls, v):
|
|
281
|
+
if v <= 0:
|
|
282
|
+
raise ValueError("Max rows must be positive")
|
|
283
|
+
return v
|
|
284
|
+
|
|
285
|
+
@field_validator('driver')
|
|
286
|
+
@classmethod
|
|
287
|
+
def validate_driver(cls, v):
|
|
288
|
+
# Normalize and validate driver
|
|
289
|
+
normalized = DriverInfo.normalize_driver(v)
|
|
290
|
+
if normalized not in DriverInfo.DRIVER_MAP:
|
|
291
|
+
supported = list(DriverInfo.DRIVER_MAP.keys())
|
|
292
|
+
raise ValueError(f"Database driver must be one of: {supported}")
|
|
293
|
+
return normalized
|
|
294
|
+
|
|
295
|
+
@field_validator('credentials', mode='before')
|
|
296
|
+
@classmethod
|
|
297
|
+
def validate_credentials(cls, v):
|
|
298
|
+
"""Ensure credentials is either None, a dict, or a DSN string."""
|
|
299
|
+
if isinstance(v, str):
|
|
300
|
+
v = { "dsn": v }
|
|
301
|
+
return v
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
class QueryValidator:
|
|
305
|
+
"""Validates queries based on query language."""
|
|
306
|
+
|
|
307
|
+
@staticmethod
|
|
308
|
+
def validate_sql_query(query: str) -> Dict[str, Any]:
|
|
309
|
+
"""Validate SQL query for safety."""
|
|
310
|
+
query_upper = query.upper().strip()
|
|
311
|
+
|
|
312
|
+
# Remove comments and extra whitespace
|
|
313
|
+
query_cleaned = re.sub(r'--.*?\n', '', query_upper)
|
|
314
|
+
query_cleaned = re.sub(r'/\*.*?\*/', '', query_cleaned, flags=re.DOTALL)
|
|
315
|
+
query_cleaned = ' '.join(query_cleaned.split())
|
|
316
|
+
|
|
317
|
+
# Dangerous operations to block
|
|
318
|
+
dangerous_operations = [
|
|
319
|
+
'CREATE', 'ALTER', 'DROP', 'TRUNCATE',
|
|
320
|
+
'INSERT', 'UPDATE', 'DELETE', 'MERGE',
|
|
321
|
+
'GRANT', 'REVOKE', 'EXEC', 'EXECUTE',
|
|
322
|
+
'CALL', 'DECLARE', 'SET @'
|
|
323
|
+
]
|
|
324
|
+
|
|
325
|
+
# Check for dangerous operations
|
|
326
|
+
for operation in dangerous_operations:
|
|
327
|
+
if re.search(rf'\b{operation}\b', query_cleaned):
|
|
328
|
+
return {
|
|
329
|
+
'is_safe': False,
|
|
330
|
+
'message': f"SQL query contains dangerous operation: {operation}",
|
|
331
|
+
'suggestions': [
|
|
332
|
+
"Use SELECT statements for data retrieval",
|
|
333
|
+
"Use aggregate functions (COUNT, SUM, AVG) for analysis",
|
|
334
|
+
"Use WHERE clauses to filter data"
|
|
335
|
+
]
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
# Check if query starts with SELECT or other safe operations
|
|
339
|
+
safe_starts = ['SELECT', 'WITH', 'SHOW', 'DESCRIBE', 'DESC', 'EXPLAIN']
|
|
340
|
+
if not any(query_cleaned.startswith(safe_op) for safe_op in safe_starts):
|
|
341
|
+
print(f"DEBUG: Query validation failed. Cleaned query: '{query_cleaned[:100]}...'")
|
|
342
|
+
return {
|
|
343
|
+
'is_safe': False,
|
|
344
|
+
'message': "SQL query should start with SELECT, WITH, SHOW, DESCRIBE, or EXPLAIN",
|
|
345
|
+
'suggestions': [
|
|
346
|
+
"Start queries with SELECT for data retrieval",
|
|
347
|
+
"Use WITH clauses for complex queries with CTEs",
|
|
348
|
+
"Use EXPLAIN for query analysis"
|
|
349
|
+
]
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
return {'is_safe': True, 'message': 'SQL query validation passed'}
|
|
353
|
+
|
|
354
|
+
@staticmethod
|
|
355
|
+
def validate_flux_query(query: str) -> Dict[str, Any]:
|
|
356
|
+
"""Validate InfluxDB Flux query for safety."""
|
|
357
|
+
query_lower = query.lower().strip()
|
|
358
|
+
|
|
359
|
+
# Flux queries typically start with from() or import
|
|
360
|
+
if not (query_lower.startswith('from(') or query_lower.startswith('import')):
|
|
361
|
+
return {
|
|
362
|
+
'is_safe': False,
|
|
363
|
+
'message': "Flux query should typically start with from() or import",
|
|
364
|
+
'suggestions': [
|
|
365
|
+
"Use from(bucket: \"...\") to query data",
|
|
366
|
+
"Chain with |> range() to specify time range",
|
|
367
|
+
"Use |> filter() to filter data"
|
|
368
|
+
]
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
# Check for potentially dangerous Flux operations
|
|
372
|
+
# Flux write operations
|
|
373
|
+
dangerous_patterns = [
|
|
374
|
+
r'\bto\s*\(', # to() function writes data
|
|
375
|
+
r'\bdelete\s*\(', # delete() function
|
|
376
|
+
]
|
|
377
|
+
|
|
378
|
+
for pattern in dangerous_patterns:
|
|
379
|
+
if re.search(pattern, query_lower):
|
|
380
|
+
return {
|
|
381
|
+
'is_safe': False,
|
|
382
|
+
'message': "Flux query contains write/delete operation",
|
|
383
|
+
'suggestions': [
|
|
384
|
+
"Use queries for data retrieval only",
|
|
385
|
+
"Use from() |> range() |> filter() for reading data"
|
|
386
|
+
]
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
return {'is_safe': True, 'message': 'Flux query validation passed'}
|
|
390
|
+
|
|
391
|
+
@classmethod
|
|
392
|
+
def validate_query(cls, query: str, query_language: QueryLanguage) -> Dict[str, Any]:
|
|
393
|
+
"""Validate query based on its language."""
|
|
394
|
+
if query_language == QueryLanguage.SQL:
|
|
395
|
+
return cls.validate_sql_query(query)
|
|
396
|
+
elif query_language == QueryLanguage.FLUX:
|
|
397
|
+
return cls.validate_flux_query(query)
|
|
398
|
+
elif query_language == QueryLanguage.JSON:
|
|
399
|
+
return cls.validate_elasticsearch_query(query)
|
|
400
|
+
else:
|
|
401
|
+
# For unknown query languages, do minimal validation
|
|
402
|
+
return {
|
|
403
|
+
'is_safe': True,
|
|
404
|
+
'message': f'Basic validation passed for {query_language.value}'
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
@staticmethod
|
|
408
|
+
def validate_elasticsearch_query(query: str) -> Dict[str, Any]:
|
|
409
|
+
"""Validate Elasticsearch query (JSON DSL format)."""
|
|
410
|
+
try:
|
|
411
|
+
# Parse the query to ensure it's valid JSON
|
|
412
|
+
query_dict = json.loads(query) if isinstance(query, str) else query
|
|
413
|
+
|
|
414
|
+
# Basic validation
|
|
415
|
+
if not isinstance(query_dict, dict):
|
|
416
|
+
return {
|
|
417
|
+
'is_safe': False,
|
|
418
|
+
'message': 'Query must be a valid JSON object',
|
|
419
|
+
'suggestions': ['Ensure query is a valid JSON object']
|
|
420
|
+
}
|
|
421
|
+
# Check for unsafe operations (if needed)
|
|
422
|
+
# For now, we allow all queries as Elasticsearch is primarily read-only
|
|
423
|
+
return {
|
|
424
|
+
'is_safe': True,
|
|
425
|
+
'message': 'Elasticsearch query validation passed'
|
|
426
|
+
}
|
|
427
|
+
except json.JSONDecodeError as e:
|
|
428
|
+
return {
|
|
429
|
+
'is_safe': False,
|
|
430
|
+
'message': f'Invalid JSON: {str(e)}',
|
|
431
|
+
'suggestions': ['Fix JSON syntax errors']
|
|
432
|
+
}
|
|
433
|
+
except Exception as e:
|
|
434
|
+
return {
|
|
435
|
+
'is_safe': False,
|
|
436
|
+
'message': f'Query validation failed: {str(e)}',
|
|
437
|
+
'suggestions': []
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
class DatabaseQueryTool(AbstractTool):
|
|
441
|
+
"""
|
|
442
|
+
Multi-language Database Query Tool for executing queries across multiple database systems.
|
|
443
|
+
|
|
444
|
+
This tool can execute SELECT queries on various databases including BigQuery, PostgreSQL,
|
|
445
|
+
MySQL, InfluxDB, SQLite, Oracle, and others supported by asyncdb library.
|
|
446
|
+
|
|
447
|
+
Supports multiple query languages:
|
|
448
|
+
- SQL: PostgreSQL (pg), MySQL, BigQuery, SQLite, Oracle, MS SQL Server (mssql),
|
|
449
|
+
ClickHouse, DuckDB
|
|
450
|
+
- Flux: InfluxDB (influx) - time-series database with Flux query language
|
|
451
|
+
- DocumentDB: DocumentDB (documentdb) - document-oriented database
|
|
452
|
+
|
|
453
|
+
DRIVER REFERENCE:
|
|
454
|
+
- 'pg' or 'postgres' or 'postgresql' → PostgreSQL
|
|
455
|
+
- 'mysql' or 'mariadb' → MySQL/MariaDB
|
|
456
|
+
- 'bigquery' or 'bq' → Google BigQuery
|
|
457
|
+
- 'mssql' or 'sqlserver' → Microsoft SQL Server
|
|
458
|
+
- 'influx' or 'influxdb' → InfluxDB (uses Flux, not SQL)
|
|
459
|
+
- 'sqlite' → SQLite
|
|
460
|
+
- 'oracle' → Oracle Database
|
|
461
|
+
- 'clickhouse' → ClickHouse
|
|
462
|
+
- 'duckdb' → DuckDB
|
|
463
|
+
- 'documentdb' → DocumentDB (MongoDB-compatible)
|
|
464
|
+
- 'elastic' → Elasticsearch (Elasticsearch/OpenSearch)
|
|
465
|
+
|
|
466
|
+
QUERY LANGUAGE EXAMPLES:
|
|
467
|
+
|
|
468
|
+
SQL (pg, mysql, bigquery, etc.):
|
|
469
|
+
SELECT column1, column2 FROM table WHERE condition
|
|
470
|
+
|
|
471
|
+
Flux (influx):
|
|
472
|
+
from(bucket: "my-bucket")
|
|
473
|
+
|> range(start: -12h)
|
|
474
|
+
|> filter(fn: (r) => r["_measurement"] == "temperature")
|
|
475
|
+
|> filter(fn: (r) => r["location"] == "room1")
|
|
476
|
+
|
|
477
|
+
DocumentDB:
|
|
478
|
+
{ find: "collection", filter: { field: "value" } }
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
IMPORTANT: This tool is designed for data retrieval and analysis queries (SELECT statements).
|
|
482
|
+
It should NOT be used for:
|
|
483
|
+
- DDL operations (CREATE, ALTER, DROP tables/schemas)
|
|
484
|
+
- DML operations (INSERT, UPDATE, DELETE data)
|
|
485
|
+
- Administrative operations (GRANT, REVOKE permissions)
|
|
486
|
+
- Database structure modifications
|
|
487
|
+
|
|
488
|
+
Use this tool for:
|
|
489
|
+
- Data exploration and analysis
|
|
490
|
+
- Generating reports from existing data
|
|
491
|
+
- Aggregating and summarizing information
|
|
492
|
+
- Filtering and searching database records
|
|
493
|
+
- Joining data from multiple tables for analysis
|
|
494
|
+
"""
|
|
495
|
+
|
|
496
|
+
name = "database_query"
|
|
497
|
+
description = (
|
|
498
|
+
"Execute queries on various databases for data retrieval. "
|
|
499
|
+
"Supports SQL (PostgreSQL, MySQL, BigQuery, etc.), InfluxDB (Flux), "
|
|
500
|
+
"and MongoDB/DocumentDB (MQL). For MongoDB/DocumentDB: provide collection_name "
|
|
501
|
+
"in credentials and only the query filter in the query parameter. "
|
|
502
|
+
"Returns pandas DataFrame or JSON. Read-only operations only."
|
|
503
|
+
)
|
|
504
|
+
args_schema = DatabaseQueryArgs
|
|
505
|
+
|
|
506
|
+
def __init__(self, **kwargs):
|
|
507
|
+
"""Initialize the Database Query tool."""
|
|
508
|
+
super().__init__(**kwargs)
|
|
509
|
+
self.default_credentials = {}
|
|
510
|
+
|
|
511
|
+
def _default_output_dir(self) -> Optional[Path]:
|
|
512
|
+
"""Get the default output directory for database query results."""
|
|
513
|
+
return self.static_dir / "database_queries" if self.static_dir else None
|
|
514
|
+
|
|
515
|
+
def _validate_query_safety(self, query: str, driver: str) -> Dict[str, Any]:
|
|
516
|
+
"""Validate query safety based on driver's query language."""
|
|
517
|
+
query_language = DriverInfo.get_query_language(driver)
|
|
518
|
+
return QueryValidator.validate_query(query, query_language)
|
|
519
|
+
|
|
520
|
+
def _get_default_credentials(
|
|
521
|
+
self,
|
|
522
|
+
driver: str,
|
|
523
|
+
provided_credentials: Optional[Dict[str, Any]] = None
|
|
524
|
+
) -> Tuple[Dict[str, Any], Optional[str]]:
|
|
525
|
+
"""
|
|
526
|
+
Get default credentials for the specified database driver.
|
|
527
|
+
Handles mongo-based drivers (mongodb, atlas, documentdb) correctly.
|
|
528
|
+
"""
|
|
529
|
+
from querysource.conf import default_dsn, INFLUX_TOKEN
|
|
530
|
+
dsn = None
|
|
531
|
+
normalized_driver = DriverInfo.normalize_driver(driver)
|
|
532
|
+
if driver == 'postgresql':
|
|
533
|
+
driver = 'pg'
|
|
534
|
+
if driver == 'pg':
|
|
535
|
+
dsn = default_dsn
|
|
536
|
+
|
|
537
|
+
# Get dbtype for mongo-based drivers
|
|
538
|
+
dbtype = DriverInfo.get_dbtype(normalized_driver)
|
|
539
|
+
default_credentials = {
|
|
540
|
+
'bigquery': {
|
|
541
|
+
'credentials_file': config.get('GOOGLE_APPLICATION_CREDENTIALS'),
|
|
542
|
+
'project_id': config.get('GOOGLE_CLOUD_PROJECT'),
|
|
543
|
+
},
|
|
544
|
+
'pg': {
|
|
545
|
+
'host': config.get('POSTGRES_HOST', fallback='localhost'),
|
|
546
|
+
'port': config.get('POSTGRES_PORT', fallback='5432'),
|
|
547
|
+
'database': config.get('POSTGRES_DB', fallback='postgres'),
|
|
548
|
+
'user': config.get('POSTGRES_USER', fallback='postgres'),
|
|
549
|
+
'password': config.get('POSTGRES_PASSWORD'),
|
|
550
|
+
},
|
|
551
|
+
'mysql': {
|
|
552
|
+
'host': config.get('MYSQL_HOST', fallback='localhost'),
|
|
553
|
+
'port': config.get('MYSQL_PORT', fallback='3306'),
|
|
554
|
+
'database': config.get('MYSQL_DATABASE', fallback='mysql'),
|
|
555
|
+
'user': config.get('MYSQL_USER', fallback='root'),
|
|
556
|
+
'password': config.get('MYSQL_PASSWORD'),
|
|
557
|
+
},
|
|
558
|
+
'sqlite': {
|
|
559
|
+
'database': config.get('SQLITE_DATABASE', fallback=':memory:'),
|
|
560
|
+
},
|
|
561
|
+
'influx': {
|
|
562
|
+
'host': config.get('INFLUX_HOST', fallback='localhost'),
|
|
563
|
+
'port': config.get('INFLUX_PORT', fallback='8086'),
|
|
564
|
+
'database': config.get('INFLUX_DATABASE', fallback='default'),
|
|
565
|
+
'username': config.get('INFLUX_USERNAME'),
|
|
566
|
+
'password': config.get('INFLUX_PASSWORD'),
|
|
567
|
+
'token': INFLUX_TOKEN,
|
|
568
|
+
'org': config.get('INFLUX_ORG', fallback='my-org'),
|
|
569
|
+
},
|
|
570
|
+
'oracle': {
|
|
571
|
+
'host': config.get('ORACLE_HOST', fallback='localhost'),
|
|
572
|
+
'port': config.get('ORACLE_PORT', fallback='1521'),
|
|
573
|
+
'service_name': config.get('ORACLE_SERVICE_NAME', fallback='xe'),
|
|
574
|
+
'user': config.get('ORACLE_USER'),
|
|
575
|
+
'password': config.get('ORACLE_PASSWORD'),
|
|
576
|
+
},
|
|
577
|
+
'mssql': {
|
|
578
|
+
'host': config.get('MSSQL_HOST', fallback='localhost'),
|
|
579
|
+
'port': config.get('MSSQL_PORT', fallback='1433'),
|
|
580
|
+
'database': config.get('MSSQL_DATABASE', fallback='master'),
|
|
581
|
+
'user': config.get('MSSQL_USER'),
|
|
582
|
+
'password': config.get('MSSQL_PASSWORD'),
|
|
583
|
+
},
|
|
584
|
+
# MongoDB - standard configuration
|
|
585
|
+
'mongo': {
|
|
586
|
+
'driver': 'mongo',
|
|
587
|
+
'host': config.get('MONGODB_HOST', fallback='localhost'),
|
|
588
|
+
'port': config.get('MONGODB_PORT', fallback='27017'),
|
|
589
|
+
'database': config.get('MONGODB_DATABASE', fallback='test'),
|
|
590
|
+
'username': config.get('MONGODB_USER'),
|
|
591
|
+
'password': config.get('MONGODB_PASSWORD'),
|
|
592
|
+
'dbtype': 'mongodb'
|
|
593
|
+
},
|
|
594
|
+
# MongoDB Atlas - cloud configuration
|
|
595
|
+
'atlas': {
|
|
596
|
+
'driver': 'mongo',
|
|
597
|
+
'host': config.get('ATLAS_HOST'),
|
|
598
|
+
'port': config.get('ATLAS_PORT', fallback='27017'),
|
|
599
|
+
'database': config.get('ATLAS_DATABASE', fallback='test'),
|
|
600
|
+
'username': config.get('ATLAS_USER'),
|
|
601
|
+
'password': config.get('ATLAS_PASSWORD'),
|
|
602
|
+
'dbtype': 'atlas'
|
|
603
|
+
},
|
|
604
|
+
# AWS DocumentDB - MongoDB-compatible with SSL
|
|
605
|
+
'documentdb': {
|
|
606
|
+
'driver': 'mongo',
|
|
607
|
+
'host': config.get('DOCUMENTDB_HOSTNAME', fallback='localhost'),
|
|
608
|
+
'port': config.get('DOCUMENTDB_PORT', fallback='27017'),
|
|
609
|
+
'database': config.get('DOCUMENTDB_DATABASE', fallback='test'),
|
|
610
|
+
'username': config.get('DOCUMENTDB_USERNAME'),
|
|
611
|
+
'password': config.get('DOCUMENTDB_PASSWORD'),
|
|
612
|
+
'tlsCAFile': BASE_DIR.joinpath('env', "global-bundle.pem"),
|
|
613
|
+
'ssl': config.get('DOCUMENTDB_USE_SSL', fallback=True),
|
|
614
|
+
'collection_name': config.get('DOCUMENTDB_COLLECTION', fallback='mycollection'),
|
|
615
|
+
'dbtype': 'documentdb'
|
|
616
|
+
},
|
|
617
|
+
# Elasticsearch/OpenSearch
|
|
618
|
+
'elastic': {
|
|
619
|
+
'host': config.get('ELASTICSEARCH_HOST', fallback='localhost'),
|
|
620
|
+
'port': config.get('ELASTICSEARCH_PORT', fallback='9200'),
|
|
621
|
+
'db': config.get('ELASTICSEARCH_INDEX', fallback='logstash-*'),
|
|
622
|
+
'user': config.get('ELASTICSEARCH_USER'),
|
|
623
|
+
'password': config.get('ELASTICSEARCH_PASSWORD'),
|
|
624
|
+
'protocol': config.get('ELASTICSEARCH_PROTOCOL', fallback='http'),
|
|
625
|
+
'client_type': config.get('ELASTICSEARCH_CLIENT_TYPE', fallback='auto')
|
|
626
|
+
},
|
|
627
|
+
'elasticsearch': {
|
|
628
|
+
'host': config.get('ELASTICSEARCH_HOST', fallback='localhost'),
|
|
629
|
+
'port': config.get('ELASTICSEARCH_PORT', fallback='9200'),
|
|
630
|
+
'db': config.get('ELASTICSEARCH_INDEX', fallback='logstash-*'),
|
|
631
|
+
'user': config.get('ELASTICSEARCH_USER'),
|
|
632
|
+
'password': config.get('ELASTICSEARCH_PASSWORD'),
|
|
633
|
+
'protocol': config.get('ELASTICSEARCH_PROTOCOL', fallback='http'),
|
|
634
|
+
'client_type': config.get('ELASTICSEARCH_CLIENT_TYPE', fallback='elasticsearch')
|
|
635
|
+
},
|
|
636
|
+
'opensearch': {
|
|
637
|
+
'host': config.get('ELASTICSEARCH_HOST', fallback='localhost'),
|
|
638
|
+
'port': config.get('ELASTICSEARCH_PORT', fallback='9200'),
|
|
639
|
+
'db': config.get('ELASTICSEARCH_INDEX', fallback='logstash-*'),
|
|
640
|
+
'user': config.get('ELASTICSEARCH_USER'),
|
|
641
|
+
'password': config.get('ELASTICSEARCH_PASSWORD'),
|
|
642
|
+
'protocol': config.get('ELASTICSEARCH_PROTOCOL', fallback='http'),
|
|
643
|
+
'client_type': 'opensearch'
|
|
644
|
+
},
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
if normalized_driver not in default_credentials:
|
|
648
|
+
raise ValueError(
|
|
649
|
+
f"No default credentials configured for database driver: {normalized_driver}"
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
creds = default_credentials[normalized_driver].copy()
|
|
653
|
+
|
|
654
|
+
# Override with provided credentials if any
|
|
655
|
+
if provided_credentials:
|
|
656
|
+
creds.update(provided_credentials)
|
|
657
|
+
|
|
658
|
+
# Remove None values
|
|
659
|
+
creds = {k: v for k, v in creds.items() if v is not None}
|
|
660
|
+
return creds, dsn
|
|
661
|
+
|
|
662
|
+
def _get_credentials(
|
|
663
|
+
self,
|
|
664
|
+
driver: str,
|
|
665
|
+
provided_credentials: Optional[Dict[str, Any]]
|
|
666
|
+
) -> Tuple[Dict[str, Any], str]:
|
|
667
|
+
"""Get database credentials, either provided or default."""
|
|
668
|
+
|
|
669
|
+
try:
|
|
670
|
+
default_creds, dsn = self._get_default_credentials(driver, provided_credentials)
|
|
671
|
+
return default_creds, dsn
|
|
672
|
+
except Exception as e:
|
|
673
|
+
raise ValueError(
|
|
674
|
+
f"No credentials provided and could not get default for {driver}: {e}"
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
def _add_row_limit(self, query: str, max_rows: int, driver: str) -> str:
|
|
678
|
+
"""Add row limit to query based on query language."""
|
|
679
|
+
if not max_rows or max_rows <= 0:
|
|
680
|
+
return query
|
|
681
|
+
|
|
682
|
+
query_language = DriverInfo.get_query_language(driver)
|
|
683
|
+
|
|
684
|
+
if query_language == QueryLanguage.SQL:
|
|
685
|
+
if not isinstance(query, str):
|
|
686
|
+
return query
|
|
687
|
+
|
|
688
|
+
# Check if LIMIT is already present
|
|
689
|
+
if re.search(r'\bLIMIT\b', query, re.IGNORECASE):
|
|
690
|
+
return query
|
|
691
|
+
|
|
692
|
+
# Regex to identify the "tail" consisting of semicolons, whitespace, and comments
|
|
693
|
+
# We strip this tail from the end of the string.
|
|
694
|
+
tail_pattern = r'(?:\s+|;|--[^\n]*|/\*[\s\S]*?\*/)*$'
|
|
695
|
+
clean_query = re.sub(tail_pattern, '', query)
|
|
696
|
+
|
|
697
|
+
if not clean_query:
|
|
698
|
+
return query
|
|
699
|
+
|
|
700
|
+
return f"{clean_query} LIMIT {max_rows}"
|
|
701
|
+
|
|
702
|
+
elif query_language == QueryLanguage.FLUX:
|
|
703
|
+
if not isinstance(query, str):
|
|
704
|
+
return query
|
|
705
|
+
# For Flux, add limit() to the pipeline if not present
|
|
706
|
+
if '|> limit(' not in query.lower():
|
|
707
|
+
return f"{query.rstrip()} |> limit(n: {max_rows})"
|
|
708
|
+
return query
|
|
709
|
+
|
|
710
|
+
elif query_language == QueryLanguage.JSON:
|
|
711
|
+
# For Elasticsearch/OpenSearch JSON DSL
|
|
712
|
+
try:
|
|
713
|
+
query_dict = json.loads(query) if isinstance(query, str) else query
|
|
714
|
+
# Add size parameter if not present
|
|
715
|
+
if 'size' not in query_dict or query_dict['size'] > max_rows:
|
|
716
|
+
query_dict['size'] = max_rows
|
|
717
|
+
|
|
718
|
+
return json.dumps(query_dict)
|
|
719
|
+
except Exception:
|
|
720
|
+
# If parsing fails, return original query
|
|
721
|
+
return query
|
|
722
|
+
else:
|
|
723
|
+
# For unknown query languages, return as-is
|
|
724
|
+
return query
|
|
725
|
+
|
|
726
|
+
def get_driver_info_list(self) -> List[Dict[str, Any]]:
|
|
727
|
+
"""Get detailed information about all supported drivers."""
|
|
728
|
+
return DriverInfo.list_drivers()
|
|
729
|
+
|
|
730
|
+
async def _execute_database_query(
|
|
731
|
+
self,
|
|
732
|
+
driver: str,
|
|
733
|
+
credentials: Dict[str, Any],
|
|
734
|
+
dsn: Optional[str],
|
|
735
|
+
query: str,
|
|
736
|
+
output_format: str,
|
|
737
|
+
timeout: int,
|
|
738
|
+
max_rows: int
|
|
739
|
+
) -> Union[pd.DataFrame, str]:
|
|
740
|
+
"""Execute the actual database query using Asyncdb."""
|
|
741
|
+
|
|
742
|
+
# TODO: combine AsyncDB with Ibis for better abstraction.
|
|
743
|
+
try:
|
|
744
|
+
# Create AsyncDB instance
|
|
745
|
+
db = AsyncDB(driver, dsn=dsn) if dsn else AsyncDB(driver, params=credentials)
|
|
746
|
+
|
|
747
|
+
async with await db.connection() as conn: # pylint: disable=E1101 # noqa
|
|
748
|
+
# Set output format
|
|
749
|
+
conn.output_format(output_format)
|
|
750
|
+
# For mongo-based drivers, ensure we're using the correct database
|
|
751
|
+
if driver == 'mongo':
|
|
752
|
+
if database_name := credentials.get('database'):
|
|
753
|
+
await conn.use(database_name)
|
|
754
|
+
|
|
755
|
+
# Add row limit to query if specified and not already present
|
|
756
|
+
modified_query = self._add_row_limit(query, max_rows, driver)
|
|
757
|
+
|
|
758
|
+
if isinstance(modified_query, str):
|
|
759
|
+
self.logger.info(
|
|
760
|
+
f"Executing query on {driver}: {modified_query[:100]}..."
|
|
761
|
+
)
|
|
762
|
+
else:
|
|
763
|
+
self.logger.info(
|
|
764
|
+
f"Executing query on {driver}: {modified_query}..."
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
# Execute query with timeout
|
|
768
|
+
if driver == 'influx':
|
|
769
|
+
# InfluxDB requires a different method to execute Flux queries
|
|
770
|
+
result, errors = await asyncio.wait_for(
|
|
771
|
+
conn.query(modified_query, frmt='recordset'),
|
|
772
|
+
timeout=timeout
|
|
773
|
+
)
|
|
774
|
+
elif driver == 'mongo':
|
|
775
|
+
# For mongo-based drivers:
|
|
776
|
+
# 1. collection_name MUST be in credentials
|
|
777
|
+
# 2. query parameter contains ONLY the MongoDB filter (JSON)
|
|
778
|
+
# For mongo-based drivers:
|
|
779
|
+
# Support both standard JSON filter and {find:..., filter:...} command style
|
|
780
|
+
|
|
781
|
+
collection_name = credentials.get('collection_name')
|
|
782
|
+
query_dict = {}
|
|
783
|
+
possible_limit = None
|
|
784
|
+
mongo_kwargs = {}
|
|
785
|
+
|
|
786
|
+
# 1. Parsing logic
|
|
787
|
+
if modified_query:
|
|
788
|
+
# Handle legacy 'collection::json_query' format first
|
|
789
|
+
if isinstance(modified_query, str) and '::' in modified_query:
|
|
790
|
+
self.logger.warning(
|
|
791
|
+
"Detected '::' format in query. For MongoDB/DocumentDB, "
|
|
792
|
+
"please provide collection_name in credentials or use the "
|
|
793
|
+
"{'find': 'collection', 'filter': {...}} syntax."
|
|
794
|
+
)
|
|
795
|
+
c_name, json_query = modified_query.split('::', 1)
|
|
796
|
+
collection_name = c_name.strip()
|
|
797
|
+
try:
|
|
798
|
+
query_dict = json.loads(json_query.strip()) if json_query.strip() else {}
|
|
799
|
+
except Exception:
|
|
800
|
+
query_dict = {}
|
|
801
|
+
|
|
802
|
+
else:
|
|
803
|
+
# Parse JSON if string
|
|
804
|
+
if isinstance(modified_query, str):
|
|
805
|
+
try:
|
|
806
|
+
query_dict = json.loads(modified_query.strip())
|
|
807
|
+
except Exception:
|
|
808
|
+
# Fallback if not valid JSON, though it should be
|
|
809
|
+
query_dict = {}
|
|
810
|
+
elif isinstance(modified_query, dict):
|
|
811
|
+
query_dict = modified_query
|
|
812
|
+
else:
|
|
813
|
+
query_dict = {}
|
|
814
|
+
|
|
815
|
+
# 2. Extract structured command components
|
|
816
|
+
# Check if it's a command object with 'filter' or 'find'
|
|
817
|
+
if isinstance(query_dict, dict) and ('filter' in query_dict or 'find' in query_dict):
|
|
818
|
+
if 'find' in query_dict and isinstance(query_dict['find'], str):
|
|
819
|
+
collection_name = query_dict['find']
|
|
820
|
+
|
|
821
|
+
# Extract limit/sort/projection
|
|
822
|
+
if 'limit' in query_dict:
|
|
823
|
+
possible_limit = query_dict['limit']
|
|
824
|
+
if 'sort' in query_dict:
|
|
825
|
+
mongo_kwargs['sort'] = query_dict['sort']
|
|
826
|
+
if 'projection' in query_dict:
|
|
827
|
+
mongo_kwargs['projection'] = query_dict['projection']
|
|
828
|
+
|
|
829
|
+
# The actual query is the filter
|
|
830
|
+
query_dict = query_dict.get('filter', {})
|
|
831
|
+
|
|
832
|
+
# 3. Validation
|
|
833
|
+
if not collection_name:
|
|
834
|
+
raise ValueError(
|
|
835
|
+
"For MongoDB/DocumentDB queries, 'collection_name' must be "
|
|
836
|
+
"provided in the 'credentials', or in the query as "
|
|
837
|
+
"{'find': 'collection_name', ...}."
|
|
838
|
+
)
|
|
839
|
+
|
|
840
|
+
if not isinstance(query_dict, dict):
|
|
841
|
+
query_dict = {}
|
|
842
|
+
|
|
843
|
+
self.logger.info(
|
|
844
|
+
f"Querying collection '{collection_name}' with filter: {query_dict}"
|
|
845
|
+
)
|
|
846
|
+
|
|
847
|
+
# 4. Enforce Limits
|
|
848
|
+
# Baseline hard limit
|
|
849
|
+
final_max_rows = 20
|
|
850
|
+
|
|
851
|
+
# Consider user-provided max_rows
|
|
852
|
+
if max_rows and max_rows > 0:
|
|
853
|
+
final_max_rows = min(final_max_rows, max_rows)
|
|
854
|
+
|
|
855
|
+
# Consider query-embedded limit
|
|
856
|
+
if possible_limit is not None and isinstance(possible_limit, int):
|
|
857
|
+
final_max_rows = min(final_max_rows, possible_limit)
|
|
858
|
+
|
|
859
|
+
result, errors = await conn.query(
|
|
860
|
+
collection_name=collection_name,
|
|
861
|
+
query=query_dict,
|
|
862
|
+
limit=final_max_rows,
|
|
863
|
+
**mongo_kwargs
|
|
864
|
+
)
|
|
865
|
+
elif driver in ('elastic', 'elasticsearch', 'opensearch'):
|
|
866
|
+
# Handle index parameter for Elastic/OpenSearch
|
|
867
|
+
query_obj = None
|
|
868
|
+
is_json_str = False
|
|
869
|
+
|
|
870
|
+
if isinstance(modified_query, str):
|
|
871
|
+
try:
|
|
872
|
+
query_obj = json.loads(modified_query)
|
|
873
|
+
is_json_str = True
|
|
874
|
+
except Exception:
|
|
875
|
+
pass
|
|
876
|
+
elif isinstance(modified_query, dict):
|
|
877
|
+
query_obj = modified_query
|
|
878
|
+
|
|
879
|
+
if isinstance(query_obj, dict):
|
|
880
|
+
# Extract index if present
|
|
881
|
+
if 'index' in query_obj:
|
|
882
|
+
target_index = query_obj.pop('index')
|
|
883
|
+
if target_index:
|
|
884
|
+
await conn.use(target_index)
|
|
885
|
+
self.logger.info(f"Switched to index: {target_index}")
|
|
886
|
+
|
|
887
|
+
# Update modified_query
|
|
888
|
+
if is_json_str:
|
|
889
|
+
modified_query = json.dumps(query_obj)
|
|
890
|
+
else:
|
|
891
|
+
modified_query = query_obj
|
|
892
|
+
|
|
893
|
+
result, errors = await asyncio.wait_for(
|
|
894
|
+
conn.query(modified_query),
|
|
895
|
+
timeout=timeout
|
|
896
|
+
)
|
|
897
|
+
else:
|
|
898
|
+
result, errors = await asyncio.wait_for(
|
|
899
|
+
conn.query(modified_query),
|
|
900
|
+
timeout=timeout
|
|
901
|
+
)
|
|
902
|
+
|
|
903
|
+
# Handle "Empty Data" error from asyncdb's pandas serializer
|
|
904
|
+
# This is NOT a real error for Elasticsearch/OpenSearch - it just means
|
|
905
|
+
# the query returned 0 hits, which is a valid result
|
|
906
|
+
if errors:
|
|
907
|
+
error_str = str(errors)
|
|
908
|
+
if "Empty Data" in error_str and driver in ('elastic', 'elasticsearch', 'opensearch'):
|
|
909
|
+
self.logger.info(
|
|
910
|
+
f"OpenSearch/Elasticsearch query returned 0 hits (empty result)"
|
|
911
|
+
)
|
|
912
|
+
# Return an empty DataFrame or empty JSON instead of raising an error
|
|
913
|
+
if output_format == 'pandas':
|
|
914
|
+
return pd.DataFrame()
|
|
915
|
+
else:
|
|
916
|
+
return "[]"
|
|
917
|
+
else:
|
|
918
|
+
raise RuntimeError(
|
|
919
|
+
f"Database query errors: {errors}"
|
|
920
|
+
)
|
|
921
|
+
|
|
922
|
+
# Return the actual result based on format
|
|
923
|
+
if output_format == 'pandas':
|
|
924
|
+
if result is None:
|
|
925
|
+
return pd.DataFrame()
|
|
926
|
+
if not isinstance(result, pd.DataFrame):
|
|
927
|
+
raise RuntimeError(
|
|
928
|
+
f"Expected pandas DataFrame but got {type(result)}"
|
|
929
|
+
)
|
|
930
|
+
return result
|
|
931
|
+
else: # json
|
|
932
|
+
if isinstance(result, str):
|
|
933
|
+
return result
|
|
934
|
+
elif isinstance(result, pd.DataFrame):
|
|
935
|
+
return result.to_json(orient='records', date_format='iso')
|
|
936
|
+
else:
|
|
937
|
+
return json.dumps(result, default=str, indent=2)
|
|
938
|
+
|
|
939
|
+
except asyncio.TimeoutError as e:
|
|
940
|
+
raise RuntimeError(
|
|
941
|
+
f"Query execution exceeded {timeout} seconds"
|
|
942
|
+
) from e
|
|
943
|
+
except Exception as e:
|
|
944
|
+
raise RuntimeError(
|
|
945
|
+
f"Database query failed: {str(e)}"
|
|
946
|
+
) from e
|
|
947
|
+
|
|
948
|
+
async def _execute(
|
|
949
|
+
self,
|
|
950
|
+
driver: str,
|
|
951
|
+
query: str,
|
|
952
|
+
credentials: Optional[Dict[str, Any]] = None,
|
|
953
|
+
dsn: Optional[str] = None,
|
|
954
|
+
output_format: str = "pandas",
|
|
955
|
+
query_timeout: int = 300,
|
|
956
|
+
max_rows: int = 10000,
|
|
957
|
+
**kwargs
|
|
958
|
+
) -> Union[pd.DataFrame, str]:
|
|
959
|
+
"""
|
|
960
|
+
Execute the database query with multi-language support.
|
|
961
|
+
|
|
962
|
+
Args:
|
|
963
|
+
driver: Database driver (pg, mysql, bigquery, influx, mssql, etc.)
|
|
964
|
+
query: Query to execute (SQL or Flux depending on driver)
|
|
965
|
+
credentials: Optional database credentials
|
|
966
|
+
dsn: Optional DSN string
|
|
967
|
+
output_format: Output format ('pandas' or 'json')
|
|
968
|
+
query_timeout: Query timeout in seconds
|
|
969
|
+
max_rows: Maximum number of rows to return
|
|
970
|
+
**kwargs: Additional arguments
|
|
971
|
+
|
|
972
|
+
Returns:
|
|
973
|
+
pandas DataFrame if output_format='pandas', JSON string otherwise
|
|
974
|
+
"""
|
|
975
|
+
start_time = datetime.now()
|
|
976
|
+
|
|
977
|
+
try:
|
|
978
|
+
# Normalize driver name
|
|
979
|
+
driver = DriverInfo.normalize_driver(driver)
|
|
980
|
+
driver_info = DriverInfo.get_driver_info(driver)
|
|
981
|
+
|
|
982
|
+
self.logger.info(
|
|
983
|
+
f"Starting query on {driver_info['name']} "
|
|
984
|
+
f"(language: {driver_info['query_language'].value})"
|
|
985
|
+
)
|
|
986
|
+
|
|
987
|
+
# Validate query safety based on query language
|
|
988
|
+
validation_result = self._validate_query_safety(query, driver)
|
|
989
|
+
if not validation_result['is_safe']:
|
|
990
|
+
raise ValueError(
|
|
991
|
+
f"Query validation failed: {validation_result['message']}\n"
|
|
992
|
+
f"Suggestions: {', '.join(validation_result.get('suggestions', []))}"
|
|
993
|
+
)
|
|
994
|
+
|
|
995
|
+
# Get credentials
|
|
996
|
+
creds, resolved_dsn = self._get_credentials(driver, credentials)
|
|
997
|
+
final_dsn = dsn or resolved_dsn
|
|
998
|
+
if 'driver' in creds:
|
|
999
|
+
driver = creds.pop('driver')
|
|
1000
|
+
|
|
1001
|
+
# Add row limit if applicable
|
|
1002
|
+
modified_query = self._add_row_limit(query, max_rows, driver)
|
|
1003
|
+
|
|
1004
|
+
# Execute query
|
|
1005
|
+
result = await self._execute_database_query(
|
|
1006
|
+
driver,
|
|
1007
|
+
creds,
|
|
1008
|
+
final_dsn,
|
|
1009
|
+
modified_query,
|
|
1010
|
+
output_format,
|
|
1011
|
+
query_timeout,
|
|
1012
|
+
max_rows
|
|
1013
|
+
)
|
|
1014
|
+
|
|
1015
|
+
end_time = datetime.now()
|
|
1016
|
+
execution_time = (end_time - start_time).total_seconds()
|
|
1017
|
+
|
|
1018
|
+
# Log execution details
|
|
1019
|
+
if output_format == 'pandas' and isinstance(result, pd.DataFrame):
|
|
1020
|
+
self.logger.info(
|
|
1021
|
+
f"Query executed successfully in {execution_time:.2f}s. "
|
|
1022
|
+
f"Retrieved {len(result)} rows, {len(result.columns)} columns "
|
|
1023
|
+
f"from {driver_info['name']}."
|
|
1024
|
+
)
|
|
1025
|
+
else:
|
|
1026
|
+
self.logger.info(
|
|
1027
|
+
f"Query executed successfully in {execution_time:.2f}s "
|
|
1028
|
+
f"on {driver_info['name']}."
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
return {
|
|
1032
|
+
"status": "success",
|
|
1033
|
+
"result": result,
|
|
1034
|
+
'metadata': {
|
|
1035
|
+
"query": modified_query,
|
|
1036
|
+
"driver": driver_info['name'],
|
|
1037
|
+
'rows_returned': len(result) if isinstance(result, pd.DataFrame) else None,
|
|
1038
|
+
'columns_returned': len(result.columns) if isinstance(result, pd.DataFrame) else None,
|
|
1039
|
+
'execution_time_seconds': execution_time,
|
|
1040
|
+
'output_format': output_format
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
except Exception as e:
|
|
1045
|
+
end_time = datetime.now()
|
|
1046
|
+
execution_time = (end_time - start_time).total_seconds()
|
|
1047
|
+
|
|
1048
|
+
self.logger.error(
|
|
1049
|
+
f"Query failed on {driver} after {execution_time:.2f}s: {e}"
|
|
1050
|
+
)
|
|
1051
|
+
raise
|
|
1052
|
+
|
|
1053
|
+
def get_supported_drivers(self) -> List[str]:
|
|
1054
|
+
"""Get list of supported database drivers."""
|
|
1055
|
+
return [
|
|
1056
|
+
'bigquery', 'pg', 'postgres', 'postgresql', 'mysql', 'influx', 'sqlite',
|
|
1057
|
+
'oracle', 'mssql', 'clickhouse', 'snowflake'
|
|
1058
|
+
]
|
|
1059
|
+
|
|
1060
|
+
async def test_connection(
|
|
1061
|
+
self,
|
|
1062
|
+
driver: str,
|
|
1063
|
+
credentials: Optional[Dict[str, Any]] = None
|
|
1064
|
+
) -> Dict[str, Any]:
|
|
1065
|
+
"""
|
|
1066
|
+
Test database connection.
|
|
1067
|
+
|
|
1068
|
+
Args:
|
|
1069
|
+
driver: Database driver to test
|
|
1070
|
+
credentials: Optional credentials to use
|
|
1071
|
+
|
|
1072
|
+
Returns:
|
|
1073
|
+
Dictionary with connection test results
|
|
1074
|
+
"""
|
|
1075
|
+
try:
|
|
1076
|
+
# Simple test query
|
|
1077
|
+
test_query = "SELECT 1 as test_column"
|
|
1078
|
+
|
|
1079
|
+
result = await self._execute(
|
|
1080
|
+
driver=driver,
|
|
1081
|
+
query=test_query,
|
|
1082
|
+
credentials=credentials,
|
|
1083
|
+
output_format="pandas",
|
|
1084
|
+
query_timeout=30,
|
|
1085
|
+
max_rows=1
|
|
1086
|
+
)
|
|
1087
|
+
|
|
1088
|
+
return {
|
|
1089
|
+
"status": "success",
|
|
1090
|
+
"message": f"Successfully connected to {driver}",
|
|
1091
|
+
"test_result": result.to_dict('records') if isinstance(result, pd.DataFrame) else result
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
except Exception as e:
|
|
1095
|
+
return {
|
|
1096
|
+
"status": "error",
|
|
1097
|
+
"message": f"Failed to connect to {driver}: {str(e)}"
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1100
|
+
def save_query_result(
|
|
1101
|
+
self,
|
|
1102
|
+
result: Union[pd.DataFrame, str],
|
|
1103
|
+
filename: Optional[str] = None,
|
|
1104
|
+
file_format: str = "csv"
|
|
1105
|
+
) -> Dict[str, Any]:
|
|
1106
|
+
"""
|
|
1107
|
+
Save query result to file.
|
|
1108
|
+
|
|
1109
|
+
Args:
|
|
1110
|
+
result: Query result to save
|
|
1111
|
+
filename: Optional filename
|
|
1112
|
+
file_format: File format ('csv', 'json', 'excel')
|
|
1113
|
+
|
|
1114
|
+
Returns:
|
|
1115
|
+
Dictionary with file information
|
|
1116
|
+
"""
|
|
1117
|
+
if not self.output_dir:
|
|
1118
|
+
raise ValueError("Output directory not configured")
|
|
1119
|
+
|
|
1120
|
+
if not filename:
|
|
1121
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
1122
|
+
filename = f"query_result_{timestamp}"
|
|
1123
|
+
|
|
1124
|
+
# Ensure output directory exists
|
|
1125
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
1126
|
+
|
|
1127
|
+
try:
|
|
1128
|
+
if isinstance(result, pd.DataFrame):
|
|
1129
|
+
if file_format.lower() == 'csv':
|
|
1130
|
+
file_path = self.output_dir / f"{filename}.csv"
|
|
1131
|
+
result.to_csv(file_path, index=False)
|
|
1132
|
+
elif file_format.lower() == 'excel':
|
|
1133
|
+
file_path = self.output_dir / f"{filename}.xlsx"
|
|
1134
|
+
result.to_excel(file_path, index=False)
|
|
1135
|
+
elif file_format.lower() == 'json':
|
|
1136
|
+
file_path = self.output_dir / f"{filename}.json"
|
|
1137
|
+
result.to_json(file_path, orient='records', date_format='iso', indent=2)
|
|
1138
|
+
else:
|
|
1139
|
+
raise ValueError(f"Unsupported file format: {file_format}")
|
|
1140
|
+
else:
|
|
1141
|
+
# Assume it's JSON string
|
|
1142
|
+
file_path = self.output_dir / f"{filename}.json"
|
|
1143
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
1144
|
+
f.write(result)
|
|
1145
|
+
|
|
1146
|
+
file_url = self.to_static_url(file_path)
|
|
1147
|
+
|
|
1148
|
+
return {
|
|
1149
|
+
"filename": file_path.name,
|
|
1150
|
+
"file_path": str(file_path),
|
|
1151
|
+
"file_url": file_url,
|
|
1152
|
+
"file_size": file_path.stat().st_size,
|
|
1153
|
+
"format": file_format
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
except Exception as e:
|
|
1157
|
+
raise ValueError(
|
|
1158
|
+
f"Error saving query result: {e}"
|
|
1159
|
+
)
|