ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentui/.prettierrc +15 -0
- agentui/QUICKSTART.md +272 -0
- agentui/README.md +59 -0
- agentui/env.example +16 -0
- agentui/jsconfig.json +14 -0
- agentui/package-lock.json +4242 -0
- agentui/package.json +34 -0
- agentui/scripts/postinstall/apply-patches.mjs +260 -0
- agentui/src/app.css +61 -0
- agentui/src/app.d.ts +13 -0
- agentui/src/app.html +12 -0
- agentui/src/components/LoadingSpinner.svelte +64 -0
- agentui/src/components/ThemeSwitcher.svelte +159 -0
- agentui/src/components/index.js +4 -0
- agentui/src/lib/api/bots.ts +60 -0
- agentui/src/lib/api/chat.ts +22 -0
- agentui/src/lib/api/http.ts +25 -0
- agentui/src/lib/components/BotCard.svelte +33 -0
- agentui/src/lib/components/ChatBubble.svelte +63 -0
- agentui/src/lib/components/Toast.svelte +21 -0
- agentui/src/lib/config.ts +20 -0
- agentui/src/lib/stores/auth.svelte.ts +73 -0
- agentui/src/lib/stores/theme.svelte.js +64 -0
- agentui/src/lib/stores/toast.svelte.ts +31 -0
- agentui/src/lib/utils/conversation.ts +39 -0
- agentui/src/routes/+layout.svelte +20 -0
- agentui/src/routes/+page.svelte +232 -0
- agentui/src/routes/login/+page.svelte +200 -0
- agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
- agentui/src/routes/talk/[agentId]/+page.ts +7 -0
- agentui/static/README.md +1 -0
- agentui/svelte.config.js +11 -0
- agentui/tailwind.config.ts +53 -0
- agentui/tsconfig.json +3 -0
- agentui/vite.config.ts +10 -0
- ai_parrot-0.17.2.dist-info/METADATA +472 -0
- ai_parrot-0.17.2.dist-info/RECORD +535 -0
- ai_parrot-0.17.2.dist-info/WHEEL +6 -0
- ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
- ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
- ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
- crew-builder/.prettierrc +15 -0
- crew-builder/QUICKSTART.md +259 -0
- crew-builder/README.md +113 -0
- crew-builder/env.example +17 -0
- crew-builder/jsconfig.json +14 -0
- crew-builder/package-lock.json +4182 -0
- crew-builder/package.json +37 -0
- crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
- crew-builder/src/app.css +62 -0
- crew-builder/src/app.d.ts +13 -0
- crew-builder/src/app.html +12 -0
- crew-builder/src/components/LoadingSpinner.svelte +64 -0
- crew-builder/src/components/ThemeSwitcher.svelte +149 -0
- crew-builder/src/components/index.js +9 -0
- crew-builder/src/lib/api/bots.ts +60 -0
- crew-builder/src/lib/api/chat.ts +80 -0
- crew-builder/src/lib/api/client.ts +56 -0
- crew-builder/src/lib/api/crew/crew.ts +136 -0
- crew-builder/src/lib/api/index.ts +5 -0
- crew-builder/src/lib/api/o365/auth.ts +65 -0
- crew-builder/src/lib/auth/auth.ts +54 -0
- crew-builder/src/lib/components/AgentNode.svelte +43 -0
- crew-builder/src/lib/components/BotCard.svelte +33 -0
- crew-builder/src/lib/components/ChatBubble.svelte +67 -0
- crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
- crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
- crew-builder/src/lib/components/JsonViewer.svelte +24 -0
- crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
- crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
- crew-builder/src/lib/components/Toast.svelte +67 -0
- crew-builder/src/lib/components/Toolbar.svelte +157 -0
- crew-builder/src/lib/components/index.ts +10 -0
- crew-builder/src/lib/config.ts +8 -0
- crew-builder/src/lib/stores/auth.svelte.ts +228 -0
- crew-builder/src/lib/stores/crewStore.ts +369 -0
- crew-builder/src/lib/stores/theme.svelte.js +145 -0
- crew-builder/src/lib/stores/toast.svelte.ts +69 -0
- crew-builder/src/lib/utils/conversation.ts +39 -0
- crew-builder/src/lib/utils/markdown.ts +122 -0
- crew-builder/src/lib/utils/talkHistory.ts +47 -0
- crew-builder/src/routes/+layout.svelte +20 -0
- crew-builder/src/routes/+page.svelte +539 -0
- crew-builder/src/routes/agents/+page.svelte +247 -0
- crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
- crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
- crew-builder/src/routes/builder/+page.svelte +204 -0
- crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
- crew-builder/src/routes/crew/ask/+page.ts +1 -0
- crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
- crew-builder/src/routes/login/+page.svelte +197 -0
- crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
- crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
- crew-builder/static/README.md +1 -0
- crew-builder/svelte.config.js +11 -0
- crew-builder/tailwind.config.ts +53 -0
- crew-builder/tsconfig.json +3 -0
- crew-builder/vite.config.ts +10 -0
- mcp_servers/calculator_server.py +309 -0
- parrot/__init__.py +27 -0
- parrot/__pycache__/__init__.cpython-310.pyc +0 -0
- parrot/__pycache__/version.cpython-310.pyc +0 -0
- parrot/_version.py +34 -0
- parrot/a2a/__init__.py +48 -0
- parrot/a2a/client.py +658 -0
- parrot/a2a/discovery.py +89 -0
- parrot/a2a/mixin.py +257 -0
- parrot/a2a/models.py +376 -0
- parrot/a2a/server.py +770 -0
- parrot/agents/__init__.py +29 -0
- parrot/bots/__init__.py +12 -0
- parrot/bots/a2a_agent.py +19 -0
- parrot/bots/abstract.py +3139 -0
- parrot/bots/agent.py +1129 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/chatbot.py +669 -0
- parrot/bots/data.py +1618 -0
- parrot/bots/database/__init__.py +5 -0
- parrot/bots/database/abstract.py +3071 -0
- parrot/bots/database/cache.py +286 -0
- parrot/bots/database/models.py +468 -0
- parrot/bots/database/prompts.py +154 -0
- parrot/bots/database/retries.py +98 -0
- parrot/bots/database/router.py +269 -0
- parrot/bots/database/sql.py +41 -0
- parrot/bots/db/__init__.py +6 -0
- parrot/bots/db/abstract.py +556 -0
- parrot/bots/db/bigquery.py +602 -0
- parrot/bots/db/cache.py +85 -0
- parrot/bots/db/documentdb.py +668 -0
- parrot/bots/db/elastic.py +1014 -0
- parrot/bots/db/influx.py +898 -0
- parrot/bots/db/mock.py +96 -0
- parrot/bots/db/multi.py +783 -0
- parrot/bots/db/prompts.py +185 -0
- parrot/bots/db/sql.py +1255 -0
- parrot/bots/db/tools.py +212 -0
- parrot/bots/document.py +680 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/kb.py +170 -0
- parrot/bots/mcp.py +36 -0
- parrot/bots/orchestration/README.md +463 -0
- parrot/bots/orchestration/__init__.py +1 -0
- parrot/bots/orchestration/agent.py +155 -0
- parrot/bots/orchestration/crew.py +3330 -0
- parrot/bots/orchestration/fsm.py +1179 -0
- parrot/bots/orchestration/hr.py +434 -0
- parrot/bots/orchestration/storage/__init__.py +4 -0
- parrot/bots/orchestration/storage/memory.py +100 -0
- parrot/bots/orchestration/storage/mixin.py +119 -0
- parrot/bots/orchestration/verify.py +202 -0
- parrot/bots/product.py +204 -0
- parrot/bots/prompts/__init__.py +96 -0
- parrot/bots/prompts/agents.py +155 -0
- parrot/bots/prompts/data.py +216 -0
- parrot/bots/prompts/output_generation.py +8 -0
- parrot/bots/scraper/__init__.py +3 -0
- parrot/bots/scraper/models.py +122 -0
- parrot/bots/scraper/scraper.py +1173 -0
- parrot/bots/scraper/templates.py +115 -0
- parrot/bots/stores/__init__.py +5 -0
- parrot/bots/stores/local.py +172 -0
- parrot/bots/webdev.py +81 -0
- parrot/cli.py +17 -0
- parrot/clients/__init__.py +16 -0
- parrot/clients/base.py +1491 -0
- parrot/clients/claude.py +1191 -0
- parrot/clients/factory.py +129 -0
- parrot/clients/google.py +4567 -0
- parrot/clients/gpt.py +1975 -0
- parrot/clients/grok.py +432 -0
- parrot/clients/groq.py +986 -0
- parrot/clients/hf.py +582 -0
- parrot/clients/models.py +18 -0
- parrot/conf.py +395 -0
- parrot/embeddings/__init__.py +9 -0
- parrot/embeddings/base.py +157 -0
- parrot/embeddings/google.py +98 -0
- parrot/embeddings/huggingface.py +74 -0
- parrot/embeddings/openai.py +84 -0
- parrot/embeddings/processor.py +88 -0
- parrot/exceptions.c +13868 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/exceptions.pxd +22 -0
- parrot/exceptions.pxi +15 -0
- parrot/exceptions.pyx +44 -0
- parrot/generators/__init__.py +29 -0
- parrot/generators/base.py +200 -0
- parrot/generators/html.py +293 -0
- parrot/generators/react.py +205 -0
- parrot/generators/streamlit.py +203 -0
- parrot/generators/template.py +105 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agent.py +861 -0
- parrot/handlers/agents/__init__.py +1 -0
- parrot/handlers/agents/abstract.py +900 -0
- parrot/handlers/bots.py +338 -0
- parrot/handlers/chat.py +915 -0
- parrot/handlers/creation.sql +192 -0
- parrot/handlers/crew/ARCHITECTURE.md +362 -0
- parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
- parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
- parrot/handlers/crew/__init__.py +0 -0
- parrot/handlers/crew/handler.py +801 -0
- parrot/handlers/crew/models.py +229 -0
- parrot/handlers/crew/redis_persistence.py +523 -0
- parrot/handlers/jobs/__init__.py +10 -0
- parrot/handlers/jobs/job.py +384 -0
- parrot/handlers/jobs/mixin.py +627 -0
- parrot/handlers/jobs/models.py +115 -0
- parrot/handlers/jobs/worker.py +31 -0
- parrot/handlers/models.py +596 -0
- parrot/handlers/o365_auth.py +105 -0
- parrot/handlers/stream.py +337 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/aws.py +143 -0
- parrot/interfaces/credentials.py +113 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/google.py +1123 -0
- parrot/interfaces/hierarchy.py +1227 -0
- parrot/interfaces/http.py +651 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +24 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/analisys.py +148 -0
- parrot/interfaces/images/plugins/classify.py +150 -0
- parrot/interfaces/images/plugins/classifybase.py +182 -0
- parrot/interfaces/images/plugins/detect.py +150 -0
- parrot/interfaces/images/plugins/exif.py +1103 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/interfaces/o365.py +978 -0
- parrot/interfaces/onedrive.py +822 -0
- parrot/interfaces/sharepoint.py +1435 -0
- parrot/interfaces/soap.py +257 -0
- parrot/loaders/__init__.py +8 -0
- parrot/loaders/abstract.py +1131 -0
- parrot/loaders/audio.py +199 -0
- parrot/loaders/basepdf.py +53 -0
- parrot/loaders/basevideo.py +1568 -0
- parrot/loaders/csv.py +409 -0
- parrot/loaders/docx.py +116 -0
- parrot/loaders/epubloader.py +316 -0
- parrot/loaders/excel.py +199 -0
- parrot/loaders/factory.py +55 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/html.py +26 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/html.py +152 -0
- parrot/loaders/markdown.py +442 -0
- parrot/loaders/pdf.py +373 -0
- parrot/loaders/pdfmark.py +320 -0
- parrot/loaders/pdftables.py +506 -0
- parrot/loaders/ppt.py +476 -0
- parrot/loaders/qa.py +63 -0
- parrot/loaders/splitters/__init__.py +10 -0
- parrot/loaders/splitters/base.py +138 -0
- parrot/loaders/splitters/md.py +228 -0
- parrot/loaders/splitters/token.py +143 -0
- parrot/loaders/txt.py +26 -0
- parrot/loaders/video.py +89 -0
- parrot/loaders/videolocal.py +218 -0
- parrot/loaders/videounderstanding.py +377 -0
- parrot/loaders/vimeo.py +167 -0
- parrot/loaders/web.py +599 -0
- parrot/loaders/youtube.py +504 -0
- parrot/manager/__init__.py +5 -0
- parrot/manager/manager.py +1030 -0
- parrot/mcp/__init__.py +28 -0
- parrot/mcp/adapter.py +105 -0
- parrot/mcp/cli.py +174 -0
- parrot/mcp/client.py +119 -0
- parrot/mcp/config.py +75 -0
- parrot/mcp/integration.py +842 -0
- parrot/mcp/oauth.py +933 -0
- parrot/mcp/server.py +225 -0
- parrot/mcp/transports/__init__.py +3 -0
- parrot/mcp/transports/base.py +279 -0
- parrot/mcp/transports/grpc_session.py +163 -0
- parrot/mcp/transports/http.py +312 -0
- parrot/mcp/transports/mcp.proto +108 -0
- parrot/mcp/transports/quic.py +1082 -0
- parrot/mcp/transports/sse.py +330 -0
- parrot/mcp/transports/stdio.py +309 -0
- parrot/mcp/transports/unix.py +395 -0
- parrot/mcp/transports/websocket.py +547 -0
- parrot/memory/__init__.py +16 -0
- parrot/memory/abstract.py +209 -0
- parrot/memory/agent.py +32 -0
- parrot/memory/cache.py +175 -0
- parrot/memory/core.py +555 -0
- parrot/memory/file.py +153 -0
- parrot/memory/mem.py +131 -0
- parrot/memory/redis.py +613 -0
- parrot/models/__init__.py +46 -0
- parrot/models/basic.py +118 -0
- parrot/models/compliance.py +208 -0
- parrot/models/crew.py +395 -0
- parrot/models/detections.py +654 -0
- parrot/models/generation.py +85 -0
- parrot/models/google.py +223 -0
- parrot/models/groq.py +23 -0
- parrot/models/openai.py +30 -0
- parrot/models/outputs.py +285 -0
- parrot/models/responses.py +938 -0
- parrot/notifications/__init__.py +743 -0
- parrot/openapi/__init__.py +3 -0
- parrot/openapi/components.yaml +641 -0
- parrot/openapi/config.py +322 -0
- parrot/outputs/__init__.py +32 -0
- parrot/outputs/formats/__init__.py +108 -0
- parrot/outputs/formats/altair.py +359 -0
- parrot/outputs/formats/application.py +122 -0
- parrot/outputs/formats/base.py +351 -0
- parrot/outputs/formats/bokeh.py +356 -0
- parrot/outputs/formats/card.py +424 -0
- parrot/outputs/formats/chart.py +436 -0
- parrot/outputs/formats/d3.py +255 -0
- parrot/outputs/formats/echarts.py +310 -0
- parrot/outputs/formats/generators/__init__.py +0 -0
- parrot/outputs/formats/generators/abstract.py +61 -0
- parrot/outputs/formats/generators/panel.py +145 -0
- parrot/outputs/formats/generators/streamlit.py +86 -0
- parrot/outputs/formats/generators/terminal.py +63 -0
- parrot/outputs/formats/holoviews.py +310 -0
- parrot/outputs/formats/html.py +147 -0
- parrot/outputs/formats/jinja2.py +46 -0
- parrot/outputs/formats/json.py +87 -0
- parrot/outputs/formats/map.py +933 -0
- parrot/outputs/formats/markdown.py +172 -0
- parrot/outputs/formats/matplotlib.py +237 -0
- parrot/outputs/formats/mixins/__init__.py +0 -0
- parrot/outputs/formats/mixins/emaps.py +855 -0
- parrot/outputs/formats/plotly.py +341 -0
- parrot/outputs/formats/seaborn.py +310 -0
- parrot/outputs/formats/table.py +397 -0
- parrot/outputs/formats/template_report.py +138 -0
- parrot/outputs/formats/yaml.py +125 -0
- parrot/outputs/formatter.py +152 -0
- parrot/outputs/templates/__init__.py +95 -0
- parrot/pipelines/__init__.py +0 -0
- parrot/pipelines/abstract.py +210 -0
- parrot/pipelines/detector.py +124 -0
- parrot/pipelines/models.py +90 -0
- parrot/pipelines/planogram.py +3002 -0
- parrot/pipelines/table.sql +97 -0
- parrot/plugins/__init__.py +106 -0
- parrot/plugins/importer.py +80 -0
- parrot/py.typed +0 -0
- parrot/registry/__init__.py +18 -0
- parrot/registry/registry.py +594 -0
- parrot/scheduler/__init__.py +1189 -0
- parrot/scheduler/models.py +60 -0
- parrot/security/__init__.py +16 -0
- parrot/security/prompt_injection.py +268 -0
- parrot/security/security_events.sql +25 -0
- parrot/services/__init__.py +1 -0
- parrot/services/mcp/__init__.py +8 -0
- parrot/services/mcp/config.py +13 -0
- parrot/services/mcp/server.py +295 -0
- parrot/services/o365_remote_auth.py +235 -0
- parrot/stores/__init__.py +7 -0
- parrot/stores/abstract.py +352 -0
- parrot/stores/arango.py +1090 -0
- parrot/stores/bigquery.py +1377 -0
- parrot/stores/cache.py +106 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss_store.py +1157 -0
- parrot/stores/kb/__init__.py +9 -0
- parrot/stores/kb/abstract.py +68 -0
- parrot/stores/kb/cache.py +165 -0
- parrot/stores/kb/doc.py +325 -0
- parrot/stores/kb/hierarchy.py +346 -0
- parrot/stores/kb/local.py +457 -0
- parrot/stores/kb/prompt.py +28 -0
- parrot/stores/kb/redis.py +659 -0
- parrot/stores/kb/store.py +115 -0
- parrot/stores/kb/user.py +374 -0
- parrot/stores/models.py +59 -0
- parrot/stores/pgvector.py +3 -0
- parrot/stores/postgres.py +2853 -0
- parrot/stores/utils/__init__.py +0 -0
- parrot/stores/utils/chunking.py +197 -0
- parrot/telemetry/__init__.py +3 -0
- parrot/telemetry/mixin.py +111 -0
- parrot/template/__init__.py +3 -0
- parrot/template/engine.py +259 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +644 -0
- parrot/tools/agent.py +363 -0
- parrot/tools/arangodbsearch.py +537 -0
- parrot/tools/arxiv_tool.py +188 -0
- parrot/tools/calculator/__init__.py +3 -0
- parrot/tools/calculator/operations/__init__.py +38 -0
- parrot/tools/calculator/operations/calculus.py +80 -0
- parrot/tools/calculator/operations/statistics.py +76 -0
- parrot/tools/calculator/tool.py +150 -0
- parrot/tools/cloudwatch.py +988 -0
- parrot/tools/codeinterpreter/__init__.py +127 -0
- parrot/tools/codeinterpreter/executor.py +371 -0
- parrot/tools/codeinterpreter/internals.py +473 -0
- parrot/tools/codeinterpreter/models.py +643 -0
- parrot/tools/codeinterpreter/prompts.py +224 -0
- parrot/tools/codeinterpreter/tool.py +664 -0
- parrot/tools/company_info/__init__.py +6 -0
- parrot/tools/company_info/tool.py +1138 -0
- parrot/tools/correlationanalysis.py +437 -0
- parrot/tools/database/abstract.py +286 -0
- parrot/tools/database/bq.py +115 -0
- parrot/tools/database/cache.py +284 -0
- parrot/tools/database/models.py +95 -0
- parrot/tools/database/pg.py +343 -0
- parrot/tools/databasequery.py +1159 -0
- parrot/tools/db.py +1800 -0
- parrot/tools/ddgo.py +370 -0
- parrot/tools/decorators.py +271 -0
- parrot/tools/dftohtml.py +282 -0
- parrot/tools/document.py +549 -0
- parrot/tools/ecs.py +819 -0
- parrot/tools/edareport.py +368 -0
- parrot/tools/elasticsearch.py +1049 -0
- parrot/tools/employees.py +462 -0
- parrot/tools/epson/__init__.py +96 -0
- parrot/tools/excel.py +683 -0
- parrot/tools/file/__init__.py +13 -0
- parrot/tools/file/abstract.py +76 -0
- parrot/tools/file/gcs.py +378 -0
- parrot/tools/file/local.py +284 -0
- parrot/tools/file/s3.py +511 -0
- parrot/tools/file/tmp.py +309 -0
- parrot/tools/file/tool.py +501 -0
- parrot/tools/file_reader.py +129 -0
- parrot/tools/flowtask/__init__.py +19 -0
- parrot/tools/flowtask/tool.py +761 -0
- parrot/tools/gittoolkit.py +508 -0
- parrot/tools/google/__init__.py +18 -0
- parrot/tools/google/base.py +169 -0
- parrot/tools/google/tools.py +1251 -0
- parrot/tools/googlelocation.py +5 -0
- parrot/tools/googleroutes.py +5 -0
- parrot/tools/googlesearch.py +5 -0
- parrot/tools/googlesitesearch.py +5 -0
- parrot/tools/googlevoice.py +2 -0
- parrot/tools/gvoice.py +695 -0
- parrot/tools/ibisworld/README.md +225 -0
- parrot/tools/ibisworld/__init__.py +11 -0
- parrot/tools/ibisworld/tool.py +366 -0
- parrot/tools/jiratoolkit.py +1718 -0
- parrot/tools/manager.py +1098 -0
- parrot/tools/math.py +152 -0
- parrot/tools/metadata.py +476 -0
- parrot/tools/msteams.py +1621 -0
- parrot/tools/msword.py +635 -0
- parrot/tools/multidb.py +580 -0
- parrot/tools/multistoresearch.py +369 -0
- parrot/tools/networkninja.py +167 -0
- parrot/tools/nextstop/__init__.py +4 -0
- parrot/tools/nextstop/base.py +286 -0
- parrot/tools/nextstop/employee.py +733 -0
- parrot/tools/nextstop/store.py +462 -0
- parrot/tools/notification.py +435 -0
- parrot/tools/o365/__init__.py +42 -0
- parrot/tools/o365/base.py +295 -0
- parrot/tools/o365/bundle.py +522 -0
- parrot/tools/o365/events.py +554 -0
- parrot/tools/o365/mail.py +992 -0
- parrot/tools/o365/onedrive.py +497 -0
- parrot/tools/o365/sharepoint.py +641 -0
- parrot/tools/openapi_toolkit.py +904 -0
- parrot/tools/openweather.py +527 -0
- parrot/tools/pdfprint.py +1001 -0
- parrot/tools/powerbi.py +518 -0
- parrot/tools/powerpoint.py +1113 -0
- parrot/tools/pricestool.py +146 -0
- parrot/tools/products/__init__.py +246 -0
- parrot/tools/prophet_tool.py +171 -0
- parrot/tools/pythonpandas.py +630 -0
- parrot/tools/pythonrepl.py +910 -0
- parrot/tools/qsource.py +436 -0
- parrot/tools/querytoolkit.py +395 -0
- parrot/tools/quickeda.py +827 -0
- parrot/tools/resttool.py +553 -0
- parrot/tools/retail/__init__.py +0 -0
- parrot/tools/retail/bby.py +528 -0
- parrot/tools/sandboxtool.py +703 -0
- parrot/tools/sassie/__init__.py +352 -0
- parrot/tools/scraping/__init__.py +7 -0
- parrot/tools/scraping/docs/select.md +466 -0
- parrot/tools/scraping/documentation.md +1278 -0
- parrot/tools/scraping/driver.py +436 -0
- parrot/tools/scraping/models.py +576 -0
- parrot/tools/scraping/options.py +85 -0
- parrot/tools/scraping/orchestrator.py +517 -0
- parrot/tools/scraping/readme.md +740 -0
- parrot/tools/scraping/tool.py +3115 -0
- parrot/tools/seasonaldetection.py +642 -0
- parrot/tools/shell_tool/__init__.py +5 -0
- parrot/tools/shell_tool/actions.py +408 -0
- parrot/tools/shell_tool/engine.py +155 -0
- parrot/tools/shell_tool/models.py +322 -0
- parrot/tools/shell_tool/tool.py +442 -0
- parrot/tools/site_search.py +214 -0
- parrot/tools/textfile.py +418 -0
- parrot/tools/think.py +378 -0
- parrot/tools/toolkit.py +298 -0
- parrot/tools/webapp_tool.py +187 -0
- parrot/tools/whatif.py +1279 -0
- parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
- parrot/tools/workday/__init__.py +6 -0
- parrot/tools/workday/models.py +1389 -0
- parrot/tools/workday/tool.py +1293 -0
- parrot/tools/yfinance_tool.py +306 -0
- parrot/tools/zipcode.py +217 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/helpers.py +73 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.c +12078 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/parsers/toml.pyx +21 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpp +20936 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/types.pyx +213 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- parrot/yaml-rs/Cargo.lock +350 -0
- parrot/yaml-rs/Cargo.toml +19 -0
- parrot/yaml-rs/pyproject.toml +19 -0
- parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
- parrot/yaml-rs/src/lib.rs +222 -0
- requirements/docker-compose.yml +24 -0
- requirements/requirements-dev.txt +21 -0
parrot/tools/multidb.py
ADDED
|
@@ -0,0 +1,580 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Multi-Tier Schema Metadata Caching System for AI-Parrot DatabaseTool
|
|
3
|
+
|
|
4
|
+
This system implements intelligent schema caching with three tiers:
|
|
5
|
+
1. In-memory cache for frequently accessed tables
|
|
6
|
+
2. Vector database for semantic discovery of related tables
|
|
7
|
+
3. Direct database extraction as last resort
|
|
8
|
+
|
|
9
|
+
The key insight: 90% of queries hit the same 10% of tables, so we optimize
|
|
10
|
+
for this common case while gracefully handling discovery of new tables.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import Dict, List, Optional, Any, Set
|
|
14
|
+
from collections import defaultdict, OrderedDict
|
|
15
|
+
import asyncio
|
|
16
|
+
from datetime import datetime, timedelta
|
|
17
|
+
from dataclasses import dataclass, asdict
|
|
18
|
+
from enum import Enum
|
|
19
|
+
import logging
|
|
20
|
+
import json
|
|
21
|
+
import yaml
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
from ..stores.abstract import AbstractStore
|
|
25
|
+
from .abstract import AbstractTool
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class MetadataFormat(str, Enum):
|
|
29
|
+
"""Supported metadata formats for schema representation."""
|
|
30
|
+
YAML_OPTIMIZED = "yaml_optimized" # Custom YAML format for LLM context
|
|
31
|
+
JSON_SCHEMA = "json_schema" # Full JSON Schema specification
|
|
32
|
+
AVRO_SCHEMA = "avro_schema" # Avro schema format
|
|
33
|
+
COMPACT_YAML = "compact_yaml" # Minimal YAML for quick parsing
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class TableMetadata:
|
|
38
|
+
"""
|
|
39
|
+
Optimized table metadata structure designed for both caching efficiency
|
|
40
|
+
and LLM comprehension. This format balances completeness with conciseness.
|
|
41
|
+
"""
|
|
42
|
+
table_name: str
|
|
43
|
+
schema_name: str
|
|
44
|
+
database_type: str
|
|
45
|
+
|
|
46
|
+
# Core structure information
|
|
47
|
+
columns: List[Dict[str, Any]] # Simplified column definitions
|
|
48
|
+
primary_keys: List[str]
|
|
49
|
+
foreign_keys: List[Dict[str, str]] # {column, references_table.column}
|
|
50
|
+
indexes: List[Dict[str, Any]]
|
|
51
|
+
|
|
52
|
+
# Semantic information for LLM context
|
|
53
|
+
description: Optional[str] = None
|
|
54
|
+
business_purpose: Optional[str] = None # Inferred from usage patterns
|
|
55
|
+
common_joins: List[str] = None # Tables commonly joined with this one
|
|
56
|
+
sample_data: Optional[Dict[str, List]] = None # Small sample for context
|
|
57
|
+
|
|
58
|
+
# Metadata about the metadata
|
|
59
|
+
row_count_estimate: Optional[int] = None
|
|
60
|
+
last_updated: datetime = None
|
|
61
|
+
access_frequency: int = 0 # How often this table is queried
|
|
62
|
+
cache_timestamp: datetime = None
|
|
63
|
+
|
|
64
|
+
def to_llm_context(self, format_type: MetadataFormat = MetadataFormat.YAML_OPTIMIZED) -> str:
|
|
65
|
+
"""
|
|
66
|
+
Convert table metadata to LLM-friendly format.
|
|
67
|
+
|
|
68
|
+
This is a critical method - the format needs to be:
|
|
69
|
+
1. Concise enough to fit in LLM context windows
|
|
70
|
+
2. Rich enough to generate accurate queries
|
|
71
|
+
3. Structured enough for reliable parsing
|
|
72
|
+
"""
|
|
73
|
+
if format_type == MetadataFormat.YAML_OPTIMIZED:
|
|
74
|
+
return self._to_yaml_optimized()
|
|
75
|
+
elif format_type == MetadataFormat.COMPACT_YAML:
|
|
76
|
+
return self._to_compact_yaml()
|
|
77
|
+
elif format_type == MetadataFormat.JSON_SCHEMA:
|
|
78
|
+
return self._to_json_schema()
|
|
79
|
+
else:
|
|
80
|
+
raise ValueError(f"Unsupported format: {format_type}")
|
|
81
|
+
|
|
82
|
+
def _to_yaml_optimized(self) -> str:
|
|
83
|
+
"""
|
|
84
|
+
Generate YAML format optimized for LLM understanding.
|
|
85
|
+
|
|
86
|
+
This format prioritizes clarity and includes business context
|
|
87
|
+
that helps the LLM generate more intuitive queries.
|
|
88
|
+
"""
|
|
89
|
+
# Build the core structure
|
|
90
|
+
schema_dict = {
|
|
91
|
+
'table': f"{self.schema_name}.{self.table_name}",
|
|
92
|
+
'purpose': self.business_purpose or self.description or "Data table",
|
|
93
|
+
'columns': {}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
# Simplify column information for LLM consumption
|
|
97
|
+
for col in self.columns:
|
|
98
|
+
col_info = {
|
|
99
|
+
'type': self._simplify_data_type(col.get('type', 'unknown')),
|
|
100
|
+
'nullable': col.get('nullable', True)
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
# Add semantic hints that help LLMs generate better queries
|
|
104
|
+
if col.get('description'):
|
|
105
|
+
col_info['description'] = col['description']
|
|
106
|
+
if col['name'] in self.primary_keys:
|
|
107
|
+
col_info['primary_key'] = True
|
|
108
|
+
if self._is_foreign_key(col['name']):
|
|
109
|
+
col_info['references'] = self._get_foreign_key_reference(col['name'])
|
|
110
|
+
|
|
111
|
+
schema_dict['columns'][col['name']] = col_info
|
|
112
|
+
|
|
113
|
+
# Add relationship information
|
|
114
|
+
if self.common_joins:
|
|
115
|
+
schema_dict['commonly_joined_with'] = self.common_joins[:5] # Limit context size
|
|
116
|
+
|
|
117
|
+
# Add sample data if available (helps LLM understand data patterns)
|
|
118
|
+
if self.sample_data:
|
|
119
|
+
schema_dict['sample_data'] = {
|
|
120
|
+
col: values[:3] # Just first 3 samples to avoid context bloat
|
|
121
|
+
for col, values in self.sample_data.items()
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
# Add usage hints
|
|
125
|
+
if self.row_count_estimate:
|
|
126
|
+
if self.row_count_estimate > 1000000:
|
|
127
|
+
schema_dict['size_hint'] = 'large_table'
|
|
128
|
+
elif self.row_count_estimate > 10000:
|
|
129
|
+
schema_dict['size_hint'] = 'medium_table'
|
|
130
|
+
else:
|
|
131
|
+
schema_dict['size_hint'] = 'small_table'
|
|
132
|
+
|
|
133
|
+
return yaml.dump(schema_dict, default_flow_style=False, sort_keys=False)
|
|
134
|
+
|
|
135
|
+
def _simplify_data_type(self, db_type: str) -> str:
|
|
136
|
+
"""
|
|
137
|
+
Convert database-specific types to LLM-friendly generic types.
|
|
138
|
+
|
|
139
|
+
This mapping helps LLMs understand what operations are valid
|
|
140
|
+
on different column types without getting lost in database-specific details.
|
|
141
|
+
"""
|
|
142
|
+
type_mappings = {
|
|
143
|
+
# Numeric types
|
|
144
|
+
'bigint': 'integer', 'int8': 'integer', 'serial8': 'integer',
|
|
145
|
+
'integer': 'integer', 'int4': 'integer', 'serial': 'integer',
|
|
146
|
+
'smallint': 'integer', 'int2': 'integer',
|
|
147
|
+
'decimal': 'decimal', 'numeric': 'decimal', 'money': 'decimal',
|
|
148
|
+
'real': 'decimal', 'float4': 'decimal',
|
|
149
|
+
'double precision': 'decimal', 'float8': 'decimal',
|
|
150
|
+
|
|
151
|
+
# Text types
|
|
152
|
+
'character varying': 'text', 'varchar': 'text', 'char': 'text',
|
|
153
|
+
'text': 'text', 'string': 'text',
|
|
154
|
+
|
|
155
|
+
# Date/time types
|
|
156
|
+
'timestamp': 'datetime', 'timestamptz': 'datetime',
|
|
157
|
+
'date': 'date', 'time': 'time',
|
|
158
|
+
|
|
159
|
+
# Boolean
|
|
160
|
+
'boolean': 'boolean', 'bool': 'boolean',
|
|
161
|
+
|
|
162
|
+
# JSON
|
|
163
|
+
'json': 'json', 'jsonb': 'json',
|
|
164
|
+
|
|
165
|
+
# Arrays and special types
|
|
166
|
+
'array': 'array', 'uuid': 'uuid'
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
# Extract base type (remove size specifications, etc.)
|
|
170
|
+
base_type = db_type.lower().split('(')[0].strip()
|
|
171
|
+
return type_mappings.get(base_type, 'unknown')
|
|
172
|
+
|
|
173
|
+
def _is_foreign_key(self, column_name: str) -> bool:
|
|
174
|
+
"""Check if a column is a foreign key."""
|
|
175
|
+
return any(fk['column'] == column_name for fk in (self.foreign_keys or []))
|
|
176
|
+
|
|
177
|
+
def _get_foreign_key_reference(self, column_name: str) -> Optional[str]:
|
|
178
|
+
"""Get the table.column that this foreign key references."""
|
|
179
|
+
for fk in (self.foreign_keys or []):
|
|
180
|
+
if fk['column'] == column_name:
|
|
181
|
+
return fk['references']
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class SchemaMetadataCache:
|
|
186
|
+
"""
|
|
187
|
+
Multi-tier caching system for database schema metadata.
|
|
188
|
+
|
|
189
|
+
This class orchestrates the three-tier caching strategy:
|
|
190
|
+
Tier 1: In-memory LRU cache for hot tables
|
|
191
|
+
Tier 2: Vector store for semantic discovery
|
|
192
|
+
Tier 3: Direct database extraction
|
|
193
|
+
|
|
194
|
+
The cache learns from usage patterns and optimizes for common access patterns.
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
def __init__(
|
|
198
|
+
self,
|
|
199
|
+
vector_store: Optional[AbstractStore] = None,
|
|
200
|
+
memory_cache_size: int = 100,
|
|
201
|
+
cache_ttl_hours: int = 24,
|
|
202
|
+
background_update: bool = True
|
|
203
|
+
):
|
|
204
|
+
"""
|
|
205
|
+
Initialize the multi-tier caching system.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
vector_store: Vector database for semantic schema search
|
|
209
|
+
memory_cache_size: Maximum number of tables to keep in memory
|
|
210
|
+
cache_ttl_hours: How long to keep cached metadata valid
|
|
211
|
+
background_update: Whether to update vector store in background
|
|
212
|
+
"""
|
|
213
|
+
self.vector_store = vector_store
|
|
214
|
+
self.cache_ttl = timedelta(hours=cache_ttl_hours)
|
|
215
|
+
self.background_update = background_update
|
|
216
|
+
|
|
217
|
+
# Tier 1: In-memory cache with LRU eviction
|
|
218
|
+
self.memory_cache: OrderedDict[str, TableMetadata] = OrderedDict()
|
|
219
|
+
self.memory_cache_size = memory_cache_size
|
|
220
|
+
|
|
221
|
+
# Usage tracking for intelligent caching decisions
|
|
222
|
+
self.access_counts = defaultdict(int)
|
|
223
|
+
self.last_access = {}
|
|
224
|
+
|
|
225
|
+
# Background tasks for non-blocking vector store updates
|
|
226
|
+
self.pending_updates: Set[str] = set()
|
|
227
|
+
self.update_tasks = []
|
|
228
|
+
|
|
229
|
+
self.logger = logging.getLogger(__name__)
|
|
230
|
+
|
|
231
|
+
def _generate_cache_key(self, schema_name: str, table_name: str, database_type: str) -> str:
|
|
232
|
+
"""Generate a unique cache key for a table."""
|
|
233
|
+
return f"{database_type}:{schema_name}.{table_name}"
|
|
234
|
+
|
|
235
|
+
async def get_table_metadata(
|
|
236
|
+
self,
|
|
237
|
+
schema_name: str,
|
|
238
|
+
table_name: str,
|
|
239
|
+
database_type: str,
|
|
240
|
+
database_extractor_func: Optional[callable] = None
|
|
241
|
+
) -> Optional[TableMetadata]:
|
|
242
|
+
"""
|
|
243
|
+
Get table metadata using the three-tier caching strategy.
|
|
244
|
+
|
|
245
|
+
This is the main entry point that implements the intelligent caching logic:
|
|
246
|
+
1. Check in-memory cache first (fastest)
|
|
247
|
+
2. Fall back to vector store semantic search
|
|
248
|
+
3. Extract directly from database if needed
|
|
249
|
+
4. Update higher tiers with new information
|
|
250
|
+
"""
|
|
251
|
+
cache_key = self._generate_cache_key(schema_name, table_name, database_type)
|
|
252
|
+
self._record_access(cache_key)
|
|
253
|
+
|
|
254
|
+
# Tier 1: Check in-memory cache
|
|
255
|
+
if cache_key in self.memory_cache:
|
|
256
|
+
metadata = self.memory_cache[cache_key]
|
|
257
|
+
|
|
258
|
+
# Check if cache entry is still valid
|
|
259
|
+
if self._is_cache_valid(metadata):
|
|
260
|
+
self.logger.debug(f"Cache hit (memory): {cache_key}")
|
|
261
|
+
self._move_to_front(cache_key) # LRU bookkeeping
|
|
262
|
+
return metadata
|
|
263
|
+
else:
|
|
264
|
+
self.logger.debug(f"Cache expired (memory): {cache_key}")
|
|
265
|
+
del self.memory_cache[cache_key]
|
|
266
|
+
|
|
267
|
+
# Tier 2: Check vector store
|
|
268
|
+
if self.vector_store:
|
|
269
|
+
vector_metadata = await self._search_vector_store(schema_name, table_name, database_type)
|
|
270
|
+
if vector_metadata and self._is_cache_valid(vector_metadata):
|
|
271
|
+
self.logger.debug(f"Cache hit (vector): {cache_key}")
|
|
272
|
+
self._add_to_memory_cache(cache_key, vector_metadata)
|
|
273
|
+
return vector_metadata
|
|
274
|
+
|
|
275
|
+
# Tier 3: Extract from database directly
|
|
276
|
+
if database_extractor_func:
|
|
277
|
+
self.logger.debug(f"Cache miss, extracting from database: {cache_key}")
|
|
278
|
+
fresh_metadata = await database_extractor_func(schema_name, table_name)
|
|
279
|
+
|
|
280
|
+
if fresh_metadata:
|
|
281
|
+
# Update all cache tiers with fresh data
|
|
282
|
+
fresh_metadata.cache_timestamp = datetime.utcnow()
|
|
283
|
+
self._add_to_memory_cache(cache_key, fresh_metadata)
|
|
284
|
+
|
|
285
|
+
# Schedule background update to vector store (non-blocking)
|
|
286
|
+
if self.vector_store and self.background_update:
|
|
287
|
+
self._schedule_vector_store_update(cache_key, fresh_metadata)
|
|
288
|
+
|
|
289
|
+
return fresh_metadata
|
|
290
|
+
|
|
291
|
+
# Nothing found
|
|
292
|
+
self.logger.warning(f"No metadata found for {cache_key}")
|
|
293
|
+
return None
|
|
294
|
+
|
|
295
|
+
async def get_context_for_query(
|
|
296
|
+
self,
|
|
297
|
+
table_names: List[str],
|
|
298
|
+
schema_name: str = "public",
|
|
299
|
+
database_type: str = "postgresql",
|
|
300
|
+
format_type: MetadataFormat = MetadataFormat.YAML_OPTIMIZED,
|
|
301
|
+
database_extractor_func: Optional[callable] = None
|
|
302
|
+
) -> str:
|
|
303
|
+
"""
|
|
304
|
+
Build comprehensive LLM context for a set of tables.
|
|
305
|
+
|
|
306
|
+
This method orchestrates the retrieval of multiple table metadata
|
|
307
|
+
and formats it into a cohesive context for LLM query generation.
|
|
308
|
+
"""
|
|
309
|
+
context_parts = []
|
|
310
|
+
retrieved_tables = []
|
|
311
|
+
|
|
312
|
+
# Collect metadata for all requested tables
|
|
313
|
+
for table_name in table_names:
|
|
314
|
+
metadata = await self.get_table_metadata(
|
|
315
|
+
schema_name, table_name, database_type, database_extractor_func
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
if metadata:
|
|
319
|
+
retrieved_tables.append(metadata)
|
|
320
|
+
else:
|
|
321
|
+
self.logger.warning(f"Could not retrieve metadata for {table_name}")
|
|
322
|
+
|
|
323
|
+
# Build comprehensive context
|
|
324
|
+
if retrieved_tables:
|
|
325
|
+
context_parts.append(f"# Database Schema Context ({database_type})")
|
|
326
|
+
context_parts.append(f"# Available tables: {len(retrieved_tables)}")
|
|
327
|
+
context_parts.append("")
|
|
328
|
+
|
|
329
|
+
# Add individual table schemas
|
|
330
|
+
for metadata in retrieved_tables:
|
|
331
|
+
context_parts.append(metadata.to_llm_context(format_type))
|
|
332
|
+
context_parts.append("")
|
|
333
|
+
|
|
334
|
+
# Add relationship information if multiple tables
|
|
335
|
+
if len(retrieved_tables) > 1:
|
|
336
|
+
relationships = self._analyze_table_relationships(retrieved_tables)
|
|
337
|
+
if relationships:
|
|
338
|
+
context_parts.append("# Table Relationships")
|
|
339
|
+
for rel in relationships:
|
|
340
|
+
context_parts.append(f"- {rel}")
|
|
341
|
+
context_parts.append("")
|
|
342
|
+
|
|
343
|
+
return "\n".join(context_parts)
|
|
344
|
+
|
|
345
|
+
def _record_access(self, cache_key: str):
|
|
346
|
+
"""Record access for usage pattern analysis."""
|
|
347
|
+
self.access_counts[cache_key] += 1
|
|
348
|
+
self.last_access[cache_key] = datetime.utcnow()
|
|
349
|
+
|
|
350
|
+
def _is_cache_valid(self, metadata: TableMetadata) -> bool:
|
|
351
|
+
"""Check if cached metadata is still valid."""
|
|
352
|
+
if not metadata.cache_timestamp:
|
|
353
|
+
return False
|
|
354
|
+
|
|
355
|
+
age = datetime.utcnow() - metadata.cache_timestamp
|
|
356
|
+
return age < self.cache_ttl
|
|
357
|
+
|
|
358
|
+
def _add_to_memory_cache(self, cache_key: str, metadata: TableMetadata):
|
|
359
|
+
"""Add metadata to in-memory cache with LRU eviction."""
|
|
360
|
+
# Remove if already exists (for LRU reordering)
|
|
361
|
+
if cache_key in self.memory_cache:
|
|
362
|
+
del self.memory_cache[cache_key]
|
|
363
|
+
|
|
364
|
+
# Add to front of cache
|
|
365
|
+
self.memory_cache[cache_key] = metadata
|
|
366
|
+
|
|
367
|
+
# Evict least recently used items if over capacity
|
|
368
|
+
while len(self.memory_cache) > self.memory_cache_size:
|
|
369
|
+
oldest_key = next(iter(self.memory_cache))
|
|
370
|
+
del self.memory_cache[oldest_key]
|
|
371
|
+
self.logger.debug(f"Evicted from memory cache: {oldest_key}")
|
|
372
|
+
|
|
373
|
+
def _move_to_front(self, cache_key: str):
|
|
374
|
+
"""Move cache item to front for LRU ordering."""
|
|
375
|
+
metadata = self.memory_cache.pop(cache_key)
|
|
376
|
+
self.memory_cache[cache_key] = metadata
|
|
377
|
+
|
|
378
|
+
async def _search_vector_store(
|
|
379
|
+
self,
|
|
380
|
+
schema_name: str,
|
|
381
|
+
table_name: str,
|
|
382
|
+
database_type: str
|
|
383
|
+
) -> Optional[TableMetadata]:
|
|
384
|
+
"""Search vector store for table metadata."""
|
|
385
|
+
if not self.vector_store:
|
|
386
|
+
return None
|
|
387
|
+
|
|
388
|
+
try:
|
|
389
|
+
# Create search query that combines exact and semantic matching
|
|
390
|
+
search_query = f"{schema_name}.{table_name} {database_type} table schema"
|
|
391
|
+
|
|
392
|
+
# Search the vector store
|
|
393
|
+
results = await self.vector_store.similarity_search(
|
|
394
|
+
query=search_query,
|
|
395
|
+
limit=1,
|
|
396
|
+
filter_metadata={"database_type": database_type}
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
if results and len(results) > 0:
|
|
400
|
+
# Deserialize the stored metadata
|
|
401
|
+
metadata_dict = results[0].get('metadata', {})
|
|
402
|
+
if 'table_metadata' in metadata_dict:
|
|
403
|
+
return TableMetadata(**metadata_dict['table_metadata'])
|
|
404
|
+
|
|
405
|
+
except Exception as e:
|
|
406
|
+
self.logger.warning(f"Vector store search failed: {e}")
|
|
407
|
+
|
|
408
|
+
return None
|
|
409
|
+
|
|
410
|
+
def _schedule_vector_store_update(self, cache_key: str, metadata: TableMetadata):
|
|
411
|
+
"""Schedule background update to vector store (non-blocking)."""
|
|
412
|
+
if cache_key not in self.pending_updates:
|
|
413
|
+
self.pending_updates.add(cache_key)
|
|
414
|
+
task = asyncio.create_task(self._update_vector_store(cache_key, metadata))
|
|
415
|
+
self.update_tasks.append(task)
|
|
416
|
+
|
|
417
|
+
async def _update_vector_store(self, cache_key: str, metadata: TableMetadata):
|
|
418
|
+
"""Update vector store with new metadata in background."""
|
|
419
|
+
try:
|
|
420
|
+
if not self.vector_store:
|
|
421
|
+
return
|
|
422
|
+
|
|
423
|
+
# Create searchable text content
|
|
424
|
+
content = metadata.to_llm_context()
|
|
425
|
+
|
|
426
|
+
# Create metadata for vector store
|
|
427
|
+
vector_metadata = {
|
|
428
|
+
"table_name": metadata.table_name,
|
|
429
|
+
"schema_name": metadata.schema_name,
|
|
430
|
+
"database_type": metadata.database_type,
|
|
431
|
+
"table_metadata": asdict(metadata),
|
|
432
|
+
"last_updated": datetime.utcnow().isoformat()
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
# Store in vector database
|
|
436
|
+
await self.vector_store.add_documents([{
|
|
437
|
+
"content": content,
|
|
438
|
+
"metadata": vector_metadata,
|
|
439
|
+
"id": cache_key
|
|
440
|
+
}])
|
|
441
|
+
|
|
442
|
+
self.logger.debug(f"Updated vector store: {cache_key}")
|
|
443
|
+
|
|
444
|
+
except Exception as e:
|
|
445
|
+
self.logger.error(f"Failed to update vector store for {cache_key}: {e}")
|
|
446
|
+
finally:
|
|
447
|
+
self.pending_updates.discard(cache_key)
|
|
448
|
+
|
|
449
|
+
def _analyze_table_relationships(self, tables: List[TableMetadata]) -> List[str]:
|
|
450
|
+
"""Analyze relationships between multiple tables."""
|
|
451
|
+
relationships = []
|
|
452
|
+
table_names = {t.table_name for t in tables}
|
|
453
|
+
|
|
454
|
+
for table in tables:
|
|
455
|
+
for fk in (table.foreign_keys or []):
|
|
456
|
+
ref_table = fk['references'].split('.')[0]
|
|
457
|
+
if ref_table in table_names:
|
|
458
|
+
relationships.append(
|
|
459
|
+
f"{table.table_name}.{fk['column']} → {fk['references']}"
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
return relationships
|
|
463
|
+
|
|
464
|
+
async def cleanup(self):
|
|
465
|
+
"""Clean up background tasks and resources."""
|
|
466
|
+
# Wait for pending vector store updates to complete
|
|
467
|
+
if self.update_tasks:
|
|
468
|
+
await asyncio.gather(*self.update_tasks, return_exceptions=True)
|
|
469
|
+
|
|
470
|
+
self.logger.info("Schema metadata cache cleanup completed")
|
|
471
|
+
|
|
472
|
+
def get_cache_stats(self) -> Dict[str, Any]:
|
|
473
|
+
"""Get caching statistics for monitoring and debugging."""
|
|
474
|
+
return {
|
|
475
|
+
"memory_cache_size": len(self.memory_cache),
|
|
476
|
+
"memory_cache_capacity": self.memory_cache_size,
|
|
477
|
+
"total_access_count": sum(self.access_counts.values()),
|
|
478
|
+
"unique_tables_accessed": len(self.access_counts),
|
|
479
|
+
"pending_vector_updates": len(self.pending_updates),
|
|
480
|
+
"most_accessed_tables": sorted(
|
|
481
|
+
self.access_counts.items(),
|
|
482
|
+
key=lambda x: x[1],
|
|
483
|
+
reverse=True
|
|
484
|
+
)[:10]
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
|
|
488
|
+
# Integration with the main DatabaseTool
|
|
489
|
+
class EnhancedDatabaseTool(AbstractTool):
|
|
490
|
+
"""
|
|
491
|
+
Enhanced DatabaseTool with intelligent multi-tier schema caching.
|
|
492
|
+
|
|
493
|
+
This version integrates the sophisticated caching system to provide
|
|
494
|
+
lightning-fast schema access and intelligent query context building.
|
|
495
|
+
"""
|
|
496
|
+
|
|
497
|
+
def __init__(self, vector_store: Optional[AbstractStore] = None, **kwargs):
|
|
498
|
+
super().__init__(**kwargs)
|
|
499
|
+
|
|
500
|
+
# Initialize the multi-tier caching system
|
|
501
|
+
self.schema_cache = SchemaMetadataCache(
|
|
502
|
+
vector_store=vector_store,
|
|
503
|
+
memory_cache_size=100,
|
|
504
|
+
cache_ttl_hours=24,
|
|
505
|
+
background_update=True
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
async def _build_intelligent_context(
|
|
509
|
+
self,
|
|
510
|
+
natural_language_query: str,
|
|
511
|
+
database_flavor: str,
|
|
512
|
+
schema_names: List[str] = ["public"]
|
|
513
|
+
) -> str:
|
|
514
|
+
"""
|
|
515
|
+
Build intelligent context using the multi-tier caching system.
|
|
516
|
+
|
|
517
|
+
This method demonstrates how the caching system integrates with
|
|
518
|
+
the main DatabaseTool to provide fast, contextual schema information.
|
|
519
|
+
"""
|
|
520
|
+
# Extract likely table names from natural language query
|
|
521
|
+
# This is a simplified version - you'd want more sophisticated NLP here
|
|
522
|
+
potential_tables = self._extract_table_names_from_query(natural_language_query)
|
|
523
|
+
|
|
524
|
+
# Get metadata for each table using the caching system
|
|
525
|
+
context = await self.schema_cache.get_context_for_query(
|
|
526
|
+
table_names=potential_tables,
|
|
527
|
+
schema_name=schema_names[0],
|
|
528
|
+
database_type=database_flavor,
|
|
529
|
+
database_extractor_func=self._extract_table_metadata_from_database
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
return context
|
|
533
|
+
|
|
534
|
+
def _extract_table_names_from_query(self, query: str) -> List[str]:
|
|
535
|
+
"""
|
|
536
|
+
Extract likely table names from natural language query.
|
|
537
|
+
|
|
538
|
+
This is a simplified implementation. In practice, you'd want:
|
|
539
|
+
1. NLP-based entity extraction
|
|
540
|
+
2. Similarity search against known table names
|
|
541
|
+
3. Business glossary matching
|
|
542
|
+
"""
|
|
543
|
+
# Simple keyword-based extraction for demonstration
|
|
544
|
+
common_table_patterns = ['sales', 'orders', 'customers', 'products', 'users']
|
|
545
|
+
query_lower = query.lower()
|
|
546
|
+
|
|
547
|
+
found_tables = []
|
|
548
|
+
for pattern in common_table_patterns:
|
|
549
|
+
if pattern in query_lower:
|
|
550
|
+
found_tables.append(pattern)
|
|
551
|
+
|
|
552
|
+
return found_tables if found_tables else ['sales'] # Default fallback
|
|
553
|
+
|
|
554
|
+
async def _extract_table_metadata_from_database(
|
|
555
|
+
self,
|
|
556
|
+
schema_name: str,
|
|
557
|
+
table_name: str
|
|
558
|
+
) -> Optional[TableMetadata]:
|
|
559
|
+
"""
|
|
560
|
+
Extract metadata directly from database.
|
|
561
|
+
|
|
562
|
+
This method integrates with your existing database extraction logic
|
|
563
|
+
but formats the result for the caching system.
|
|
564
|
+
"""
|
|
565
|
+
# This would integrate with your existing schema extraction logic
|
|
566
|
+
# For demonstration, returning a mock metadata object
|
|
567
|
+
return TableMetadata(
|
|
568
|
+
table_name=table_name,
|
|
569
|
+
schema_name=schema_name,
|
|
570
|
+
database_type="postgresql",
|
|
571
|
+
columns=[
|
|
572
|
+
{"name": "id", "type": "integer", "nullable": False},
|
|
573
|
+
{"name": "name", "type": "varchar", "nullable": True},
|
|
574
|
+
{"name": "created_at", "type": "timestamp", "nullable": False}
|
|
575
|
+
],
|
|
576
|
+
primary_keys=["id"],
|
|
577
|
+
foreign_keys=[],
|
|
578
|
+
indexes=[],
|
|
579
|
+
last_updated=datetime.utcnow()
|
|
580
|
+
)
|