ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentui/.prettierrc +15 -0
- agentui/QUICKSTART.md +272 -0
- agentui/README.md +59 -0
- agentui/env.example +16 -0
- agentui/jsconfig.json +14 -0
- agentui/package-lock.json +4242 -0
- agentui/package.json +34 -0
- agentui/scripts/postinstall/apply-patches.mjs +260 -0
- agentui/src/app.css +61 -0
- agentui/src/app.d.ts +13 -0
- agentui/src/app.html +12 -0
- agentui/src/components/LoadingSpinner.svelte +64 -0
- agentui/src/components/ThemeSwitcher.svelte +159 -0
- agentui/src/components/index.js +4 -0
- agentui/src/lib/api/bots.ts +60 -0
- agentui/src/lib/api/chat.ts +22 -0
- agentui/src/lib/api/http.ts +25 -0
- agentui/src/lib/components/BotCard.svelte +33 -0
- agentui/src/lib/components/ChatBubble.svelte +63 -0
- agentui/src/lib/components/Toast.svelte +21 -0
- agentui/src/lib/config.ts +20 -0
- agentui/src/lib/stores/auth.svelte.ts +73 -0
- agentui/src/lib/stores/theme.svelte.js +64 -0
- agentui/src/lib/stores/toast.svelte.ts +31 -0
- agentui/src/lib/utils/conversation.ts +39 -0
- agentui/src/routes/+layout.svelte +20 -0
- agentui/src/routes/+page.svelte +232 -0
- agentui/src/routes/login/+page.svelte +200 -0
- agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
- agentui/src/routes/talk/[agentId]/+page.ts +7 -0
- agentui/static/README.md +1 -0
- agentui/svelte.config.js +11 -0
- agentui/tailwind.config.ts +53 -0
- agentui/tsconfig.json +3 -0
- agentui/vite.config.ts +10 -0
- ai_parrot-0.17.2.dist-info/METADATA +472 -0
- ai_parrot-0.17.2.dist-info/RECORD +535 -0
- ai_parrot-0.17.2.dist-info/WHEEL +6 -0
- ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
- ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
- ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
- crew-builder/.prettierrc +15 -0
- crew-builder/QUICKSTART.md +259 -0
- crew-builder/README.md +113 -0
- crew-builder/env.example +17 -0
- crew-builder/jsconfig.json +14 -0
- crew-builder/package-lock.json +4182 -0
- crew-builder/package.json +37 -0
- crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
- crew-builder/src/app.css +62 -0
- crew-builder/src/app.d.ts +13 -0
- crew-builder/src/app.html +12 -0
- crew-builder/src/components/LoadingSpinner.svelte +64 -0
- crew-builder/src/components/ThemeSwitcher.svelte +149 -0
- crew-builder/src/components/index.js +9 -0
- crew-builder/src/lib/api/bots.ts +60 -0
- crew-builder/src/lib/api/chat.ts +80 -0
- crew-builder/src/lib/api/client.ts +56 -0
- crew-builder/src/lib/api/crew/crew.ts +136 -0
- crew-builder/src/lib/api/index.ts +5 -0
- crew-builder/src/lib/api/o365/auth.ts +65 -0
- crew-builder/src/lib/auth/auth.ts +54 -0
- crew-builder/src/lib/components/AgentNode.svelte +43 -0
- crew-builder/src/lib/components/BotCard.svelte +33 -0
- crew-builder/src/lib/components/ChatBubble.svelte +67 -0
- crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
- crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
- crew-builder/src/lib/components/JsonViewer.svelte +24 -0
- crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
- crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
- crew-builder/src/lib/components/Toast.svelte +67 -0
- crew-builder/src/lib/components/Toolbar.svelte +157 -0
- crew-builder/src/lib/components/index.ts +10 -0
- crew-builder/src/lib/config.ts +8 -0
- crew-builder/src/lib/stores/auth.svelte.ts +228 -0
- crew-builder/src/lib/stores/crewStore.ts +369 -0
- crew-builder/src/lib/stores/theme.svelte.js +145 -0
- crew-builder/src/lib/stores/toast.svelte.ts +69 -0
- crew-builder/src/lib/utils/conversation.ts +39 -0
- crew-builder/src/lib/utils/markdown.ts +122 -0
- crew-builder/src/lib/utils/talkHistory.ts +47 -0
- crew-builder/src/routes/+layout.svelte +20 -0
- crew-builder/src/routes/+page.svelte +539 -0
- crew-builder/src/routes/agents/+page.svelte +247 -0
- crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
- crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
- crew-builder/src/routes/builder/+page.svelte +204 -0
- crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
- crew-builder/src/routes/crew/ask/+page.ts +1 -0
- crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
- crew-builder/src/routes/login/+page.svelte +197 -0
- crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
- crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
- crew-builder/static/README.md +1 -0
- crew-builder/svelte.config.js +11 -0
- crew-builder/tailwind.config.ts +53 -0
- crew-builder/tsconfig.json +3 -0
- crew-builder/vite.config.ts +10 -0
- mcp_servers/calculator_server.py +309 -0
- parrot/__init__.py +27 -0
- parrot/__pycache__/__init__.cpython-310.pyc +0 -0
- parrot/__pycache__/version.cpython-310.pyc +0 -0
- parrot/_version.py +34 -0
- parrot/a2a/__init__.py +48 -0
- parrot/a2a/client.py +658 -0
- parrot/a2a/discovery.py +89 -0
- parrot/a2a/mixin.py +257 -0
- parrot/a2a/models.py +376 -0
- parrot/a2a/server.py +770 -0
- parrot/agents/__init__.py +29 -0
- parrot/bots/__init__.py +12 -0
- parrot/bots/a2a_agent.py +19 -0
- parrot/bots/abstract.py +3139 -0
- parrot/bots/agent.py +1129 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/chatbot.py +669 -0
- parrot/bots/data.py +1618 -0
- parrot/bots/database/__init__.py +5 -0
- parrot/bots/database/abstract.py +3071 -0
- parrot/bots/database/cache.py +286 -0
- parrot/bots/database/models.py +468 -0
- parrot/bots/database/prompts.py +154 -0
- parrot/bots/database/retries.py +98 -0
- parrot/bots/database/router.py +269 -0
- parrot/bots/database/sql.py +41 -0
- parrot/bots/db/__init__.py +6 -0
- parrot/bots/db/abstract.py +556 -0
- parrot/bots/db/bigquery.py +602 -0
- parrot/bots/db/cache.py +85 -0
- parrot/bots/db/documentdb.py +668 -0
- parrot/bots/db/elastic.py +1014 -0
- parrot/bots/db/influx.py +898 -0
- parrot/bots/db/mock.py +96 -0
- parrot/bots/db/multi.py +783 -0
- parrot/bots/db/prompts.py +185 -0
- parrot/bots/db/sql.py +1255 -0
- parrot/bots/db/tools.py +212 -0
- parrot/bots/document.py +680 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/kb.py +170 -0
- parrot/bots/mcp.py +36 -0
- parrot/bots/orchestration/README.md +463 -0
- parrot/bots/orchestration/__init__.py +1 -0
- parrot/bots/orchestration/agent.py +155 -0
- parrot/bots/orchestration/crew.py +3330 -0
- parrot/bots/orchestration/fsm.py +1179 -0
- parrot/bots/orchestration/hr.py +434 -0
- parrot/bots/orchestration/storage/__init__.py +4 -0
- parrot/bots/orchestration/storage/memory.py +100 -0
- parrot/bots/orchestration/storage/mixin.py +119 -0
- parrot/bots/orchestration/verify.py +202 -0
- parrot/bots/product.py +204 -0
- parrot/bots/prompts/__init__.py +96 -0
- parrot/bots/prompts/agents.py +155 -0
- parrot/bots/prompts/data.py +216 -0
- parrot/bots/prompts/output_generation.py +8 -0
- parrot/bots/scraper/__init__.py +3 -0
- parrot/bots/scraper/models.py +122 -0
- parrot/bots/scraper/scraper.py +1173 -0
- parrot/bots/scraper/templates.py +115 -0
- parrot/bots/stores/__init__.py +5 -0
- parrot/bots/stores/local.py +172 -0
- parrot/bots/webdev.py +81 -0
- parrot/cli.py +17 -0
- parrot/clients/__init__.py +16 -0
- parrot/clients/base.py +1491 -0
- parrot/clients/claude.py +1191 -0
- parrot/clients/factory.py +129 -0
- parrot/clients/google.py +4567 -0
- parrot/clients/gpt.py +1975 -0
- parrot/clients/grok.py +432 -0
- parrot/clients/groq.py +986 -0
- parrot/clients/hf.py +582 -0
- parrot/clients/models.py +18 -0
- parrot/conf.py +395 -0
- parrot/embeddings/__init__.py +9 -0
- parrot/embeddings/base.py +157 -0
- parrot/embeddings/google.py +98 -0
- parrot/embeddings/huggingface.py +74 -0
- parrot/embeddings/openai.py +84 -0
- parrot/embeddings/processor.py +88 -0
- parrot/exceptions.c +13868 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/exceptions.pxd +22 -0
- parrot/exceptions.pxi +15 -0
- parrot/exceptions.pyx +44 -0
- parrot/generators/__init__.py +29 -0
- parrot/generators/base.py +200 -0
- parrot/generators/html.py +293 -0
- parrot/generators/react.py +205 -0
- parrot/generators/streamlit.py +203 -0
- parrot/generators/template.py +105 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agent.py +861 -0
- parrot/handlers/agents/__init__.py +1 -0
- parrot/handlers/agents/abstract.py +900 -0
- parrot/handlers/bots.py +338 -0
- parrot/handlers/chat.py +915 -0
- parrot/handlers/creation.sql +192 -0
- parrot/handlers/crew/ARCHITECTURE.md +362 -0
- parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
- parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
- parrot/handlers/crew/__init__.py +0 -0
- parrot/handlers/crew/handler.py +801 -0
- parrot/handlers/crew/models.py +229 -0
- parrot/handlers/crew/redis_persistence.py +523 -0
- parrot/handlers/jobs/__init__.py +10 -0
- parrot/handlers/jobs/job.py +384 -0
- parrot/handlers/jobs/mixin.py +627 -0
- parrot/handlers/jobs/models.py +115 -0
- parrot/handlers/jobs/worker.py +31 -0
- parrot/handlers/models.py +596 -0
- parrot/handlers/o365_auth.py +105 -0
- parrot/handlers/stream.py +337 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/aws.py +143 -0
- parrot/interfaces/credentials.py +113 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/google.py +1123 -0
- parrot/interfaces/hierarchy.py +1227 -0
- parrot/interfaces/http.py +651 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +24 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/analisys.py +148 -0
- parrot/interfaces/images/plugins/classify.py +150 -0
- parrot/interfaces/images/plugins/classifybase.py +182 -0
- parrot/interfaces/images/plugins/detect.py +150 -0
- parrot/interfaces/images/plugins/exif.py +1103 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/interfaces/o365.py +978 -0
- parrot/interfaces/onedrive.py +822 -0
- parrot/interfaces/sharepoint.py +1435 -0
- parrot/interfaces/soap.py +257 -0
- parrot/loaders/__init__.py +8 -0
- parrot/loaders/abstract.py +1131 -0
- parrot/loaders/audio.py +199 -0
- parrot/loaders/basepdf.py +53 -0
- parrot/loaders/basevideo.py +1568 -0
- parrot/loaders/csv.py +409 -0
- parrot/loaders/docx.py +116 -0
- parrot/loaders/epubloader.py +316 -0
- parrot/loaders/excel.py +199 -0
- parrot/loaders/factory.py +55 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/html.py +26 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/html.py +152 -0
- parrot/loaders/markdown.py +442 -0
- parrot/loaders/pdf.py +373 -0
- parrot/loaders/pdfmark.py +320 -0
- parrot/loaders/pdftables.py +506 -0
- parrot/loaders/ppt.py +476 -0
- parrot/loaders/qa.py +63 -0
- parrot/loaders/splitters/__init__.py +10 -0
- parrot/loaders/splitters/base.py +138 -0
- parrot/loaders/splitters/md.py +228 -0
- parrot/loaders/splitters/token.py +143 -0
- parrot/loaders/txt.py +26 -0
- parrot/loaders/video.py +89 -0
- parrot/loaders/videolocal.py +218 -0
- parrot/loaders/videounderstanding.py +377 -0
- parrot/loaders/vimeo.py +167 -0
- parrot/loaders/web.py +599 -0
- parrot/loaders/youtube.py +504 -0
- parrot/manager/__init__.py +5 -0
- parrot/manager/manager.py +1030 -0
- parrot/mcp/__init__.py +28 -0
- parrot/mcp/adapter.py +105 -0
- parrot/mcp/cli.py +174 -0
- parrot/mcp/client.py +119 -0
- parrot/mcp/config.py +75 -0
- parrot/mcp/integration.py +842 -0
- parrot/mcp/oauth.py +933 -0
- parrot/mcp/server.py +225 -0
- parrot/mcp/transports/__init__.py +3 -0
- parrot/mcp/transports/base.py +279 -0
- parrot/mcp/transports/grpc_session.py +163 -0
- parrot/mcp/transports/http.py +312 -0
- parrot/mcp/transports/mcp.proto +108 -0
- parrot/mcp/transports/quic.py +1082 -0
- parrot/mcp/transports/sse.py +330 -0
- parrot/mcp/transports/stdio.py +309 -0
- parrot/mcp/transports/unix.py +395 -0
- parrot/mcp/transports/websocket.py +547 -0
- parrot/memory/__init__.py +16 -0
- parrot/memory/abstract.py +209 -0
- parrot/memory/agent.py +32 -0
- parrot/memory/cache.py +175 -0
- parrot/memory/core.py +555 -0
- parrot/memory/file.py +153 -0
- parrot/memory/mem.py +131 -0
- parrot/memory/redis.py +613 -0
- parrot/models/__init__.py +46 -0
- parrot/models/basic.py +118 -0
- parrot/models/compliance.py +208 -0
- parrot/models/crew.py +395 -0
- parrot/models/detections.py +654 -0
- parrot/models/generation.py +85 -0
- parrot/models/google.py +223 -0
- parrot/models/groq.py +23 -0
- parrot/models/openai.py +30 -0
- parrot/models/outputs.py +285 -0
- parrot/models/responses.py +938 -0
- parrot/notifications/__init__.py +743 -0
- parrot/openapi/__init__.py +3 -0
- parrot/openapi/components.yaml +641 -0
- parrot/openapi/config.py +322 -0
- parrot/outputs/__init__.py +32 -0
- parrot/outputs/formats/__init__.py +108 -0
- parrot/outputs/formats/altair.py +359 -0
- parrot/outputs/formats/application.py +122 -0
- parrot/outputs/formats/base.py +351 -0
- parrot/outputs/formats/bokeh.py +356 -0
- parrot/outputs/formats/card.py +424 -0
- parrot/outputs/formats/chart.py +436 -0
- parrot/outputs/formats/d3.py +255 -0
- parrot/outputs/formats/echarts.py +310 -0
- parrot/outputs/formats/generators/__init__.py +0 -0
- parrot/outputs/formats/generators/abstract.py +61 -0
- parrot/outputs/formats/generators/panel.py +145 -0
- parrot/outputs/formats/generators/streamlit.py +86 -0
- parrot/outputs/formats/generators/terminal.py +63 -0
- parrot/outputs/formats/holoviews.py +310 -0
- parrot/outputs/formats/html.py +147 -0
- parrot/outputs/formats/jinja2.py +46 -0
- parrot/outputs/formats/json.py +87 -0
- parrot/outputs/formats/map.py +933 -0
- parrot/outputs/formats/markdown.py +172 -0
- parrot/outputs/formats/matplotlib.py +237 -0
- parrot/outputs/formats/mixins/__init__.py +0 -0
- parrot/outputs/formats/mixins/emaps.py +855 -0
- parrot/outputs/formats/plotly.py +341 -0
- parrot/outputs/formats/seaborn.py +310 -0
- parrot/outputs/formats/table.py +397 -0
- parrot/outputs/formats/template_report.py +138 -0
- parrot/outputs/formats/yaml.py +125 -0
- parrot/outputs/formatter.py +152 -0
- parrot/outputs/templates/__init__.py +95 -0
- parrot/pipelines/__init__.py +0 -0
- parrot/pipelines/abstract.py +210 -0
- parrot/pipelines/detector.py +124 -0
- parrot/pipelines/models.py +90 -0
- parrot/pipelines/planogram.py +3002 -0
- parrot/pipelines/table.sql +97 -0
- parrot/plugins/__init__.py +106 -0
- parrot/plugins/importer.py +80 -0
- parrot/py.typed +0 -0
- parrot/registry/__init__.py +18 -0
- parrot/registry/registry.py +594 -0
- parrot/scheduler/__init__.py +1189 -0
- parrot/scheduler/models.py +60 -0
- parrot/security/__init__.py +16 -0
- parrot/security/prompt_injection.py +268 -0
- parrot/security/security_events.sql +25 -0
- parrot/services/__init__.py +1 -0
- parrot/services/mcp/__init__.py +8 -0
- parrot/services/mcp/config.py +13 -0
- parrot/services/mcp/server.py +295 -0
- parrot/services/o365_remote_auth.py +235 -0
- parrot/stores/__init__.py +7 -0
- parrot/stores/abstract.py +352 -0
- parrot/stores/arango.py +1090 -0
- parrot/stores/bigquery.py +1377 -0
- parrot/stores/cache.py +106 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss_store.py +1157 -0
- parrot/stores/kb/__init__.py +9 -0
- parrot/stores/kb/abstract.py +68 -0
- parrot/stores/kb/cache.py +165 -0
- parrot/stores/kb/doc.py +325 -0
- parrot/stores/kb/hierarchy.py +346 -0
- parrot/stores/kb/local.py +457 -0
- parrot/stores/kb/prompt.py +28 -0
- parrot/stores/kb/redis.py +659 -0
- parrot/stores/kb/store.py +115 -0
- parrot/stores/kb/user.py +374 -0
- parrot/stores/models.py +59 -0
- parrot/stores/pgvector.py +3 -0
- parrot/stores/postgres.py +2853 -0
- parrot/stores/utils/__init__.py +0 -0
- parrot/stores/utils/chunking.py +197 -0
- parrot/telemetry/__init__.py +3 -0
- parrot/telemetry/mixin.py +111 -0
- parrot/template/__init__.py +3 -0
- parrot/template/engine.py +259 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +644 -0
- parrot/tools/agent.py +363 -0
- parrot/tools/arangodbsearch.py +537 -0
- parrot/tools/arxiv_tool.py +188 -0
- parrot/tools/calculator/__init__.py +3 -0
- parrot/tools/calculator/operations/__init__.py +38 -0
- parrot/tools/calculator/operations/calculus.py +80 -0
- parrot/tools/calculator/operations/statistics.py +76 -0
- parrot/tools/calculator/tool.py +150 -0
- parrot/tools/cloudwatch.py +988 -0
- parrot/tools/codeinterpreter/__init__.py +127 -0
- parrot/tools/codeinterpreter/executor.py +371 -0
- parrot/tools/codeinterpreter/internals.py +473 -0
- parrot/tools/codeinterpreter/models.py +643 -0
- parrot/tools/codeinterpreter/prompts.py +224 -0
- parrot/tools/codeinterpreter/tool.py +664 -0
- parrot/tools/company_info/__init__.py +6 -0
- parrot/tools/company_info/tool.py +1138 -0
- parrot/tools/correlationanalysis.py +437 -0
- parrot/tools/database/abstract.py +286 -0
- parrot/tools/database/bq.py +115 -0
- parrot/tools/database/cache.py +284 -0
- parrot/tools/database/models.py +95 -0
- parrot/tools/database/pg.py +343 -0
- parrot/tools/databasequery.py +1159 -0
- parrot/tools/db.py +1800 -0
- parrot/tools/ddgo.py +370 -0
- parrot/tools/decorators.py +271 -0
- parrot/tools/dftohtml.py +282 -0
- parrot/tools/document.py +549 -0
- parrot/tools/ecs.py +819 -0
- parrot/tools/edareport.py +368 -0
- parrot/tools/elasticsearch.py +1049 -0
- parrot/tools/employees.py +462 -0
- parrot/tools/epson/__init__.py +96 -0
- parrot/tools/excel.py +683 -0
- parrot/tools/file/__init__.py +13 -0
- parrot/tools/file/abstract.py +76 -0
- parrot/tools/file/gcs.py +378 -0
- parrot/tools/file/local.py +284 -0
- parrot/tools/file/s3.py +511 -0
- parrot/tools/file/tmp.py +309 -0
- parrot/tools/file/tool.py +501 -0
- parrot/tools/file_reader.py +129 -0
- parrot/tools/flowtask/__init__.py +19 -0
- parrot/tools/flowtask/tool.py +761 -0
- parrot/tools/gittoolkit.py +508 -0
- parrot/tools/google/__init__.py +18 -0
- parrot/tools/google/base.py +169 -0
- parrot/tools/google/tools.py +1251 -0
- parrot/tools/googlelocation.py +5 -0
- parrot/tools/googleroutes.py +5 -0
- parrot/tools/googlesearch.py +5 -0
- parrot/tools/googlesitesearch.py +5 -0
- parrot/tools/googlevoice.py +2 -0
- parrot/tools/gvoice.py +695 -0
- parrot/tools/ibisworld/README.md +225 -0
- parrot/tools/ibisworld/__init__.py +11 -0
- parrot/tools/ibisworld/tool.py +366 -0
- parrot/tools/jiratoolkit.py +1718 -0
- parrot/tools/manager.py +1098 -0
- parrot/tools/math.py +152 -0
- parrot/tools/metadata.py +476 -0
- parrot/tools/msteams.py +1621 -0
- parrot/tools/msword.py +635 -0
- parrot/tools/multidb.py +580 -0
- parrot/tools/multistoresearch.py +369 -0
- parrot/tools/networkninja.py +167 -0
- parrot/tools/nextstop/__init__.py +4 -0
- parrot/tools/nextstop/base.py +286 -0
- parrot/tools/nextstop/employee.py +733 -0
- parrot/tools/nextstop/store.py +462 -0
- parrot/tools/notification.py +435 -0
- parrot/tools/o365/__init__.py +42 -0
- parrot/tools/o365/base.py +295 -0
- parrot/tools/o365/bundle.py +522 -0
- parrot/tools/o365/events.py +554 -0
- parrot/tools/o365/mail.py +992 -0
- parrot/tools/o365/onedrive.py +497 -0
- parrot/tools/o365/sharepoint.py +641 -0
- parrot/tools/openapi_toolkit.py +904 -0
- parrot/tools/openweather.py +527 -0
- parrot/tools/pdfprint.py +1001 -0
- parrot/tools/powerbi.py +518 -0
- parrot/tools/powerpoint.py +1113 -0
- parrot/tools/pricestool.py +146 -0
- parrot/tools/products/__init__.py +246 -0
- parrot/tools/prophet_tool.py +171 -0
- parrot/tools/pythonpandas.py +630 -0
- parrot/tools/pythonrepl.py +910 -0
- parrot/tools/qsource.py +436 -0
- parrot/tools/querytoolkit.py +395 -0
- parrot/tools/quickeda.py +827 -0
- parrot/tools/resttool.py +553 -0
- parrot/tools/retail/__init__.py +0 -0
- parrot/tools/retail/bby.py +528 -0
- parrot/tools/sandboxtool.py +703 -0
- parrot/tools/sassie/__init__.py +352 -0
- parrot/tools/scraping/__init__.py +7 -0
- parrot/tools/scraping/docs/select.md +466 -0
- parrot/tools/scraping/documentation.md +1278 -0
- parrot/tools/scraping/driver.py +436 -0
- parrot/tools/scraping/models.py +576 -0
- parrot/tools/scraping/options.py +85 -0
- parrot/tools/scraping/orchestrator.py +517 -0
- parrot/tools/scraping/readme.md +740 -0
- parrot/tools/scraping/tool.py +3115 -0
- parrot/tools/seasonaldetection.py +642 -0
- parrot/tools/shell_tool/__init__.py +5 -0
- parrot/tools/shell_tool/actions.py +408 -0
- parrot/tools/shell_tool/engine.py +155 -0
- parrot/tools/shell_tool/models.py +322 -0
- parrot/tools/shell_tool/tool.py +442 -0
- parrot/tools/site_search.py +214 -0
- parrot/tools/textfile.py +418 -0
- parrot/tools/think.py +378 -0
- parrot/tools/toolkit.py +298 -0
- parrot/tools/webapp_tool.py +187 -0
- parrot/tools/whatif.py +1279 -0
- parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
- parrot/tools/workday/__init__.py +6 -0
- parrot/tools/workday/models.py +1389 -0
- parrot/tools/workday/tool.py +1293 -0
- parrot/tools/yfinance_tool.py +306 -0
- parrot/tools/zipcode.py +217 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/helpers.py +73 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.c +12078 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/parsers/toml.pyx +21 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpp +20936 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/types.pyx +213 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- parrot/yaml-rs/Cargo.lock +350 -0
- parrot/yaml-rs/Cargo.toml +19 -0
- parrot/yaml-rs/pyproject.toml +19 -0
- parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
- parrot/yaml-rs/src/lib.rs +222 -0
- requirements/docker-compose.yml +24 -0
- requirements/requirements-dev.txt +21 -0
parrot/stores/arango.py
ADDED
|
@@ -0,0 +1,1090 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ArangoDBStore: Vector Store implementation for ArangoDB.
|
|
3
|
+
|
|
4
|
+
Provides comprehensive vector storage with graph support, including:
|
|
5
|
+
- Database and collection management
|
|
6
|
+
- Graph creation and management
|
|
7
|
+
- Document operations with upsert support
|
|
8
|
+
- Vector similarity search
|
|
9
|
+
- Full-text search (BM25)
|
|
10
|
+
- Hybrid search (vector + text)
|
|
11
|
+
- Graph-enhanced retrieval
|
|
12
|
+
"""
|
|
13
|
+
from typing import Any, Dict, List, Optional, Union, Callable
|
|
14
|
+
import uuid
|
|
15
|
+
import asyncio
|
|
16
|
+
from collections.abc import Callable as CallableType
|
|
17
|
+
|
|
18
|
+
from navconfig.logging import logging
|
|
19
|
+
from asyncdb import AsyncDB
|
|
20
|
+
|
|
21
|
+
from .abstract import AbstractStore
|
|
22
|
+
from .models import Document, SearchResult, DistanceStrategy
|
|
23
|
+
from ..conf import EMBEDDING_DEFAULT_MODEL
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ArangoDBStore(AbstractStore):
|
|
27
|
+
"""
|
|
28
|
+
ArangoDB Vector Store with native graph support.
|
|
29
|
+
|
|
30
|
+
Features:
|
|
31
|
+
- Multi-model database (documents, graphs, key-value)
|
|
32
|
+
- Native graph operations for knowledge graphs
|
|
33
|
+
- ArangoSearch for full-text and vector search
|
|
34
|
+
- Hybrid search combining semantic and keyword
|
|
35
|
+
- Graph-enhanced RAG with relationship context
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
database: str = "_system",
|
|
41
|
+
collection_name: str = "documents",
|
|
42
|
+
embedding_column: str = "embedding",
|
|
43
|
+
text_column: str = "content",
|
|
44
|
+
metadata_column: str = "metadata",
|
|
45
|
+
id_column: str = "_key",
|
|
46
|
+
embedding_model: Union[dict, str] = None,
|
|
47
|
+
embedding: Optional[Callable] = None,
|
|
48
|
+
distance_strategy: DistanceStrategy = DistanceStrategy.COSINE,
|
|
49
|
+
# ArangoDB specific
|
|
50
|
+
host: str = "127.0.0.1",
|
|
51
|
+
port: int = 8529,
|
|
52
|
+
protocol: str = "http",
|
|
53
|
+
username: str = "root",
|
|
54
|
+
password: str = "",
|
|
55
|
+
default_graph: Optional[str] = None,
|
|
56
|
+
default_view: Optional[str] = None,
|
|
57
|
+
# Collection options
|
|
58
|
+
edge_collection: str = "relationships",
|
|
59
|
+
auto_create_view: bool = True,
|
|
60
|
+
view_analyzer: str = "text_en",
|
|
61
|
+
**kwargs
|
|
62
|
+
):
|
|
63
|
+
"""
|
|
64
|
+
Initialize ArangoDB Vector Store.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
database: Database name
|
|
68
|
+
collection_name: Default collection for documents
|
|
69
|
+
embedding_column: Field name for embeddings
|
|
70
|
+
text_column: Field name for text content
|
|
71
|
+
metadata_column: Field name for metadata
|
|
72
|
+
id_column: Field name for document ID (_key)
|
|
73
|
+
embedding_model: Embedding model configuration
|
|
74
|
+
embedding: Custom embedding function
|
|
75
|
+
distance_strategy: Distance metric (COSINE, L2, DOT_PRODUCT)
|
|
76
|
+
host: ArangoDB host
|
|
77
|
+
port: ArangoDB port
|
|
78
|
+
protocol: Connection protocol (http/https)
|
|
79
|
+
username: Database username
|
|
80
|
+
password: Database password
|
|
81
|
+
default_graph: Default graph name
|
|
82
|
+
default_view: Default ArangoSearch view name
|
|
83
|
+
edge_collection: Default edge collection name
|
|
84
|
+
auto_create_view: Auto-create ArangoSearch view
|
|
85
|
+
view_analyzer: Text analyzer for views
|
|
86
|
+
"""
|
|
87
|
+
# Initialize parent class
|
|
88
|
+
super().__init__(
|
|
89
|
+
embedding_model=embedding_model,
|
|
90
|
+
embedding=embedding,
|
|
91
|
+
**kwargs
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Store configuration
|
|
95
|
+
self.database = database
|
|
96
|
+
self.collection_name = collection_name
|
|
97
|
+
self.embedding_column = embedding_column
|
|
98
|
+
self.text_column = text_column
|
|
99
|
+
self.metadata_column = metadata_column
|
|
100
|
+
self.id_column = id_column
|
|
101
|
+
self.distance_strategy = distance_strategy
|
|
102
|
+
|
|
103
|
+
# ArangoDB connection parameters
|
|
104
|
+
self.connection_params = {
|
|
105
|
+
"host": host,
|
|
106
|
+
"port": port,
|
|
107
|
+
"protocol": protocol,
|
|
108
|
+
"username": username,
|
|
109
|
+
"password": password,
|
|
110
|
+
"database": database
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
# Graph and search configuration
|
|
114
|
+
self.default_graph = default_graph
|
|
115
|
+
self.default_view = default_view or f"{collection_name}_view"
|
|
116
|
+
self.edge_collection = edge_collection
|
|
117
|
+
self.auto_create_view = auto_create_view
|
|
118
|
+
self.view_analyzer = view_analyzer
|
|
119
|
+
|
|
120
|
+
# AsyncDB connection
|
|
121
|
+
self._db: Optional[AsyncDB] = None
|
|
122
|
+
self._connection = None
|
|
123
|
+
|
|
124
|
+
self.logger = logging.getLogger("ArangoDBStore")
|
|
125
|
+
|
|
126
|
+
async def connection(self) -> tuple:
|
|
127
|
+
"""
|
|
128
|
+
Establish connection to ArangoDB.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Tuple of (connection, None) or (None, error)
|
|
132
|
+
"""
|
|
133
|
+
try:
|
|
134
|
+
if self._db is None or not self._db._connected:
|
|
135
|
+
self._db = AsyncDB("arangodb", params=self.connection_params)
|
|
136
|
+
await self._db.connection()
|
|
137
|
+
self._connection = self._db._connection
|
|
138
|
+
self._connected = True
|
|
139
|
+
|
|
140
|
+
# Auto-create collection and view if needed
|
|
141
|
+
if self.collection_name:
|
|
142
|
+
await self.create_collection(self.collection_name)
|
|
143
|
+
|
|
144
|
+
if self.auto_create_view:
|
|
145
|
+
await self._ensure_search_view()
|
|
146
|
+
|
|
147
|
+
self.logger.info(
|
|
148
|
+
f"Connected to ArangoDB: {self.database}/{self.collection_name}"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
return (self._connection, None)
|
|
152
|
+
|
|
153
|
+
except Exception as e:
|
|
154
|
+
self.logger.error(f"Connection error: {e}", exc_info=True)
|
|
155
|
+
return (None, str(e))
|
|
156
|
+
|
|
157
|
+
async def disconnect(self) -> None:
|
|
158
|
+
"""Close ArangoDB connection."""
|
|
159
|
+
if self._db:
|
|
160
|
+
try:
|
|
161
|
+
await self._db.close()
|
|
162
|
+
self._connected = False
|
|
163
|
+
self.logger.info("Disconnected from ArangoDB")
|
|
164
|
+
except Exception as e:
|
|
165
|
+
self.logger.error(f"Disconnect error: {e}")
|
|
166
|
+
finally:
|
|
167
|
+
self._db = None
|
|
168
|
+
self._connection = None
|
|
169
|
+
|
|
170
|
+
def get_vector(self, metric_type: str = None, **kwargs):
|
|
171
|
+
"""Get vector store instance."""
|
|
172
|
+
return self
|
|
173
|
+
|
|
174
|
+
# Database Management
|
|
175
|
+
|
|
176
|
+
async def create_database(self, database_name: str) -> bool:
|
|
177
|
+
"""
|
|
178
|
+
Create a new database.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
database_name: Name of database to create
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
True if successful
|
|
185
|
+
"""
|
|
186
|
+
try:
|
|
187
|
+
await self._db.create_database(database_name)
|
|
188
|
+
self.logger.info(f"Created database: {database_name}")
|
|
189
|
+
return True
|
|
190
|
+
except Exception as e:
|
|
191
|
+
self.logger.error(f"Error creating database {database_name}: {e}")
|
|
192
|
+
raise
|
|
193
|
+
|
|
194
|
+
async def drop_database(self, database_name: str) -> bool:
|
|
195
|
+
"""
|
|
196
|
+
Drop a database.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
database_name: Name of database to drop
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
True if successful
|
|
203
|
+
"""
|
|
204
|
+
try:
|
|
205
|
+
await self._db.drop_database(database_name)
|
|
206
|
+
self.logger.info(f"Dropped database: {database_name}")
|
|
207
|
+
return True
|
|
208
|
+
except Exception as e:
|
|
209
|
+
self.logger.error(f"Error dropping database {database_name}: {e}")
|
|
210
|
+
raise
|
|
211
|
+
|
|
212
|
+
async def use_database(self, database_name: str) -> None:
|
|
213
|
+
"""
|
|
214
|
+
Switch to a different database.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
database_name: Database to switch to
|
|
218
|
+
"""
|
|
219
|
+
await self._db.use(database_name)
|
|
220
|
+
self.database = database_name
|
|
221
|
+
self.logger.info(f"Switched to database: {database_name}")
|
|
222
|
+
|
|
223
|
+
# Collection Management
|
|
224
|
+
|
|
225
|
+
async def create_collection(
|
|
226
|
+
self,
|
|
227
|
+
collection: str,
|
|
228
|
+
edge: bool = False,
|
|
229
|
+
**kwargs
|
|
230
|
+
) -> bool:
|
|
231
|
+
"""
|
|
232
|
+
Create a collection (document or edge).
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
collection: Collection name
|
|
236
|
+
edge: If True, create edge collection
|
|
237
|
+
**kwargs: Additional collection properties
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
True if created or already exists
|
|
241
|
+
"""
|
|
242
|
+
try:
|
|
243
|
+
if await self._db.collection_exists(collection):
|
|
244
|
+
self.logger.debug(f"Collection {collection} already exists")
|
|
245
|
+
return True
|
|
246
|
+
|
|
247
|
+
await self._db.create_collection(collection, edge=edge, **kwargs)
|
|
248
|
+
self.logger.info(
|
|
249
|
+
f"Created collection: {collection} (edge={edge})"
|
|
250
|
+
)
|
|
251
|
+
return True
|
|
252
|
+
|
|
253
|
+
except Exception as e:
|
|
254
|
+
self.logger.error(f"Error creating collection {collection}: {e}")
|
|
255
|
+
raise
|
|
256
|
+
|
|
257
|
+
async def delete_collection(self, collection: str) -> bool:
|
|
258
|
+
"""
|
|
259
|
+
Drop a collection.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
collection: Collection name to drop
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
True if successful
|
|
266
|
+
"""
|
|
267
|
+
try:
|
|
268
|
+
await self._db.drop_collection(collection)
|
|
269
|
+
self.logger.info(f"Dropped collection: {collection}")
|
|
270
|
+
return True
|
|
271
|
+
except Exception as e:
|
|
272
|
+
self.logger.error(f"Error dropping collection {collection}: {e}")
|
|
273
|
+
raise
|
|
274
|
+
|
|
275
|
+
async def collection_exists(self, collection: str) -> bool:
|
|
276
|
+
"""
|
|
277
|
+
Check if collection exists.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
collection: Collection name
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
True if exists
|
|
284
|
+
"""
|
|
285
|
+
return await self._db.collection_exists(collection)
|
|
286
|
+
|
|
287
|
+
# Graph Management
|
|
288
|
+
|
|
289
|
+
async def create_graph(
|
|
290
|
+
self,
|
|
291
|
+
graph_name: str,
|
|
292
|
+
vertex_collections: List[str],
|
|
293
|
+
edge_collection: str = None,
|
|
294
|
+
orphan_collections: List[str] = None
|
|
295
|
+
) -> bool:
|
|
296
|
+
"""
|
|
297
|
+
Create a named graph.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
graph_name: Name of the graph
|
|
301
|
+
vertex_collections: List of vertex collection names
|
|
302
|
+
edge_collection: Edge collection name (defaults to self.edge_collection)
|
|
303
|
+
orphan_collections: Vertex collections without edges
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
True if successful
|
|
307
|
+
"""
|
|
308
|
+
try:
|
|
309
|
+
edge_col = edge_collection or self.edge_collection
|
|
310
|
+
|
|
311
|
+
# Create edge collection if it doesn't exist
|
|
312
|
+
await self.create_collection(edge_col, edge=True)
|
|
313
|
+
|
|
314
|
+
# Create vertex collections
|
|
315
|
+
for vcol in vertex_collections:
|
|
316
|
+
await self.create_collection(vcol, edge=False)
|
|
317
|
+
|
|
318
|
+
# Define edge definitions
|
|
319
|
+
edge_definitions = [{
|
|
320
|
+
'edge_collection': edge_col,
|
|
321
|
+
'from_vertex_collections': vertex_collections,
|
|
322
|
+
'to_vertex_collections': vertex_collections
|
|
323
|
+
}]
|
|
324
|
+
|
|
325
|
+
# Create graph
|
|
326
|
+
await self._db.create_graph(
|
|
327
|
+
graph_name,
|
|
328
|
+
edge_definitions=edge_definitions,
|
|
329
|
+
orphan_collections=orphan_collections or []
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
self.logger.info(f"Created graph: {graph_name}")
|
|
333
|
+
return True
|
|
334
|
+
|
|
335
|
+
except Exception as e:
|
|
336
|
+
self.logger.error(f"Error creating graph {graph_name}: {e}")
|
|
337
|
+
raise
|
|
338
|
+
|
|
339
|
+
async def drop_graph(
|
|
340
|
+
self,
|
|
341
|
+
graph_name: str,
|
|
342
|
+
drop_collections: bool = False
|
|
343
|
+
) -> bool:
|
|
344
|
+
"""
|
|
345
|
+
Drop a graph.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
graph_name: Graph name
|
|
349
|
+
drop_collections: If True, also drop associated collections
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
True if successful
|
|
353
|
+
"""
|
|
354
|
+
try:
|
|
355
|
+
await self._db.drop_graph(graph_name, drop_collections=drop_collections)
|
|
356
|
+
self.logger.info(f"Dropped graph: {graph_name}")
|
|
357
|
+
return True
|
|
358
|
+
except Exception as e:
|
|
359
|
+
self.logger.error(f"Error dropping graph {graph_name}: {e}")
|
|
360
|
+
raise
|
|
361
|
+
|
|
362
|
+
async def graph_exists(self, graph_name: str) -> bool:
|
|
363
|
+
"""Check if graph exists."""
|
|
364
|
+
return await self._db.graph_exists(graph_name)
|
|
365
|
+
|
|
366
|
+
# ArangoSearch View Management
|
|
367
|
+
|
|
368
|
+
async def _ensure_search_view(self) -> None:
|
|
369
|
+
"""Ensure ArangoSearch view exists for the collection."""
|
|
370
|
+
try:
|
|
371
|
+
# Create view with both text and vector search capabilities
|
|
372
|
+
links = {
|
|
373
|
+
self.collection_name: {
|
|
374
|
+
'fields': {
|
|
375
|
+
self.text_column: {
|
|
376
|
+
'analyzers': [self.view_analyzer]
|
|
377
|
+
},
|
|
378
|
+
self.embedding_column: {
|
|
379
|
+
'analyzers': ['identity']
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
await self._db.create_arangosearch_view(
|
|
386
|
+
view_name=self.default_view,
|
|
387
|
+
links=links
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
self.logger.info(f"Created ArangoSearch view: {self.default_view}")
|
|
391
|
+
|
|
392
|
+
except Exception as e:
|
|
393
|
+
# View might already exist
|
|
394
|
+
self.logger.debug(f"View creation: {e}")
|
|
395
|
+
|
|
396
|
+
async def create_view(
|
|
397
|
+
self,
|
|
398
|
+
view_name: str,
|
|
399
|
+
collections: List[str],
|
|
400
|
+
text_fields: List[str] = None,
|
|
401
|
+
vector_field: str = None,
|
|
402
|
+
analyzer: str = None,
|
|
403
|
+
**kwargs
|
|
404
|
+
) -> bool:
|
|
405
|
+
"""
|
|
406
|
+
Create an ArangoSearch view.
|
|
407
|
+
|
|
408
|
+
Args:
|
|
409
|
+
view_name: Name for the view
|
|
410
|
+
collections: Collections to include
|
|
411
|
+
text_fields: Text fields to index
|
|
412
|
+
vector_field: Vector field to index
|
|
413
|
+
analyzer: Text analyzer (defaults to view_analyzer)
|
|
414
|
+
**kwargs: Additional view properties
|
|
415
|
+
|
|
416
|
+
Returns:
|
|
417
|
+
True if successful
|
|
418
|
+
"""
|
|
419
|
+
try:
|
|
420
|
+
analyzer = analyzer or self.view_analyzer
|
|
421
|
+
text_fields = text_fields or [self.text_column]
|
|
422
|
+
vector_field = vector_field or self.embedding_column
|
|
423
|
+
|
|
424
|
+
links = {}
|
|
425
|
+
for collection in collections:
|
|
426
|
+
fields = {}
|
|
427
|
+
|
|
428
|
+
# Add text fields
|
|
429
|
+
for field in text_fields:
|
|
430
|
+
fields[field] = {'analyzers': [analyzer]}
|
|
431
|
+
|
|
432
|
+
# Add vector field
|
|
433
|
+
if vector_field:
|
|
434
|
+
fields[vector_field] = {'analyzers': ['identity']}
|
|
435
|
+
|
|
436
|
+
links[collection] = {'fields': fields}
|
|
437
|
+
|
|
438
|
+
await self._db.create_arangosearch_view(
|
|
439
|
+
view_name=view_name,
|
|
440
|
+
links=links,
|
|
441
|
+
**kwargs
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
self.logger.info(f"Created view: {view_name}")
|
|
445
|
+
return True
|
|
446
|
+
|
|
447
|
+
except Exception as e:
|
|
448
|
+
self.logger.error(f"Error creating view {view_name}: {e}")
|
|
449
|
+
raise
|
|
450
|
+
|
|
451
|
+
async def drop_view(self, view_name: str) -> bool:
|
|
452
|
+
"""Drop an ArangoSearch view."""
|
|
453
|
+
try:
|
|
454
|
+
await self._db.drop_arangosearch_view(view_name)
|
|
455
|
+
self.logger.info(f"Dropped view: {view_name}")
|
|
456
|
+
return True
|
|
457
|
+
except Exception as e:
|
|
458
|
+
self.logger.error(f"Error dropping view {view_name}: {e}")
|
|
459
|
+
raise
|
|
460
|
+
|
|
461
|
+
# Document Operations
|
|
462
|
+
|
|
463
|
+
async def add_document(
|
|
464
|
+
self,
|
|
465
|
+
document: Union[Document, dict],
|
|
466
|
+
collection: str = None,
|
|
467
|
+
upsert: bool = True,
|
|
468
|
+
upsert_key: Optional[str] = None,
|
|
469
|
+
upsert_metadata_keys: Optional[List[str]] = None,
|
|
470
|
+
**kwargs
|
|
471
|
+
) -> Dict[str, Any]:
|
|
472
|
+
"""
|
|
473
|
+
Add a single document with upsert support.
|
|
474
|
+
|
|
475
|
+
Args:
|
|
476
|
+
document: Document to add (Document object or dict)
|
|
477
|
+
collection: Collection name (defaults to self.collection_name)
|
|
478
|
+
upsert: If True, update existing document if key exists
|
|
479
|
+
upsert_key: Specific key field to use for upsert matching
|
|
480
|
+
upsert_metadata_keys: Metadata fields to match for upsert
|
|
481
|
+
**kwargs: Additional arguments
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
Inserted/updated document metadata
|
|
485
|
+
"""
|
|
486
|
+
collection = collection or self.collection_name
|
|
487
|
+
|
|
488
|
+
# Convert Document to dict
|
|
489
|
+
if isinstance(document, Document):
|
|
490
|
+
doc_dict = self._document_to_dict(document)
|
|
491
|
+
else:
|
|
492
|
+
doc_dict = document.copy()
|
|
493
|
+
|
|
494
|
+
# Generate embedding if needed
|
|
495
|
+
if self.embedding_column not in doc_dict and self.text_column in doc_dict:
|
|
496
|
+
text = doc_dict[self.text_column]
|
|
497
|
+
embedding = await self._generate_embedding(text)
|
|
498
|
+
doc_dict[self.embedding_column] = embedding
|
|
499
|
+
|
|
500
|
+
try:
|
|
501
|
+
if upsert:
|
|
502
|
+
# Find existing document
|
|
503
|
+
existing_doc = await self._find_existing_document(
|
|
504
|
+
doc_dict,
|
|
505
|
+
collection,
|
|
506
|
+
upsert_key,
|
|
507
|
+
upsert_metadata_keys
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
if existing_doc:
|
|
511
|
+
# Update existing
|
|
512
|
+
doc_dict['_key'] = existing_doc['_key']
|
|
513
|
+
result = await self._db.update_document(
|
|
514
|
+
collection,
|
|
515
|
+
doc_dict,
|
|
516
|
+
return_new=True
|
|
517
|
+
)
|
|
518
|
+
self.logger.debug(f"Updated document: {doc_dict['_key']}")
|
|
519
|
+
return result
|
|
520
|
+
|
|
521
|
+
# Insert new document
|
|
522
|
+
if '_key' not in doc_dict:
|
|
523
|
+
doc_dict['_key'] = str(uuid.uuid4())
|
|
524
|
+
|
|
525
|
+
result = await self._db.insert_document(
|
|
526
|
+
collection,
|
|
527
|
+
doc_dict,
|
|
528
|
+
return_new=True
|
|
529
|
+
)
|
|
530
|
+
self.logger.debug(f"Inserted document: {doc_dict['_key']}")
|
|
531
|
+
return result
|
|
532
|
+
|
|
533
|
+
except Exception as e:
|
|
534
|
+
self.logger.error(f"Error adding document: {e}")
|
|
535
|
+
raise
|
|
536
|
+
|
|
537
|
+
async def add_documents(
|
|
538
|
+
self,
|
|
539
|
+
documents: List[Union[Document, dict]],
|
|
540
|
+
collection: str = None,
|
|
541
|
+
upsert: bool = True,
|
|
542
|
+
batch_size: int = 100,
|
|
543
|
+
**kwargs
|
|
544
|
+
) -> int:
|
|
545
|
+
"""
|
|
546
|
+
Add multiple documents.
|
|
547
|
+
|
|
548
|
+
Args:
|
|
549
|
+
documents: List of documents to add
|
|
550
|
+
collection: Collection name
|
|
551
|
+
upsert: If True, update existing documents
|
|
552
|
+
batch_size: Batch size for bulk operations
|
|
553
|
+
**kwargs: Additional arguments
|
|
554
|
+
|
|
555
|
+
Returns:
|
|
556
|
+
Number of documents processed
|
|
557
|
+
"""
|
|
558
|
+
collection = collection or self.collection_name
|
|
559
|
+
count = 0
|
|
560
|
+
|
|
561
|
+
# Process in batches
|
|
562
|
+
for i in range(0, len(documents), batch_size):
|
|
563
|
+
batch = documents[i:i + batch_size]
|
|
564
|
+
|
|
565
|
+
for doc in batch:
|
|
566
|
+
try:
|
|
567
|
+
await self.add_document(
|
|
568
|
+
doc,
|
|
569
|
+
collection=collection,
|
|
570
|
+
upsert=upsert,
|
|
571
|
+
**kwargs
|
|
572
|
+
)
|
|
573
|
+
count += 1
|
|
574
|
+
except Exception as e:
|
|
575
|
+
self.logger.error(f"Error adding document in batch: {e}")
|
|
576
|
+
|
|
577
|
+
self.logger.info(f"Added {count} documents to {collection}")
|
|
578
|
+
return count
|
|
579
|
+
|
|
580
|
+
async def save_documents(
|
|
581
|
+
self,
|
|
582
|
+
documents: List[Union[Document, dict]],
|
|
583
|
+
collection: str = None,
|
|
584
|
+
**kwargs
|
|
585
|
+
) -> int:
|
|
586
|
+
"""
|
|
587
|
+
Save documents with upsert (alias for add_documents with upsert=True).
|
|
588
|
+
|
|
589
|
+
Args:
|
|
590
|
+
documents: Documents to save
|
|
591
|
+
collection: Collection name
|
|
592
|
+
**kwargs: Additional arguments
|
|
593
|
+
|
|
594
|
+
Returns:
|
|
595
|
+
Number of documents saved
|
|
596
|
+
"""
|
|
597
|
+
return await self.add_documents(
|
|
598
|
+
documents,
|
|
599
|
+
collection=collection,
|
|
600
|
+
upsert=True,
|
|
601
|
+
**kwargs
|
|
602
|
+
)
|
|
603
|
+
|
|
604
|
+
async def delete_documents_by_filter(
|
|
605
|
+
self,
|
|
606
|
+
filter_dict: Dict[str, Any],
|
|
607
|
+
collection: str = None,
|
|
608
|
+
**kwargs
|
|
609
|
+
) -> int:
|
|
610
|
+
"""
|
|
611
|
+
Delete documents matching filter conditions.
|
|
612
|
+
|
|
613
|
+
Args:
|
|
614
|
+
filter_dict: Filter conditions (e.g., {'category': 'test'})
|
|
615
|
+
collection: Collection name
|
|
616
|
+
**kwargs: Additional arguments
|
|
617
|
+
|
|
618
|
+
Returns:
|
|
619
|
+
Number of documents deleted
|
|
620
|
+
"""
|
|
621
|
+
collection = collection or self.collection_name
|
|
622
|
+
|
|
623
|
+
# Build AQL filter conditions
|
|
624
|
+
filter_conditions = []
|
|
625
|
+
bind_vars = {}
|
|
626
|
+
|
|
627
|
+
for key, value in filter_dict.items():
|
|
628
|
+
var_name = f"filter_{key}"
|
|
629
|
+
filter_conditions.append(f"doc.{key} == @{var_name}")
|
|
630
|
+
bind_vars[var_name] = value
|
|
631
|
+
|
|
632
|
+
filter_clause = " AND ".join(filter_conditions)
|
|
633
|
+
|
|
634
|
+
# Delete query
|
|
635
|
+
query = f"""
|
|
636
|
+
FOR doc IN {collection}
|
|
637
|
+
FILTER {filter_clause}
|
|
638
|
+
REMOVE doc IN {collection}
|
|
639
|
+
RETURN OLD
|
|
640
|
+
"""
|
|
641
|
+
|
|
642
|
+
try:
|
|
643
|
+
results, error = await self._db.query(query, bind_vars=bind_vars)
|
|
644
|
+
|
|
645
|
+
if error:
|
|
646
|
+
raise Exception(error)
|
|
647
|
+
|
|
648
|
+
count = len(results) if results else 0
|
|
649
|
+
self.logger.info(f"Deleted {count} documents from {collection}")
|
|
650
|
+
return count
|
|
651
|
+
|
|
652
|
+
except Exception as e:
|
|
653
|
+
self.logger.error(f"Error deleting documents: {e}")
|
|
654
|
+
raise
|
|
655
|
+
|
|
656
|
+
# Search Methods
|
|
657
|
+
|
|
658
|
+
async def similarity_search(
|
|
659
|
+
self,
|
|
660
|
+
query: str,
|
|
661
|
+
collection: str = None,
|
|
662
|
+
limit: int = 10,
|
|
663
|
+
similarity_threshold: float = 0.0,
|
|
664
|
+
search_strategy: str = "auto",
|
|
665
|
+
metadata_filters: Union[dict, None] = None,
|
|
666
|
+
include_graph_context: bool = False,
|
|
667
|
+
**kwargs
|
|
668
|
+
) -> List[SearchResult]:
|
|
669
|
+
"""
|
|
670
|
+
Perform vector similarity search.
|
|
671
|
+
|
|
672
|
+
Args:
|
|
673
|
+
query: Query text
|
|
674
|
+
collection: Collection to search
|
|
675
|
+
limit: Maximum results
|
|
676
|
+
similarity_threshold: Minimum similarity score
|
|
677
|
+
search_strategy: Search strategy (auto, vector, hybrid)
|
|
678
|
+
metadata_filters: Metadata filter conditions
|
|
679
|
+
include_graph_context: Include graph neighbors
|
|
680
|
+
**kwargs: Additional arguments
|
|
681
|
+
|
|
682
|
+
Returns:
|
|
683
|
+
List of SearchResult objects
|
|
684
|
+
"""
|
|
685
|
+
collection = collection or self.collection_name
|
|
686
|
+
view_name = kwargs.get('view_name', self.default_view)
|
|
687
|
+
|
|
688
|
+
# Generate query embedding
|
|
689
|
+
query_embedding = await self._generate_embedding(query)
|
|
690
|
+
|
|
691
|
+
# Build filter conditions
|
|
692
|
+
filter_conditions = self._build_filter_conditions(metadata_filters)
|
|
693
|
+
|
|
694
|
+
try:
|
|
695
|
+
results = await self._db.vector_search(
|
|
696
|
+
view_name=view_name,
|
|
697
|
+
collection=collection,
|
|
698
|
+
query_vector=query_embedding,
|
|
699
|
+
vector_field=self.embedding_column,
|
|
700
|
+
top_k=limit,
|
|
701
|
+
filter_conditions=filter_conditions,
|
|
702
|
+
include_similarity=True
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
# Filter by threshold
|
|
706
|
+
filtered = [
|
|
707
|
+
r for r in results
|
|
708
|
+
if r.get('similarity', 0) >= similarity_threshold
|
|
709
|
+
]
|
|
710
|
+
|
|
711
|
+
# Convert to SearchResult objects
|
|
712
|
+
search_results = self._to_search_results(filtered)
|
|
713
|
+
|
|
714
|
+
# Add graph context if requested
|
|
715
|
+
if include_graph_context and self.default_graph:
|
|
716
|
+
search_results = await self._enrich_with_graph_context(
|
|
717
|
+
search_results
|
|
718
|
+
)
|
|
719
|
+
|
|
720
|
+
return search_results
|
|
721
|
+
|
|
722
|
+
except Exception as e:
|
|
723
|
+
self.logger.error(f"Similarity search error: {e}")
|
|
724
|
+
raise
|
|
725
|
+
|
|
726
|
+
async def fulltext_search(
|
|
727
|
+
self,
|
|
728
|
+
query: str,
|
|
729
|
+
collection: str = None,
|
|
730
|
+
text_fields: List[str] = None,
|
|
731
|
+
limit: int = 10,
|
|
732
|
+
min_score: float = 0.0,
|
|
733
|
+
analyzer: str = None,
|
|
734
|
+
metadata_filters: dict = None,
|
|
735
|
+
**kwargs
|
|
736
|
+
) -> List[SearchResult]:
|
|
737
|
+
"""
|
|
738
|
+
Perform full-text search using BM25.
|
|
739
|
+
|
|
740
|
+
Args:
|
|
741
|
+
query: Search query text
|
|
742
|
+
collection: Collection to search
|
|
743
|
+
text_fields: Fields to search
|
|
744
|
+
limit: Maximum results
|
|
745
|
+
min_score: Minimum BM25 score
|
|
746
|
+
analyzer: Text analyzer
|
|
747
|
+
metadata_filters: Metadata filters
|
|
748
|
+
**kwargs: Additional arguments
|
|
749
|
+
|
|
750
|
+
Returns:
|
|
751
|
+
List of SearchResult objects
|
|
752
|
+
"""
|
|
753
|
+
view_name = kwargs.get('view_name', self.default_view)
|
|
754
|
+
text_fields = text_fields or [self.text_column]
|
|
755
|
+
analyzer = analyzer or self.view_analyzer
|
|
756
|
+
|
|
757
|
+
try:
|
|
758
|
+
results = await self._db.fulltext_search(
|
|
759
|
+
view_name=view_name,
|
|
760
|
+
query_text=query,
|
|
761
|
+
fields=text_fields,
|
|
762
|
+
analyzer=analyzer,
|
|
763
|
+
top_k=limit,
|
|
764
|
+
min_score=min_score
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
return self._to_search_results(results, score_field='score')
|
|
768
|
+
|
|
769
|
+
except Exception as e:
|
|
770
|
+
self.logger.error(f"Full-text search error: {e}")
|
|
771
|
+
raise
|
|
772
|
+
|
|
773
|
+
async def hybrid_search(
|
|
774
|
+
self,
|
|
775
|
+
query: str,
|
|
776
|
+
collection: str = None,
|
|
777
|
+
limit: int = 10,
|
|
778
|
+
text_weight: float = 0.5,
|
|
779
|
+
vector_weight: float = 0.5,
|
|
780
|
+
text_fields: List[str] = None,
|
|
781
|
+
analyzer: str = None,
|
|
782
|
+
metadata_filters: dict = None,
|
|
783
|
+
**kwargs
|
|
784
|
+
) -> List[SearchResult]:
|
|
785
|
+
"""
|
|
786
|
+
Perform hybrid search combining vector and text.
|
|
787
|
+
|
|
788
|
+
Args:
|
|
789
|
+
query: Search query
|
|
790
|
+
collection: Collection to search
|
|
791
|
+
limit: Maximum results
|
|
792
|
+
text_weight: Weight for text score (0-1)
|
|
793
|
+
vector_weight: Weight for vector score (0-1)
|
|
794
|
+
text_fields: Text fields to search
|
|
795
|
+
analyzer: Text analyzer
|
|
796
|
+
metadata_filters: Metadata filters
|
|
797
|
+
**kwargs: Additional arguments
|
|
798
|
+
|
|
799
|
+
Returns:
|
|
800
|
+
List of SearchResult objects
|
|
801
|
+
"""
|
|
802
|
+
collection = collection or self.collection_name
|
|
803
|
+
view_name = kwargs.get('view_name', self.default_view)
|
|
804
|
+
text_fields = text_fields or [self.text_column]
|
|
805
|
+
analyzer = analyzer or self.view_analyzer
|
|
806
|
+
|
|
807
|
+
# Generate embedding
|
|
808
|
+
query_embedding = await self._generate_embedding(query)
|
|
809
|
+
|
|
810
|
+
try:
|
|
811
|
+
results = await self._db.hybrid_search(
|
|
812
|
+
view_name=view_name,
|
|
813
|
+
collection=collection,
|
|
814
|
+
query_text=query,
|
|
815
|
+
query_vector=query_embedding,
|
|
816
|
+
text_fields=text_fields,
|
|
817
|
+
vector_field=self.embedding_column,
|
|
818
|
+
text_weight=text_weight,
|
|
819
|
+
vector_weight=vector_weight,
|
|
820
|
+
analyzer=analyzer,
|
|
821
|
+
top_k=limit
|
|
822
|
+
)
|
|
823
|
+
|
|
824
|
+
return self._to_search_results(results, score_field='combined_score')
|
|
825
|
+
|
|
826
|
+
except Exception as e:
|
|
827
|
+
self.logger.error(f"Hybrid search error: {e}")
|
|
828
|
+
raise
|
|
829
|
+
|
|
830
|
+
async def document_search(
|
|
831
|
+
self,
|
|
832
|
+
query: str,
|
|
833
|
+
search_type: str = "similarity",
|
|
834
|
+
collection: str = None,
|
|
835
|
+
limit: int = 10,
|
|
836
|
+
**kwargs
|
|
837
|
+
) -> List[SearchResult]:
|
|
838
|
+
"""
|
|
839
|
+
Unified document search interface.
|
|
840
|
+
|
|
841
|
+
Args:
|
|
842
|
+
query: Search query
|
|
843
|
+
search_type: Type of search (similarity, fulltext, hybrid)
|
|
844
|
+
collection: Collection to search
|
|
845
|
+
limit: Maximum results
|
|
846
|
+
**kwargs: Additional search parameters
|
|
847
|
+
|
|
848
|
+
Returns:
|
|
849
|
+
List of SearchResult objects
|
|
850
|
+
"""
|
|
851
|
+
if search_type == "similarity":
|
|
852
|
+
return await self.similarity_search(
|
|
853
|
+
query, collection=collection, limit=limit, **kwargs
|
|
854
|
+
)
|
|
855
|
+
elif search_type == "fulltext":
|
|
856
|
+
return await self.fulltext_search(
|
|
857
|
+
query, collection=collection, limit=limit, **kwargs
|
|
858
|
+
)
|
|
859
|
+
elif search_type == "hybrid":
|
|
860
|
+
return await self.hybrid_search(
|
|
861
|
+
query, collection=collection, limit=limit, **kwargs
|
|
862
|
+
)
|
|
863
|
+
else:
|
|
864
|
+
raise ValueError(f"Unknown search type: {search_type}")
|
|
865
|
+
|
|
866
|
+
# Required AbstractStore methods
|
|
867
|
+
|
|
868
|
+
async def from_documents(
|
|
869
|
+
self,
|
|
870
|
+
documents: List[Any],
|
|
871
|
+
collection: Union[str, None] = None,
|
|
872
|
+
**kwargs
|
|
873
|
+
) -> 'ArangoDBStore':
|
|
874
|
+
"""
|
|
875
|
+
Create vector store from documents.
|
|
876
|
+
|
|
877
|
+
Args:
|
|
878
|
+
documents: List of Document objects
|
|
879
|
+
collection: Collection name
|
|
880
|
+
**kwargs: Additional arguments
|
|
881
|
+
|
|
882
|
+
Returns:
|
|
883
|
+
Self
|
|
884
|
+
"""
|
|
885
|
+
collection = collection or self.collection_name
|
|
886
|
+
|
|
887
|
+
# Ensure collection exists
|
|
888
|
+
await self.create_collection(collection)
|
|
889
|
+
|
|
890
|
+
# Add documents
|
|
891
|
+
await self.add_documents(documents, collection=collection, **kwargs)
|
|
892
|
+
|
|
893
|
+
return self
|
|
894
|
+
|
|
895
|
+
# Helper Methods
|
|
896
|
+
|
|
897
|
+
def _document_to_dict(self, document: Document) -> dict:
|
|
898
|
+
"""Convert Document object to dictionary."""
|
|
899
|
+
doc_dict = {
|
|
900
|
+
self.text_column: document.page_content,
|
|
901
|
+
self.metadata_column: document.metadata or {}
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
# Add any existing embedding
|
|
905
|
+
if hasattr(document, 'embedding') and document.embedding:
|
|
906
|
+
doc_dict[self.embedding_column] = document.embedding
|
|
907
|
+
|
|
908
|
+
return doc_dict
|
|
909
|
+
|
|
910
|
+
async def _generate_embedding(self, text: str) -> List[float]:
|
|
911
|
+
"""Generate embedding for text."""
|
|
912
|
+
if self._embed_ is None:
|
|
913
|
+
raise ValueError("No embedding model configured")
|
|
914
|
+
|
|
915
|
+
# Handle both sync and async embeddings
|
|
916
|
+
import inspect
|
|
917
|
+
if inspect.iscoroutinefunction(self._embed_.embed_query):
|
|
918
|
+
return await self._embed_.embed_query(text)
|
|
919
|
+
else:
|
|
920
|
+
return await asyncio.get_event_loop().run_in_executor(
|
|
921
|
+
None, self._embed_.embed_query, text
|
|
922
|
+
)
|
|
923
|
+
|
|
924
|
+
async def _find_existing_document(
|
|
925
|
+
self,
|
|
926
|
+
doc_dict: dict,
|
|
927
|
+
collection: str,
|
|
928
|
+
upsert_key: Optional[str],
|
|
929
|
+
upsert_metadata_keys: Optional[List[str]]
|
|
930
|
+
) -> Optional[dict]:
|
|
931
|
+
"""Find existing document for upsert operation."""
|
|
932
|
+
# Check by explicit key
|
|
933
|
+
if upsert_key and upsert_key in doc_dict:
|
|
934
|
+
query = f"""
|
|
935
|
+
FOR doc IN {collection}
|
|
936
|
+
FILTER doc.{upsert_key} == @key_value
|
|
937
|
+
LIMIT 1
|
|
938
|
+
RETURN doc
|
|
939
|
+
"""
|
|
940
|
+
bind_vars = {'key_value': doc_dict[upsert_key]}
|
|
941
|
+
|
|
942
|
+
result = await self._db.fetch_one(query, bind_vars=bind_vars)
|
|
943
|
+
if result:
|
|
944
|
+
return result
|
|
945
|
+
|
|
946
|
+
# Check by metadata keys
|
|
947
|
+
if upsert_metadata_keys and self.metadata_column in doc_dict:
|
|
948
|
+
metadata = doc_dict[self.metadata_column]
|
|
949
|
+
conditions = []
|
|
950
|
+
bind_vars = {}
|
|
951
|
+
|
|
952
|
+
for key in upsert_metadata_keys:
|
|
953
|
+
if key in metadata:
|
|
954
|
+
var_name = f"meta_{key}"
|
|
955
|
+
conditions.append(f"doc.{self.metadata_column}.{key} == @{var_name}")
|
|
956
|
+
bind_vars[var_name] = metadata[key]
|
|
957
|
+
|
|
958
|
+
if conditions:
|
|
959
|
+
filter_clause = " AND ".join(conditions)
|
|
960
|
+
query = f"""
|
|
961
|
+
FOR doc IN {collection}
|
|
962
|
+
FILTER {filter_clause}
|
|
963
|
+
LIMIT 1
|
|
964
|
+
RETURN doc
|
|
965
|
+
"""
|
|
966
|
+
|
|
967
|
+
result = await self._db.fetch_one(query, bind_vars=bind_vars)
|
|
968
|
+
if result:
|
|
969
|
+
return result
|
|
970
|
+
|
|
971
|
+
return None
|
|
972
|
+
|
|
973
|
+
def _build_filter_conditions(
|
|
974
|
+
self,
|
|
975
|
+
metadata_filters: Optional[dict]
|
|
976
|
+
) -> Optional[List[str]]:
|
|
977
|
+
"""Build AQL filter conditions from metadata filters."""
|
|
978
|
+
if not metadata_filters:
|
|
979
|
+
return None
|
|
980
|
+
|
|
981
|
+
conditions = []
|
|
982
|
+
for key, value in metadata_filters.items():
|
|
983
|
+
if isinstance(value, (list, tuple)):
|
|
984
|
+
# IN condition
|
|
985
|
+
values_str = ", ".join([f"'{v}'" for v in value])
|
|
986
|
+
conditions.append(f"doc.{self.metadata_column}.{key} IN [{values_str}]")
|
|
987
|
+
else:
|
|
988
|
+
# Equality condition
|
|
989
|
+
conditions.append(f"doc.{self.metadata_column}.{key} == '{value}'")
|
|
990
|
+
|
|
991
|
+
return conditions
|
|
992
|
+
|
|
993
|
+
def _to_search_results(
|
|
994
|
+
self,
|
|
995
|
+
results: List[dict],
|
|
996
|
+
score_field: str = 'similarity'
|
|
997
|
+
) -> List[SearchResult]:
|
|
998
|
+
"""Convert ArangoDB results to SearchResult objects."""
|
|
999
|
+
search_results = []
|
|
1000
|
+
|
|
1001
|
+
for result in results:
|
|
1002
|
+
# Extract document
|
|
1003
|
+
doc = result.get('document', result)
|
|
1004
|
+
|
|
1005
|
+
# Extract score
|
|
1006
|
+
score = result.get(score_field, 0.0)
|
|
1007
|
+
|
|
1008
|
+
# Create Document object
|
|
1009
|
+
document = Document(
|
|
1010
|
+
page_content=doc.get(self.text_column, ""),
|
|
1011
|
+
metadata=doc.get(self.metadata_column, {})
|
|
1012
|
+
)
|
|
1013
|
+
|
|
1014
|
+
# Create SearchResult
|
|
1015
|
+
search_result = SearchResult(
|
|
1016
|
+
document=document,
|
|
1017
|
+
score=score,
|
|
1018
|
+
metadata={
|
|
1019
|
+
'_id': doc.get('_id'),
|
|
1020
|
+
'_key': doc.get('_key'),
|
|
1021
|
+
**doc.get(self.metadata_column, {})
|
|
1022
|
+
}
|
|
1023
|
+
)
|
|
1024
|
+
|
|
1025
|
+
search_results.append(search_result)
|
|
1026
|
+
|
|
1027
|
+
return search_results
|
|
1028
|
+
|
|
1029
|
+
async def _enrich_with_graph_context(
|
|
1030
|
+
self,
|
|
1031
|
+
search_results: List[SearchResult]
|
|
1032
|
+
) -> List[SearchResult]:
|
|
1033
|
+
"""Enrich results with graph context."""
|
|
1034
|
+
if not self.default_graph:
|
|
1035
|
+
return search_results
|
|
1036
|
+
|
|
1037
|
+
enriched = []
|
|
1038
|
+
for result in search_results:
|
|
1039
|
+
doc_id = result.metadata.get('_id')
|
|
1040
|
+
|
|
1041
|
+
if doc_id:
|
|
1042
|
+
try:
|
|
1043
|
+
# Get related nodes
|
|
1044
|
+
neighbors = await self._db.find_related_nodes(
|
|
1045
|
+
node_id=doc_id,
|
|
1046
|
+
max_depth=1,
|
|
1047
|
+
limit=5,
|
|
1048
|
+
graph_name=self.default_graph
|
|
1049
|
+
)
|
|
1050
|
+
|
|
1051
|
+
# Add to metadata
|
|
1052
|
+
result.metadata['graph_context'] = neighbors
|
|
1053
|
+
except Exception as e:
|
|
1054
|
+
self.logger.debug(f"Could not get graph context: {e}")
|
|
1055
|
+
|
|
1056
|
+
enriched.append(result)
|
|
1057
|
+
|
|
1058
|
+
return enriched
|
|
1059
|
+
|
|
1060
|
+
async def prepare_embedding_table(
|
|
1061
|
+
self,
|
|
1062
|
+
collection: str = None,
|
|
1063
|
+
recreate: bool = False,
|
|
1064
|
+
**kwargs
|
|
1065
|
+
) -> bool:
|
|
1066
|
+
"""
|
|
1067
|
+
Prepare collection for vector storage.
|
|
1068
|
+
|
|
1069
|
+
Args:
|
|
1070
|
+
collection: Collection name
|
|
1071
|
+
recreate: If True, drop and recreate collection
|
|
1072
|
+
**kwargs: Additional arguments
|
|
1073
|
+
|
|
1074
|
+
Returns:
|
|
1075
|
+
True if successful
|
|
1076
|
+
"""
|
|
1077
|
+
collection = collection or self.collection_name
|
|
1078
|
+
|
|
1079
|
+
# Drop if recreate
|
|
1080
|
+
if recreate and await self.collection_exists(collection):
|
|
1081
|
+
await self.delete_collection(collection)
|
|
1082
|
+
|
|
1083
|
+
# Create collection
|
|
1084
|
+
await self.create_collection(collection)
|
|
1085
|
+
|
|
1086
|
+
# Create search view
|
|
1087
|
+
await self._ensure_search_view()
|
|
1088
|
+
|
|
1089
|
+
self.logger.info(f"Prepared collection: {collection}")
|
|
1090
|
+
return True
|