PraisonAI 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- praisonai/__init__.py +54 -0
- praisonai/__main__.py +15 -0
- praisonai/acp/__init__.py +54 -0
- praisonai/acp/config.py +159 -0
- praisonai/acp/server.py +587 -0
- praisonai/acp/session.py +219 -0
- praisonai/adapters/__init__.py +50 -0
- praisonai/adapters/readers.py +395 -0
- praisonai/adapters/rerankers.py +315 -0
- praisonai/adapters/retrievers.py +394 -0
- praisonai/adapters/vector_stores.py +409 -0
- praisonai/agent_scheduler.py +337 -0
- praisonai/agents_generator.py +903 -0
- praisonai/api/call.py +292 -0
- praisonai/auto.py +1197 -0
- praisonai/capabilities/__init__.py +275 -0
- praisonai/capabilities/a2a.py +140 -0
- praisonai/capabilities/assistants.py +283 -0
- praisonai/capabilities/audio.py +320 -0
- praisonai/capabilities/batches.py +469 -0
- praisonai/capabilities/completions.py +336 -0
- praisonai/capabilities/container_files.py +155 -0
- praisonai/capabilities/containers.py +93 -0
- praisonai/capabilities/embeddings.py +158 -0
- praisonai/capabilities/files.py +467 -0
- praisonai/capabilities/fine_tuning.py +293 -0
- praisonai/capabilities/guardrails.py +182 -0
- praisonai/capabilities/images.py +330 -0
- praisonai/capabilities/mcp.py +190 -0
- praisonai/capabilities/messages.py +270 -0
- praisonai/capabilities/moderations.py +154 -0
- praisonai/capabilities/ocr.py +217 -0
- praisonai/capabilities/passthrough.py +204 -0
- praisonai/capabilities/rag.py +207 -0
- praisonai/capabilities/realtime.py +160 -0
- praisonai/capabilities/rerank.py +165 -0
- praisonai/capabilities/responses.py +266 -0
- praisonai/capabilities/search.py +109 -0
- praisonai/capabilities/skills.py +133 -0
- praisonai/capabilities/vector_store_files.py +334 -0
- praisonai/capabilities/vector_stores.py +304 -0
- praisonai/capabilities/videos.py +141 -0
- praisonai/chainlit_ui.py +304 -0
- praisonai/chat/__init__.py +106 -0
- praisonai/chat/app.py +125 -0
- praisonai/cli/__init__.py +26 -0
- praisonai/cli/app.py +213 -0
- praisonai/cli/commands/__init__.py +75 -0
- praisonai/cli/commands/acp.py +70 -0
- praisonai/cli/commands/completion.py +333 -0
- praisonai/cli/commands/config.py +166 -0
- praisonai/cli/commands/debug.py +142 -0
- praisonai/cli/commands/diag.py +55 -0
- praisonai/cli/commands/doctor.py +166 -0
- praisonai/cli/commands/environment.py +179 -0
- praisonai/cli/commands/lsp.py +112 -0
- praisonai/cli/commands/mcp.py +210 -0
- praisonai/cli/commands/profile.py +457 -0
- praisonai/cli/commands/run.py +228 -0
- praisonai/cli/commands/schedule.py +150 -0
- praisonai/cli/commands/serve.py +97 -0
- praisonai/cli/commands/session.py +212 -0
- praisonai/cli/commands/traces.py +145 -0
- praisonai/cli/commands/version.py +101 -0
- praisonai/cli/configuration/__init__.py +18 -0
- praisonai/cli/configuration/loader.py +353 -0
- praisonai/cli/configuration/paths.py +114 -0
- praisonai/cli/configuration/schema.py +164 -0
- praisonai/cli/features/__init__.py +268 -0
- praisonai/cli/features/acp.py +236 -0
- praisonai/cli/features/action_orchestrator.py +546 -0
- praisonai/cli/features/agent_scheduler.py +773 -0
- praisonai/cli/features/agent_tools.py +474 -0
- praisonai/cli/features/agents.py +375 -0
- praisonai/cli/features/at_mentions.py +471 -0
- praisonai/cli/features/auto_memory.py +182 -0
- praisonai/cli/features/autonomy_mode.py +490 -0
- praisonai/cli/features/background.py +356 -0
- praisonai/cli/features/base.py +168 -0
- praisonai/cli/features/capabilities.py +1326 -0
- praisonai/cli/features/checkpoints.py +338 -0
- praisonai/cli/features/code_intelligence.py +652 -0
- praisonai/cli/features/compaction.py +294 -0
- praisonai/cli/features/compare.py +534 -0
- praisonai/cli/features/cost_tracker.py +514 -0
- praisonai/cli/features/debug.py +810 -0
- praisonai/cli/features/deploy.py +517 -0
- praisonai/cli/features/diag.py +289 -0
- praisonai/cli/features/doctor/__init__.py +63 -0
- praisonai/cli/features/doctor/checks/__init__.py +24 -0
- praisonai/cli/features/doctor/checks/acp_checks.py +240 -0
- praisonai/cli/features/doctor/checks/config_checks.py +366 -0
- praisonai/cli/features/doctor/checks/db_checks.py +366 -0
- praisonai/cli/features/doctor/checks/env_checks.py +543 -0
- praisonai/cli/features/doctor/checks/lsp_checks.py +199 -0
- praisonai/cli/features/doctor/checks/mcp_checks.py +349 -0
- praisonai/cli/features/doctor/checks/memory_checks.py +268 -0
- praisonai/cli/features/doctor/checks/network_checks.py +251 -0
- praisonai/cli/features/doctor/checks/obs_checks.py +328 -0
- praisonai/cli/features/doctor/checks/performance_checks.py +235 -0
- praisonai/cli/features/doctor/checks/permissions_checks.py +259 -0
- praisonai/cli/features/doctor/checks/selftest_checks.py +322 -0
- praisonai/cli/features/doctor/checks/serve_checks.py +426 -0
- praisonai/cli/features/doctor/checks/skills_checks.py +231 -0
- praisonai/cli/features/doctor/checks/tools_checks.py +371 -0
- praisonai/cli/features/doctor/engine.py +266 -0
- praisonai/cli/features/doctor/formatters.py +310 -0
- praisonai/cli/features/doctor/handler.py +397 -0
- praisonai/cli/features/doctor/models.py +264 -0
- praisonai/cli/features/doctor/registry.py +239 -0
- praisonai/cli/features/endpoints.py +1019 -0
- praisonai/cli/features/eval.py +560 -0
- praisonai/cli/features/external_agents.py +231 -0
- praisonai/cli/features/fast_context.py +410 -0
- praisonai/cli/features/flow_display.py +566 -0
- praisonai/cli/features/git_integration.py +651 -0
- praisonai/cli/features/guardrail.py +171 -0
- praisonai/cli/features/handoff.py +185 -0
- praisonai/cli/features/hooks.py +583 -0
- praisonai/cli/features/image.py +384 -0
- praisonai/cli/features/interactive_runtime.py +585 -0
- praisonai/cli/features/interactive_tools.py +380 -0
- praisonai/cli/features/interactive_tui.py +603 -0
- praisonai/cli/features/jobs.py +632 -0
- praisonai/cli/features/knowledge.py +531 -0
- praisonai/cli/features/lite.py +244 -0
- praisonai/cli/features/lsp_cli.py +225 -0
- praisonai/cli/features/mcp.py +169 -0
- praisonai/cli/features/message_queue.py +587 -0
- praisonai/cli/features/metrics.py +211 -0
- praisonai/cli/features/n8n.py +673 -0
- praisonai/cli/features/observability.py +293 -0
- praisonai/cli/features/ollama.py +361 -0
- praisonai/cli/features/output_style.py +273 -0
- praisonai/cli/features/package.py +631 -0
- praisonai/cli/features/performance.py +308 -0
- praisonai/cli/features/persistence.py +636 -0
- praisonai/cli/features/profile.py +226 -0
- praisonai/cli/features/profiler/__init__.py +81 -0
- praisonai/cli/features/profiler/core.py +558 -0
- praisonai/cli/features/profiler/optimizations.py +652 -0
- praisonai/cli/features/profiler/suite.py +386 -0
- praisonai/cli/features/profiling.py +350 -0
- praisonai/cli/features/queue/__init__.py +73 -0
- praisonai/cli/features/queue/manager.py +395 -0
- praisonai/cli/features/queue/models.py +286 -0
- praisonai/cli/features/queue/persistence.py +564 -0
- praisonai/cli/features/queue/scheduler.py +484 -0
- praisonai/cli/features/queue/worker.py +372 -0
- praisonai/cli/features/recipe.py +1723 -0
- praisonai/cli/features/recipes.py +449 -0
- praisonai/cli/features/registry.py +229 -0
- praisonai/cli/features/repo_map.py +860 -0
- praisonai/cli/features/router.py +466 -0
- praisonai/cli/features/sandbox_executor.py +515 -0
- praisonai/cli/features/serve.py +829 -0
- praisonai/cli/features/session.py +222 -0
- praisonai/cli/features/skills.py +856 -0
- praisonai/cli/features/slash_commands.py +650 -0
- praisonai/cli/features/telemetry.py +179 -0
- praisonai/cli/features/templates.py +1384 -0
- praisonai/cli/features/thinking.py +305 -0
- praisonai/cli/features/todo.py +334 -0
- praisonai/cli/features/tools.py +680 -0
- praisonai/cli/features/tui/__init__.py +83 -0
- praisonai/cli/features/tui/app.py +580 -0
- praisonai/cli/features/tui/cli.py +566 -0
- praisonai/cli/features/tui/debug.py +511 -0
- praisonai/cli/features/tui/events.py +99 -0
- praisonai/cli/features/tui/mock_provider.py +328 -0
- praisonai/cli/features/tui/orchestrator.py +652 -0
- praisonai/cli/features/tui/screens/__init__.py +50 -0
- praisonai/cli/features/tui/screens/main.py +245 -0
- praisonai/cli/features/tui/screens/queue.py +174 -0
- praisonai/cli/features/tui/screens/session.py +124 -0
- praisonai/cli/features/tui/screens/settings.py +148 -0
- praisonai/cli/features/tui/widgets/__init__.py +56 -0
- praisonai/cli/features/tui/widgets/chat.py +261 -0
- praisonai/cli/features/tui/widgets/composer.py +224 -0
- praisonai/cli/features/tui/widgets/queue_panel.py +200 -0
- praisonai/cli/features/tui/widgets/status.py +167 -0
- praisonai/cli/features/tui/widgets/tool_panel.py +248 -0
- praisonai/cli/features/workflow.py +720 -0
- praisonai/cli/legacy.py +236 -0
- praisonai/cli/main.py +5559 -0
- praisonai/cli/schedule_cli.py +54 -0
- praisonai/cli/state/__init__.py +31 -0
- praisonai/cli/state/identifiers.py +161 -0
- praisonai/cli/state/sessions.py +313 -0
- praisonai/code/__init__.py +93 -0
- praisonai/code/agent_tools.py +344 -0
- praisonai/code/diff/__init__.py +21 -0
- praisonai/code/diff/diff_strategy.py +432 -0
- praisonai/code/tools/__init__.py +27 -0
- praisonai/code/tools/apply_diff.py +221 -0
- praisonai/code/tools/execute_command.py +275 -0
- praisonai/code/tools/list_files.py +274 -0
- praisonai/code/tools/read_file.py +206 -0
- praisonai/code/tools/search_replace.py +248 -0
- praisonai/code/tools/write_file.py +217 -0
- praisonai/code/utils/__init__.py +46 -0
- praisonai/code/utils/file_utils.py +307 -0
- praisonai/code/utils/ignore_utils.py +308 -0
- praisonai/code/utils/text_utils.py +276 -0
- praisonai/db/__init__.py +64 -0
- praisonai/db/adapter.py +531 -0
- praisonai/deploy/__init__.py +62 -0
- praisonai/deploy/api.py +231 -0
- praisonai/deploy/docker.py +454 -0
- praisonai/deploy/doctor.py +367 -0
- praisonai/deploy/main.py +327 -0
- praisonai/deploy/models.py +179 -0
- praisonai/deploy/providers/__init__.py +33 -0
- praisonai/deploy/providers/aws.py +331 -0
- praisonai/deploy/providers/azure.py +358 -0
- praisonai/deploy/providers/base.py +101 -0
- praisonai/deploy/providers/gcp.py +314 -0
- praisonai/deploy/schema.py +208 -0
- praisonai/deploy.py +185 -0
- praisonai/endpoints/__init__.py +53 -0
- praisonai/endpoints/a2u_server.py +410 -0
- praisonai/endpoints/discovery.py +165 -0
- praisonai/endpoints/providers/__init__.py +28 -0
- praisonai/endpoints/providers/a2a.py +253 -0
- praisonai/endpoints/providers/a2u.py +208 -0
- praisonai/endpoints/providers/agents_api.py +171 -0
- praisonai/endpoints/providers/base.py +231 -0
- praisonai/endpoints/providers/mcp.py +263 -0
- praisonai/endpoints/providers/recipe.py +206 -0
- praisonai/endpoints/providers/tools_mcp.py +150 -0
- praisonai/endpoints/registry.py +131 -0
- praisonai/endpoints/server.py +161 -0
- praisonai/inbuilt_tools/__init__.py +24 -0
- praisonai/inbuilt_tools/autogen_tools.py +117 -0
- praisonai/inc/__init__.py +2 -0
- praisonai/inc/config.py +96 -0
- praisonai/inc/models.py +155 -0
- praisonai/integrations/__init__.py +56 -0
- praisonai/integrations/base.py +303 -0
- praisonai/integrations/claude_code.py +270 -0
- praisonai/integrations/codex_cli.py +255 -0
- praisonai/integrations/cursor_cli.py +195 -0
- praisonai/integrations/gemini_cli.py +222 -0
- praisonai/jobs/__init__.py +67 -0
- praisonai/jobs/executor.py +425 -0
- praisonai/jobs/models.py +230 -0
- praisonai/jobs/router.py +314 -0
- praisonai/jobs/server.py +186 -0
- praisonai/jobs/store.py +203 -0
- praisonai/llm/__init__.py +66 -0
- praisonai/llm/registry.py +382 -0
- praisonai/mcp_server/__init__.py +152 -0
- praisonai/mcp_server/adapters/__init__.py +74 -0
- praisonai/mcp_server/adapters/agents.py +128 -0
- praisonai/mcp_server/adapters/capabilities.py +168 -0
- praisonai/mcp_server/adapters/cli_tools.py +568 -0
- praisonai/mcp_server/adapters/extended_capabilities.py +462 -0
- praisonai/mcp_server/adapters/knowledge.py +93 -0
- praisonai/mcp_server/adapters/memory.py +104 -0
- praisonai/mcp_server/adapters/prompts.py +306 -0
- praisonai/mcp_server/adapters/resources.py +124 -0
- praisonai/mcp_server/adapters/tools_bridge.py +280 -0
- praisonai/mcp_server/auth/__init__.py +48 -0
- praisonai/mcp_server/auth/api_key.py +291 -0
- praisonai/mcp_server/auth/oauth.py +460 -0
- praisonai/mcp_server/auth/oidc.py +289 -0
- praisonai/mcp_server/auth/scopes.py +260 -0
- praisonai/mcp_server/cli.py +852 -0
- praisonai/mcp_server/elicitation.py +445 -0
- praisonai/mcp_server/icons.py +302 -0
- praisonai/mcp_server/recipe_adapter.py +573 -0
- praisonai/mcp_server/recipe_cli.py +824 -0
- praisonai/mcp_server/registry.py +703 -0
- praisonai/mcp_server/sampling.py +422 -0
- praisonai/mcp_server/server.py +490 -0
- praisonai/mcp_server/tasks.py +443 -0
- praisonai/mcp_server/transports/__init__.py +18 -0
- praisonai/mcp_server/transports/http_stream.py +376 -0
- praisonai/mcp_server/transports/stdio.py +132 -0
- praisonai/persistence/__init__.py +84 -0
- praisonai/persistence/config.py +238 -0
- praisonai/persistence/conversation/__init__.py +25 -0
- praisonai/persistence/conversation/async_mysql.py +427 -0
- praisonai/persistence/conversation/async_postgres.py +410 -0
- praisonai/persistence/conversation/async_sqlite.py +371 -0
- praisonai/persistence/conversation/base.py +151 -0
- praisonai/persistence/conversation/json_store.py +250 -0
- praisonai/persistence/conversation/mysql.py +387 -0
- praisonai/persistence/conversation/postgres.py +401 -0
- praisonai/persistence/conversation/singlestore.py +240 -0
- praisonai/persistence/conversation/sqlite.py +341 -0
- praisonai/persistence/conversation/supabase.py +203 -0
- praisonai/persistence/conversation/surrealdb.py +287 -0
- praisonai/persistence/factory.py +301 -0
- praisonai/persistence/hooks/__init__.py +18 -0
- praisonai/persistence/hooks/agent_hooks.py +297 -0
- praisonai/persistence/knowledge/__init__.py +26 -0
- praisonai/persistence/knowledge/base.py +144 -0
- praisonai/persistence/knowledge/cassandra.py +232 -0
- praisonai/persistence/knowledge/chroma.py +295 -0
- praisonai/persistence/knowledge/clickhouse.py +242 -0
- praisonai/persistence/knowledge/cosmosdb_vector.py +438 -0
- praisonai/persistence/knowledge/couchbase.py +286 -0
- praisonai/persistence/knowledge/lancedb.py +216 -0
- praisonai/persistence/knowledge/langchain_adapter.py +291 -0
- praisonai/persistence/knowledge/lightrag_adapter.py +212 -0
- praisonai/persistence/knowledge/llamaindex_adapter.py +256 -0
- praisonai/persistence/knowledge/milvus.py +277 -0
- praisonai/persistence/knowledge/mongodb_vector.py +306 -0
- praisonai/persistence/knowledge/pgvector.py +335 -0
- praisonai/persistence/knowledge/pinecone.py +253 -0
- praisonai/persistence/knowledge/qdrant.py +301 -0
- praisonai/persistence/knowledge/redis_vector.py +291 -0
- praisonai/persistence/knowledge/singlestore_vector.py +299 -0
- praisonai/persistence/knowledge/surrealdb_vector.py +309 -0
- praisonai/persistence/knowledge/upstash_vector.py +266 -0
- praisonai/persistence/knowledge/weaviate.py +223 -0
- praisonai/persistence/migrations/__init__.py +10 -0
- praisonai/persistence/migrations/manager.py +251 -0
- praisonai/persistence/orchestrator.py +406 -0
- praisonai/persistence/state/__init__.py +21 -0
- praisonai/persistence/state/async_mongodb.py +200 -0
- praisonai/persistence/state/base.py +107 -0
- praisonai/persistence/state/dynamodb.py +226 -0
- praisonai/persistence/state/firestore.py +175 -0
- praisonai/persistence/state/gcs.py +155 -0
- praisonai/persistence/state/memory.py +245 -0
- praisonai/persistence/state/mongodb.py +158 -0
- praisonai/persistence/state/redis.py +190 -0
- praisonai/persistence/state/upstash.py +144 -0
- praisonai/persistence/tests/__init__.py +3 -0
- praisonai/persistence/tests/test_all_backends.py +633 -0
- praisonai/profiler.py +1214 -0
- praisonai/recipe/__init__.py +134 -0
- praisonai/recipe/bridge.py +278 -0
- praisonai/recipe/core.py +893 -0
- praisonai/recipe/exceptions.py +54 -0
- praisonai/recipe/history.py +402 -0
- praisonai/recipe/models.py +266 -0
- praisonai/recipe/operations.py +440 -0
- praisonai/recipe/policy.py +422 -0
- praisonai/recipe/registry.py +849 -0
- praisonai/recipe/runtime.py +214 -0
- praisonai/recipe/security.py +711 -0
- praisonai/recipe/serve.py +859 -0
- praisonai/recipe/server.py +613 -0
- praisonai/scheduler/__init__.py +45 -0
- praisonai/scheduler/agent_scheduler.py +552 -0
- praisonai/scheduler/base.py +124 -0
- praisonai/scheduler/daemon_manager.py +225 -0
- praisonai/scheduler/state_manager.py +155 -0
- praisonai/scheduler/yaml_loader.py +193 -0
- praisonai/scheduler.py +194 -0
- praisonai/setup/__init__.py +1 -0
- praisonai/setup/build.py +21 -0
- praisonai/setup/post_install.py +23 -0
- praisonai/setup/setup_conda_env.py +25 -0
- praisonai/setup.py +16 -0
- praisonai/templates/__init__.py +116 -0
- praisonai/templates/cache.py +364 -0
- praisonai/templates/dependency_checker.py +358 -0
- praisonai/templates/discovery.py +391 -0
- praisonai/templates/loader.py +564 -0
- praisonai/templates/registry.py +511 -0
- praisonai/templates/resolver.py +206 -0
- praisonai/templates/security.py +327 -0
- praisonai/templates/tool_override.py +498 -0
- praisonai/templates/tools_doctor.py +256 -0
- praisonai/test.py +105 -0
- praisonai/train.py +562 -0
- praisonai/train_vision.py +306 -0
- praisonai/ui/agents.py +824 -0
- praisonai/ui/callbacks.py +57 -0
- praisonai/ui/chainlit_compat.py +246 -0
- praisonai/ui/chat.py +532 -0
- praisonai/ui/code.py +717 -0
- praisonai/ui/colab.py +474 -0
- praisonai/ui/colab_chainlit.py +81 -0
- praisonai/ui/components/aicoder.py +284 -0
- praisonai/ui/context.py +283 -0
- praisonai/ui/database_config.py +56 -0
- praisonai/ui/db.py +294 -0
- praisonai/ui/realtime.py +488 -0
- praisonai/ui/realtimeclient/__init__.py +756 -0
- praisonai/ui/realtimeclient/tools.py +242 -0
- praisonai/ui/sql_alchemy.py +710 -0
- praisonai/upload_vision.py +140 -0
- praisonai/version.py +1 -0
- praisonai-3.0.0.dist-info/METADATA +3493 -0
- praisonai-3.0.0.dist-info/RECORD +393 -0
- praisonai-3.0.0.dist-info/WHEEL +5 -0
- praisonai-3.0.0.dist-info/entry_points.txt +4 -0
- praisonai-3.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MongoDB Vector Search implementation of KnowledgeStore.
|
|
3
|
+
|
|
4
|
+
Requires: pymongo
|
|
5
|
+
Install: pip install pymongo
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Any, Dict, List, Optional
|
|
10
|
+
|
|
11
|
+
from .base import KnowledgeStore, KnowledgeDocument
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class MongoDBVectorKnowledgeStore(KnowledgeStore):
|
|
17
|
+
"""
|
|
18
|
+
MongoDB Atlas Vector Search store for knowledge/RAG.
|
|
19
|
+
|
|
20
|
+
Uses MongoDB Atlas Vector Search capabilities.
|
|
21
|
+
|
|
22
|
+
Example:
|
|
23
|
+
store = MongoDBVectorKnowledgeStore(
|
|
24
|
+
url="mongodb+srv://...",
|
|
25
|
+
database="praisonai",
|
|
26
|
+
collection="vectors"
|
|
27
|
+
)
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
url: str = "mongodb://localhost:27017",
|
|
33
|
+
database: str = "praisonai",
|
|
34
|
+
collection: str = "vectors",
|
|
35
|
+
index_name: str = "vector_index",
|
|
36
|
+
embedding_field: str = "embedding",
|
|
37
|
+
text_field: str = "content",
|
|
38
|
+
embedding_dim: int = 1536,
|
|
39
|
+
):
|
|
40
|
+
"""
|
|
41
|
+
Initialize MongoDB Vector store.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
url: MongoDB connection URL
|
|
45
|
+
database: Database name
|
|
46
|
+
collection: Collection name
|
|
47
|
+
index_name: Vector search index name
|
|
48
|
+
embedding_field: Field name for embeddings
|
|
49
|
+
text_field: Field name for text content
|
|
50
|
+
embedding_dim: Embedding dimension
|
|
51
|
+
"""
|
|
52
|
+
self.url = url
|
|
53
|
+
self.database_name = database
|
|
54
|
+
self.collection_name = collection
|
|
55
|
+
self.index_name = index_name
|
|
56
|
+
self.embedding_field = embedding_field
|
|
57
|
+
self.text_field = text_field
|
|
58
|
+
self.embedding_dim = embedding_dim
|
|
59
|
+
|
|
60
|
+
self._client = None
|
|
61
|
+
self._db = None
|
|
62
|
+
self._collection = None
|
|
63
|
+
self._initialized = False
|
|
64
|
+
|
|
65
|
+
def _init_client(self):
|
|
66
|
+
"""Initialize MongoDB client lazily."""
|
|
67
|
+
if self._initialized:
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
from pymongo import MongoClient
|
|
72
|
+
except ImportError:
|
|
73
|
+
raise ImportError(
|
|
74
|
+
"pymongo is required for MongoDB Vector support. "
|
|
75
|
+
"Install with: pip install pymongo"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
self._client = MongoClient(self.url)
|
|
79
|
+
self._db = self._client[self.database_name]
|
|
80
|
+
self._collection = self._db[self.collection_name]
|
|
81
|
+
self._initialized = True
|
|
82
|
+
|
|
83
|
+
def create_collection(
|
|
84
|
+
self,
|
|
85
|
+
name: str,
|
|
86
|
+
dimension: int = 1536,
|
|
87
|
+
distance: str = "cosine",
|
|
88
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
89
|
+
) -> None:
|
|
90
|
+
"""Create collection (MongoDB creates on first insert)."""
|
|
91
|
+
self._init_client()
|
|
92
|
+
# MongoDB creates collections automatically on first insert
|
|
93
|
+
# Vector search index must be created via Atlas UI or API
|
|
94
|
+
logger.info(
|
|
95
|
+
f"Collection '{name}' will be created on first insert. "
|
|
96
|
+
f"Note: Vector search index must be created via MongoDB Atlas."
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
def delete_collection(self, name: str) -> bool:
|
|
100
|
+
"""Delete a collection."""
|
|
101
|
+
self._init_client()
|
|
102
|
+
try:
|
|
103
|
+
self._db.drop_collection(name)
|
|
104
|
+
return True
|
|
105
|
+
except Exception as e:
|
|
106
|
+
logger.error(f"Failed to delete collection {name}: {e}")
|
|
107
|
+
return False
|
|
108
|
+
|
|
109
|
+
def collection_exists(self, name: str) -> bool:
|
|
110
|
+
"""Check if collection exists."""
|
|
111
|
+
self._init_client()
|
|
112
|
+
return name in self._db.list_collection_names()
|
|
113
|
+
|
|
114
|
+
def list_collections(self) -> List[str]:
|
|
115
|
+
"""List all collections."""
|
|
116
|
+
self._init_client()
|
|
117
|
+
return self._db.list_collection_names()
|
|
118
|
+
|
|
119
|
+
def insert(
|
|
120
|
+
self,
|
|
121
|
+
collection: str,
|
|
122
|
+
documents: List[KnowledgeDocument]
|
|
123
|
+
) -> List[str]:
|
|
124
|
+
"""Insert documents into collection."""
|
|
125
|
+
self._init_client()
|
|
126
|
+
|
|
127
|
+
coll = self._db[collection]
|
|
128
|
+
ids = []
|
|
129
|
+
|
|
130
|
+
for doc in documents:
|
|
131
|
+
mongo_doc = {
|
|
132
|
+
"_id": doc.id,
|
|
133
|
+
self.text_field: doc.content,
|
|
134
|
+
"metadata": doc.metadata or {},
|
|
135
|
+
"content_hash": doc.content_hash,
|
|
136
|
+
"created_at": doc.created_at,
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if doc.embedding:
|
|
140
|
+
mongo_doc[self.embedding_field] = doc.embedding
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
coll.insert_one(mongo_doc)
|
|
144
|
+
ids.append(doc.id)
|
|
145
|
+
except Exception as e:
|
|
146
|
+
logger.warning(f"Failed to insert document {doc.id}: {e}")
|
|
147
|
+
|
|
148
|
+
return ids
|
|
149
|
+
|
|
150
|
+
def upsert(
|
|
151
|
+
self,
|
|
152
|
+
collection: str,
|
|
153
|
+
documents: List[KnowledgeDocument]
|
|
154
|
+
) -> List[str]:
|
|
155
|
+
"""Upsert documents into collection."""
|
|
156
|
+
self._init_client()
|
|
157
|
+
|
|
158
|
+
coll = self._db[collection]
|
|
159
|
+
ids = []
|
|
160
|
+
|
|
161
|
+
for doc in documents:
|
|
162
|
+
mongo_doc = {
|
|
163
|
+
self.text_field: doc.content,
|
|
164
|
+
"metadata": doc.metadata or {},
|
|
165
|
+
"content_hash": doc.content_hash,
|
|
166
|
+
"created_at": doc.created_at,
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if doc.embedding:
|
|
170
|
+
mongo_doc[self.embedding_field] = doc.embedding
|
|
171
|
+
|
|
172
|
+
try:
|
|
173
|
+
coll.replace_one(
|
|
174
|
+
{"_id": doc.id},
|
|
175
|
+
{"_id": doc.id, **mongo_doc},
|
|
176
|
+
upsert=True
|
|
177
|
+
)
|
|
178
|
+
ids.append(doc.id)
|
|
179
|
+
except Exception as e:
|
|
180
|
+
logger.warning(f"Failed to upsert document {doc.id}: {e}")
|
|
181
|
+
|
|
182
|
+
return ids
|
|
183
|
+
|
|
184
|
+
def search(
|
|
185
|
+
self,
|
|
186
|
+
collection: str,
|
|
187
|
+
query_embedding: List[float],
|
|
188
|
+
limit: int = 5,
|
|
189
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
190
|
+
score_threshold: Optional[float] = None
|
|
191
|
+
) -> List[KnowledgeDocument]:
|
|
192
|
+
"""Search for similar documents using vector search."""
|
|
193
|
+
self._init_client()
|
|
194
|
+
|
|
195
|
+
coll = self._db[collection]
|
|
196
|
+
|
|
197
|
+
# Build aggregation pipeline for vector search
|
|
198
|
+
pipeline = [
|
|
199
|
+
{
|
|
200
|
+
"$vectorSearch": {
|
|
201
|
+
"index": self.index_name,
|
|
202
|
+
"path": self.embedding_field,
|
|
203
|
+
"queryVector": query_embedding,
|
|
204
|
+
"numCandidates": limit * 10,
|
|
205
|
+
"limit": limit
|
|
206
|
+
}
|
|
207
|
+
},
|
|
208
|
+
{
|
|
209
|
+
"$project": {
|
|
210
|
+
"_id": 1,
|
|
211
|
+
self.text_field: 1,
|
|
212
|
+
"metadata": 1,
|
|
213
|
+
"content_hash": 1,
|
|
214
|
+
"created_at": 1,
|
|
215
|
+
"score": {"$meta": "vectorSearchScore"}
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
]
|
|
219
|
+
|
|
220
|
+
# Add filter if provided
|
|
221
|
+
if filters:
|
|
222
|
+
pipeline[0]["$vectorSearch"]["filter"] = filters
|
|
223
|
+
|
|
224
|
+
try:
|
|
225
|
+
results = list(coll.aggregate(pipeline))
|
|
226
|
+
|
|
227
|
+
documents = []
|
|
228
|
+
for doc in results:
|
|
229
|
+
score = doc.get("score", 0)
|
|
230
|
+
if score_threshold and score < score_threshold:
|
|
231
|
+
continue
|
|
232
|
+
|
|
233
|
+
documents.append(KnowledgeDocument(
|
|
234
|
+
id=str(doc["_id"]),
|
|
235
|
+
content=doc.get(self.text_field, ""),
|
|
236
|
+
metadata={**(doc.get("metadata") or {}), "score": score},
|
|
237
|
+
content_hash=doc.get("content_hash"),
|
|
238
|
+
created_at=doc.get("created_at", 0)
|
|
239
|
+
))
|
|
240
|
+
|
|
241
|
+
return documents
|
|
242
|
+
except Exception as e:
|
|
243
|
+
logger.error(f"Vector search failed: {e}")
|
|
244
|
+
return []
|
|
245
|
+
|
|
246
|
+
def get(
|
|
247
|
+
self,
|
|
248
|
+
collection: str,
|
|
249
|
+
ids: List[str]
|
|
250
|
+
) -> List[KnowledgeDocument]:
|
|
251
|
+
"""Get documents by IDs."""
|
|
252
|
+
self._init_client()
|
|
253
|
+
|
|
254
|
+
coll = self._db[collection]
|
|
255
|
+
documents = []
|
|
256
|
+
|
|
257
|
+
for doc_id in ids:
|
|
258
|
+
try:
|
|
259
|
+
doc = coll.find_one({"_id": doc_id})
|
|
260
|
+
if doc:
|
|
261
|
+
documents.append(KnowledgeDocument(
|
|
262
|
+
id=str(doc["_id"]),
|
|
263
|
+
content=doc.get(self.text_field, ""),
|
|
264
|
+
embedding=doc.get(self.embedding_field),
|
|
265
|
+
metadata=doc.get("metadata"),
|
|
266
|
+
content_hash=doc.get("content_hash"),
|
|
267
|
+
created_at=doc.get("created_at", 0)
|
|
268
|
+
))
|
|
269
|
+
except Exception as e:
|
|
270
|
+
logger.warning(f"Failed to get document {doc_id}: {e}")
|
|
271
|
+
|
|
272
|
+
return documents
|
|
273
|
+
|
|
274
|
+
def delete(
|
|
275
|
+
self,
|
|
276
|
+
collection: str,
|
|
277
|
+
ids: Optional[List[str]] = None,
|
|
278
|
+
filters: Optional[Dict[str, Any]] = None
|
|
279
|
+
) -> int:
|
|
280
|
+
"""Delete documents by IDs or filters."""
|
|
281
|
+
self._init_client()
|
|
282
|
+
|
|
283
|
+
coll = self._db[collection]
|
|
284
|
+
|
|
285
|
+
if ids:
|
|
286
|
+
result = coll.delete_many({"_id": {"$in": ids}})
|
|
287
|
+
return result.deleted_count
|
|
288
|
+
elif filters:
|
|
289
|
+
result = coll.delete_many(filters)
|
|
290
|
+
return result.deleted_count
|
|
291
|
+
else:
|
|
292
|
+
return 0
|
|
293
|
+
|
|
294
|
+
def count(self, collection: str) -> int:
|
|
295
|
+
"""Count documents in collection."""
|
|
296
|
+
self._init_client()
|
|
297
|
+
return self._db[collection].count_documents({})
|
|
298
|
+
|
|
299
|
+
def close(self) -> None:
|
|
300
|
+
"""Close the connection."""
|
|
301
|
+
if self._client:
|
|
302
|
+
self._client.close()
|
|
303
|
+
self._client = None
|
|
304
|
+
self._db = None
|
|
305
|
+
self._collection = None
|
|
306
|
+
self._initialized = False
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PGVector implementation of KnowledgeStore.
|
|
3
|
+
|
|
4
|
+
Requires: pgvector, psycopg2-binary
|
|
5
|
+
Install: pip install pgvector psycopg2-binary
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Any, Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
from .base import KnowledgeStore, KnowledgeDocument
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PGVectorKnowledgeStore(KnowledgeStore):
|
|
18
|
+
"""
|
|
19
|
+
PGVector-based knowledge store for vector search.
|
|
20
|
+
|
|
21
|
+
Uses PostgreSQL with pgvector extension.
|
|
22
|
+
|
|
23
|
+
Example:
|
|
24
|
+
store = PGVectorKnowledgeStore(
|
|
25
|
+
url="postgresql://localhost:5432/praisonai"
|
|
26
|
+
)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
url: Optional[str] = None,
|
|
32
|
+
host: str = "localhost",
|
|
33
|
+
port: int = 5432,
|
|
34
|
+
database: str = "praisonai",
|
|
35
|
+
user: str = "postgres",
|
|
36
|
+
password: str = "",
|
|
37
|
+
schema: str = "public",
|
|
38
|
+
auto_create_extension: bool = True,
|
|
39
|
+
):
|
|
40
|
+
try:
|
|
41
|
+
import psycopg2
|
|
42
|
+
from psycopg2 import pool as pg_pool
|
|
43
|
+
from psycopg2.extras import RealDictCursor
|
|
44
|
+
except ImportError:
|
|
45
|
+
raise ImportError(
|
|
46
|
+
"psycopg2 is required for PGVector support. "
|
|
47
|
+
"Install with: pip install psycopg2-binary"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
self._psycopg2 = psycopg2
|
|
51
|
+
self._RealDictCursor = RealDictCursor
|
|
52
|
+
self.schema = schema
|
|
53
|
+
|
|
54
|
+
if url:
|
|
55
|
+
self._pool = pg_pool.ThreadedConnectionPool(1, 5, url)
|
|
56
|
+
else:
|
|
57
|
+
self._pool = pg_pool.ThreadedConnectionPool(
|
|
58
|
+
1, 5,
|
|
59
|
+
host=host, port=port, database=database,
|
|
60
|
+
user=user, password=password,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
if auto_create_extension:
|
|
64
|
+
self._create_extension()
|
|
65
|
+
|
|
66
|
+
def _get_conn(self):
|
|
67
|
+
return self._pool.getconn()
|
|
68
|
+
|
|
69
|
+
def _put_conn(self, conn):
|
|
70
|
+
self._pool.putconn(conn)
|
|
71
|
+
|
|
72
|
+
def _create_extension(self):
|
|
73
|
+
"""Create pgvector extension if not exists."""
|
|
74
|
+
conn = self._get_conn()
|
|
75
|
+
try:
|
|
76
|
+
with conn.cursor() as cur:
|
|
77
|
+
cur.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
|
78
|
+
conn.commit()
|
|
79
|
+
finally:
|
|
80
|
+
self._put_conn(conn)
|
|
81
|
+
|
|
82
|
+
def _table_name(self, collection: str) -> str:
|
|
83
|
+
return f"{self.schema}.praison_vec_{collection}"
|
|
84
|
+
|
|
85
|
+
def create_collection(
|
|
86
|
+
self,
|
|
87
|
+
name: str,
|
|
88
|
+
dimension: int,
|
|
89
|
+
distance: str = "cosine",
|
|
90
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
91
|
+
) -> None:
|
|
92
|
+
"""Create a new collection table."""
|
|
93
|
+
table = self._table_name(name)
|
|
94
|
+
conn = self._get_conn()
|
|
95
|
+
try:
|
|
96
|
+
with conn.cursor() as cur:
|
|
97
|
+
cur.execute(f"""
|
|
98
|
+
CREATE TABLE IF NOT EXISTS {table} (
|
|
99
|
+
id VARCHAR(255) PRIMARY KEY,
|
|
100
|
+
content TEXT,
|
|
101
|
+
content_hash VARCHAR(64),
|
|
102
|
+
created_at DOUBLE PRECISION,
|
|
103
|
+
metadata JSONB,
|
|
104
|
+
embedding vector({dimension})
|
|
105
|
+
)
|
|
106
|
+
""")
|
|
107
|
+
|
|
108
|
+
# Create index based on distance metric
|
|
109
|
+
op_map = {"cosine": "vector_cosine_ops", "euclidean": "vector_l2_ops", "dot": "vector_ip_ops"}
|
|
110
|
+
ops = op_map.get(distance, "vector_cosine_ops")
|
|
111
|
+
|
|
112
|
+
cur.execute(f"""
|
|
113
|
+
CREATE INDEX IF NOT EXISTS idx_{name}_embedding
|
|
114
|
+
ON {table} USING hnsw (embedding {ops})
|
|
115
|
+
""")
|
|
116
|
+
conn.commit()
|
|
117
|
+
logger.info(f"Created PGVector table: {table}")
|
|
118
|
+
finally:
|
|
119
|
+
self._put_conn(conn)
|
|
120
|
+
|
|
121
|
+
def delete_collection(self, name: str) -> bool:
|
|
122
|
+
"""Delete a collection table."""
|
|
123
|
+
table = self._table_name(name)
|
|
124
|
+
conn = self._get_conn()
|
|
125
|
+
try:
|
|
126
|
+
with conn.cursor() as cur:
|
|
127
|
+
cur.execute(f"DROP TABLE IF EXISTS {table}")
|
|
128
|
+
conn.commit()
|
|
129
|
+
return True
|
|
130
|
+
except Exception as e:
|
|
131
|
+
logger.warning(f"Failed to delete table {table}: {e}")
|
|
132
|
+
return False
|
|
133
|
+
finally:
|
|
134
|
+
self._put_conn(conn)
|
|
135
|
+
|
|
136
|
+
def collection_exists(self, name: str) -> bool:
|
|
137
|
+
"""Check if a collection exists."""
|
|
138
|
+
table = self._table_name(name)
|
|
139
|
+
conn = self._get_conn()
|
|
140
|
+
try:
|
|
141
|
+
with conn.cursor() as cur:
|
|
142
|
+
cur.execute("""
|
|
143
|
+
SELECT EXISTS (
|
|
144
|
+
SELECT FROM information_schema.tables
|
|
145
|
+
WHERE table_schema = %s AND table_name = %s
|
|
146
|
+
)
|
|
147
|
+
""", (self.schema, f"praison_vec_{name}"))
|
|
148
|
+
return cur.fetchone()[0]
|
|
149
|
+
finally:
|
|
150
|
+
self._put_conn(conn)
|
|
151
|
+
|
|
152
|
+
def list_collections(self) -> List[str]:
|
|
153
|
+
"""List all collections."""
|
|
154
|
+
conn = self._get_conn()
|
|
155
|
+
try:
|
|
156
|
+
with conn.cursor() as cur:
|
|
157
|
+
cur.execute("""
|
|
158
|
+
SELECT table_name FROM information_schema.tables
|
|
159
|
+
WHERE table_schema = %s AND table_name LIKE 'praison_vec_%%'
|
|
160
|
+
""", (self.schema,))
|
|
161
|
+
return [row[0].replace("praison_vec_", "") for row in cur.fetchall()]
|
|
162
|
+
finally:
|
|
163
|
+
self._put_conn(conn)
|
|
164
|
+
|
|
165
|
+
def insert(
|
|
166
|
+
self,
|
|
167
|
+
collection: str,
|
|
168
|
+
documents: List[KnowledgeDocument]
|
|
169
|
+
) -> List[str]:
|
|
170
|
+
"""Insert documents."""
|
|
171
|
+
table = self._table_name(collection)
|
|
172
|
+
conn = self._get_conn()
|
|
173
|
+
try:
|
|
174
|
+
with conn.cursor() as cur:
|
|
175
|
+
for doc in documents:
|
|
176
|
+
if doc.embedding is None:
|
|
177
|
+
raise ValueError(f"Document {doc.id} has no embedding")
|
|
178
|
+
cur.execute(f"""
|
|
179
|
+
INSERT INTO {table} (id, content, content_hash, created_at, metadata, embedding)
|
|
180
|
+
VALUES (%s, %s, %s, %s, %s, %s)
|
|
181
|
+
""", (
|
|
182
|
+
doc.id, doc.content, doc.content_hash, doc.created_at,
|
|
183
|
+
json.dumps(doc.metadata) if doc.metadata else None,
|
|
184
|
+
doc.embedding,
|
|
185
|
+
))
|
|
186
|
+
conn.commit()
|
|
187
|
+
return [doc.id for doc in documents]
|
|
188
|
+
finally:
|
|
189
|
+
self._put_conn(conn)
|
|
190
|
+
|
|
191
|
+
def upsert(
|
|
192
|
+
self,
|
|
193
|
+
collection: str,
|
|
194
|
+
documents: List[KnowledgeDocument]
|
|
195
|
+
) -> List[str]:
|
|
196
|
+
"""Insert or update documents."""
|
|
197
|
+
table = self._table_name(collection)
|
|
198
|
+
conn = self._get_conn()
|
|
199
|
+
try:
|
|
200
|
+
with conn.cursor() as cur:
|
|
201
|
+
for doc in documents:
|
|
202
|
+
if doc.embedding is None:
|
|
203
|
+
raise ValueError(f"Document {doc.id} has no embedding")
|
|
204
|
+
cur.execute(f"""
|
|
205
|
+
INSERT INTO {table} (id, content, content_hash, created_at, metadata, embedding)
|
|
206
|
+
VALUES (%s, %s, %s, %s, %s, %s)
|
|
207
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
208
|
+
content = EXCLUDED.content,
|
|
209
|
+
content_hash = EXCLUDED.content_hash,
|
|
210
|
+
created_at = EXCLUDED.created_at,
|
|
211
|
+
metadata = EXCLUDED.metadata,
|
|
212
|
+
embedding = EXCLUDED.embedding
|
|
213
|
+
""", (
|
|
214
|
+
doc.id, doc.content, doc.content_hash, doc.created_at,
|
|
215
|
+
json.dumps(doc.metadata) if doc.metadata else None,
|
|
216
|
+
doc.embedding,
|
|
217
|
+
))
|
|
218
|
+
conn.commit()
|
|
219
|
+
return [doc.id for doc in documents]
|
|
220
|
+
finally:
|
|
221
|
+
self._put_conn(conn)
|
|
222
|
+
|
|
223
|
+
def search(
|
|
224
|
+
self,
|
|
225
|
+
collection: str,
|
|
226
|
+
query_embedding: List[float],
|
|
227
|
+
limit: int = 5,
|
|
228
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
229
|
+
score_threshold: Optional[float] = None
|
|
230
|
+
) -> List[KnowledgeDocument]:
|
|
231
|
+
"""Search for similar documents."""
|
|
232
|
+
table = self._table_name(collection)
|
|
233
|
+
conn = self._get_conn()
|
|
234
|
+
try:
|
|
235
|
+
with conn.cursor(cursor_factory=self._RealDictCursor) as cur:
|
|
236
|
+
# Use cosine distance operator <=>
|
|
237
|
+
query = f"""
|
|
238
|
+
SELECT id, content, content_hash, created_at, metadata,
|
|
239
|
+
1 - (embedding <=> %s::vector) as score
|
|
240
|
+
FROM {table}
|
|
241
|
+
"""
|
|
242
|
+
params = [query_embedding]
|
|
243
|
+
|
|
244
|
+
if score_threshold:
|
|
245
|
+
query += " WHERE 1 - (embedding <=> %s::vector) >= %s"
|
|
246
|
+
params.extend([query_embedding, score_threshold])
|
|
247
|
+
|
|
248
|
+
query += " ORDER BY embedding <=> %s::vector LIMIT %s"
|
|
249
|
+
params.extend([query_embedding, limit])
|
|
250
|
+
|
|
251
|
+
cur.execute(query, params)
|
|
252
|
+
|
|
253
|
+
documents = []
|
|
254
|
+
for row in cur.fetchall():
|
|
255
|
+
doc = KnowledgeDocument(
|
|
256
|
+
id=row["id"],
|
|
257
|
+
content=row["content"],
|
|
258
|
+
embedding=None,
|
|
259
|
+
metadata=row["metadata"] or {},
|
|
260
|
+
content_hash=row["content_hash"],
|
|
261
|
+
created_at=row["created_at"],
|
|
262
|
+
)
|
|
263
|
+
documents.append(doc)
|
|
264
|
+
return documents
|
|
265
|
+
finally:
|
|
266
|
+
self._put_conn(conn)
|
|
267
|
+
|
|
268
|
+
def get(
|
|
269
|
+
self,
|
|
270
|
+
collection: str,
|
|
271
|
+
ids: List[str]
|
|
272
|
+
) -> List[KnowledgeDocument]:
|
|
273
|
+
"""Get documents by IDs."""
|
|
274
|
+
table = self._table_name(collection)
|
|
275
|
+
conn = self._get_conn()
|
|
276
|
+
try:
|
|
277
|
+
with conn.cursor(cursor_factory=self._RealDictCursor) as cur:
|
|
278
|
+
placeholders = ",".join(["%s"] * len(ids))
|
|
279
|
+
cur.execute(f"""
|
|
280
|
+
SELECT id, content, content_hash, created_at, metadata, embedding::float[]
|
|
281
|
+
FROM {table} WHERE id IN ({placeholders})
|
|
282
|
+
""", ids)
|
|
283
|
+
|
|
284
|
+
documents = []
|
|
285
|
+
for row in cur.fetchall():
|
|
286
|
+
doc = KnowledgeDocument(
|
|
287
|
+
id=row["id"],
|
|
288
|
+
content=row["content"],
|
|
289
|
+
embedding=row["embedding"],
|
|
290
|
+
metadata=row["metadata"] or {},
|
|
291
|
+
content_hash=row["content_hash"],
|
|
292
|
+
created_at=row["created_at"],
|
|
293
|
+
)
|
|
294
|
+
documents.append(doc)
|
|
295
|
+
return documents
|
|
296
|
+
finally:
|
|
297
|
+
self._put_conn(conn)
|
|
298
|
+
|
|
299
|
+
def delete(
|
|
300
|
+
self,
|
|
301
|
+
collection: str,
|
|
302
|
+
ids: Optional[List[str]] = None,
|
|
303
|
+
filters: Optional[Dict[str, Any]] = None
|
|
304
|
+
) -> int:
|
|
305
|
+
"""Delete documents."""
|
|
306
|
+
table = self._table_name(collection)
|
|
307
|
+
conn = self._get_conn()
|
|
308
|
+
try:
|
|
309
|
+
with conn.cursor() as cur:
|
|
310
|
+
if ids:
|
|
311
|
+
placeholders = ",".join(["%s"] * len(ids))
|
|
312
|
+
cur.execute(f"DELETE FROM {table} WHERE id IN ({placeholders})", ids)
|
|
313
|
+
deleted = cur.rowcount
|
|
314
|
+
else:
|
|
315
|
+
deleted = 0
|
|
316
|
+
conn.commit()
|
|
317
|
+
return deleted
|
|
318
|
+
finally:
|
|
319
|
+
self._put_conn(conn)
|
|
320
|
+
|
|
321
|
+
def count(self, collection: str) -> int:
|
|
322
|
+
"""Count documents."""
|
|
323
|
+
table = self._table_name(collection)
|
|
324
|
+
conn = self._get_conn()
|
|
325
|
+
try:
|
|
326
|
+
with conn.cursor() as cur:
|
|
327
|
+
cur.execute(f"SELECT COUNT(*) FROM {table}")
|
|
328
|
+
return cur.fetchone()[0]
|
|
329
|
+
finally:
|
|
330
|
+
self._put_conn(conn)
|
|
331
|
+
|
|
332
|
+
def close(self) -> None:
|
|
333
|
+
"""Close the store."""
|
|
334
|
+
if self._pool:
|
|
335
|
+
self._pool.closeall()
|