agno 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/__init__.py +6 -0
- agno/agent/agent.py +5252 -3145
- agno/agent/remote.py +525 -0
- agno/api/api.py +2 -0
- agno/client/__init__.py +3 -0
- agno/client/a2a/__init__.py +10 -0
- agno/client/a2a/client.py +554 -0
- agno/client/a2a/schemas.py +112 -0
- agno/client/a2a/utils.py +369 -0
- agno/client/os.py +2669 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/manager.py +2 -2
- agno/db/base.py +927 -6
- agno/db/dynamo/dynamo.py +788 -2
- agno/db/dynamo/schemas.py +128 -0
- agno/db/dynamo/utils.py +26 -3
- agno/db/firestore/firestore.py +674 -50
- agno/db/firestore/schemas.py +41 -0
- agno/db/firestore/utils.py +25 -10
- agno/db/gcs_json/gcs_json_db.py +506 -3
- agno/db/gcs_json/utils.py +14 -2
- agno/db/in_memory/in_memory_db.py +203 -4
- agno/db/in_memory/utils.py +14 -2
- agno/db/json/json_db.py +498 -2
- agno/db/json/utils.py +14 -2
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/utils.py +19 -0
- agno/db/migrations/v1_to_v2.py +54 -16
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +977 -0
- agno/db/mongo/async_mongo.py +1013 -39
- agno/db/mongo/mongo.py +684 -4
- agno/db/mongo/schemas.py +48 -0
- agno/db/mongo/utils.py +17 -0
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2958 -0
- agno/db/mysql/mysql.py +722 -53
- agno/db/mysql/schemas.py +77 -11
- agno/db/mysql/utils.py +151 -8
- agno/db/postgres/async_postgres.py +1254 -137
- agno/db/postgres/postgres.py +2316 -93
- agno/db/postgres/schemas.py +153 -21
- agno/db/postgres/utils.py +22 -7
- agno/db/redis/redis.py +531 -3
- agno/db/redis/schemas.py +36 -0
- agno/db/redis/utils.py +31 -15
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +20 -9
- agno/db/singlestore/schemas.py +70 -1
- agno/db/singlestore/singlestore.py +737 -74
- agno/db/singlestore/utils.py +13 -3
- agno/db/sqlite/async_sqlite.py +1069 -89
- agno/db/sqlite/schemas.py +133 -1
- agno/db/sqlite/sqlite.py +2203 -165
- agno/db/sqlite/utils.py +21 -11
- agno/db/surrealdb/models.py +25 -0
- agno/db/surrealdb/surrealdb.py +603 -1
- agno/db/utils.py +60 -0
- agno/eval/__init__.py +26 -3
- agno/eval/accuracy.py +25 -12
- agno/eval/agent_as_judge.py +871 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +10 -4
- agno/eval/reliability.py +22 -13
- agno/eval/utils.py +2 -1
- agno/exceptions.py +42 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +13 -2
- agno/knowledge/__init__.py +4 -0
- agno/knowledge/chunking/code.py +90 -0
- agno/knowledge/chunking/document.py +65 -4
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/markdown.py +102 -11
- agno/knowledge/chunking/recursive.py +2 -2
- agno/knowledge/chunking/semantic.py +130 -48
- agno/knowledge/chunking/strategy.py +18 -0
- agno/knowledge/embedder/azure_openai.py +0 -1
- agno/knowledge/embedder/google.py +1 -1
- agno/knowledge/embedder/mistral.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/openai.py +16 -12
- agno/knowledge/filesystem.py +412 -0
- agno/knowledge/knowledge.py +4261 -1199
- agno/knowledge/protocol.py +134 -0
- agno/knowledge/reader/arxiv_reader.py +3 -2
- agno/knowledge/reader/base.py +9 -7
- agno/knowledge/reader/csv_reader.py +91 -42
- agno/knowledge/reader/docx_reader.py +9 -10
- agno/knowledge/reader/excel_reader.py +225 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +38 -48
- agno/knowledge/reader/firecrawl_reader.py +3 -2
- agno/knowledge/reader/json_reader.py +16 -22
- agno/knowledge/reader/markdown_reader.py +15 -14
- agno/knowledge/reader/pdf_reader.py +33 -28
- agno/knowledge/reader/pptx_reader.py +9 -10
- agno/knowledge/reader/reader_factory.py +135 -1
- agno/knowledge/reader/s3_reader.py +8 -16
- agno/knowledge/reader/tavily_reader.py +3 -3
- agno/knowledge/reader/text_reader.py +15 -14
- agno/knowledge/reader/utils/__init__.py +17 -0
- agno/knowledge/reader/utils/spreadsheet.py +114 -0
- agno/knowledge/reader/web_search_reader.py +8 -65
- agno/knowledge/reader/website_reader.py +16 -13
- agno/knowledge/reader/wikipedia_reader.py +36 -3
- agno/knowledge/reader/youtube_reader.py +3 -2
- agno/knowledge/remote_content/__init__.py +33 -0
- agno/knowledge/remote_content/config.py +266 -0
- agno/knowledge/remote_content/remote_content.py +105 -17
- agno/knowledge/utils.py +76 -22
- agno/learn/__init__.py +71 -0
- agno/learn/config.py +463 -0
- agno/learn/curate.py +185 -0
- agno/learn/machine.py +725 -0
- agno/learn/schemas.py +1114 -0
- agno/learn/stores/__init__.py +38 -0
- agno/learn/stores/decision_log.py +1156 -0
- agno/learn/stores/entity_memory.py +3275 -0
- agno/learn/stores/learned_knowledge.py +1583 -0
- agno/learn/stores/protocol.py +117 -0
- agno/learn/stores/session_context.py +1217 -0
- agno/learn/stores/user_memory.py +1495 -0
- agno/learn/stores/user_profile.py +1220 -0
- agno/learn/utils.py +209 -0
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +223 -8
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +434 -59
- agno/models/aws/bedrock.py +121 -20
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +10 -6
- agno/models/azure/openai_chat.py +33 -10
- agno/models/base.py +1162 -561
- agno/models/cerebras/cerebras.py +120 -24
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +65 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +959 -89
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +48 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +88 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +24 -5
- agno/models/meta/llama.py +40 -13
- agno/models/meta/llama_openai.py +22 -21
- agno/models/metrics.py +12 -0
- agno/models/mistral/mistral.py +8 -4
- agno/models/n1n/__init__.py +3 -0
- agno/models/n1n/n1n.py +57 -0
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/__init__.py +2 -0
- agno/models/ollama/chat.py +17 -6
- agno/models/ollama/responses.py +100 -0
- agno/models/openai/__init__.py +2 -0
- agno/models/openai/chat.py +117 -26
- agno/models/openai/open_responses.py +46 -0
- agno/models/openai/responses.py +110 -32
- agno/models/openrouter/__init__.py +2 -0
- agno/models/openrouter/openrouter.py +67 -2
- agno/models/openrouter/responses.py +146 -0
- agno/models/perplexity/perplexity.py +19 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +19 -2
- agno/models/response.py +20 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/claude.py +124 -4
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +467 -137
- agno/os/auth.py +253 -5
- agno/os/config.py +22 -0
- agno/os/interfaces/a2a/a2a.py +7 -6
- agno/os/interfaces/a2a/router.py +635 -26
- agno/os/interfaces/a2a/utils.py +32 -33
- agno/os/interfaces/agui/agui.py +5 -3
- agno/os/interfaces/agui/router.py +26 -16
- agno/os/interfaces/agui/utils.py +97 -57
- agno/os/interfaces/base.py +7 -7
- agno/os/interfaces/slack/router.py +16 -7
- agno/os/interfaces/slack/slack.py +7 -7
- agno/os/interfaces/whatsapp/router.py +35 -7
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/interfaces/whatsapp/whatsapp.py +11 -8
- agno/os/managers.py +326 -0
- agno/os/mcp.py +652 -79
- agno/os/middleware/__init__.py +4 -0
- agno/os/middleware/jwt.py +718 -115
- agno/os/middleware/trailing_slash.py +27 -0
- agno/os/router.py +105 -1558
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +655 -0
- agno/os/routers/agents/schema.py +288 -0
- agno/os/routers/components/__init__.py +3 -0
- agno/os/routers/components/components.py +475 -0
- agno/os/routers/database.py +155 -0
- agno/os/routers/evals/evals.py +111 -18
- agno/os/routers/evals/schemas.py +38 -5
- agno/os/routers/evals/utils.py +80 -11
- agno/os/routers/health.py +3 -3
- agno/os/routers/knowledge/knowledge.py +284 -35
- agno/os/routers/knowledge/schemas.py +14 -2
- agno/os/routers/memory/memory.py +274 -11
- agno/os/routers/memory/schemas.py +44 -3
- agno/os/routers/metrics/metrics.py +30 -15
- agno/os/routers/metrics/schemas.py +10 -6
- agno/os/routers/registry/__init__.py +3 -0
- agno/os/routers/registry/registry.py +337 -0
- agno/os/routers/session/session.py +143 -14
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +550 -0
- agno/os/routers/teams/schema.py +280 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +549 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +757 -0
- agno/os/routers/workflows/schema.py +139 -0
- agno/os/schema.py +157 -584
- agno/os/scopes.py +469 -0
- agno/os/settings.py +3 -0
- agno/os/utils.py +574 -185
- agno/reasoning/anthropic.py +85 -1
- agno/reasoning/azure_ai_foundry.py +93 -1
- agno/reasoning/deepseek.py +102 -2
- agno/reasoning/default.py +6 -7
- agno/reasoning/gemini.py +87 -3
- agno/reasoning/groq.py +109 -2
- agno/reasoning/helpers.py +6 -7
- agno/reasoning/manager.py +1238 -0
- agno/reasoning/ollama.py +93 -1
- agno/reasoning/openai.py +115 -1
- agno/reasoning/vertexai.py +85 -1
- agno/registry/__init__.py +3 -0
- agno/registry/registry.py +68 -0
- agno/remote/__init__.py +3 -0
- agno/remote/base.py +581 -0
- agno/run/__init__.py +2 -4
- agno/run/agent.py +134 -19
- agno/run/base.py +49 -1
- agno/run/cancel.py +65 -52
- agno/run/cancellation_management/__init__.py +9 -0
- agno/run/cancellation_management/base.py +78 -0
- agno/run/cancellation_management/in_memory_cancellation_manager.py +100 -0
- agno/run/cancellation_management/redis_cancellation_manager.py +236 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +111 -19
- agno/run/workflow.py +2 -1
- agno/session/agent.py +57 -92
- agno/session/summary.py +1 -1
- agno/session/team.py +62 -115
- agno/session/workflow.py +353 -57
- agno/skills/__init__.py +17 -0
- agno/skills/agent_skills.py +377 -0
- agno/skills/errors.py +32 -0
- agno/skills/loaders/__init__.py +4 -0
- agno/skills/loaders/base.py +27 -0
- agno/skills/loaders/local.py +216 -0
- agno/skills/skill.py +65 -0
- agno/skills/utils.py +107 -0
- agno/skills/validator.py +277 -0
- agno/table.py +10 -0
- agno/team/__init__.py +5 -1
- agno/team/remote.py +447 -0
- agno/team/team.py +3769 -2202
- agno/tools/brandfetch.py +27 -18
- agno/tools/browserbase.py +225 -16
- agno/tools/crawl4ai.py +3 -0
- agno/tools/duckduckgo.py +25 -71
- agno/tools/exa.py +0 -21
- agno/tools/file.py +14 -13
- agno/tools/file_generation.py +12 -6
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +94 -113
- agno/tools/google_bigquery.py +11 -2
- agno/tools/google_drive.py +4 -3
- agno/tools/knowledge.py +9 -4
- agno/tools/mcp/mcp.py +301 -18
- agno/tools/mcp/multi_mcp.py +269 -14
- agno/tools/mem0.py +11 -10
- agno/tools/memory.py +47 -46
- agno/tools/mlx_transcribe.py +10 -7
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/parallel.py +0 -7
- agno/tools/postgres.py +76 -36
- agno/tools/python.py +14 -6
- agno/tools/reasoning.py +30 -23
- agno/tools/redshift.py +406 -0
- agno/tools/shopify.py +1519 -0
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +4 -1
- agno/tools/toolkit.py +253 -18
- agno/tools/websearch.py +93 -0
- agno/tools/website.py +1 -1
- agno/tools/wikipedia.py +1 -1
- agno/tools/workflow.py +56 -48
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +161 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +112 -0
- agno/utils/agent.py +251 -10
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +264 -7
- agno/utils/hooks.py +111 -3
- agno/utils/http.py +161 -2
- agno/utils/mcp.py +49 -8
- agno/utils/media.py +22 -1
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +20 -5
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/os.py +0 -0
- agno/utils/print_response/agent.py +99 -16
- agno/utils/print_response/team.py +223 -24
- agno/utils/print_response/workflow.py +0 -2
- agno/utils/prompts.py +8 -6
- agno/utils/remote.py +23 -0
- agno/utils/response.py +1 -13
- agno/utils/string.py +91 -2
- agno/utils/team.py +62 -12
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +15 -2
- agno/vectordb/cassandra/cassandra.py +1 -1
- agno/vectordb/chroma/__init__.py +2 -1
- agno/vectordb/chroma/chromadb.py +468 -23
- agno/vectordb/clickhouse/clickhousedb.py +1 -1
- agno/vectordb/couchbase/couchbase.py +6 -2
- agno/vectordb/lancedb/lance_db.py +7 -38
- agno/vectordb/lightrag/lightrag.py +7 -6
- agno/vectordb/milvus/milvus.py +118 -84
- agno/vectordb/mongodb/__init__.py +2 -1
- agno/vectordb/mongodb/mongodb.py +14 -31
- agno/vectordb/pgvector/pgvector.py +120 -66
- agno/vectordb/pineconedb/pineconedb.py +2 -19
- agno/vectordb/qdrant/__init__.py +2 -1
- agno/vectordb/qdrant/qdrant.py +33 -56
- agno/vectordb/redis/__init__.py +2 -1
- agno/vectordb/redis/redisdb.py +19 -31
- agno/vectordb/singlestore/singlestore.py +17 -9
- agno/vectordb/surrealdb/surrealdb.py +2 -38
- agno/vectordb/weaviate/__init__.py +2 -1
- agno/vectordb/weaviate/weaviate.py +7 -3
- agno/workflow/__init__.py +5 -1
- agno/workflow/agent.py +2 -2
- agno/workflow/condition.py +12 -10
- agno/workflow/loop.py +28 -9
- agno/workflow/parallel.py +21 -13
- agno/workflow/remote.py +362 -0
- agno/workflow/router.py +12 -9
- agno/workflow/step.py +261 -36
- agno/workflow/steps.py +12 -8
- agno/workflow/types.py +40 -77
- agno/workflow/workflow.py +939 -213
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/METADATA +134 -181
- agno-2.4.3.dist-info/RECORD +677 -0
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/WHEEL +1 -1
- agno/tools/googlesearch.py +0 -98
- agno/tools/memori.py +0 -339
- agno-2.2.13.dist-info/RECORD +0 -575
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/licenses/LICENSE +0 -0
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/top_level.txt +0 -0
|
@@ -28,7 +28,7 @@ from agno.filters import FilterExpr
|
|
|
28
28
|
from agno.knowledge.document import Document
|
|
29
29
|
from agno.knowledge.embedder import Embedder
|
|
30
30
|
from agno.knowledge.reranker.base import Reranker
|
|
31
|
-
from agno.utils.log import log_debug, log_info,
|
|
31
|
+
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
32
32
|
from agno.vectordb.base import VectorDb
|
|
33
33
|
from agno.vectordb.distance import Distance
|
|
34
34
|
from agno.vectordb.pgvector.index import HNSW, Ivfflat
|
|
@@ -62,6 +62,7 @@ class PgVector(VectorDb):
|
|
|
62
62
|
schema_version: int = 1,
|
|
63
63
|
auto_upgrade_schema: bool = False,
|
|
64
64
|
reranker: Optional[Reranker] = None,
|
|
65
|
+
create_schema: bool = True,
|
|
65
66
|
):
|
|
66
67
|
"""
|
|
67
68
|
Initialize the PgVector instance.
|
|
@@ -82,6 +83,8 @@ class PgVector(VectorDb):
|
|
|
82
83
|
content_language (str): Language for full-text search.
|
|
83
84
|
schema_version (int): Version of the database schema.
|
|
84
85
|
auto_upgrade_schema (bool): Automatically upgrade schema if True.
|
|
86
|
+
create_schema (bool): Whether to automatically create the database schema if it doesn't exist.
|
|
87
|
+
Set to False if schema is managed externally (e.g., via migrations). Defaults to True.
|
|
85
88
|
"""
|
|
86
89
|
if not table_name:
|
|
87
90
|
raise ValueError("Table name must be provided.")
|
|
@@ -104,7 +107,7 @@ class PgVector(VectorDb):
|
|
|
104
107
|
try:
|
|
105
108
|
db_engine = create_engine(db_url)
|
|
106
109
|
except Exception as e:
|
|
107
|
-
|
|
110
|
+
log_error(f"Failed to create engine from 'db_url': {e}")
|
|
108
111
|
raise
|
|
109
112
|
|
|
110
113
|
# Database settings
|
|
@@ -119,7 +122,7 @@ class PgVector(VectorDb):
|
|
|
119
122
|
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
120
123
|
|
|
121
124
|
embedder = OpenAIEmbedder()
|
|
122
|
-
|
|
125
|
+
log_debug("Embedder not provided, using OpenAIEmbedder as default.")
|
|
123
126
|
self.embedder: Embedder = embedder
|
|
124
127
|
self.dimensions: Optional[int] = self.embedder.dimensions
|
|
125
128
|
|
|
@@ -147,6 +150,9 @@ class PgVector(VectorDb):
|
|
|
147
150
|
# Reranker instance
|
|
148
151
|
self.reranker: Optional[Reranker] = reranker
|
|
149
152
|
|
|
153
|
+
# Schema creation flag
|
|
154
|
+
self.create_schema: bool = create_schema
|
|
155
|
+
|
|
150
156
|
# Database session
|
|
151
157
|
self.Session: scoped_session = scoped_session(sessionmaker(bind=self.db_engine))
|
|
152
158
|
# Database table
|
|
@@ -209,7 +215,7 @@ class PgVector(VectorDb):
|
|
|
209
215
|
try:
|
|
210
216
|
return inspect(self.db_engine).has_table(self.table_name, schema=self.schema)
|
|
211
217
|
except Exception as e:
|
|
212
|
-
|
|
218
|
+
log_error(f"Error checking if table exists: {e}")
|
|
213
219
|
return False
|
|
214
220
|
|
|
215
221
|
def create(self) -> None:
|
|
@@ -220,7 +226,7 @@ class PgVector(VectorDb):
|
|
|
220
226
|
with self.Session() as sess, sess.begin():
|
|
221
227
|
log_debug("Creating extension: vector")
|
|
222
228
|
sess.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))
|
|
223
|
-
if self.schema is not None:
|
|
229
|
+
if self.create_schema and self.schema is not None:
|
|
224
230
|
log_debug(f"Creating schema: {self.schema}")
|
|
225
231
|
sess.execute(text(f"CREATE SCHEMA IF NOT EXISTS {self.schema};"))
|
|
226
232
|
log_debug(f"Creating table: {self.table_name}")
|
|
@@ -247,7 +253,7 @@ class PgVector(VectorDb):
|
|
|
247
253
|
result = sess.execute(stmt).first()
|
|
248
254
|
return result is not None
|
|
249
255
|
except Exception as e:
|
|
250
|
-
|
|
256
|
+
log_error(f"Error checking if record exists: {e}")
|
|
251
257
|
return False
|
|
252
258
|
|
|
253
259
|
def name_exists(self, name: str) -> bool:
|
|
@@ -324,7 +330,7 @@ class PgVector(VectorDb):
|
|
|
324
330
|
try:
|
|
325
331
|
batch_records.append(self._get_document_record(doc, filters, content_hash))
|
|
326
332
|
except Exception as e:
|
|
327
|
-
|
|
333
|
+
log_error(f"Error processing document '{doc.name}': {e}")
|
|
328
334
|
|
|
329
335
|
# Insert the batch of records
|
|
330
336
|
insert_stmt = postgresql.insert(self.table)
|
|
@@ -332,11 +338,11 @@ class PgVector(VectorDb):
|
|
|
332
338
|
sess.commit() # Commit batch independently
|
|
333
339
|
log_info(f"Inserted batch of {len(batch_records)} documents.")
|
|
334
340
|
except Exception as e:
|
|
335
|
-
|
|
341
|
+
log_error(f"Error with batch starting at index {i}: {e}")
|
|
336
342
|
sess.rollback() # Rollback the current batch if there's an error
|
|
337
343
|
raise
|
|
338
344
|
except Exception as e:
|
|
339
|
-
|
|
345
|
+
log_error(f"Error inserting documents: {e}")
|
|
340
346
|
raise
|
|
341
347
|
|
|
342
348
|
async def async_insert(
|
|
@@ -361,7 +367,10 @@ class PgVector(VectorDb):
|
|
|
361
367
|
for doc in batch_docs:
|
|
362
368
|
try:
|
|
363
369
|
cleaned_content = self._clean_content(doc.content)
|
|
364
|
-
|
|
370
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
371
|
+
# This allows the same URL/content to be inserted with different descriptions
|
|
372
|
+
base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
|
|
373
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
365
374
|
|
|
366
375
|
meta_data = doc.meta_data or {}
|
|
367
376
|
if filters:
|
|
@@ -370,7 +379,7 @@ class PgVector(VectorDb):
|
|
|
370
379
|
record = {
|
|
371
380
|
"id": record_id,
|
|
372
381
|
"name": doc.name,
|
|
373
|
-
"meta_data":
|
|
382
|
+
"meta_data": meta_data,
|
|
374
383
|
"filters": filters,
|
|
375
384
|
"content": cleaned_content,
|
|
376
385
|
"embedding": doc.embedding,
|
|
@@ -380,7 +389,7 @@ class PgVector(VectorDb):
|
|
|
380
389
|
}
|
|
381
390
|
batch_records.append(record)
|
|
382
391
|
except Exception as e:
|
|
383
|
-
|
|
392
|
+
log_error(f"Error processing document '{doc.name}': {e}")
|
|
384
393
|
|
|
385
394
|
# Insert the batch of records
|
|
386
395
|
if batch_records:
|
|
@@ -389,11 +398,11 @@ class PgVector(VectorDb):
|
|
|
389
398
|
sess.commit() # Commit batch independently
|
|
390
399
|
log_info(f"Inserted batch of {len(batch_records)} documents.")
|
|
391
400
|
except Exception as e:
|
|
392
|
-
|
|
401
|
+
log_error(f"Error with batch starting at index {i}: {e}")
|
|
393
402
|
sess.rollback() # Rollback the current batch if there's an error
|
|
394
403
|
raise
|
|
395
404
|
except Exception as e:
|
|
396
|
-
|
|
405
|
+
log_error(f"Error inserting documents: {e}")
|
|
397
406
|
raise
|
|
398
407
|
|
|
399
408
|
def upsert_available(self) -> bool:
|
|
@@ -422,7 +431,7 @@ class PgVector(VectorDb):
|
|
|
422
431
|
self._delete_by_content_hash(content_hash)
|
|
423
432
|
self._upsert(content_hash, documents, filters, batch_size)
|
|
424
433
|
except Exception as e:
|
|
425
|
-
|
|
434
|
+
log_error(f"Error upserting documents by content hash: {e}")
|
|
426
435
|
raise
|
|
427
436
|
|
|
428
437
|
def _upsert(
|
|
@@ -450,9 +459,11 @@ class PgVector(VectorDb):
|
|
|
450
459
|
batch_records_dict: Dict[str, Dict[str, Any]] = {} # Use dict to deduplicate by ID
|
|
451
460
|
for doc in batch_docs:
|
|
452
461
|
try:
|
|
453
|
-
|
|
462
|
+
record = self._get_document_record(doc, filters, content_hash)
|
|
463
|
+
# Use the generated record ID (which includes content_hash) for deduplication
|
|
464
|
+
batch_records_dict[record["id"]] = record
|
|
454
465
|
except Exception as e:
|
|
455
|
-
|
|
466
|
+
log_error(f"Error processing document '{doc.name}': {e}")
|
|
456
467
|
|
|
457
468
|
# Convert dict to list for upsert
|
|
458
469
|
batch_records = list(batch_records_dict.values())
|
|
@@ -479,11 +490,11 @@ class PgVector(VectorDb):
|
|
|
479
490
|
sess.commit() # Commit batch independently
|
|
480
491
|
log_info(f"Upserted batch of {len(batch_records)} documents.")
|
|
481
492
|
except Exception as e:
|
|
482
|
-
|
|
493
|
+
log_error(f"Error with batch starting at index {i}: {e}")
|
|
483
494
|
sess.rollback() # Rollback the current batch if there's an error
|
|
484
495
|
raise
|
|
485
496
|
except Exception as e:
|
|
486
|
-
|
|
497
|
+
log_error(f"Error upserting documents: {e}")
|
|
487
498
|
raise
|
|
488
499
|
|
|
489
500
|
def _get_document_record(
|
|
@@ -491,7 +502,10 @@ class PgVector(VectorDb):
|
|
|
491
502
|
) -> Dict[str, Any]:
|
|
492
503
|
doc.embed(embedder=self.embedder)
|
|
493
504
|
cleaned_content = self._clean_content(doc.content)
|
|
494
|
-
|
|
505
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
506
|
+
# This allows the same URL/content to be inserted with different descriptions
|
|
507
|
+
base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
|
|
508
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
495
509
|
|
|
496
510
|
meta_data = doc.meta_data or {}
|
|
497
511
|
if filters:
|
|
@@ -500,7 +514,7 @@ class PgVector(VectorDb):
|
|
|
500
514
|
return {
|
|
501
515
|
"id": record_id,
|
|
502
516
|
"name": doc.name,
|
|
503
|
-
"meta_data":
|
|
517
|
+
"meta_data": meta_data,
|
|
504
518
|
"filters": filters,
|
|
505
519
|
"content": cleaned_content,
|
|
506
520
|
"embedding": doc.embedding,
|
|
@@ -532,7 +546,7 @@ class PgVector(VectorDb):
|
|
|
532
546
|
doc.embedding = embeddings[j]
|
|
533
547
|
doc.usage = usages[j] if j < len(usages) else None
|
|
534
548
|
except Exception as e:
|
|
535
|
-
|
|
549
|
+
log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
536
550
|
|
|
537
551
|
except Exception as e:
|
|
538
552
|
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
@@ -543,17 +557,41 @@ class PgVector(VectorDb):
|
|
|
543
557
|
)
|
|
544
558
|
|
|
545
559
|
if is_rate_limit:
|
|
546
|
-
|
|
560
|
+
log_error(f"Rate limit detected during batch embedding. {e}")
|
|
547
561
|
raise e
|
|
548
562
|
else:
|
|
549
|
-
|
|
563
|
+
log_warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
550
564
|
# Fall back to individual embedding
|
|
551
565
|
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in batch_docs]
|
|
552
|
-
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
566
|
+
results = await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
567
|
+
|
|
568
|
+
# Check for exceptions and handle them
|
|
569
|
+
for i, result in enumerate(results):
|
|
570
|
+
if isinstance(result, Exception):
|
|
571
|
+
error_msg = str(result)
|
|
572
|
+
# If it's an event loop closure error, log it but don't fail
|
|
573
|
+
if "Event loop is closed" in error_msg or "RuntimeError" in type(result).__name__:
|
|
574
|
+
log_warning(
|
|
575
|
+
f"Event loop closure during embedding for document {i}, but operation may have succeeded: {result}"
|
|
576
|
+
)
|
|
577
|
+
else:
|
|
578
|
+
log_error(f"Error embedding document {i}: {result}")
|
|
553
579
|
else:
|
|
554
580
|
# Use individual embedding
|
|
555
581
|
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in batch_docs]
|
|
556
|
-
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
582
|
+
results = await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
583
|
+
|
|
584
|
+
# Check for exceptions and handle them
|
|
585
|
+
for i, result in enumerate(results):
|
|
586
|
+
if isinstance(result, Exception):
|
|
587
|
+
error_msg = str(result)
|
|
588
|
+
# If it's an event loop closure error, log it but don't fail
|
|
589
|
+
if "Event loop is closed" in error_msg or "RuntimeError" in type(result).__name__:
|
|
590
|
+
log_warning(
|
|
591
|
+
f"Event loop closure during embedding for document {i}, but operation may have succeeded: {result}"
|
|
592
|
+
)
|
|
593
|
+
else:
|
|
594
|
+
log_error(f"Error embedding document {i}: {result}")
|
|
557
595
|
|
|
558
596
|
async def async_upsert(
|
|
559
597
|
self,
|
|
@@ -568,7 +606,7 @@ class PgVector(VectorDb):
|
|
|
568
606
|
self._delete_by_content_hash(content_hash)
|
|
569
607
|
await self._async_upsert(content_hash, documents, filters, batch_size)
|
|
570
608
|
except Exception as e:
|
|
571
|
-
|
|
609
|
+
log_error(f"Error upserting documents by content hash: {e}")
|
|
572
610
|
raise
|
|
573
611
|
|
|
574
612
|
async def _async_upsert(
|
|
@@ -597,10 +635,27 @@ class PgVector(VectorDb):
|
|
|
597
635
|
|
|
598
636
|
# Prepare documents for upserting
|
|
599
637
|
batch_records_dict = {} # Use dict to deduplicate by ID
|
|
600
|
-
for doc in batch_docs:
|
|
638
|
+
for idx, doc in enumerate(batch_docs):
|
|
601
639
|
try:
|
|
602
640
|
cleaned_content = self._clean_content(doc.content)
|
|
603
|
-
|
|
641
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
642
|
+
# This allows the same URL/content to be inserted with different descriptions
|
|
643
|
+
base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
|
|
644
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
645
|
+
|
|
646
|
+
if (
|
|
647
|
+
doc.embedding is not None
|
|
648
|
+
and isinstance(doc.embedding, list)
|
|
649
|
+
and len(doc.embedding) == 0
|
|
650
|
+
):
|
|
651
|
+
log_warning(f"Document {idx} '{doc.name}' has empty embedding (length 0)")
|
|
652
|
+
|
|
653
|
+
if (
|
|
654
|
+
doc.embedding is not None
|
|
655
|
+
and isinstance(doc.embedding, list)
|
|
656
|
+
and len(doc.embedding) == 0
|
|
657
|
+
):
|
|
658
|
+
log_warning(f"Document {idx} '{doc.name}' has empty embedding (length 0)")
|
|
604
659
|
|
|
605
660
|
meta_data = doc.meta_data or {}
|
|
606
661
|
if filters:
|
|
@@ -609,7 +664,7 @@ class PgVector(VectorDb):
|
|
|
609
664
|
record = {
|
|
610
665
|
"id": record_id, # use record_id as a reproducible id to avoid duplicates while upsert
|
|
611
666
|
"name": doc.name,
|
|
612
|
-
"meta_data":
|
|
667
|
+
"meta_data": meta_data,
|
|
613
668
|
"filters": filters,
|
|
614
669
|
"content": cleaned_content,
|
|
615
670
|
"embedding": doc.embedding,
|
|
@@ -619,7 +674,7 @@ class PgVector(VectorDb):
|
|
|
619
674
|
}
|
|
620
675
|
batch_records_dict[record_id] = record # This deduplicates by ID
|
|
621
676
|
except Exception as e:
|
|
622
|
-
|
|
677
|
+
log_error(f"Error processing document '{doc.name}': {e}")
|
|
623
678
|
|
|
624
679
|
# Convert dict to list for upsert
|
|
625
680
|
batch_records = list(batch_records_dict.values())
|
|
@@ -646,11 +701,11 @@ class PgVector(VectorDb):
|
|
|
646
701
|
sess.commit() # Commit batch independently
|
|
647
702
|
log_info(f"Upserted batch of {len(batch_records)} documents.")
|
|
648
703
|
except Exception as e:
|
|
649
|
-
|
|
704
|
+
log_error(f"Error with batch starting at index {i}: {e}")
|
|
650
705
|
sess.rollback() # Rollback the current batch if there's an error
|
|
651
706
|
raise
|
|
652
707
|
except Exception as e:
|
|
653
|
-
|
|
708
|
+
log_error(f"Error upserting documents: {e}")
|
|
654
709
|
raise
|
|
655
710
|
|
|
656
711
|
def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
|
|
@@ -658,28 +713,26 @@ class PgVector(VectorDb):
|
|
|
658
713
|
Update the metadata for a document.
|
|
659
714
|
|
|
660
715
|
Args:
|
|
661
|
-
|
|
716
|
+
content_id (str): The ID of the document.
|
|
662
717
|
metadata (Dict[str, Any]): The metadata to update.
|
|
663
718
|
"""
|
|
664
719
|
try:
|
|
665
720
|
with self.Session() as sess:
|
|
666
|
-
# Merge JSONB
|
|
721
|
+
# Merge JSONB for metadata, but replace filters entirely (absolute value)
|
|
667
722
|
stmt = (
|
|
668
723
|
update(self.table)
|
|
669
724
|
.where(self.table.c.content_id == content_id)
|
|
670
725
|
.values(
|
|
671
726
|
meta_data=func.coalesce(self.table.c.meta_data, text("'{}'::jsonb")).op("||")(
|
|
672
|
-
bindparam("md",
|
|
673
|
-
),
|
|
674
|
-
filters=func.coalesce(self.table.c.filters, text("'{}'::jsonb")).op("||")(
|
|
675
|
-
bindparam("ft", metadata, type_=postgresql.JSONB)
|
|
727
|
+
bindparam("md", type_=postgresql.JSONB)
|
|
676
728
|
),
|
|
729
|
+
filters=bindparam("ft", type_=postgresql.JSONB),
|
|
677
730
|
)
|
|
678
731
|
)
|
|
679
|
-
sess.execute(stmt)
|
|
732
|
+
sess.execute(stmt, {"md": metadata, "ft": metadata})
|
|
680
733
|
sess.commit()
|
|
681
734
|
except Exception as e:
|
|
682
|
-
|
|
735
|
+
log_error(f"Error updating metadata for document {content_id}: {e}")
|
|
683
736
|
raise
|
|
684
737
|
|
|
685
738
|
def search(
|
|
@@ -703,7 +756,7 @@ class PgVector(VectorDb):
|
|
|
703
756
|
elif self.search_type == SearchType.hybrid:
|
|
704
757
|
return self.hybrid_search(query=query, limit=limit, filters=filters)
|
|
705
758
|
else:
|
|
706
|
-
|
|
759
|
+
log_error(f"Invalid search type '{self.search_type}'.")
|
|
707
760
|
return []
|
|
708
761
|
|
|
709
762
|
async def async_search(
|
|
@@ -751,7 +804,7 @@ class PgVector(VectorDb):
|
|
|
751
804
|
# Get the embedding for the query string
|
|
752
805
|
query_embedding = self.embedder.get_embedding(query)
|
|
753
806
|
if query_embedding is None:
|
|
754
|
-
|
|
807
|
+
log_error(f"Error getting embedding for Query: {query}")
|
|
755
808
|
return []
|
|
756
809
|
|
|
757
810
|
# Define the columns to select
|
|
@@ -789,7 +842,7 @@ class PgVector(VectorDb):
|
|
|
789
842
|
elif self.distance == Distance.max_inner_product:
|
|
790
843
|
stmt = stmt.order_by(self.table.c.embedding.max_inner_product(query_embedding))
|
|
791
844
|
else:
|
|
792
|
-
|
|
845
|
+
log_error(f"Unknown distance metric: {self.distance}")
|
|
793
846
|
return []
|
|
794
847
|
|
|
795
848
|
# Limit the number of results
|
|
@@ -808,8 +861,8 @@ class PgVector(VectorDb):
|
|
|
808
861
|
sess.execute(text(f"SET LOCAL hnsw.ef_search = {self.vector_index.ef_search}"))
|
|
809
862
|
results = sess.execute(stmt).fetchall()
|
|
810
863
|
except Exception as e:
|
|
811
|
-
|
|
812
|
-
|
|
864
|
+
log_error(f"Error performing semantic search: {e}")
|
|
865
|
+
log_error("Table might not exist, creating for future use")
|
|
813
866
|
self.create()
|
|
814
867
|
return []
|
|
815
868
|
|
|
@@ -834,7 +887,7 @@ class PgVector(VectorDb):
|
|
|
834
887
|
log_info(f"Found {len(search_results)} documents")
|
|
835
888
|
return search_results
|
|
836
889
|
except Exception as e:
|
|
837
|
-
|
|
890
|
+
log_error(f"Error during vector search: {e}")
|
|
838
891
|
return []
|
|
839
892
|
|
|
840
893
|
def enable_prefix_matching(self, query: str) -> str:
|
|
@@ -916,8 +969,8 @@ class PgVector(VectorDb):
|
|
|
916
969
|
with self.Session() as sess, sess.begin():
|
|
917
970
|
results = sess.execute(stmt).fetchall()
|
|
918
971
|
except Exception as e:
|
|
919
|
-
|
|
920
|
-
|
|
972
|
+
log_error(f"Error performing keyword search: {e}")
|
|
973
|
+
log_error("Table might not exist, creating for future use")
|
|
921
974
|
self.create()
|
|
922
975
|
return []
|
|
923
976
|
|
|
@@ -939,7 +992,7 @@ class PgVector(VectorDb):
|
|
|
939
992
|
log_info(f"Found {len(search_results)} documents")
|
|
940
993
|
return search_results
|
|
941
994
|
except Exception as e:
|
|
942
|
-
|
|
995
|
+
log_error(f"Error during keyword search: {e}")
|
|
943
996
|
return []
|
|
944
997
|
|
|
945
998
|
def hybrid_search(
|
|
@@ -963,7 +1016,7 @@ class PgVector(VectorDb):
|
|
|
963
1016
|
# Get the embedding for the query string
|
|
964
1017
|
query_embedding = self.embedder.get_embedding(query)
|
|
965
1018
|
if query_embedding is None:
|
|
966
|
-
|
|
1019
|
+
log_error(f"Error getting embedding for Query: {query}")
|
|
967
1020
|
return []
|
|
968
1021
|
|
|
969
1022
|
# Define the columns to select
|
|
@@ -1001,7 +1054,7 @@ class PgVector(VectorDb):
|
|
|
1001
1054
|
# Normalize to range [0, 1]
|
|
1002
1055
|
vector_score = (raw_vector_score + 1) / 2
|
|
1003
1056
|
else:
|
|
1004
|
-
|
|
1057
|
+
log_error(f"Unknown distance metric: {self.distance}")
|
|
1005
1058
|
return []
|
|
1006
1059
|
|
|
1007
1060
|
# Apply weights to control the influence of each score
|
|
@@ -1052,7 +1105,7 @@ class PgVector(VectorDb):
|
|
|
1052
1105
|
sess.execute(text(f"SET LOCAL hnsw.ef_search = {self.vector_index.ef_search}"))
|
|
1053
1106
|
results = sess.execute(stmt).fetchall()
|
|
1054
1107
|
except Exception as e:
|
|
1055
|
-
|
|
1108
|
+
log_error(f"Error performing hybrid search: {e}")
|
|
1056
1109
|
return []
|
|
1057
1110
|
|
|
1058
1111
|
# Process the results and convert to Document objects
|
|
@@ -1074,9 +1127,10 @@ class PgVector(VectorDb):
|
|
|
1074
1127
|
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
1075
1128
|
|
|
1076
1129
|
log_info(f"Found {len(search_results)} documents")
|
|
1130
|
+
|
|
1077
1131
|
return search_results
|
|
1078
1132
|
except Exception as e:
|
|
1079
|
-
|
|
1133
|
+
log_error(f"Error during hybrid search: {e}")
|
|
1080
1134
|
return []
|
|
1081
1135
|
|
|
1082
1136
|
def drop(self) -> None:
|
|
@@ -1089,7 +1143,7 @@ class PgVector(VectorDb):
|
|
|
1089
1143
|
self.table.drop(self.db_engine)
|
|
1090
1144
|
log_info(f"Table '{self.table.fullname}' dropped successfully.")
|
|
1091
1145
|
except Exception as e:
|
|
1092
|
-
|
|
1146
|
+
log_error(f"Error dropping table '{self.table.fullname}': {e}")
|
|
1093
1147
|
raise
|
|
1094
1148
|
else:
|
|
1095
1149
|
log_info(f"Table '{self.table.fullname}' does not exist.")
|
|
@@ -1124,7 +1178,7 @@ class PgVector(VectorDb):
|
|
|
1124
1178
|
result = sess.execute(stmt).scalar()
|
|
1125
1179
|
return int(result) if result is not None else 0
|
|
1126
1180
|
except Exception as e:
|
|
1127
|
-
|
|
1181
|
+
log_error(f"Error getting count from table '{self.table.fullname}': {e}")
|
|
1128
1182
|
return 0
|
|
1129
1183
|
|
|
1130
1184
|
def optimize(self, force_recreate: bool = False) -> None:
|
|
@@ -1165,7 +1219,7 @@ class PgVector(VectorDb):
|
|
|
1165
1219
|
drop_index_sql = f'DROP INDEX IF EXISTS "{self.schema}"."{index_name}";'
|
|
1166
1220
|
sess.execute(text(drop_index_sql))
|
|
1167
1221
|
except Exception as e:
|
|
1168
|
-
|
|
1222
|
+
log_error(f"Error dropping index '{index_name}': {e}")
|
|
1169
1223
|
raise
|
|
1170
1224
|
|
|
1171
1225
|
def _create_vector_index(self, force_recreate: bool = False) -> None:
|
|
@@ -1220,10 +1274,10 @@ class PgVector(VectorDb):
|
|
|
1220
1274
|
elif isinstance(self.vector_index, HNSW):
|
|
1221
1275
|
self._create_hnsw_index(sess, table_fullname, index_distance)
|
|
1222
1276
|
else:
|
|
1223
|
-
|
|
1277
|
+
log_error(f"Unknown index type: {type(self.vector_index)}")
|
|
1224
1278
|
return
|
|
1225
1279
|
except Exception as e:
|
|
1226
|
-
|
|
1280
|
+
log_error(f"Error creating vector index '{self.vector_index.name}': {e}")
|
|
1227
1281
|
raise
|
|
1228
1282
|
|
|
1229
1283
|
def _create_ivfflat_index(self, sess: Session, table_fullname: str, index_distance: str) -> None:
|
|
@@ -1322,7 +1376,7 @@ class PgVector(VectorDb):
|
|
|
1322
1376
|
)
|
|
1323
1377
|
sess.execute(create_gin_index_sql)
|
|
1324
1378
|
except Exception as e:
|
|
1325
|
-
|
|
1379
|
+
log_error(f"Error creating GIN index '{gin_index_name}': {e}")
|
|
1326
1380
|
raise
|
|
1327
1381
|
|
|
1328
1382
|
def delete(self) -> bool:
|
|
@@ -1341,7 +1395,7 @@ class PgVector(VectorDb):
|
|
|
1341
1395
|
log_info(f"Deleted all records from table '{self.table.fullname}'.")
|
|
1342
1396
|
return True
|
|
1343
1397
|
except Exception as e:
|
|
1344
|
-
|
|
1398
|
+
log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
|
|
1345
1399
|
sess.rollback()
|
|
1346
1400
|
return False
|
|
1347
1401
|
|
|
@@ -1357,7 +1411,7 @@ class PgVector(VectorDb):
|
|
|
1357
1411
|
log_info(f"Deleted records with id '{id}' from table '{self.table.fullname}'.")
|
|
1358
1412
|
return True
|
|
1359
1413
|
except Exception as e:
|
|
1360
|
-
|
|
1414
|
+
log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
|
|
1361
1415
|
sess.rollback()
|
|
1362
1416
|
return False
|
|
1363
1417
|
|
|
@@ -1373,7 +1427,7 @@ class PgVector(VectorDb):
|
|
|
1373
1427
|
log_info(f"Deleted records with name '{name}' from table '{self.table.fullname}'.")
|
|
1374
1428
|
return True
|
|
1375
1429
|
except Exception as e:
|
|
1376
|
-
|
|
1430
|
+
log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
|
|
1377
1431
|
sess.rollback()
|
|
1378
1432
|
return False
|
|
1379
1433
|
|
|
@@ -1389,7 +1443,7 @@ class PgVector(VectorDb):
|
|
|
1389
1443
|
log_info(f"Deleted records with metadata '{metadata}' from table '{self.table.fullname}'.")
|
|
1390
1444
|
return True
|
|
1391
1445
|
except Exception as e:
|
|
1392
|
-
|
|
1446
|
+
log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
|
|
1393
1447
|
sess.rollback()
|
|
1394
1448
|
return False
|
|
1395
1449
|
|
|
@@ -1405,7 +1459,7 @@ class PgVector(VectorDb):
|
|
|
1405
1459
|
log_info(f"Deleted records with content ID '{content_id}' from table '{self.table.fullname}'.")
|
|
1406
1460
|
return True
|
|
1407
1461
|
except Exception as e:
|
|
1408
|
-
|
|
1462
|
+
log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
|
|
1409
1463
|
sess.rollback()
|
|
1410
1464
|
return False
|
|
1411
1465
|
|
|
@@ -1421,7 +1475,7 @@ class PgVector(VectorDb):
|
|
|
1421
1475
|
log_info(f"Deleted records with content hash '{content_hash}' from table '{self.table.fullname}'.")
|
|
1422
1476
|
return True
|
|
1423
1477
|
except Exception as e:
|
|
1424
|
-
|
|
1478
|
+
log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
|
|
1425
1479
|
sess.rollback()
|
|
1426
1480
|
return False
|
|
1427
1481
|
|
|
@@ -26,7 +26,7 @@ from agno.filters import FilterExpr
|
|
|
26
26
|
from agno.knowledge.document import Document
|
|
27
27
|
from agno.knowledge.embedder import Embedder
|
|
28
28
|
from agno.knowledge.reranker.base import Reranker
|
|
29
|
-
from agno.utils.log import log_debug,
|
|
29
|
+
from agno.utils.log import log_debug, log_warning, logger
|
|
30
30
|
from agno.vectordb.base import VectorDb
|
|
31
31
|
|
|
32
32
|
|
|
@@ -136,7 +136,7 @@ class PineconeDb(VectorDb):
|
|
|
136
136
|
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
137
137
|
|
|
138
138
|
_embedder = OpenAIEmbedder()
|
|
139
|
-
|
|
139
|
+
log_debug("Embedder not provided, using OpenAIEmbedder as default.")
|
|
140
140
|
self.embedder: Embedder = _embedder
|
|
141
141
|
self.reranker: Optional[Reranker] = reranker
|
|
142
142
|
|
|
@@ -217,23 +217,6 @@ class PineconeDb(VectorDb):
|
|
|
217
217
|
log_debug(f"Deleting index: {self.name}")
|
|
218
218
|
self.client.delete_index(name=self.name, timeout=self.timeout)
|
|
219
219
|
|
|
220
|
-
def doc_exists(self, document: Document) -> bool:
|
|
221
|
-
"""Check if a document exists in the index.
|
|
222
|
-
|
|
223
|
-
Args:
|
|
224
|
-
document (Document): The document to check.
|
|
225
|
-
|
|
226
|
-
Returns:
|
|
227
|
-
bool: True if the document exists, False otherwise.
|
|
228
|
-
|
|
229
|
-
"""
|
|
230
|
-
response = self.index.fetch(ids=[document.id], namespace=self.namespace)
|
|
231
|
-
return len(response.vectors) > 0
|
|
232
|
-
|
|
233
|
-
async def async_doc_exists(self, document: Document) -> bool:
|
|
234
|
-
"""Check if a document exists in the index asynchronously."""
|
|
235
|
-
return await asyncio.to_thread(self.doc_exists, document)
|
|
236
|
-
|
|
237
220
|
def name_exists(self, name: str) -> bool:
|
|
238
221
|
"""Check if an index with the given name exists.
|
|
239
222
|
|