PyPI - agno - Versions diffs - 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl - Mend

agno 2.1.2py3-none-any.whl → 2.3.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (314) hide show

agno/agent/agent.py +5540 -2273
agno/api/api.py +2 -0
agno/api/os.py +1 -1
agno/compression/__init__.py +3 -0
agno/compression/manager.py +247 -0
agno/culture/__init__.py +3 -0
agno/culture/manager.py +956 -0
agno/db/async_postgres/__init__.py +3 -0
agno/db/base.py +689 -6
agno/db/dynamo/dynamo.py +933 -37
agno/db/dynamo/schemas.py +174 -10
agno/db/dynamo/utils.py +63 -4
agno/db/firestore/firestore.py +831 -9
agno/db/firestore/schemas.py +51 -0
agno/db/firestore/utils.py +102 -4
agno/db/gcs_json/gcs_json_db.py +660 -12
agno/db/gcs_json/utils.py +60 -26
agno/db/in_memory/in_memory_db.py +287 -14
agno/db/in_memory/utils.py +60 -2
agno/db/json/json_db.py +590 -14
agno/db/json/utils.py +60 -26
agno/db/migrations/manager.py +199 -0
agno/db/migrations/v1_to_v2.py +43 -13
agno/db/migrations/versions/__init__.py +0 -0
agno/db/migrations/versions/v2_3_0.py +938 -0
agno/db/mongo/__init__.py +15 -1
agno/db/mongo/async_mongo.py +2760 -0
agno/db/mongo/mongo.py +879 -11
agno/db/mongo/schemas.py +42 -0
agno/db/mongo/utils.py +80 -8
agno/db/mysql/__init__.py +2 -1
agno/db/mysql/async_mysql.py +2912 -0
agno/db/mysql/mysql.py +946 -68
agno/db/mysql/schemas.py +72 -10
agno/db/mysql/utils.py +198 -7
agno/db/postgres/__init__.py +2 -1
agno/db/postgres/async_postgres.py +2579 -0
agno/db/postgres/postgres.py +942 -57
agno/db/postgres/schemas.py +81 -18
agno/db/postgres/utils.py +164 -2
agno/db/redis/redis.py +671 -7
agno/db/redis/schemas.py +50 -0
agno/db/redis/utils.py +65 -7
agno/db/schemas/__init__.py +2 -1
agno/db/schemas/culture.py +120 -0
agno/db/schemas/evals.py +1 -0
agno/db/schemas/memory.py +17 -2
agno/db/singlestore/schemas.py +63 -0
agno/db/singlestore/singlestore.py +949 -83
agno/db/singlestore/utils.py +60 -2
agno/db/sqlite/__init__.py +2 -1
agno/db/sqlite/async_sqlite.py +2911 -0
agno/db/sqlite/schemas.py +62 -0
agno/db/sqlite/sqlite.py +965 -46
agno/db/sqlite/utils.py +169 -8
agno/db/surrealdb/__init__.py +3 -0
agno/db/surrealdb/metrics.py +292 -0
agno/db/surrealdb/models.py +334 -0
agno/db/surrealdb/queries.py +71 -0
agno/db/surrealdb/surrealdb.py +1908 -0
agno/db/surrealdb/utils.py +147 -0
agno/db/utils.py +2 -0
agno/eval/__init__.py +10 -0
agno/eval/accuracy.py +75 -55
agno/eval/agent_as_judge.py +861 -0
agno/eval/base.py +29 -0
agno/eval/performance.py +16 -7
agno/eval/reliability.py +28 -16
agno/eval/utils.py +35 -17
agno/exceptions.py +27 -2
agno/filters.py +354 -0
agno/guardrails/prompt_injection.py +1 -0
agno/hooks/__init__.py +3 -0
agno/hooks/decorator.py +164 -0
agno/integrations/discord/client.py +1 -1
agno/knowledge/chunking/agentic.py +13 -10
agno/knowledge/chunking/fixed.py +4 -1
agno/knowledge/chunking/semantic.py +9 -4
agno/knowledge/chunking/strategy.py +59 -15
agno/knowledge/embedder/fastembed.py +1 -1
agno/knowledge/embedder/nebius.py +1 -1
agno/knowledge/embedder/ollama.py +8 -0
agno/knowledge/embedder/openai.py +8 -8
agno/knowledge/embedder/sentence_transformer.py +6 -2
agno/knowledge/embedder/vllm.py +262 -0
agno/knowledge/knowledge.py +1618 -318
agno/knowledge/reader/base.py +6 -2
agno/knowledge/reader/csv_reader.py +8 -10
agno/knowledge/reader/docx_reader.py +5 -6
agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
agno/knowledge/reader/json_reader.py +5 -4
agno/knowledge/reader/markdown_reader.py +8 -8
agno/knowledge/reader/pdf_reader.py +17 -19
agno/knowledge/reader/pptx_reader.py +101 -0
agno/knowledge/reader/reader_factory.py +32 -3
agno/knowledge/reader/s3_reader.py +3 -3
agno/knowledge/reader/tavily_reader.py +193 -0
agno/knowledge/reader/text_reader.py +22 -10
agno/knowledge/reader/web_search_reader.py +1 -48
agno/knowledge/reader/website_reader.py +10 -10
agno/knowledge/reader/wikipedia_reader.py +33 -1
agno/knowledge/types.py +1 -0
agno/knowledge/utils.py +72 -7
agno/media.py +22 -6
agno/memory/__init__.py +14 -1
agno/memory/manager.py +544 -83
agno/memory/strategies/__init__.py +15 -0
agno/memory/strategies/base.py +66 -0
agno/memory/strategies/summarize.py +196 -0
agno/memory/strategies/types.py +37 -0
agno/models/aimlapi/aimlapi.py +17 -0
agno/models/anthropic/claude.py +515 -40
agno/models/aws/bedrock.py +102 -21
agno/models/aws/claude.py +131 -274
agno/models/azure/ai_foundry.py +41 -19
agno/models/azure/openai_chat.py +39 -8
agno/models/base.py +1249 -525
agno/models/cerebras/cerebras.py +91 -21
agno/models/cerebras/cerebras_openai.py +21 -2
agno/models/cohere/chat.py +40 -6
agno/models/cometapi/cometapi.py +18 -1
agno/models/dashscope/dashscope.py +2 -3
agno/models/deepinfra/deepinfra.py +18 -1
agno/models/deepseek/deepseek.py +69 -3
agno/models/fireworks/fireworks.py +18 -1
agno/models/google/gemini.py +877 -80
agno/models/google/utils.py +22 -0
agno/models/groq/groq.py +51 -18
agno/models/huggingface/huggingface.py +17 -6
agno/models/ibm/watsonx.py +16 -6
agno/models/internlm/internlm.py +18 -1
agno/models/langdb/langdb.py +13 -1
agno/models/litellm/chat.py +44 -9
agno/models/litellm/litellm_openai.py +18 -1
agno/models/message.py +28 -5
agno/models/meta/llama.py +47 -14
agno/models/meta/llama_openai.py +22 -17
agno/models/mistral/mistral.py +8 -4
agno/models/nebius/nebius.py +6 -7
agno/models/nvidia/nvidia.py +20 -3
agno/models/ollama/chat.py +24 -8
agno/models/openai/chat.py +104 -29
agno/models/openai/responses.py +101 -81
agno/models/openrouter/openrouter.py +60 -3
agno/models/perplexity/perplexity.py +17 -1
agno/models/portkey/portkey.py +7 -6
agno/models/requesty/requesty.py +24 -4
agno/models/response.py +73 -2
agno/models/sambanova/sambanova.py +20 -3
agno/models/siliconflow/siliconflow.py +19 -2
agno/models/together/together.py +20 -3
agno/models/utils.py +254 -8
agno/models/vercel/v0.py +20 -3
agno/models/vertexai/__init__.py +0 -0
agno/models/vertexai/claude.py +190 -0
agno/models/vllm/vllm.py +19 -14
agno/models/xai/xai.py +19 -2
agno/os/app.py +549 -152
agno/os/auth.py +190 -3
agno/os/config.py +23 -0
agno/os/interfaces/a2a/router.py +8 -11
agno/os/interfaces/a2a/utils.py +1 -1
agno/os/interfaces/agui/router.py +18 -3
agno/os/interfaces/agui/utils.py +152 -39
agno/os/interfaces/slack/router.py +55 -37
agno/os/interfaces/slack/slack.py +9 -1
agno/os/interfaces/whatsapp/router.py +0 -1
agno/os/interfaces/whatsapp/security.py +3 -1
agno/os/mcp.py +110 -52
agno/os/middleware/__init__.py +2 -0
agno/os/middleware/jwt.py +676 -112
agno/os/router.py +40 -1478
agno/os/routers/agents/__init__.py +3 -0
agno/os/routers/agents/router.py +599 -0
agno/os/routers/agents/schema.py +261 -0
agno/os/routers/evals/evals.py +96 -39
agno/os/routers/evals/schemas.py +65 -33
agno/os/routers/evals/utils.py +80 -10
agno/os/routers/health.py +10 -4
agno/os/routers/knowledge/knowledge.py +196 -38
agno/os/routers/knowledge/schemas.py +82 -22
agno/os/routers/memory/memory.py +279 -52
agno/os/routers/memory/schemas.py +46 -17
agno/os/routers/metrics/metrics.py +20 -8
agno/os/routers/metrics/schemas.py +16 -16
agno/os/routers/session/session.py +462 -34
agno/os/routers/teams/__init__.py +3 -0
agno/os/routers/teams/router.py +512 -0
agno/os/routers/teams/schema.py +257 -0
agno/os/routers/traces/__init__.py +3 -0
agno/os/routers/traces/schemas.py +414 -0
agno/os/routers/traces/traces.py +499 -0
agno/os/routers/workflows/__init__.py +3 -0
agno/os/routers/workflows/router.py +624 -0
agno/os/routers/workflows/schema.py +75 -0
agno/os/schema.py +256 -693
agno/os/scopes.py +469 -0
agno/os/utils.py +514 -36
agno/reasoning/anthropic.py +80 -0
agno/reasoning/gemini.py +73 -0
agno/reasoning/openai.py +5 -0
agno/reasoning/vertexai.py +76 -0
agno/run/__init__.py +6 -0
agno/run/agent.py +155 -32
agno/run/base.py +55 -3
agno/run/requirement.py +181 -0
agno/run/team.py +125 -38
agno/run/workflow.py +72 -18
agno/session/agent.py +102 -89
agno/session/summary.py +56 -15
agno/session/team.py +164 -90
agno/session/workflow.py +405 -40
agno/table.py +10 -0
agno/team/team.py +3974 -1903
agno/tools/dalle.py +2 -4
agno/tools/eleven_labs.py +23 -25
agno/tools/exa.py +21 -16
agno/tools/file.py +153 -23
agno/tools/file_generation.py +16 -10
agno/tools/firecrawl.py +15 -7
agno/tools/function.py +193 -38
agno/tools/gmail.py +238 -14
agno/tools/google_drive.py +271 -0
agno/tools/googlecalendar.py +36 -8
agno/tools/googlesheets.py +20 -5
agno/tools/jira.py +20 -0
agno/tools/mcp/__init__.py +10 -0
agno/tools/mcp/mcp.py +331 -0
agno/tools/mcp/multi_mcp.py +347 -0
agno/tools/mcp/params.py +24 -0
agno/tools/mcp_toolbox.py +3 -3
agno/tools/models/nebius.py +5 -5
agno/tools/models_labs.py +20 -10
agno/tools/nano_banana.py +151 -0
agno/tools/notion.py +204 -0
agno/tools/parallel.py +314 -0
agno/tools/postgres.py +76 -36
agno/tools/redshift.py +406 -0
agno/tools/scrapegraph.py +1 -1
agno/tools/shopify.py +1519 -0
agno/tools/slack.py +18 -3
agno/tools/spotify.py +919 -0
agno/tools/tavily.py +146 -0
agno/tools/toolkit.py +25 -0
agno/tools/workflow.py +8 -1
agno/tools/yfinance.py +12 -11
agno/tracing/__init__.py +12 -0
agno/tracing/exporter.py +157 -0
agno/tracing/schemas.py +276 -0
agno/tracing/setup.py +111 -0
agno/utils/agent.py +938 -0
agno/utils/cryptography.py +22 -0
agno/utils/dttm.py +33 -0
agno/utils/events.py +151 -3
agno/utils/gemini.py +15 -5
agno/utils/hooks.py +118 -4
agno/utils/http.py +113 -2
agno/utils/knowledge.py +12 -5
agno/utils/log.py +1 -0
agno/utils/mcp.py +92 -2
agno/utils/media.py +187 -1
agno/utils/merge_dict.py +3 -3
agno/utils/message.py +60 -0
agno/utils/models/ai_foundry.py +9 -2
agno/utils/models/claude.py +49 -14
agno/utils/models/cohere.py +9 -2
agno/utils/models/llama.py +9 -2
agno/utils/models/mistral.py +4 -2
agno/utils/print_response/agent.py +109 -16
agno/utils/print_response/team.py +223 -30
agno/utils/print_response/workflow.py +251 -34
agno/utils/streamlit.py +1 -1
agno/utils/team.py +98 -9
agno/utils/tokens.py +657 -0
agno/vectordb/base.py +39 -7
agno/vectordb/cassandra/cassandra.py +21 -5
agno/vectordb/chroma/chromadb.py +43 -12
agno/vectordb/clickhouse/clickhousedb.py +21 -5
agno/vectordb/couchbase/couchbase.py +29 -5
agno/vectordb/lancedb/lance_db.py +92 -181
agno/vectordb/langchaindb/langchaindb.py +24 -4
agno/vectordb/lightrag/lightrag.py +17 -3
agno/vectordb/llamaindex/llamaindexdb.py +25 -5
agno/vectordb/milvus/milvus.py +50 -37
agno/vectordb/mongodb/__init__.py +7 -1
agno/vectordb/mongodb/mongodb.py +36 -30
agno/vectordb/pgvector/pgvector.py +201 -77
agno/vectordb/pineconedb/pineconedb.py +41 -23
agno/vectordb/qdrant/qdrant.py +67 -54
agno/vectordb/redis/__init__.py +9 -0
agno/vectordb/redis/redisdb.py +682 -0
agno/vectordb/singlestore/singlestore.py +50 -29
agno/vectordb/surrealdb/surrealdb.py +31 -41
agno/vectordb/upstashdb/upstashdb.py +34 -6
agno/vectordb/weaviate/weaviate.py +53 -14
agno/workflow/__init__.py +2 -0
agno/workflow/agent.py +299 -0
agno/workflow/condition.py +120 -18
agno/workflow/loop.py +77 -10
agno/workflow/parallel.py +231 -143
agno/workflow/router.py +118 -17
agno/workflow/step.py +609 -170
agno/workflow/steps.py +73 -6
agno/workflow/types.py +96 -21
agno/workflow/workflow.py +2039 -262
{agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
agno-2.3.13.dist-info/RECORD +613 -0
agno/tools/googlesearch.py +0 -98
agno/tools/mcp.py +0 -679
agno/tools/memori.py +0 -339
agno-2.1.2.dist-info/RECORD +0 -543
{agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
{agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
{agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0

agno/vectordb/pgvector/pgvector.py CHANGED Viewed

@@ -3,15 +3,18 @@ from hashlib import md5
 from math import sqrt
 from typing import Any, Dict, List, Optional, Union, cast
+from agno.utils.string import generate_id
 try:
-    from sqlalchemy import update
+    from sqlalchemy import and_, not_, or_, update
     from sqlalchemy.dialects import postgresql
     from sqlalchemy.engine import Engine, create_engine
     from sqlalchemy.inspection import inspect
     from sqlalchemy.orm import Session, scoped_session, sessionmaker
     from sqlalchemy.schema import Column, Index, MetaData, Table
+    from sqlalchemy.sql.elements import ColumnElement
     from sqlalchemy.sql.expression import bindparam, desc, func, select, text
-    from sqlalchemy.types import DateTime, String
+    from sqlalchemy.types import DateTime, Integer, String
 except ImportError:
     raise ImportError("`sqlalchemy` not installed. Please install using `pip install sqlalchemy psycopg`")
@@ -21,10 +24,11 @@ try:
 except ImportError:
     raise ImportError("`pgvector` not installed. Please install using `pip install pgvector`")
+from agno.filters import FilterExpr
 from agno.knowledge.document import Document
 from agno.knowledge.embedder import Embedder
 from agno.knowledge.reranker.base import Reranker
-from agno.utils.log import log_debug, log_info, logger
+from agno.utils.log import log_debug, log_error, log_info, log_warning
 from agno.vectordb.base import VectorDb
 from agno.vectordb.distance import Distance
 from agno.vectordb.pgvector.index import HNSW, Ivfflat
@@ -43,6 +47,9 @@ class PgVector(VectorDb):
         self,
         table_name: str,
         schema: str = "ai",
+        name: Optional[str] = None,
+        description: Optional[str] = None,
+        id: Optional[str] = None,
         db_url: Optional[str] = None,
         db_engine: Optional[Engine] = None,
         embedder: Optional[Embedder] = None,
@@ -55,6 +62,7 @@ class PgVector(VectorDb):
         schema_version: int = 1,
         auto_upgrade_schema: bool = False,
         reranker: Optional[Reranker] = None,
+        create_schema: bool = True,
     ):
         """
         Initialize the PgVector instance.
@@ -62,6 +70,8 @@ class PgVector(VectorDb):
         Args:
             table_name (str): Name of the table to store vector data.
             schema (str): Database schema name.
+            name (Optional[str]): Name of the vector database.
+            description (Optional[str]): Description of the vector database.
             db_url (Optional[str]): Database connection URL.
             db_engine (Optional[Engine]): SQLAlchemy database engine.
             embedder (Optional[Embedder]): Embedder instance for creating embeddings.
@@ -73,6 +83,8 @@ class PgVector(VectorDb):
             content_language (str): Language for full-text search.
             schema_version (int): Version of the database schema.
             auto_upgrade_schema (bool): Automatically upgrade schema if True.
+            create_schema (bool): Whether to automatically create the database schema if it doesn't exist.
+                Set to False if schema is managed externally (e.g., via migrations). Defaults to True.
         """
         if not table_name:
             raise ValueError("Table name must be provided.")
@@ -80,13 +92,22 @@ class PgVector(VectorDb):
         if db_engine is None and db_url is None:
             raise ValueError("Either 'db_url' or 'db_engine' must be provided.")
+        if id is None:
+            base_seed = db_url or str(db_engine.url)  # type: ignore
+            schema_suffix = table_name if table_name is not None else "ai"
+            seed = f"{base_seed}#{schema_suffix}"
+            id = generate_id(seed)
+        # Initialize base class with name and description
+        super().__init__(id=id, name=name, description=description)
         if db_engine is None:
             if db_url is None:
                 raise ValueError("Must provide 'db_url' if 'db_engine' is None.")
             try:
                 db_engine = create_engine(db_url)
             except Exception as e:
-                logger.error(f"Failed to create engine from 'db_url': {e}")
+                log_error(f"Failed to create engine from 'db_url': {e}")
                 raise
         # Database settings
@@ -129,6 +150,9 @@ class PgVector(VectorDb):
         # Reranker instance
         self.reranker: Optional[Reranker] = reranker
+        # Schema creation flag
+        self.create_schema: bool = create_schema
         # Database session
         self.Session: scoped_session = scoped_session(sessionmaker(bind=self.db_engine))
         # Database table
@@ -191,7 +215,7 @@ class PgVector(VectorDb):
         try:
             return inspect(self.db_engine).has_table(self.table_name, schema=self.schema)
         except Exception as e:
-            logger.error(f"Error checking if table exists: {e}")
+            log_error(f"Error checking if table exists: {e}")
             return False
     def create(self) -> None:
@@ -202,7 +226,7 @@ class PgVector(VectorDb):
             with self.Session() as sess, sess.begin():
                 log_debug("Creating extension: vector")
                 sess.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))
-                if self.schema is not None:
+                if self.create_schema and self.schema is not None:
                     log_debug(f"Creating schema: {self.schema}")
                     sess.execute(text(f"CREATE SCHEMA IF NOT EXISTS {self.schema};"))
             log_debug(f"Creating table: {self.table_name}")
@@ -229,7 +253,7 @@ class PgVector(VectorDb):
                 result = sess.execute(stmt).first()
                 return result is not None
         except Exception as e:
-            logger.error(f"Error checking if record exists: {e}")
+            log_error(f"Error checking if record exists: {e}")
             return False
     def name_exists(self, name: str) -> bool:
@@ -306,7 +330,7 @@ class PgVector(VectorDb):
                             try:
                                 batch_records.append(self._get_document_record(doc, filters, content_hash))
                             except Exception as e:
-                                logger.error(f"Error processing document '{doc.name}': {e}")
+                                log_error(f"Error processing document '{doc.name}': {e}")
                         # Insert the batch of records
                         insert_stmt = postgresql.insert(self.table)
@@ -314,11 +338,11 @@ class PgVector(VectorDb):
                         sess.commit()  # Commit batch independently
                         log_info(f"Inserted batch of {len(batch_records)} documents.")
                     except Exception as e:
-                        logger.error(f"Error with batch starting at index {i}: {e}")
+                        log_error(f"Error with batch starting at index {i}: {e}")
                         sess.rollback()  # Rollback the current batch if there's an error
                         raise
         except Exception as e:
-            logger.error(f"Error inserting documents: {e}")
+            log_error(f"Error inserting documents: {e}")
             raise
     async def async_insert(
@@ -343,7 +367,10 @@ class PgVector(VectorDb):
                         for doc in batch_docs:
                             try:
                                 cleaned_content = self._clean_content(doc.content)
-                                record_id = doc.id or content_hash
+                                # Include content_hash in ID to ensure uniqueness across different content hashes
+                                # This allows the same URL/content to be inserted with different descriptions
+                                base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
+                                record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
                                 meta_data = doc.meta_data or {}
                                 if filters:
@@ -362,7 +389,7 @@ class PgVector(VectorDb):
                                 }
                                 batch_records.append(record)
                             except Exception as e:
-                                logger.error(f"Error processing document '{doc.name}': {e}")
+                                log_error(f"Error processing document '{doc.name}': {e}")
                         # Insert the batch of records
                         if batch_records:
@@ -371,11 +398,11 @@ class PgVector(VectorDb):
                             sess.commit()  # Commit batch independently
                             log_info(f"Inserted batch of {len(batch_records)} documents.")
                     except Exception as e:
-                        logger.error(f"Error with batch starting at index {i}: {e}")
+                        log_error(f"Error with batch starting at index {i}: {e}")
                         sess.rollback()  # Rollback the current batch if there's an error
                         raise
         except Exception as e:
-            logger.error(f"Error inserting documents: {e}")
+            log_error(f"Error inserting documents: {e}")
             raise
     def upsert_available(self) -> bool:
@@ -404,7 +431,7 @@ class PgVector(VectorDb):
                 self._delete_by_content_hash(content_hash)
             self._upsert(content_hash, documents, filters, batch_size)
         except Exception as e:
-            logger.error(f"Error upserting documents by content hash: {e}")
+            log_error(f"Error upserting documents by content hash: {e}")
             raise
     def _upsert(
@@ -432,9 +459,11 @@ class PgVector(VectorDb):
                         batch_records_dict: Dict[str, Dict[str, Any]] = {}  # Use dict to deduplicate by ID
                         for doc in batch_docs:
                             try:
-                                batch_records_dict[doc.id] = self._get_document_record(doc, filters, content_hash)  # type: ignore
+                                record = self._get_document_record(doc, filters, content_hash)
+                                # Use the generated record ID (which includes content_hash) for deduplication
+                                batch_records_dict[record["id"]] = record
                             except Exception as e:
-                                logger.error(f"Error processing document '{doc.name}': {e}")
+                                log_error(f"Error processing document '{doc.name}': {e}")
                         # Convert dict to list for upsert
                         batch_records = list(batch_records_dict.values())
@@ -461,11 +490,11 @@ class PgVector(VectorDb):
                         sess.commit()  # Commit batch independently
                         log_info(f"Upserted batch of {len(batch_records)} documents.")
                     except Exception as e:
-                        logger.error(f"Error with batch starting at index {i}: {e}")
+                        log_error(f"Error with batch starting at index {i}: {e}")
                         sess.rollback()  # Rollback the current batch if there's an error
                         raise
         except Exception as e:
-            logger.error(f"Error upserting documents: {e}")
+            log_error(f"Error upserting documents: {e}")
             raise
     def _get_document_record(
@@ -473,7 +502,10 @@ class PgVector(VectorDb):
     ) -> Dict[str, Any]:
         doc.embed(embedder=self.embedder)
         cleaned_content = self._clean_content(doc.content)
-        record_id = doc.id or content_hash
+        # Include content_hash in ID to ensure uniqueness across different content hashes
+        # This allows the same URL/content to be inserted with different descriptions
+        base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
+        record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
         meta_data = doc.meta_data or {}
         if filters:
@@ -514,7 +546,7 @@ class PgVector(VectorDb):
                             doc.embedding = embeddings[j]
                             doc.usage = usages[j] if j < len(usages) else None
                     except Exception as e:
-                        logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+                        log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
             except Exception as e:
                 # Check if this is a rate limit error - don't fall back as it would make things worse
@@ -525,17 +557,41 @@ class PgVector(VectorDb):
                 )
                 if is_rate_limit:
-                    logger.error(f"Rate limit detected during batch embedding.  {e}")
+                    log_error(f"Rate limit detected during batch embedding.  {e}")
                     raise e
                 else:
-                    logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    log_warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
                     # Fall back to individual embedding
                     embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in batch_docs]
-                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+                    results = await asyncio.gather(*embed_tasks, return_exceptions=True)
+                    # Check for exceptions and handle them
+                    for i, result in enumerate(results):
+                        if isinstance(result, Exception):
+                            error_msg = str(result)
+                            # If it's an event loop closure error, log it but don't fail
+                            if "Event loop is closed" in error_msg or "RuntimeError" in type(result).__name__:
+                                log_warning(
+                                    f"Event loop closure during embedding for document {i}, but operation may have succeeded: {result}"
+                                )
+                            else:
+                                log_error(f"Error embedding document {i}: {result}")
         else:
             # Use individual embedding
             embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in batch_docs]
-            await asyncio.gather(*embed_tasks, return_exceptions=True)
+            results = await asyncio.gather(*embed_tasks, return_exceptions=True)
+            # Check for exceptions and handle them
+            for i, result in enumerate(results):
+                if isinstance(result, Exception):
+                    error_msg = str(result)
+                    # If it's an event loop closure error, log it but don't fail
+                    if "Event loop is closed" in error_msg or "RuntimeError" in type(result).__name__:
+                        log_warning(
+                            f"Event loop closure during embedding for document {i}, but operation may have succeeded: {result}"
+                        )
+                    else:
+                        log_error(f"Error embedding document {i}: {result}")
     async def async_upsert(
         self,
@@ -550,7 +606,7 @@ class PgVector(VectorDb):
                 self._delete_by_content_hash(content_hash)
             await self._async_upsert(content_hash, documents, filters, batch_size)
         except Exception as e:
-            logger.error(f"Error upserting documents by content hash: {e}")
+            log_error(f"Error upserting documents by content hash: {e}")
             raise
     async def _async_upsert(
@@ -579,10 +635,20 @@ class PgVector(VectorDb):
                         # Prepare documents for upserting
                         batch_records_dict = {}  # Use dict to deduplicate by ID
-                        for doc in batch_docs:
+                        for idx, doc in enumerate(batch_docs):
                             try:
                                 cleaned_content = self._clean_content(doc.content)
-                                record_id = md5(cleaned_content.encode()).hexdigest()
+                                # Include content_hash in ID to ensure uniqueness across different content hashes
+                                # This allows the same URL/content to be inserted with different descriptions
+                                base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
+                                record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
+                                if (
+                                    doc.embedding is not None
+                                    and isinstance(doc.embedding, list)
+                                    and len(doc.embedding) == 0
+                                ):
+                                    log_warning(f"Document {idx} '{doc.name}' has empty embedding (length 0)")
                                 meta_data = doc.meta_data or {}
                                 if filters:
@@ -601,7 +667,7 @@ class PgVector(VectorDb):
                                 }
                                 batch_records_dict[record_id] = record  # This deduplicates by ID
                             except Exception as e:
-                                logger.error(f"Error processing document '{doc.name}': {e}")
+                                log_error(f"Error processing document '{doc.name}': {e}")
                         # Convert dict to list for upsert
                         batch_records = list(batch_records_dict.values())
@@ -628,11 +694,11 @@ class PgVector(VectorDb):
                         sess.commit()  # Commit batch independently
                         log_info(f"Upserted batch of {len(batch_records)} documents.")
                     except Exception as e:
-                        logger.error(f"Error with batch starting at index {i}: {e}")
+                        log_error(f"Error with batch starting at index {i}: {e}")
                         sess.rollback()  # Rollback the current batch if there's an error
                         raise
         except Exception as e:
-            logger.error(f"Error upserting documents: {e}")
+            log_error(f"Error upserting documents: {e}")
             raise
     def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
@@ -640,38 +706,38 @@ class PgVector(VectorDb):
         Update the metadata for a document.
         Args:
-            id (str): The ID of the document.
+            content_id (str): The ID of the document.
             metadata (Dict[str, Any]): The metadata to update.
         """
         try:
             with self.Session() as sess:
-                # Merge JSONB instead of overwriting: coalesce(existing, '{}') || :new
+                # Merge JSONB for metadata, but replace filters entirely (absolute value)
                 stmt = (
                     update(self.table)
                     .where(self.table.c.content_id == content_id)
                     .values(
                         meta_data=func.coalesce(self.table.c.meta_data, text("'{}'::jsonb")).op("||")(
-                            bindparam("md", metadata, type_=postgresql.JSONB)
-                        ),
-                        filters=func.coalesce(self.table.c.filters, text("'{}'::jsonb")).op("||")(
-                            bindparam("ft", metadata, type_=postgresql.JSONB)
+                            bindparam("md", type_=postgresql.JSONB)
                         ),
+                        filters=bindparam("ft", type_=postgresql.JSONB),
                     )
                 )
-                sess.execute(stmt)
+                sess.execute(stmt, {"md": metadata, "ft": metadata})
                 sess.commit()
         except Exception as e:
-            logger.error(f"Error updating metadata for document {content_id}: {e}")
+            log_error(f"Error updating metadata for document {content_id}: {e}")
             raise
-    def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
+    def search(
+        self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
+    ) -> List[Document]:
         """
         Perform a search based on the configured search type.
         Args:
             query (str): The search query.
             limit (int): Maximum number of results to return.
-            filters (Optional[Dict[str, Any]]): Filters to apply to the search.
+            filters (Optional[Union[Dict[str, Any], List[FilterExpr]]]): Filters to apply to the search.
         Returns:
             List[Document]: List of matching documents.
@@ -683,23 +749,46 @@ class PgVector(VectorDb):
         elif self.search_type == SearchType.hybrid:
             return self.hybrid_search(query=query, limit=limit, filters=filters)
         else:
-            logger.error(f"Invalid search type '{self.search_type}'.")
+            log_error(f"Invalid search type '{self.search_type}'.")
             return []
     async def async_search(
-        self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
+        self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
     ) -> List[Document]:
         """Search asynchronously by running in a thread."""
         return await asyncio.to_thread(self.search, query, limit, filters)
-    def vector_search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
+    def _dsl_to_sqlalchemy(self, filter_expr, table) -> ColumnElement[bool]:
+        op = filter_expr["op"]
+        if op == "EQ":
+            return table.c.meta_data[filter_expr["key"]].astext == str(filter_expr["value"])
+        elif op == "IN":
+            # Postgres JSONB array containment
+            return table.c.meta_data[filter_expr["key"]].astext.in_([str(v) for v in filter_expr["values"]])
+        elif op == "GT":
+            return table.c.meta_data[filter_expr["key"]].astext.cast(Integer) > filter_expr["value"]
+        elif op == "LT":
+            return table.c.meta_data[filter_expr["key"]].astext.cast(Integer) < filter_expr["value"]
+        elif op == "NOT":
+            return not_(self._dsl_to_sqlalchemy(filter_expr["condition"], table))
+        elif op == "AND":
+            return and_(*[self._dsl_to_sqlalchemy(cond, table) for cond in filter_expr["conditions"]])
+        elif op == "OR":
+            return or_(*[self._dsl_to_sqlalchemy(cond, table) for cond in filter_expr["conditions"]])
+        else:
+            raise ValueError(f"Unknown filter operator: {op}")
+    def vector_search(
+        self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
+    ) -> List[Document]:
         """
         Perform a vector similarity search.
         Args:
             query (str): The search query.
             limit (int): Maximum number of results to return.
-            filters (Optional[Dict[str, Any]]): Filters to apply to the search.
+            filters (Optional[Union[Dict[str, Any], List[FilterExpr]]]): Filters to apply to the search.
         Returns:
             List[Document]: List of matching documents.
@@ -708,7 +797,7 @@ class PgVector(VectorDb):
             # Get the embedding for the query string
             query_embedding = self.embedder.get_embedding(query)
             if query_embedding is None:
-                logger.error(f"Error getting embedding for Query: {query}")
+                log_error(f"Error getting embedding for Query: {query}")
                 return []
             # Define the columns to select
@@ -726,7 +815,17 @@ class PgVector(VectorDb):
             # Apply filters if provided
             if filters is not None:
-                stmt = stmt.where(self.table.c.meta_data.contains(filters))
+                # Handle dict filters
+                if isinstance(filters, dict):
+                    stmt = stmt.where(self.table.c.meta_data.contains(filters))
+                # Handle FilterExpr DSL
+                else:
+                    # Convert each DSL expression to SQLAlchemy and AND them together
+                    sqlalchemy_conditions = [
+                        self._dsl_to_sqlalchemy(f.to_dict() if hasattr(f, "to_dict") else f, self.table)
+                        for f in filters
+                    ]
+                    stmt = stmt.where(and_(*sqlalchemy_conditions))
             # Order the results based on the distance metric
             if self.distance == Distance.l2:
@@ -736,7 +835,7 @@ class PgVector(VectorDb):
             elif self.distance == Distance.max_inner_product:
                 stmt = stmt.order_by(self.table.c.embedding.max_inner_product(query_embedding))
             else:
-                logger.error(f"Unknown distance metric: {self.distance}")
+                log_error(f"Unknown distance metric: {self.distance}")
                 return []
             # Limit the number of results
@@ -755,8 +854,8 @@ class PgVector(VectorDb):
                             sess.execute(text(f"SET LOCAL hnsw.ef_search = {self.vector_index.ef_search}"))
                     results = sess.execute(stmt).fetchall()
             except Exception as e:
-                logger.error(f"Error performing semantic search: {e}")
-                logger.error("Table might not exist, creating for future use")
+                log_error(f"Error performing semantic search: {e}")
+                log_error("Table might not exist, creating for future use")
                 self.create()
                 return []
@@ -781,7 +880,7 @@ class PgVector(VectorDb):
             log_info(f"Found {len(search_results)} documents")
             return search_results
         except Exception as e:
-            logger.error(f"Error during vector search: {e}")
+            log_error(f"Error during vector search: {e}")
             return []
     def enable_prefix_matching(self, query: str) -> str:
@@ -799,14 +898,16 @@ class PgVector(VectorDb):
         processed_words = [word + "*" for word in words]
         return " ".join(processed_words)
-    def keyword_search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
+    def keyword_search(
+        self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
+    ) -> List[Document]:
         """
         Perform a keyword search on the 'content' column.
         Args:
             query (str): The search query.
             limit (int): Maximum number of results to return.
-            filters (Optional[Dict[str, Any]]): Filters to apply to the search.
+            filters (Optional[Union[Dict[str, Any], List[FilterExpr]]]): Filters to apply to the search.
         Returns:
             List[Document]: List of matching documents.
@@ -835,8 +936,17 @@ class PgVector(VectorDb):
             # Apply filters if provided
             if filters is not None:
-                # Use the contains() method for JSONB columns to check if the filters column contains the specified filters
-                stmt = stmt.where(self.table.c.meta_data.contains(filters))
+                # Handle dict filters
+                if isinstance(filters, dict):
+                    stmt = stmt.where(self.table.c.meta_data.contains(filters))
+                # Handle FilterExpr DSL
+                else:
+                    # Convert each DSL expression to SQLAlchemy and AND them together
+                    sqlalchemy_conditions = [
+                        self._dsl_to_sqlalchemy(f.to_dict() if hasattr(f, "to_dict") else f, self.table)
+                        for f in filters
+                    ]
+                    stmt = stmt.where(and_(*sqlalchemy_conditions))
             # Order by the relevance rank
             stmt = stmt.order_by(text_rank.desc())
@@ -852,8 +962,8 @@ class PgVector(VectorDb):
                 with self.Session() as sess, sess.begin():
                     results = sess.execute(stmt).fetchall()
             except Exception as e:
-                logger.error(f"Error performing keyword search: {e}")
-                logger.error("Table might not exist, creating for future use")
+                log_error(f"Error performing keyword search: {e}")
+                log_error("Table might not exist, creating for future use")
                 self.create()
                 return []
@@ -875,14 +985,14 @@ class PgVector(VectorDb):
             log_info(f"Found {len(search_results)} documents")
             return search_results
         except Exception as e:
-            logger.error(f"Error during keyword search: {e}")
+            log_error(f"Error during keyword search: {e}")
             return []
     def hybrid_search(
         self,
         query: str,
         limit: int = 5,
-        filters: Optional[Dict[str, Any]] = None,
+        filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
     ) -> List[Document]:
         """
         Perform a hybrid search combining vector similarity and full-text search.
@@ -890,7 +1000,7 @@ class PgVector(VectorDb):
         Args:
             query (str): The search query.
             limit (int): Maximum number of results to return.
-            filters (Optional[Dict[str, Any]]): Filters to apply to the search.
+            filters (Optional[Union[Dict[str, Any], List[FilterExpr]]]): Filters to apply to the search.
         Returns:
             List[Document]: List of matching documents.
@@ -899,7 +1009,7 @@ class PgVector(VectorDb):
             # Get the embedding for the query string
             query_embedding = self.embedder.get_embedding(query)
             if query_embedding is None:
-                logger.error(f"Error getting embedding for Query: {query}")
+                log_error(f"Error getting embedding for Query: {query}")
                 return []
             # Define the columns to select
@@ -937,7 +1047,7 @@ class PgVector(VectorDb):
                 # Normalize to range [0, 1]
                 vector_score = (raw_vector_score + 1) / 2
             else:
-                logger.error(f"Unknown distance metric: {self.distance}")
+                log_error(f"Unknown distance metric: {self.distance}")
                 return []
             # Apply weights to control the influence of each score
@@ -957,7 +1067,17 @@ class PgVector(VectorDb):
             # Apply filters if provided
             if filters is not None:
-                stmt = stmt.where(self.table.c.meta_data.contains(filters))
+                # Handle dict filters
+                if isinstance(filters, dict):
+                    stmt = stmt.where(self.table.c.meta_data.contains(filters))
+                # Handle FilterExpr DSL
+                else:
+                    # Convert each DSL expression to SQLAlchemy and AND them together
+                    sqlalchemy_conditions = [
+                        self._dsl_to_sqlalchemy(f.to_dict() if hasattr(f, "to_dict") else f, self.table)
+                        for f in filters
+                    ]
+                    stmt = stmt.where(and_(*sqlalchemy_conditions))
             # Order the results by the hybrid score in descending order
             stmt = stmt.order_by(desc("hybrid_score"))
@@ -978,7 +1098,7 @@ class PgVector(VectorDb):
                             sess.execute(text(f"SET LOCAL hnsw.ef_search = {self.vector_index.ef_search}"))
                     results = sess.execute(stmt).fetchall()
             except Exception as e:
-                logger.error(f"Error performing hybrid search: {e}")
+                log_error(f"Error performing hybrid search: {e}")
                 return []
             # Process the results and convert to Document objects
@@ -1000,9 +1120,10 @@ class PgVector(VectorDb):
                 search_results = self.reranker.rerank(query=query, documents=search_results)
             log_info(f"Found {len(search_results)} documents")
             return search_results
         except Exception as e:
-            logger.error(f"Error during hybrid search: {e}")
+            log_error(f"Error during hybrid search: {e}")
             return []
     def drop(self) -> None:
@@ -1015,7 +1136,7 @@ class PgVector(VectorDb):
                 self.table.drop(self.db_engine)
                 log_info(f"Table '{self.table.fullname}' dropped successfully.")
             except Exception as e:
-                logger.error(f"Error dropping table '{self.table.fullname}': {e}")
+                log_error(f"Error dropping table '{self.table.fullname}': {e}")
                 raise
         else:
             log_info(f"Table '{self.table.fullname}' does not exist.")
@@ -1050,7 +1171,7 @@ class PgVector(VectorDb):
                 result = sess.execute(stmt).scalar()
                 return int(result) if result is not None else 0
         except Exception as e:
-            logger.error(f"Error getting count from table '{self.table.fullname}': {e}")
+            log_error(f"Error getting count from table '{self.table.fullname}': {e}")
             return 0
     def optimize(self, force_recreate: bool = False) -> None:
@@ -1091,7 +1212,7 @@ class PgVector(VectorDb):
                 drop_index_sql = f'DROP INDEX IF EXISTS "{self.schema}"."{index_name}";'
                 sess.execute(text(drop_index_sql))
         except Exception as e:
-            logger.error(f"Error dropping index '{index_name}': {e}")
+            log_error(f"Error dropping index '{index_name}': {e}")
             raise
     def _create_vector_index(self, force_recreate: bool = False) -> None:
@@ -1146,10 +1267,10 @@ class PgVector(VectorDb):
                 elif isinstance(self.vector_index, HNSW):
                     self._create_hnsw_index(sess, table_fullname, index_distance)
                 else:
-                    logger.error(f"Unknown index type: {type(self.vector_index)}")
+                    log_error(f"Unknown index type: {type(self.vector_index)}")
                     return
         except Exception as e:
-            logger.error(f"Error creating vector index '{self.vector_index.name}': {e}")
+            log_error(f"Error creating vector index '{self.vector_index.name}': {e}")
             raise
     def _create_ivfflat_index(self, sess: Session, table_fullname: str, index_distance: str) -> None:
@@ -1248,7 +1369,7 @@ class PgVector(VectorDb):
                 )
                 sess.execute(create_gin_index_sql)
         except Exception as e:
-            logger.error(f"Error creating GIN index '{gin_index_name}': {e}")
+            log_error(f"Error creating GIN index '{gin_index_name}': {e}")
             raise
     def delete(self) -> bool:
@@ -1267,7 +1388,7 @@ class PgVector(VectorDb):
                 log_info(f"Deleted all records from table '{self.table.fullname}'.")
                 return True
         except Exception as e:
-            logger.error(f"Error deleting rows from table '{self.table.fullname}': {e}")
+            log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
             sess.rollback()
             return False
@@ -1283,7 +1404,7 @@ class PgVector(VectorDb):
                 log_info(f"Deleted records with id '{id}' from table '{self.table.fullname}'.")
                 return True
         except Exception as e:
-            logger.error(f"Error deleting rows from table '{self.table.fullname}': {e}")
+            log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
             sess.rollback()
             return False
@@ -1299,7 +1420,7 @@ class PgVector(VectorDb):
                 log_info(f"Deleted records with name '{name}' from table '{self.table.fullname}'.")
                 return True
         except Exception as e:
-            logger.error(f"Error deleting rows from table '{self.table.fullname}': {e}")
+            log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
             sess.rollback()
             return False
@@ -1315,7 +1436,7 @@ class PgVector(VectorDb):
                 log_info(f"Deleted records with metadata '{metadata}' from table '{self.table.fullname}'.")
                 return True
         except Exception as e:
-            logger.error(f"Error deleting rows from table '{self.table.fullname}': {e}")
+            log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
             sess.rollback()
             return False
@@ -1331,7 +1452,7 @@ class PgVector(VectorDb):
                 log_info(f"Deleted records with content ID '{content_id}' from table '{self.table.fullname}'.")
                 return True
         except Exception as e:
-            logger.error(f"Error deleting rows from table '{self.table.fullname}': {e}")
+            log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
             sess.rollback()
             return False
@@ -1347,7 +1468,7 @@ class PgVector(VectorDb):
                 log_info(f"Deleted records with content hash '{content_hash}' from table '{self.table.fullname}'.")
                 return True
         except Exception as e:
-            logger.error(f"Error deleting rows from table '{self.table.fullname}': {e}")
+            log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
             sess.rollback()
             return False
@@ -1383,3 +1504,6 @@ class PgVector(VectorDb):
         copied_obj.table = copied_obj.get_table()
         return copied_obj
+    def get_supported_search_types(self) -> List[str]:
+        return [SearchType.vector, SearchType.keyword, SearchType.hybrid]

agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

agno 2.1.2py3-none-any.whl → 2.3.13py3-none-any.whl