agno 2.0.1__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +6015 -2823
- agno/api/api.py +2 -0
- agno/api/os.py +1 -1
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +385 -6
- agno/db/dynamo/dynamo.py +388 -81
- agno/db/dynamo/schemas.py +47 -10
- agno/db/dynamo/utils.py +63 -4
- agno/db/firestore/firestore.py +435 -64
- agno/db/firestore/schemas.py +11 -0
- agno/db/firestore/utils.py +102 -4
- agno/db/gcs_json/gcs_json_db.py +384 -42
- agno/db/gcs_json/utils.py +60 -26
- agno/db/in_memory/in_memory_db.py +351 -66
- agno/db/in_memory/utils.py +60 -2
- agno/db/json/json_db.py +339 -48
- agno/db/json/utils.py +60 -26
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +510 -37
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +15 -1
- agno/db/mongo/async_mongo.py +2036 -0
- agno/db/mongo/mongo.py +653 -76
- agno/db/mongo/schemas.py +13 -0
- agno/db/mongo/utils.py +80 -8
- agno/db/mysql/mysql.py +687 -25
- agno/db/mysql/schemas.py +61 -37
- agno/db/mysql/utils.py +60 -2
- agno/db/postgres/__init__.py +2 -1
- agno/db/postgres/async_postgres.py +2001 -0
- agno/db/postgres/postgres.py +676 -57
- agno/db/postgres/schemas.py +43 -18
- agno/db/postgres/utils.py +164 -2
- agno/db/redis/redis.py +344 -38
- agno/db/redis/schemas.py +18 -0
- agno/db/redis/utils.py +60 -2
- agno/db/schemas/__init__.py +2 -1
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/memory.py +13 -0
- agno/db/singlestore/schemas.py +26 -1
- agno/db/singlestore/singlestore.py +687 -53
- agno/db/singlestore/utils.py +60 -2
- agno/db/sqlite/__init__.py +2 -1
- agno/db/sqlite/async_sqlite.py +2371 -0
- agno/db/sqlite/schemas.py +24 -0
- agno/db/sqlite/sqlite.py +774 -85
- agno/db/sqlite/utils.py +168 -5
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +309 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1361 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +50 -22
- agno/eval/accuracy.py +50 -43
- agno/eval/performance.py +6 -3
- agno/eval/reliability.py +6 -3
- agno/eval/utils.py +33 -16
- agno/exceptions.py +68 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/integrations/discord/client.py +1 -0
- agno/knowledge/chunking/agentic.py +13 -10
- agno/knowledge/chunking/fixed.py +1 -1
- agno/knowledge/chunking/semantic.py +40 -8
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/aws_bedrock.py +9 -4
- agno/knowledge/embedder/azure_openai.py +54 -0
- agno/knowledge/embedder/base.py +2 -0
- agno/knowledge/embedder/cohere.py +184 -5
- agno/knowledge/embedder/fastembed.py +1 -1
- agno/knowledge/embedder/google.py +79 -1
- agno/knowledge/embedder/huggingface.py +9 -4
- agno/knowledge/embedder/jina.py +63 -0
- agno/knowledge/embedder/mistral.py +78 -11
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/ollama.py +13 -0
- agno/knowledge/embedder/openai.py +37 -65
- agno/knowledge/embedder/sentence_transformer.py +8 -4
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +69 -16
- agno/knowledge/knowledge.py +594 -186
- agno/knowledge/reader/base.py +9 -2
- agno/knowledge/reader/csv_reader.py +8 -10
- agno/knowledge/reader/docx_reader.py +5 -6
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/json_reader.py +6 -5
- agno/knowledge/reader/markdown_reader.py +13 -13
- agno/knowledge/reader/pdf_reader.py +43 -68
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +51 -6
- agno/knowledge/reader/s3_reader.py +3 -15
- agno/knowledge/reader/tavily_reader.py +194 -0
- agno/knowledge/reader/text_reader.py +13 -13
- agno/knowledge/reader/web_search_reader.py +2 -43
- agno/knowledge/reader/website_reader.py +43 -25
- agno/knowledge/reranker/__init__.py +2 -8
- agno/knowledge/types.py +9 -0
- agno/knowledge/utils.py +20 -0
- agno/media.py +72 -0
- agno/memory/manager.py +336 -82
- agno/models/aimlapi/aimlapi.py +2 -2
- agno/models/anthropic/claude.py +183 -37
- agno/models/aws/bedrock.py +52 -112
- agno/models/aws/claude.py +33 -1
- agno/models/azure/ai_foundry.py +33 -15
- agno/models/azure/openai_chat.py +25 -8
- agno/models/base.py +999 -519
- agno/models/cerebras/cerebras.py +19 -13
- agno/models/cerebras/cerebras_openai.py +8 -5
- agno/models/cohere/chat.py +27 -1
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +57 -0
- agno/models/dashscope/dashscope.py +1 -0
- agno/models/deepinfra/deepinfra.py +2 -2
- agno/models/deepseek/deepseek.py +2 -2
- agno/models/fireworks/fireworks.py +2 -2
- agno/models/google/gemini.py +103 -31
- agno/models/groq/groq.py +28 -11
- agno/models/huggingface/huggingface.py +2 -1
- agno/models/internlm/internlm.py +2 -2
- agno/models/langdb/langdb.py +4 -4
- agno/models/litellm/chat.py +18 -1
- agno/models/litellm/litellm_openai.py +2 -2
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/message.py +139 -0
- agno/models/meta/llama.py +27 -10
- agno/models/meta/llama_openai.py +5 -17
- agno/models/nebius/nebius.py +6 -6
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/nvidia.py +2 -2
- agno/models/ollama/chat.py +59 -5
- agno/models/openai/chat.py +69 -29
- agno/models/openai/responses.py +103 -106
- agno/models/openrouter/openrouter.py +41 -3
- agno/models/perplexity/perplexity.py +4 -5
- agno/models/portkey/portkey.py +3 -3
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +52 -0
- agno/models/response.py +77 -1
- agno/models/sambanova/sambanova.py +2 -2
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +25 -0
- agno/models/together/together.py +2 -2
- agno/models/utils.py +254 -8
- agno/models/vercel/v0.py +2 -2
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +96 -0
- agno/models/vllm/vllm.py +1 -0
- agno/models/xai/xai.py +3 -2
- agno/os/app.py +543 -178
- agno/os/auth.py +24 -14
- agno/os/config.py +1 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +250 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/agui.py +23 -7
- agno/os/interfaces/agui/router.py +27 -3
- agno/os/interfaces/agui/utils.py +242 -142
- agno/os/interfaces/base.py +6 -2
- agno/os/interfaces/slack/router.py +81 -23
- agno/os/interfaces/slack/slack.py +29 -14
- agno/os/interfaces/whatsapp/router.py +11 -4
- agno/os/interfaces/whatsapp/whatsapp.py +14 -7
- agno/os/mcp.py +111 -54
- agno/os/middleware/__init__.py +7 -0
- agno/os/middleware/jwt.py +233 -0
- agno/os/router.py +556 -139
- agno/os/routers/evals/evals.py +71 -34
- agno/os/routers/evals/schemas.py +31 -31
- agno/os/routers/evals/utils.py +6 -5
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/knowledge.py +185 -38
- agno/os/routers/knowledge/schemas.py +82 -22
- agno/os/routers/memory/memory.py +158 -53
- agno/os/routers/memory/schemas.py +20 -16
- agno/os/routers/metrics/metrics.py +20 -8
- agno/os/routers/metrics/schemas.py +16 -16
- agno/os/routers/session/session.py +499 -38
- agno/os/schema.py +308 -198
- agno/os/utils.py +401 -41
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +2 -2
- agno/reasoning/deepseek.py +2 -2
- agno/reasoning/default.py +3 -1
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +2 -2
- agno/reasoning/ollama.py +2 -2
- agno/reasoning/openai.py +7 -2
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +248 -94
- agno/run/base.py +44 -5
- agno/run/team.py +238 -97
- agno/run/workflow.py +144 -33
- agno/session/agent.py +105 -89
- agno/session/summary.py +65 -25
- agno/session/team.py +176 -96
- agno/session/workflow.py +406 -40
- agno/team/team.py +3854 -1610
- agno/tools/dalle.py +2 -4
- agno/tools/decorator.py +4 -2
- agno/tools/duckduckgo.py +15 -11
- agno/tools/e2b.py +14 -7
- agno/tools/eleven_labs.py +23 -25
- agno/tools/exa.py +21 -16
- agno/tools/file.py +153 -23
- agno/tools/file_generation.py +350 -0
- agno/tools/firecrawl.py +4 -4
- agno/tools/function.py +250 -30
- agno/tools/gmail.py +238 -14
- agno/tools/google_drive.py +270 -0
- agno/tools/googlecalendar.py +36 -8
- agno/tools/googlesheets.py +20 -5
- agno/tools/jira.py +20 -0
- agno/tools/knowledge.py +3 -3
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +11 -17
- agno/tools/memori.py +1 -53
- agno/tools/memory.py +419 -0
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/notion.py +204 -0
- agno/tools/parallel.py +314 -0
- agno/tools/scrapegraph.py +58 -31
- agno/tools/searxng.py +2 -2
- agno/tools/serper.py +2 -2
- agno/tools/slack.py +18 -3
- agno/tools/spider.py +2 -2
- agno/tools/tavily.py +146 -0
- agno/tools/whatsapp.py +1 -1
- agno/tools/workflow.py +278 -0
- agno/tools/yfinance.py +12 -11
- agno/utils/agent.py +820 -0
- agno/utils/audio.py +27 -0
- agno/utils/common.py +90 -1
- agno/utils/events.py +217 -2
- agno/utils/gemini.py +180 -22
- agno/utils/hooks.py +57 -0
- agno/utils/http.py +111 -0
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/mcp.py +92 -2
- agno/utils/media.py +188 -10
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +60 -0
- agno/utils/models/claude.py +40 -11
- agno/utils/print_response/agent.py +105 -21
- agno/utils/print_response/team.py +103 -38
- agno/utils/print_response/workflow.py +251 -34
- agno/utils/reasoning.py +22 -1
- agno/utils/serialize.py +32 -0
- agno/utils/streamlit.py +16 -10
- agno/utils/string.py +41 -0
- agno/utils/team.py +98 -9
- agno/utils/tools.py +1 -1
- agno/vectordb/base.py +23 -4
- agno/vectordb/cassandra/cassandra.py +65 -9
- agno/vectordb/chroma/chromadb.py +182 -38
- agno/vectordb/clickhouse/clickhousedb.py +64 -11
- agno/vectordb/couchbase/couchbase.py +105 -10
- agno/vectordb/lancedb/lance_db.py +124 -133
- agno/vectordb/langchaindb/langchaindb.py +25 -7
- agno/vectordb/lightrag/lightrag.py +17 -3
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +46 -7
- agno/vectordb/milvus/milvus.py +126 -9
- agno/vectordb/mongodb/__init__.py +7 -1
- agno/vectordb/mongodb/mongodb.py +112 -7
- agno/vectordb/pgvector/pgvector.py +142 -21
- agno/vectordb/pineconedb/pineconedb.py +80 -8
- agno/vectordb/qdrant/qdrant.py +125 -39
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +694 -0
- agno/vectordb/singlestore/singlestore.py +111 -25
- agno/vectordb/surrealdb/surrealdb.py +31 -5
- agno/vectordb/upstashdb/upstashdb.py +76 -8
- agno/vectordb/weaviate/weaviate.py +86 -15
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +112 -18
- agno/workflow/loop.py +69 -10
- agno/workflow/parallel.py +266 -118
- agno/workflow/router.py +110 -17
- agno/workflow/step.py +638 -129
- agno/workflow/steps.py +65 -6
- agno/workflow/types.py +61 -23
- agno/workflow/workflow.py +2085 -272
- {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/METADATA +182 -58
- agno-2.3.0.dist-info/RECORD +577 -0
- agno/knowledge/reader/url_reader.py +0 -128
- agno/tools/googlesearch.py +0 -98
- agno/tools/mcp.py +0 -610
- agno/utils/models/aws_claude.py +0 -170
- agno-2.0.1.dist-info/RECORD +0 -515
- {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
- {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/licenses/LICENSE +0 -0
- {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0
|
@@ -1,28 +1,31 @@
|
|
|
1
|
-
from typing import List, Optional
|
|
1
|
+
from typing import List, Optional, Union
|
|
2
2
|
|
|
3
3
|
from agno.knowledge.chunking.strategy import ChunkingStrategy
|
|
4
4
|
from agno.knowledge.document.base import Document
|
|
5
5
|
from agno.models.base import Model
|
|
6
6
|
from agno.models.defaults import DEFAULT_OPENAI_MODEL_ID
|
|
7
7
|
from agno.models.message import Message
|
|
8
|
+
from agno.models.utils import get_model
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
class AgenticChunking(ChunkingStrategy):
|
|
11
12
|
"""Chunking strategy that uses an LLM to determine natural breakpoints in the text"""
|
|
12
13
|
|
|
13
|
-
def __init__(self, model: Optional[Model] = None, max_chunk_size: int = 5000):
|
|
14
|
+
def __init__(self, model: Optional[Union[Model, str]] = None, max_chunk_size: int = 5000):
|
|
15
|
+
# Convert model string to Model instance
|
|
16
|
+
model = get_model(model)
|
|
14
17
|
if model is None:
|
|
15
18
|
try:
|
|
16
19
|
from agno.models.openai import OpenAIChat
|
|
17
20
|
except Exception:
|
|
18
21
|
raise ValueError("`openai` isn't installed. Please install it with `pip install openai`")
|
|
19
22
|
model = OpenAIChat(DEFAULT_OPENAI_MODEL_ID)
|
|
20
|
-
self.
|
|
23
|
+
self.chunk_size = max_chunk_size
|
|
21
24
|
self.model = model
|
|
22
25
|
|
|
23
26
|
def chunk(self, document: Document) -> List[Document]:
|
|
24
27
|
"""Split text into chunks using LLM to determine natural breakpoints based on context"""
|
|
25
|
-
if len(document.content) <= self.
|
|
28
|
+
if len(document.content) <= self.chunk_size:
|
|
26
29
|
return [document]
|
|
27
30
|
|
|
28
31
|
chunks: List[Document] = []
|
|
@@ -31,22 +34,22 @@ class AgenticChunking(ChunkingStrategy):
|
|
|
31
34
|
chunk_number = 1
|
|
32
35
|
|
|
33
36
|
while remaining_text:
|
|
34
|
-
# Ask model to find a good breakpoint within
|
|
35
|
-
prompt = f"""Analyze this text and determine a natural breakpoint within the first {self.
|
|
37
|
+
# Ask model to find a good breakpoint within chunk_size
|
|
38
|
+
prompt = f"""Analyze this text and determine a natural breakpoint within the first {self.chunk_size} characters.
|
|
36
39
|
Consider semantic completeness, paragraph boundaries, and topic transitions.
|
|
37
40
|
Return only the character position number of where to break the text:
|
|
38
41
|
|
|
39
|
-
{remaining_text[: self.
|
|
42
|
+
{remaining_text[: self.chunk_size]}"""
|
|
40
43
|
|
|
41
44
|
try:
|
|
42
45
|
response = self.model.response([Message(role="user", content=prompt)])
|
|
43
46
|
if response and response.content:
|
|
44
|
-
break_point = min(int(response.content.strip()), self.
|
|
47
|
+
break_point = min(int(response.content.strip()), self.chunk_size)
|
|
45
48
|
else:
|
|
46
|
-
break_point = self.
|
|
49
|
+
break_point = self.chunk_size
|
|
47
50
|
except Exception:
|
|
48
51
|
# Fallback to max size if model fails
|
|
49
|
-
break_point = self.
|
|
52
|
+
break_point = self.chunk_size
|
|
50
53
|
|
|
51
54
|
# Extract chunk and update remaining text
|
|
52
55
|
chunk = remaining_text[:break_point].strip()
|
agno/knowledge/chunking/fixed.py
CHANGED
|
@@ -7,7 +7,7 @@ from agno.knowledge.document.base import Document
|
|
|
7
7
|
class FixedSizeChunking(ChunkingStrategy):
|
|
8
8
|
"""Chunking strategy that splits text into fixed-size chunks with optional overlap"""
|
|
9
9
|
|
|
10
|
-
def __init__(self, chunk_size: int =
|
|
10
|
+
def __init__(self, chunk_size: int = 5000, overlap: int = 0):
|
|
11
11
|
# overlap must be less than chunk size
|
|
12
12
|
if overlap >= chunk_size:
|
|
13
13
|
raise ValueError(f"Invalid parameters: overlap ({overlap}) must be less than chunk size ({chunk_size}).")
|
|
@@ -1,16 +1,22 @@
|
|
|
1
|
-
|
|
1
|
+
import inspect
|
|
2
|
+
from typing import Any, Dict, List, Optional
|
|
2
3
|
|
|
3
4
|
from agno.knowledge.chunking.strategy import ChunkingStrategy
|
|
4
5
|
from agno.knowledge.document.base import Document
|
|
5
6
|
from agno.knowledge.embedder.base import Embedder
|
|
6
|
-
from agno.
|
|
7
|
+
from agno.utils.log import log_info
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
class SemanticChunking(ChunkingStrategy):
|
|
10
11
|
"""Chunking strategy that splits text into semantic chunks using chonkie"""
|
|
11
12
|
|
|
12
13
|
def __init__(self, embedder: Optional[Embedder] = None, chunk_size: int = 5000, similarity_threshold: float = 0.5):
|
|
13
|
-
|
|
14
|
+
if embedder is None:
|
|
15
|
+
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
16
|
+
|
|
17
|
+
embedder = OpenAIEmbedder() # type: ignore
|
|
18
|
+
log_info("Embedder not provided, using OpenAIEmbedder as default.")
|
|
19
|
+
self.embedder = embedder
|
|
14
20
|
self.chunk_size = chunk_size
|
|
15
21
|
self.similarity_threshold = similarity_threshold
|
|
16
22
|
self.chunker = None # Will be initialized lazily when needed
|
|
@@ -26,11 +32,37 @@ class SemanticChunking(ChunkingStrategy):
|
|
|
26
32
|
"Please install it using `pip install chonkie` to use SemanticChunking."
|
|
27
33
|
)
|
|
28
34
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
chunk_size
|
|
32
|
-
threshold
|
|
33
|
-
|
|
35
|
+
# Build arguments dynamically based on chonkie's supported signature
|
|
36
|
+
params: Dict[str, Any] = {
|
|
37
|
+
"chunk_size": self.chunk_size,
|
|
38
|
+
"threshold": self.similarity_threshold,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
sig = inspect.signature(SemanticChunker)
|
|
43
|
+
param_names = set(sig.parameters.keys())
|
|
44
|
+
|
|
45
|
+
# Prefer passing a callable to avoid Chonkie initializing its own client
|
|
46
|
+
if "embedding_fn" in param_names:
|
|
47
|
+
params["embedding_fn"] = self.embedder.get_embedding # type: ignore[attr-defined]
|
|
48
|
+
# If chonkie allows specifying dimensions, provide them
|
|
49
|
+
if "embedding_dimensions" in param_names and getattr(self.embedder, "dimensions", None):
|
|
50
|
+
params["embedding_dimensions"] = self.embedder.dimensions # type: ignore[attr-defined]
|
|
51
|
+
elif "embedder" in param_names:
|
|
52
|
+
# Some versions may accept an embedder object directly
|
|
53
|
+
params["embedder"] = self.embedder
|
|
54
|
+
else:
|
|
55
|
+
# Fallback to model id
|
|
56
|
+
params["embedding_model"] = getattr(self.embedder, "id", None) or "text-embedding-3-small"
|
|
57
|
+
|
|
58
|
+
self.chunker = SemanticChunker(**params)
|
|
59
|
+
except Exception:
|
|
60
|
+
# As a final fallback, use the original behavior
|
|
61
|
+
self.chunker = SemanticChunker(
|
|
62
|
+
embedding_model=getattr(self.embedder, "id", None) or "text-embedding-3-small",
|
|
63
|
+
chunk_size=self.chunk_size,
|
|
64
|
+
threshold=self.similarity_threshold,
|
|
65
|
+
)
|
|
34
66
|
|
|
35
67
|
def chunk(self, document: Document) -> List[Document]:
|
|
36
68
|
"""Split document into semantic chunks using chonkie"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from enum import Enum
|
|
3
|
-
from typing import List
|
|
3
|
+
from typing import List, Optional
|
|
4
4
|
|
|
5
5
|
from agno.knowledge.document.base import Document
|
|
6
6
|
|
|
@@ -60,7 +60,13 @@ class ChunkingStrategyFactory:
|
|
|
60
60
|
"""Factory for creating chunking strategy instances."""
|
|
61
61
|
|
|
62
62
|
@classmethod
|
|
63
|
-
def create_strategy(
|
|
63
|
+
def create_strategy(
|
|
64
|
+
cls,
|
|
65
|
+
strategy_type: ChunkingStrategyType,
|
|
66
|
+
chunk_size: Optional[int] = None,
|
|
67
|
+
overlap: Optional[int] = None,
|
|
68
|
+
**kwargs,
|
|
69
|
+
) -> ChunkingStrategy:
|
|
64
70
|
"""Create an instance of the chunking strategy with the given parameters."""
|
|
65
71
|
strategy_map = {
|
|
66
72
|
ChunkingStrategyType.AGENTIC_CHUNKER: cls._create_agentic_chunking,
|
|
@@ -71,51 +77,89 @@ class ChunkingStrategyFactory:
|
|
|
71
77
|
ChunkingStrategyType.ROW_CHUNKER: cls._create_row_chunking,
|
|
72
78
|
ChunkingStrategyType.MARKDOWN_CHUNKER: cls._create_markdown_chunking,
|
|
73
79
|
}
|
|
74
|
-
return strategy_map[strategy_type](**kwargs)
|
|
80
|
+
return strategy_map[strategy_type](chunk_size=chunk_size, overlap=overlap, **kwargs)
|
|
75
81
|
|
|
76
82
|
@classmethod
|
|
77
|
-
def _create_agentic_chunking(
|
|
83
|
+
def _create_agentic_chunking(
|
|
84
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
85
|
+
) -> ChunkingStrategy:
|
|
78
86
|
from agno.knowledge.chunking.agentic import AgenticChunking
|
|
79
87
|
|
|
80
|
-
#
|
|
81
|
-
if
|
|
82
|
-
kwargs["max_chunk_size"] =
|
|
88
|
+
# AgenticChunking accepts max_chunk_size (not chunk_size) and no overlap
|
|
89
|
+
if chunk_size is not None:
|
|
90
|
+
kwargs["max_chunk_size"] = chunk_size
|
|
91
|
+
# Remove overlap since AgenticChunking doesn't support it
|
|
83
92
|
return AgenticChunking(**kwargs)
|
|
84
93
|
|
|
85
94
|
@classmethod
|
|
86
|
-
def _create_document_chunking(
|
|
95
|
+
def _create_document_chunking(
|
|
96
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
97
|
+
) -> ChunkingStrategy:
|
|
87
98
|
from agno.knowledge.chunking.document import DocumentChunking
|
|
88
99
|
|
|
100
|
+
# DocumentChunking accepts both chunk_size and overlap
|
|
101
|
+
if chunk_size is not None:
|
|
102
|
+
kwargs["chunk_size"] = chunk_size
|
|
103
|
+
if overlap is not None:
|
|
104
|
+
kwargs["overlap"] = overlap
|
|
89
105
|
return DocumentChunking(**kwargs)
|
|
90
106
|
|
|
91
107
|
@classmethod
|
|
92
|
-
def _create_recursive_chunking(
|
|
108
|
+
def _create_recursive_chunking(
|
|
109
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
110
|
+
) -> ChunkingStrategy:
|
|
93
111
|
from agno.knowledge.chunking.recursive import RecursiveChunking
|
|
94
112
|
|
|
113
|
+
# RecursiveChunking accepts both chunk_size and overlap
|
|
114
|
+
if chunk_size is not None:
|
|
115
|
+
kwargs["chunk_size"] = chunk_size
|
|
116
|
+
if overlap is not None:
|
|
117
|
+
kwargs["overlap"] = overlap
|
|
95
118
|
return RecursiveChunking(**kwargs)
|
|
96
119
|
|
|
97
120
|
@classmethod
|
|
98
|
-
def _create_semantic_chunking(
|
|
121
|
+
def _create_semantic_chunking(
|
|
122
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
123
|
+
) -> ChunkingStrategy:
|
|
99
124
|
from agno.knowledge.chunking.semantic import SemanticChunking
|
|
100
125
|
|
|
126
|
+
# SemanticChunking accepts chunk_size but not overlap
|
|
127
|
+
if chunk_size is not None:
|
|
128
|
+
kwargs["chunk_size"] = chunk_size
|
|
129
|
+
# Remove overlap since SemanticChunking doesn't support it
|
|
101
130
|
return SemanticChunking(**kwargs)
|
|
102
131
|
|
|
103
132
|
@classmethod
|
|
104
|
-
def _create_fixed_chunking(
|
|
133
|
+
def _create_fixed_chunking(
|
|
134
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
135
|
+
) -> ChunkingStrategy:
|
|
105
136
|
from agno.knowledge.chunking.fixed import FixedSizeChunking
|
|
106
137
|
|
|
138
|
+
# FixedSizeChunking accepts both chunk_size and overlap
|
|
139
|
+
if chunk_size is not None:
|
|
140
|
+
kwargs["chunk_size"] = chunk_size
|
|
141
|
+
if overlap is not None:
|
|
142
|
+
kwargs["overlap"] = overlap
|
|
107
143
|
return FixedSizeChunking(**kwargs)
|
|
108
144
|
|
|
109
145
|
@classmethod
|
|
110
|
-
def _create_row_chunking(
|
|
146
|
+
def _create_row_chunking(
|
|
147
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
148
|
+
) -> ChunkingStrategy:
|
|
111
149
|
from agno.knowledge.chunking.row import RowChunking
|
|
112
150
|
|
|
113
|
-
#
|
|
114
|
-
kwargs.pop("chunk_size", None)
|
|
151
|
+
# RowChunking doesn't accept chunk_size or overlap, only skip_header and clean_rows
|
|
115
152
|
return RowChunking(**kwargs)
|
|
116
153
|
|
|
117
154
|
@classmethod
|
|
118
|
-
def _create_markdown_chunking(
|
|
155
|
+
def _create_markdown_chunking(
|
|
156
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
157
|
+
) -> ChunkingStrategy:
|
|
119
158
|
from agno.knowledge.chunking.markdown import MarkdownChunking
|
|
120
159
|
|
|
160
|
+
# MarkdownChunking accepts both chunk_size and overlap
|
|
161
|
+
if chunk_size is not None:
|
|
162
|
+
kwargs["chunk_size"] = chunk_size
|
|
163
|
+
if overlap is not None:
|
|
164
|
+
kwargs["overlap"] = overlap
|
|
121
165
|
return MarkdownChunking(**kwargs)
|
|
@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional, Tuple
|
|
|
5
5
|
|
|
6
6
|
from agno.exceptions import AgnoError, ModelProviderError
|
|
7
7
|
from agno.knowledge.embedder.base import Embedder
|
|
8
|
-
from agno.utils.log import log_error,
|
|
8
|
+
from agno.utils.log import log_error, log_warning
|
|
9
9
|
|
|
10
10
|
try:
|
|
11
11
|
from boto3 import client as AwsClient
|
|
@@ -69,6 +69,11 @@ class AwsBedrockEmbedder(Embedder):
|
|
|
69
69
|
client_params: Optional[Dict[str, Any]] = None
|
|
70
70
|
client: Optional[AwsClient] = None
|
|
71
71
|
|
|
72
|
+
def __post_init__(self):
|
|
73
|
+
if self.enable_batch:
|
|
74
|
+
log_warning("AwsBedrockEmbedder does not support batch embeddings, setting enable_batch to False")
|
|
75
|
+
self.enable_batch = False
|
|
76
|
+
|
|
72
77
|
def get_client(self) -> AwsClient:
|
|
73
78
|
"""
|
|
74
79
|
Returns an AWS Bedrock client.
|
|
@@ -220,10 +225,10 @@ class AwsBedrockEmbedder(Embedder):
|
|
|
220
225
|
# Fallback to the first available embedding type
|
|
221
226
|
for embedding_type in response["embeddings"]:
|
|
222
227
|
return response["embeddings"][embedding_type][0]
|
|
223
|
-
|
|
228
|
+
log_warning("No embeddings found in response")
|
|
224
229
|
return []
|
|
225
230
|
except Exception as e:
|
|
226
|
-
|
|
231
|
+
log_warning(f"Error extracting embeddings: {e}")
|
|
227
232
|
return []
|
|
228
233
|
|
|
229
234
|
def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict[str, Any]]]:
|
|
@@ -286,7 +291,7 @@ class AwsBedrockEmbedder(Embedder):
|
|
|
286
291
|
# Fallback to the first available embedding type
|
|
287
292
|
for embedding_type in response_body["embeddings"]:
|
|
288
293
|
return response_body["embeddings"][embedding_type][0]
|
|
289
|
-
|
|
294
|
+
log_warning("No embeddings found in response")
|
|
290
295
|
return []
|
|
291
296
|
except ClientError as e:
|
|
292
297
|
log_error(f"Unexpected error calling Bedrock API: {str(e)}")
|
|
@@ -154,3 +154,57 @@ class AzureOpenAIEmbedder(Embedder):
|
|
|
154
154
|
embedding = response.data[0].embedding
|
|
155
155
|
usage = response.usage
|
|
156
156
|
return embedding, usage.model_dump()
|
|
157
|
+
|
|
158
|
+
async def async_get_embeddings_batch_and_usage(
|
|
159
|
+
self, texts: List[str]
|
|
160
|
+
) -> Tuple[List[List[float]], List[Optional[Dict]]]:
|
|
161
|
+
"""
|
|
162
|
+
Get embeddings and usage for multiple texts in batches.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
texts: List of text strings to embed
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
Tuple of (List of embedding vectors, List of usage dictionaries)
|
|
169
|
+
"""
|
|
170
|
+
all_embeddings = []
|
|
171
|
+
all_usage = []
|
|
172
|
+
logger.info(f"Getting embeddings and usage for {len(texts)} texts in batches of {self.batch_size}")
|
|
173
|
+
|
|
174
|
+
for i in range(0, len(texts), self.batch_size):
|
|
175
|
+
batch_texts = texts[i : i + self.batch_size]
|
|
176
|
+
|
|
177
|
+
req: Dict[str, Any] = {
|
|
178
|
+
"input": batch_texts,
|
|
179
|
+
"model": self.id,
|
|
180
|
+
"encoding_format": self.encoding_format,
|
|
181
|
+
}
|
|
182
|
+
if self.user is not None:
|
|
183
|
+
req["user"] = self.user
|
|
184
|
+
if self.id.startswith("text-embedding-3"):
|
|
185
|
+
req["dimensions"] = self.dimensions
|
|
186
|
+
if self.request_params:
|
|
187
|
+
req.update(self.request_params)
|
|
188
|
+
|
|
189
|
+
try:
|
|
190
|
+
response: CreateEmbeddingResponse = await self.aclient.embeddings.create(**req)
|
|
191
|
+
batch_embeddings = [data.embedding for data in response.data]
|
|
192
|
+
all_embeddings.extend(batch_embeddings)
|
|
193
|
+
|
|
194
|
+
# For each embedding in the batch, add the same usage information
|
|
195
|
+
usage_dict = response.usage.model_dump() if response.usage else None
|
|
196
|
+
all_usage.extend([usage_dict] * len(batch_embeddings))
|
|
197
|
+
except Exception as e:
|
|
198
|
+
logger.warning(f"Error in async batch embedding: {e}")
|
|
199
|
+
# Fallback to individual calls for this batch
|
|
200
|
+
for text in batch_texts:
|
|
201
|
+
try:
|
|
202
|
+
embedding, usage = await self.async_get_embedding_and_usage(text)
|
|
203
|
+
all_embeddings.append(embedding)
|
|
204
|
+
all_usage.append(usage)
|
|
205
|
+
except Exception as e2:
|
|
206
|
+
logger.warning(f"Error in individual async embedding fallback: {e2}")
|
|
207
|
+
all_embeddings.append([])
|
|
208
|
+
all_usage.append(None)
|
|
209
|
+
|
|
210
|
+
return all_embeddings, all_usage
|
agno/knowledge/embedder/base.py
CHANGED
|
@@ -7,6 +7,8 @@ class Embedder:
|
|
|
7
7
|
"""Base class for managing embedders"""
|
|
8
8
|
|
|
9
9
|
dimensions: Optional[int] = 1536
|
|
10
|
+
enable_batch: bool = False
|
|
11
|
+
batch_size: int = 100 # Number of texts to process in each API call
|
|
10
12
|
|
|
11
13
|
def get_embedding(self, text: str) -> List[float]:
|
|
12
14
|
raise NotImplementedError
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
import time
|
|
1
2
|
from dataclasses import dataclass
|
|
2
3
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
3
4
|
|
|
4
5
|
from agno.knowledge.embedder.base import Embedder
|
|
5
|
-
from agno.utils.log import
|
|
6
|
+
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
6
7
|
|
|
7
8
|
try:
|
|
8
9
|
from cohere import AsyncClient as AsyncCohereClient
|
|
@@ -22,6 +23,7 @@ class CohereEmbedder(Embedder):
|
|
|
22
23
|
client_params: Optional[Dict[str, Any]] = None
|
|
23
24
|
cohere_client: Optional[CohereClient] = None
|
|
24
25
|
async_client: Optional[AsyncCohereClient] = None
|
|
26
|
+
exponential_backoff: bool = False # Enable exponential backoff on rate limits
|
|
25
27
|
|
|
26
28
|
@property
|
|
27
29
|
def client(self) -> CohereClient:
|
|
@@ -61,6 +63,111 @@ class CohereEmbedder(Embedder):
|
|
|
61
63
|
request_params.update(self.request_params)
|
|
62
64
|
return self.client.embed(texts=[text], **request_params)
|
|
63
65
|
|
|
66
|
+
def _get_batch_request_params(self) -> Dict[str, Any]:
|
|
67
|
+
"""Get request parameters for batch embedding calls."""
|
|
68
|
+
request_params: Dict[str, Any] = {}
|
|
69
|
+
|
|
70
|
+
if self.id:
|
|
71
|
+
request_params["model"] = self.id
|
|
72
|
+
if self.input_type:
|
|
73
|
+
request_params["input_type"] = self.input_type
|
|
74
|
+
if self.embedding_types:
|
|
75
|
+
request_params["embedding_types"] = self.embedding_types
|
|
76
|
+
if self.request_params:
|
|
77
|
+
request_params.update(self.request_params)
|
|
78
|
+
|
|
79
|
+
return request_params
|
|
80
|
+
|
|
81
|
+
def _is_rate_limit_error(self, error: Exception) -> bool:
|
|
82
|
+
"""Check if the error is a rate limiting error."""
|
|
83
|
+
if hasattr(error, "status_code") and error.status_code == 429:
|
|
84
|
+
return True
|
|
85
|
+
error_str = str(error).lower()
|
|
86
|
+
return any(
|
|
87
|
+
phrase in error_str
|
|
88
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def _exponential_backoff_sleep(self, attempt: int, base_delay: float = 1.0) -> None:
|
|
92
|
+
"""Sleep with exponential backoff."""
|
|
93
|
+
delay = base_delay * (2**attempt) + (time.time() % 1) # Add jitter
|
|
94
|
+
log_debug(f"Rate limited, waiting {delay:.2f} seconds before retry (attempt {attempt + 1})")
|
|
95
|
+
time.sleep(delay)
|
|
96
|
+
|
|
97
|
+
async def _async_rate_limit_backoff_sleep(self, attempt: int) -> None:
|
|
98
|
+
"""Async version of rate-limit-aware backoff for APIs with per-minute limits."""
|
|
99
|
+
import asyncio
|
|
100
|
+
|
|
101
|
+
# For 40 req/min APIs like Cohere Trial, we need longer waits
|
|
102
|
+
if attempt == 0:
|
|
103
|
+
delay = 15.0 # Wait 15 seconds (1/4 of minute window)
|
|
104
|
+
elif attempt == 1:
|
|
105
|
+
delay = 30.0 # Wait 30 seconds (1/2 of minute window)
|
|
106
|
+
else:
|
|
107
|
+
delay = 60.0 # Wait full minute for window reset
|
|
108
|
+
|
|
109
|
+
# Add small jitter
|
|
110
|
+
delay += time.time() % 3
|
|
111
|
+
|
|
112
|
+
log_debug(
|
|
113
|
+
f"Async rate limit backoff, waiting {delay:.1f} seconds for rate limit window reset (attempt {attempt + 1})"
|
|
114
|
+
)
|
|
115
|
+
await asyncio.sleep(delay)
|
|
116
|
+
|
|
117
|
+
async def _async_batch_with_retry(
|
|
118
|
+
self, texts: List[str], max_retries: int = 3
|
|
119
|
+
) -> Tuple[List[List[float]], List[Optional[Dict]]]:
|
|
120
|
+
"""Execute async batch embedding with rate-limit-aware backoff for rate limiting."""
|
|
121
|
+
|
|
122
|
+
log_debug(f"Starting async batch retry for {len(texts)} texts with max_retries={max_retries}")
|
|
123
|
+
|
|
124
|
+
for attempt in range(max_retries + 1):
|
|
125
|
+
try:
|
|
126
|
+
request_params = self._get_batch_request_params()
|
|
127
|
+
response: Union[
|
|
128
|
+
EmbeddingsFloatsEmbedResponse, EmbeddingsByTypeEmbedResponse
|
|
129
|
+
] = await self.aclient.embed(texts=texts, **request_params)
|
|
130
|
+
|
|
131
|
+
# Extract embeddings from response
|
|
132
|
+
if isinstance(response, EmbeddingsFloatsEmbedResponse):
|
|
133
|
+
batch_embeddings = response.embeddings
|
|
134
|
+
elif isinstance(response, EmbeddingsByTypeEmbedResponse):
|
|
135
|
+
batch_embeddings = response.embeddings.float_ if response.embeddings.float_ else []
|
|
136
|
+
else:
|
|
137
|
+
log_warning("No embeddings found in response")
|
|
138
|
+
batch_embeddings = []
|
|
139
|
+
|
|
140
|
+
# Extract usage information
|
|
141
|
+
usage = response.meta.billed_units if response.meta else None
|
|
142
|
+
usage_dict = usage.model_dump() if usage else None
|
|
143
|
+
all_usage = [usage_dict] * len(batch_embeddings)
|
|
144
|
+
|
|
145
|
+
log_debug(f"Async batch embedding succeeded on attempt {attempt + 1}")
|
|
146
|
+
return batch_embeddings, all_usage
|
|
147
|
+
|
|
148
|
+
except Exception as e:
|
|
149
|
+
if self._is_rate_limit_error(e):
|
|
150
|
+
if not self.exponential_backoff:
|
|
151
|
+
log_warning(
|
|
152
|
+
"Rate limit detected. To enable automatic backoff retry, set enable_backoff=True when creating the embedder."
|
|
153
|
+
)
|
|
154
|
+
raise e
|
|
155
|
+
|
|
156
|
+
log_info(f"Async rate limit detected on attempt {attempt + 1}")
|
|
157
|
+
if attempt < max_retries:
|
|
158
|
+
await self._async_rate_limit_backoff_sleep(attempt)
|
|
159
|
+
continue
|
|
160
|
+
else:
|
|
161
|
+
log_warning(f"Async max retries ({max_retries}) reached for rate limiting")
|
|
162
|
+
raise e
|
|
163
|
+
else:
|
|
164
|
+
log_debug(f"Async non-rate-limit error on attempt {attempt + 1}: {e}")
|
|
165
|
+
raise e
|
|
166
|
+
|
|
167
|
+
# This should never be reached, but just in case
|
|
168
|
+
log_error("Could not create embeddings. End of retry loop reached.")
|
|
169
|
+
return [], []
|
|
170
|
+
|
|
64
171
|
def get_embedding(self, text: str) -> List[float]:
|
|
65
172
|
response: Union[EmbeddingsFloatsEmbedResponse, EmbeddingsByTypeEmbedResponse] = self.response(text=text)
|
|
66
173
|
try:
|
|
@@ -69,10 +176,10 @@ class CohereEmbedder(Embedder):
|
|
|
69
176
|
elif isinstance(response, EmbeddingsByTypeEmbedResponse):
|
|
70
177
|
return response.embeddings.float_[0] if response.embeddings.float_ else []
|
|
71
178
|
else:
|
|
72
|
-
|
|
179
|
+
log_warning("No embeddings found")
|
|
73
180
|
return []
|
|
74
181
|
except Exception as e:
|
|
75
|
-
|
|
182
|
+
log_warning(e)
|
|
76
183
|
return []
|
|
77
184
|
|
|
78
185
|
def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict[str, Any]]]:
|
|
@@ -110,10 +217,10 @@ class CohereEmbedder(Embedder):
|
|
|
110
217
|
elif isinstance(response, EmbeddingsByTypeEmbedResponse):
|
|
111
218
|
return response.embeddings.float_[0] if response.embeddings.float_ else []
|
|
112
219
|
else:
|
|
113
|
-
|
|
220
|
+
log_warning("No embeddings found")
|
|
114
221
|
return []
|
|
115
222
|
except Exception as e:
|
|
116
|
-
|
|
223
|
+
log_warning(e)
|
|
117
224
|
return []
|
|
118
225
|
|
|
119
226
|
async def async_get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict[str, Any]]]:
|
|
@@ -142,3 +249,75 @@ class CohereEmbedder(Embedder):
|
|
|
142
249
|
if usage:
|
|
143
250
|
return embedding, usage.model_dump()
|
|
144
251
|
return embedding, None
|
|
252
|
+
|
|
253
|
+
async def async_get_embeddings_batch_and_usage(
|
|
254
|
+
self, texts: List[str]
|
|
255
|
+
) -> Tuple[List[List[float]], List[Optional[Dict]]]:
|
|
256
|
+
"""
|
|
257
|
+
Get embeddings and usage for multiple texts in batches (async version).
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
texts: List of text strings to embed
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
s, List of usage dictionaries)
|
|
264
|
+
"""
|
|
265
|
+
all_embeddings = []
|
|
266
|
+
all_usage = []
|
|
267
|
+
log_info(f"Getting embeddings and usage for {len(texts)} texts in batches of {self.batch_size} (async)")
|
|
268
|
+
|
|
269
|
+
for i in range(0, len(texts), self.batch_size):
|
|
270
|
+
batch_texts = texts[i : i + self.batch_size]
|
|
271
|
+
|
|
272
|
+
try:
|
|
273
|
+
# Use retry logic for batch processing
|
|
274
|
+
batch_embeddings, batch_usage = await self._async_batch_with_retry(batch_texts)
|
|
275
|
+
all_embeddings.extend(batch_embeddings)
|
|
276
|
+
all_usage.extend(batch_usage)
|
|
277
|
+
|
|
278
|
+
except Exception as e:
|
|
279
|
+
log_warning(f"Async batch embedding failed after retries: {e}")
|
|
280
|
+
|
|
281
|
+
# Check if this is a rate limit error and backoff is disabled
|
|
282
|
+
if self._is_rate_limit_error(e) and not self.exponential_backoff:
|
|
283
|
+
log_warning("Rate limit hit and backoff is disabled. Failing immediately.")
|
|
284
|
+
raise e
|
|
285
|
+
|
|
286
|
+
# Only fall back to individual calls for non-rate-limit errors
|
|
287
|
+
# For rate limit errors, we should reduce batch size instead
|
|
288
|
+
if self._is_rate_limit_error(e):
|
|
289
|
+
log_warning("Rate limit hit even after retries. Consider reducing batch_size or upgrading API key.")
|
|
290
|
+
# Try with smaller batch size
|
|
291
|
+
if len(batch_texts) > 1:
|
|
292
|
+
smaller_batch_size = max(1, len(batch_texts) // 2)
|
|
293
|
+
log_info(f"Retrying with smaller batch size: {smaller_batch_size}")
|
|
294
|
+
for j in range(0, len(batch_texts), smaller_batch_size):
|
|
295
|
+
small_batch = batch_texts[j : j + smaller_batch_size]
|
|
296
|
+
try:
|
|
297
|
+
small_embeddings, small_usage = await self._async_batch_with_retry(small_batch)
|
|
298
|
+
all_embeddings.extend(small_embeddings)
|
|
299
|
+
all_usage.extend(small_usage)
|
|
300
|
+
except Exception as e3:
|
|
301
|
+
log_error(f"Failed even with reduced batch size: {e3}")
|
|
302
|
+
# Fall back to empty results for this batch
|
|
303
|
+
all_embeddings.extend([[] for _ in small_batch])
|
|
304
|
+
all_usage.extend([None for _ in small_batch])
|
|
305
|
+
else:
|
|
306
|
+
# Single item already failed, add empty result
|
|
307
|
+
log_debug("Single item failed, adding empty result")
|
|
308
|
+
all_embeddings.append([])
|
|
309
|
+
all_usage.append(None)
|
|
310
|
+
else:
|
|
311
|
+
# For non-rate-limit errors, fall back to individual calls
|
|
312
|
+
log_debug("Non-rate-limit error, falling back to individual calls")
|
|
313
|
+
for text in batch_texts:
|
|
314
|
+
try:
|
|
315
|
+
embedding, usage = await self.async_get_embedding_and_usage(text)
|
|
316
|
+
all_embeddings.append(embedding)
|
|
317
|
+
all_usage.append(usage)
|
|
318
|
+
except Exception as e2:
|
|
319
|
+
log_warning(f"Error in individual async embedding fallback: {e2}")
|
|
320
|
+
all_embeddings.append([])
|
|
321
|
+
all_usage.append(None)
|
|
322
|
+
|
|
323
|
+
return all_embeddings, all_usage
|
|
@@ -23,7 +23,7 @@ class FastEmbedEmbedder(Embedder):
|
|
|
23
23
|
"""Using BAAI/bge-small-en-v1.5 model, more models available: https://qdrant.github.io/fastembed/examples/Supported_Models/"""
|
|
24
24
|
|
|
25
25
|
id: str = "BAAI/bge-small-en-v1.5"
|
|
26
|
-
dimensions: int = 384
|
|
26
|
+
dimensions: Optional[int] = 384
|
|
27
27
|
|
|
28
28
|
def get_embedding(self, text: str) -> List[float]:
|
|
29
29
|
model = TextEmbedding(model_name=self.id)
|