PyPI - agno - Versions diffs - 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

agno 2.0.0rc2py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (331) hide show

agno/agent/agent.py +6009 -2874
agno/api/api.py +2 -0
agno/api/os.py +1 -1
agno/culture/__init__.py +3 -0
agno/culture/manager.py +956 -0
agno/db/async_postgres/__init__.py +3 -0
agno/db/base.py +385 -6
agno/db/dynamo/dynamo.py +388 -81
agno/db/dynamo/schemas.py +47 -10
agno/db/dynamo/utils.py +63 -4
agno/db/firestore/firestore.py +435 -64
agno/db/firestore/schemas.py +11 -0
agno/db/firestore/utils.py +102 -4
agno/db/gcs_json/gcs_json_db.py +384 -42
agno/db/gcs_json/utils.py +60 -26
agno/db/in_memory/in_memory_db.py +351 -66
agno/db/in_memory/utils.py +60 -2
agno/db/json/json_db.py +339 -48
agno/db/json/utils.py +60 -26
agno/db/migrations/manager.py +199 -0
agno/db/migrations/v1_to_v2.py +510 -37
agno/db/migrations/versions/__init__.py +0 -0
agno/db/migrations/versions/v2_3_0.py +938 -0
agno/db/mongo/__init__.py +15 -1
agno/db/mongo/async_mongo.py +2036 -0
agno/db/mongo/mongo.py +653 -76
agno/db/mongo/schemas.py +13 -0
agno/db/mongo/utils.py +80 -8
agno/db/mysql/mysql.py +687 -25
agno/db/mysql/schemas.py +61 -37
agno/db/mysql/utils.py +60 -2
agno/db/postgres/__init__.py +2 -1
agno/db/postgres/async_postgres.py +2001 -0
agno/db/postgres/postgres.py +676 -57
agno/db/postgres/schemas.py +43 -18
agno/db/postgres/utils.py +164 -2
agno/db/redis/redis.py +344 -38
agno/db/redis/schemas.py +18 -0
agno/db/redis/utils.py +60 -2
agno/db/schemas/__init__.py +2 -1
agno/db/schemas/culture.py +120 -0
agno/db/schemas/memory.py +13 -0
agno/db/singlestore/schemas.py +26 -1
agno/db/singlestore/singlestore.py +687 -53
agno/db/singlestore/utils.py +60 -2
agno/db/sqlite/__init__.py +2 -1
agno/db/sqlite/async_sqlite.py +2371 -0
agno/db/sqlite/schemas.py +24 -0
agno/db/sqlite/sqlite.py +774 -85
agno/db/sqlite/utils.py +168 -5
agno/db/surrealdb/__init__.py +3 -0
agno/db/surrealdb/metrics.py +292 -0
agno/db/surrealdb/models.py +309 -0
agno/db/surrealdb/queries.py +71 -0
agno/db/surrealdb/surrealdb.py +1361 -0
agno/db/surrealdb/utils.py +147 -0
agno/db/utils.py +50 -22
agno/eval/accuracy.py +50 -43
agno/eval/performance.py +6 -3
agno/eval/reliability.py +6 -3
agno/eval/utils.py +33 -16
agno/exceptions.py +68 -1
agno/filters.py +354 -0
agno/guardrails/__init__.py +6 -0
agno/guardrails/base.py +19 -0
agno/guardrails/openai.py +144 -0
agno/guardrails/pii.py +94 -0
agno/guardrails/prompt_injection.py +52 -0
agno/integrations/discord/client.py +1 -0
agno/knowledge/chunking/agentic.py +13 -10
agno/knowledge/chunking/fixed.py +1 -1
agno/knowledge/chunking/semantic.py +40 -8
agno/knowledge/chunking/strategy.py +59 -15
agno/knowledge/embedder/aws_bedrock.py +9 -4
agno/knowledge/embedder/azure_openai.py +54 -0
agno/knowledge/embedder/base.py +2 -0
agno/knowledge/embedder/cohere.py +184 -5
agno/knowledge/embedder/fastembed.py +1 -1
agno/knowledge/embedder/google.py +79 -1
agno/knowledge/embedder/huggingface.py +9 -4
agno/knowledge/embedder/jina.py +63 -0
agno/knowledge/embedder/mistral.py +78 -11
agno/knowledge/embedder/nebius.py +1 -1
agno/knowledge/embedder/ollama.py +13 -0
agno/knowledge/embedder/openai.py +37 -65
agno/knowledge/embedder/sentence_transformer.py +8 -4
agno/knowledge/embedder/vllm.py +262 -0
agno/knowledge/embedder/voyageai.py +69 -16
agno/knowledge/knowledge.py +595 -187
agno/knowledge/reader/base.py +9 -2
agno/knowledge/reader/csv_reader.py +8 -10
agno/knowledge/reader/docx_reader.py +5 -6
agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
agno/knowledge/reader/json_reader.py +6 -5
agno/knowledge/reader/markdown_reader.py +13 -13
agno/knowledge/reader/pdf_reader.py +43 -68
agno/knowledge/reader/pptx_reader.py +101 -0
agno/knowledge/reader/reader_factory.py +51 -6
agno/knowledge/reader/s3_reader.py +3 -15
agno/knowledge/reader/tavily_reader.py +194 -0
agno/knowledge/reader/text_reader.py +13 -13
agno/knowledge/reader/web_search_reader.py +2 -43
agno/knowledge/reader/website_reader.py +43 -25
agno/knowledge/reranker/__init__.py +3 -0
agno/knowledge/types.py +9 -0
agno/knowledge/utils.py +20 -0
agno/media.py +339 -266
agno/memory/manager.py +336 -82
agno/models/aimlapi/aimlapi.py +2 -2
agno/models/anthropic/claude.py +183 -37
agno/models/aws/bedrock.py +52 -112
agno/models/aws/claude.py +33 -1
agno/models/azure/ai_foundry.py +33 -15
agno/models/azure/openai_chat.py +25 -8
agno/models/base.py +1011 -566
agno/models/cerebras/cerebras.py +19 -13
agno/models/cerebras/cerebras_openai.py +8 -5
agno/models/cohere/chat.py +27 -1
agno/models/cometapi/__init__.py +5 -0
agno/models/cometapi/cometapi.py +57 -0
agno/models/dashscope/dashscope.py +1 -0
agno/models/deepinfra/deepinfra.py +2 -2
agno/models/deepseek/deepseek.py +2 -2
agno/models/fireworks/fireworks.py +2 -2
agno/models/google/gemini.py +110 -37
agno/models/groq/groq.py +28 -11
agno/models/huggingface/huggingface.py +2 -1
agno/models/internlm/internlm.py +2 -2
agno/models/langdb/langdb.py +4 -4
agno/models/litellm/chat.py +18 -1
agno/models/litellm/litellm_openai.py +2 -2
agno/models/llama_cpp/__init__.py +5 -0
agno/models/llama_cpp/llama_cpp.py +22 -0
agno/models/message.py +143 -4
agno/models/meta/llama.py +27 -10
agno/models/meta/llama_openai.py +5 -17
agno/models/nebius/nebius.py +6 -6
agno/models/nexus/__init__.py +3 -0
agno/models/nexus/nexus.py +22 -0
agno/models/nvidia/nvidia.py +2 -2
agno/models/ollama/chat.py +60 -6
agno/models/openai/chat.py +102 -43
agno/models/openai/responses.py +103 -106
agno/models/openrouter/openrouter.py +41 -3
agno/models/perplexity/perplexity.py +4 -5
agno/models/portkey/portkey.py +3 -3
agno/models/requesty/__init__.py +5 -0
agno/models/requesty/requesty.py +52 -0
agno/models/response.py +81 -5
agno/models/sambanova/sambanova.py +2 -2
agno/models/siliconflow/__init__.py +5 -0
agno/models/siliconflow/siliconflow.py +25 -0
agno/models/together/together.py +2 -2
agno/models/utils.py +254 -8
agno/models/vercel/v0.py +2 -2
agno/models/vertexai/__init__.py +0 -0
agno/models/vertexai/claude.py +96 -0
agno/models/vllm/vllm.py +1 -0
agno/models/xai/xai.py +3 -2
agno/os/app.py +543 -175
agno/os/auth.py +24 -14
agno/os/config.py +1 -0
agno/os/interfaces/__init__.py +1 -0
agno/os/interfaces/a2a/__init__.py +3 -0
agno/os/interfaces/a2a/a2a.py +42 -0
agno/os/interfaces/a2a/router.py +250 -0
agno/os/interfaces/a2a/utils.py +924 -0
agno/os/interfaces/agui/agui.py +23 -7
agno/os/interfaces/agui/router.py +27 -3
agno/os/interfaces/agui/utils.py +242 -142
agno/os/interfaces/base.py +6 -2
agno/os/interfaces/slack/router.py +81 -23
agno/os/interfaces/slack/slack.py +29 -14
agno/os/interfaces/whatsapp/router.py +11 -4
agno/os/interfaces/whatsapp/whatsapp.py +14 -7
agno/os/mcp.py +111 -54
agno/os/middleware/__init__.py +7 -0
agno/os/middleware/jwt.py +233 -0
agno/os/router.py +556 -139
agno/os/routers/evals/evals.py +71 -34
agno/os/routers/evals/schemas.py +31 -31
agno/os/routers/evals/utils.py +6 -5
agno/os/routers/health.py +31 -0
agno/os/routers/home.py +52 -0
agno/os/routers/knowledge/knowledge.py +185 -38
agno/os/routers/knowledge/schemas.py +82 -22
agno/os/routers/memory/memory.py +158 -53
agno/os/routers/memory/schemas.py +20 -16
agno/os/routers/metrics/metrics.py +20 -8
agno/os/routers/metrics/schemas.py +16 -16
agno/os/routers/session/session.py +499 -38
agno/os/schema.py +308 -198
agno/os/utils.py +401 -41
agno/reasoning/anthropic.py +80 -0
agno/reasoning/azure_ai_foundry.py +2 -2
agno/reasoning/deepseek.py +2 -2
agno/reasoning/default.py +3 -1
agno/reasoning/gemini.py +73 -0
agno/reasoning/groq.py +2 -2
agno/reasoning/ollama.py +2 -2
agno/reasoning/openai.py +7 -2
agno/reasoning/vertexai.py +76 -0
agno/run/__init__.py +6 -0
agno/run/agent.py +266 -112
agno/run/base.py +53 -24
agno/run/team.py +252 -111
agno/run/workflow.py +156 -45
agno/session/agent.py +105 -89
agno/session/summary.py +65 -25
agno/session/team.py +176 -96
agno/session/workflow.py +406 -40
agno/team/team.py +3854 -1692
agno/tools/brightdata.py +3 -3
agno/tools/cartesia.py +3 -5
agno/tools/dalle.py +9 -8
agno/tools/decorator.py +4 -2
agno/tools/desi_vocal.py +2 -2
agno/tools/duckduckgo.py +15 -11
agno/tools/e2b.py +20 -13
agno/tools/eleven_labs.py +26 -28
agno/tools/exa.py +21 -16
agno/tools/fal.py +4 -4
agno/tools/file.py +153 -23
agno/tools/file_generation.py +350 -0
agno/tools/firecrawl.py +4 -4
agno/tools/function.py +257 -37
agno/tools/giphy.py +2 -2
agno/tools/gmail.py +238 -14
agno/tools/google_drive.py +270 -0
agno/tools/googlecalendar.py +36 -8
agno/tools/googlesheets.py +20 -5
agno/tools/jira.py +20 -0
agno/tools/knowledge.py +3 -3
agno/tools/lumalab.py +3 -3
agno/tools/mcp/__init__.py +10 -0
agno/tools/mcp/mcp.py +331 -0
agno/tools/mcp/multi_mcp.py +347 -0
agno/tools/mcp/params.py +24 -0
agno/tools/mcp_toolbox.py +284 -0
agno/tools/mem0.py +11 -17
agno/tools/memori.py +1 -53
agno/tools/memory.py +419 -0
agno/tools/models/azure_openai.py +2 -2
agno/tools/models/gemini.py +3 -3
agno/tools/models/groq.py +3 -5
agno/tools/models/nebius.py +7 -7
agno/tools/models_labs.py +25 -15
agno/tools/notion.py +204 -0
agno/tools/openai.py +4 -9
agno/tools/opencv.py +3 -3
agno/tools/parallel.py +314 -0
agno/tools/replicate.py +7 -7
agno/tools/scrapegraph.py +58 -31
agno/tools/searxng.py +2 -2
agno/tools/serper.py +2 -2
agno/tools/slack.py +18 -3
agno/tools/spider.py +2 -2
agno/tools/tavily.py +146 -0
agno/tools/whatsapp.py +1 -1
agno/tools/workflow.py +278 -0
agno/tools/yfinance.py +12 -11
agno/utils/agent.py +820 -0
agno/utils/audio.py +27 -0
agno/utils/common.py +90 -1
agno/utils/events.py +222 -7
agno/utils/gemini.py +181 -23
agno/utils/hooks.py +57 -0
agno/utils/http.py +111 -0
agno/utils/knowledge.py +12 -5
agno/utils/log.py +1 -0
agno/utils/mcp.py +95 -5
agno/utils/media.py +188 -10
agno/utils/merge_dict.py +22 -1
agno/utils/message.py +60 -0
agno/utils/models/claude.py +40 -11
agno/utils/models/cohere.py +1 -1
agno/utils/models/watsonx.py +1 -1
agno/utils/openai.py +1 -1
agno/utils/print_response/agent.py +105 -21
agno/utils/print_response/team.py +103 -38
agno/utils/print_response/workflow.py +251 -34
agno/utils/reasoning.py +22 -1
agno/utils/serialize.py +32 -0
agno/utils/streamlit.py +16 -10
agno/utils/string.py +41 -0
agno/utils/team.py +98 -9
agno/utils/tools.py +1 -1
agno/vectordb/base.py +23 -4
agno/vectordb/cassandra/cassandra.py +65 -9
agno/vectordb/chroma/chromadb.py +182 -38
agno/vectordb/clickhouse/clickhousedb.py +64 -11
agno/vectordb/couchbase/couchbase.py +105 -10
agno/vectordb/lancedb/lance_db.py +183 -135
agno/vectordb/langchaindb/langchaindb.py +25 -7
agno/vectordb/lightrag/lightrag.py +17 -3
agno/vectordb/llamaindex/__init__.py +3 -0
agno/vectordb/llamaindex/llamaindexdb.py +46 -7
agno/vectordb/milvus/milvus.py +126 -9
agno/vectordb/mongodb/__init__.py +7 -1
agno/vectordb/mongodb/mongodb.py +112 -7
agno/vectordb/pgvector/pgvector.py +142 -21
agno/vectordb/pineconedb/pineconedb.py +80 -8
agno/vectordb/qdrant/qdrant.py +125 -39
agno/vectordb/redis/__init__.py +9 -0
agno/vectordb/redis/redisdb.py +694 -0
agno/vectordb/singlestore/singlestore.py +111 -25
agno/vectordb/surrealdb/surrealdb.py +31 -5
agno/vectordb/upstashdb/upstashdb.py +76 -8
agno/vectordb/weaviate/weaviate.py +86 -15
agno/workflow/__init__.py +2 -0
agno/workflow/agent.py +299 -0
agno/workflow/condition.py +112 -18
agno/workflow/loop.py +69 -10
agno/workflow/parallel.py +266 -118
agno/workflow/router.py +110 -17
agno/workflow/step.py +645 -136
agno/workflow/steps.py +65 -6
agno/workflow/types.py +71 -33
agno/workflow/workflow.py +2113 -300
agno-2.3.0.dist-info/METADATA +618 -0
agno-2.3.0.dist-info/RECORD +577 -0
agno-2.3.0.dist-info/licenses/LICENSE +201 -0
agno/knowledge/reader/url_reader.py +0 -128
agno/tools/googlesearch.py +0 -98
agno/tools/mcp.py +0 -610
agno/utils/models/aws_claude.py +0 -170
agno-2.0.0rc2.dist-info/METADATA +0 -355
agno-2.0.0rc2.dist-info/RECORD +0 -515
agno-2.0.0rc2.dist-info/licenses/LICENSE +0 -375
{agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
{agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0

agno/db/migrations/v1_to_v2.py CHANGED Viewed

@@ -1,75 +1,532 @@
 """Migration utility to migrate your Agno tables from v1 to v2"""
-from typing import Any, Dict, List, Optional, Union
+import gc
+import json
+from typing import Any, Dict, List, Optional, Union, cast
 from sqlalchemy import text
-from agno.db.mysql.mysql import MySQLDb
-from agno.db.postgres.postgres import PostgresDb
+from agno.db.base import BaseDb
 from agno.db.schemas.memory import UserMemory
-from agno.db.sqlite.sqlite import SqliteDb
 from agno.session import AgentSession, TeamSession, WorkflowSession
-from agno.utils.log import log_error
+from agno.utils.log import log_error, log_info, log_warning
+def convert_v1_metrics_to_v2(metrics_dict: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert v1 metrics dictionary to v2 format by mapping old field names to new ones."""
+    if not isinstance(metrics_dict, dict):
+        return metrics_dict
+    # Create a copy to avoid modifying the original
+    v2_metrics = metrics_dict.copy()
+    # Map v1 field names to v2 field names
+    field_mappings = {
+        "time": "duration",
+        "audio_tokens": "audio_total_tokens",
+        "input_audio_tokens": "audio_input_tokens",
+        "output_audio_tokens": "audio_output_tokens",
+        "cached_tokens": "cache_read_tokens",
+    }
+    # Fields to remove (deprecated in v2)
+    deprecated_fields = ["prompt_tokens", "completion_tokens", "prompt_tokens_details", "completion_tokens_details"]
+    # Apply field mappings
+    for old_field, new_field in field_mappings.items():
+        if old_field in v2_metrics:
+            v2_metrics[new_field] = v2_metrics.pop(old_field)
+    # Remove deprecated fields
+    for field in deprecated_fields:
+        v2_metrics.pop(field, None)
+    return v2_metrics
+def convert_any_metrics_in_data(data: Any) -> Any:
+    """Recursively find and convert any metrics dictionaries and handle v1 to v2 field conversion."""
+    if isinstance(data, dict):
+        # First apply v1 to v2 field conversion (handles extra_data extraction, thinking/reasoning_content consolidation, etc.)
+        data = convert_v1_fields_to_v2(data)
+        # Check if this looks like a metrics dictionary
+        if _is_metrics_dict(data):
+            return convert_v1_metrics_to_v2(data)
+        # Otherwise, recursively process all values
+        converted_dict = {}
+        for key, value in data.items():
+            # Special handling for 'metrics' keys - always convert their values
+            if key == "metrics" and isinstance(value, dict):
+                converted_dict[key] = convert_v1_metrics_to_v2(value)
+            else:
+                converted_dict[key] = convert_any_metrics_in_data(value)
+        return converted_dict
+    elif isinstance(data, list):
+        return [convert_any_metrics_in_data(item) for item in data]
+    else:
+        # Not a dict or list, return as-is
+        return data
+def _is_metrics_dict(data: Dict[str, Any]) -> bool:
+    """Check if a dictionary looks like a metrics dictionary based on common field names."""
+    if not isinstance(data, dict):
+        return False
+    # Common metrics field names (both v1 and v2)
+    metrics_indicators = {
+        "input_tokens",
+        "output_tokens",
+        "total_tokens",
+        "time",
+        "duration",
+        "audio_tokens",
+        "audio_total_tokens",
+        "audio_input_tokens",
+        "audio_output_tokens",
+        "cached_tokens",
+        "cache_read_tokens",
+        "cache_write_tokens",
+        "reasoning_tokens",
+        "prompt_tokens",
+        "completion_tokens",
+        "time_to_first_token",
+        "provider_metrics",
+        "additional_metrics",
+    }
+    # Deprecated v1 fields that are strong indicators this is a metrics dict
+    deprecated_v1_indicators = {"time", "audio_tokens", "cached_tokens", "prompt_tokens", "completion_tokens"}
+    # If we find any deprecated v1 field, it's definitely a metrics dict that needs conversion
+    if any(field in data for field in deprecated_v1_indicators):
+        return True
+    # Otherwise, if the dict has at least 2 metrics-related fields, consider it a metrics dict
+    matching_fields = sum(1 for field in data.keys() if field in metrics_indicators)
+    return matching_fields >= 2
+def convert_session_data_comprehensively(session_data: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
+    """Comprehensively convert session data from v1 to v2 format, including metrics conversion and field mapping."""
+    if not session_data:
+        return session_data
+    # Use the recursive converter to handle all v1 to v2 conversions (metrics, field mapping, extra_data extraction, etc.)
+    return convert_any_metrics_in_data(session_data)
+def safe_get_runs_from_memory(memory_data: Any) -> Any:
+    """Safely extract runs data from memory field, handling various data types."""
+    if memory_data is None:
+        return None
+    runs: Any = []
+    # If memory_data is a string, try to parse it as JSON
+    if isinstance(memory_data, str):
+        try:
+            memory_dict = json.loads(memory_data)
+            if isinstance(memory_dict, dict):
+                runs = memory_dict.get("runs")
+        except (json.JSONDecodeError, AttributeError):
+            # If JSON parsing fails, memory_data might just be a string value
+            return None
+    # If memory_data is already a dict, access runs directly
+    elif isinstance(memory_data, dict):
+        runs = memory_data.get("runs")
+    for run in runs or []:
+        # Adjust fields mapping for Agent sessions
+        if run.get("agent_id") is not None:
+            if run.get("team_id") is not None:
+                run.pop("team_id")
+            if run.get("team_session_id") is not None:
+                run["session_id"] = run.pop("team_session_id")
+                if run.get("event"):
+                    run["events"] = [run.pop("event")]
+        # Adjust fields mapping for Team sessions
+        if run.get("team_id") is not None:
+            if run.get("agent_id") is not None:
+                run.pop("agent_id")
+            if member_responses := run.get("member_responses"):
+                for response in member_responses:
+                    if response.get("agent_id") is not None and response.get("team_id") is not None:
+                        response.pop("team_id")
+                    if response.get("agent_id") is not None and response.get("team_session_id") is not None:
+                        response["session_id"] = response.pop("team_session_id")
+                run["member_responses"] = member_responses
+    return runs
+def convert_v1_media_to_v2(media_data: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert v1 media objects to v2 format."""
+    if not isinstance(media_data, dict):
+        return media_data
+    # Create a copy to avoid modifying the original
+    v2_media = media_data.copy()
+    # Add id if missing (required in v2)
+    if "id" not in v2_media or v2_media["id"] is None:
+        from uuid import uuid4
+        v2_media["id"] = str(uuid4())
+    # Handle VideoArtifact → Video conversion
+    if "eta" in v2_media or "length" in v2_media:
+        # Convert length to duration if it's numeric
+        length = v2_media.pop("length", None)
+        if length and isinstance(length, (int, float)):
+            v2_media["duration"] = length
+        elif length and isinstance(length, str):
+            try:
+                v2_media["duration"] = float(length)
+            except ValueError:
+                pass  # Keep as is if not convertible
+    # Handle AudioArtifact → Audio conversion
+    if "base64_audio" in v2_media:
+        # Map base64_audio to content
+        base64_audio = v2_media.pop("base64_audio", None)
+        if base64_audio:
+            v2_media["content"] = base64_audio
+    # Handle AudioResponse content conversion (base64 string to bytes if needed)
+    if "transcript" in v2_media and "content" in v2_media:
+        content = v2_media.get("content")
+        if content and isinstance(content, str):
+            # Try to decode base64 content to bytes for v2
+            try:
+                import base64
+                v2_media["content"] = base64.b64decode(content)
+            except Exception:
+                # If not valid base64, keep as string
+                pass
+    # Ensure format and mime_type are set appropriately
+    if "format" in v2_media and "mime_type" not in v2_media:
+        format_val = v2_media["format"]
+        if format_val:
+            # Set mime_type based on format for common types
+            mime_type_map = {
+                "mp4": "video/mp4",
+                "mov": "video/quicktime",
+                "avi": "video/x-msvideo",
+                "webm": "video/webm",
+                "mp3": "audio/mpeg",
+                "wav": "audio/wav",
+                "ogg": "audio/ogg",
+                "png": "image/png",
+                "jpg": "image/jpeg",
+                "jpeg": "image/jpeg",
+                "gif": "image/gif",
+                "webp": "image/webp",
+            }
+            if format_val.lower() in mime_type_map:
+                v2_media["mime_type"] = mime_type_map[format_val.lower()]
+    return v2_media
+def convert_v1_fields_to_v2(data: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert v1 fields to v2 format with proper field mapping and extraction."""
+    if not isinstance(data, dict):
+        return data
+    # Create a copy to avoid modifying the original
+    v2_data = data.copy()
+    # Fields that should be completely ignored/removed in v2
+    deprecated_fields = {
+        "team_session_id",  # RunOutput v1 field, removed in v2
+        "formatted_tool_calls",  # RunOutput v1 field, removed in v2
+        "event",  # Remove event field
+        "events",  # Remove events field
+        # Add other deprecated fields here as needed
+    }
+    # Extract and map fields from extra_data before removing it
+    extra_data = v2_data.get("extra_data")
+    if extra_data and isinstance(extra_data, dict):
+        # Map extra_data fields to their v2 locations
+        if "add_messages" in extra_data:
+            v2_data["additional_input"] = extra_data["add_messages"]
+        if "references" in extra_data:
+            v2_data["references"] = extra_data["references"]
+        if "reasoning_steps" in extra_data:
+            v2_data["reasoning_steps"] = extra_data["reasoning_steps"]
+        if "reasoning_content" in extra_data:
+            # reasoning_content from extra_data also goes to reasoning_content
+            v2_data["reasoning_content"] = extra_data["reasoning_content"]
+        if "reasoning_messages" in extra_data:
+            v2_data["reasoning_messages"] = extra_data["reasoning_messages"]
+    # Handle thinking and reasoning_content consolidation
+    # Both thinking and reasoning_content from v1 should become reasoning_content in v2
+    thinking = v2_data.get("thinking")
+    reasoning_content = v2_data.get("reasoning_content")
+    # Consolidate thinking and reasoning_content into reasoning_content
+    if thinking and reasoning_content:
+        # Both exist, combine them (thinking first, then reasoning_content)
+        v2_data["reasoning_content"] = f"{thinking}\n{reasoning_content}"
+    elif thinking and not reasoning_content:
+        # Only thinking exists, move it to reasoning_content
+        v2_data["reasoning_content"] = thinking
+    # If only reasoning_content exists, keep it as is
+    # Remove thinking field since it's now consolidated into reasoning_content
+    if "thinking" in v2_data:
+        del v2_data["thinking"]
+    # Handle media object conversions
+    media_fields = ["images", "videos", "audio", "response_audio"]
+    for field in media_fields:
+        if field in v2_data and v2_data[field]:
+            if isinstance(v2_data[field], list):
+                # Handle list of media objects
+                v2_data[field] = [
+                    convert_v1_media_to_v2(item) if isinstance(item, dict) else item for item in v2_data[field]
+                ]
+            elif isinstance(v2_data[field], dict):
+                # Handle single media object
+                v2_data[field] = convert_v1_media_to_v2(v2_data[field])
+    # Remove extra_data after extraction
+    if "extra_data" in v2_data:
+        del v2_data["extra_data"]
+    # Remove other deprecated fields
+    for field in deprecated_fields:
+        v2_data.pop(field, None)
+    return v2_data
 def migrate(
-    db: Union[PostgresDb, MySQLDb, SqliteDb],
+    db: BaseDb,
     v1_db_schema: str,
     agent_sessions_table_name: Optional[str] = None,
     team_sessions_table_name: Optional[str] = None,
     workflow_sessions_table_name: Optional[str] = None,
     memories_table_name: Optional[str] = None,
+    batch_size: int = 5000,
 ):
-    """Given a PostgresDb and table names, parse and migrate the tables' content to the corresponding v2 tables.
+    """Given a database connection and table/collection names, parse and migrate the content to corresponding v2 tables/collections.
     Args:
-        db: The database to migrate
-        v1_db_schema: The schema of the v1 tables
-        agent_sessions_table_name: The name of the agent sessions table. If not provided, the agent sessions table will not be migrated.
-        team_sessions_table_name: The name of the team sessions table. If not provided, the team sessions table will not be migrated.
-        workflow_sessions_table_name: The name of the workflow sessions table. If not provided, the workflow sessions table will not be migrated.
-        workflow_v2_sessions_table_name: The name of the workflow v2 sessions table. If not provided, the workflow v2 sessions table will not be migrated.
-        memories_table_name: The name of the memories table. If not provided, the memories table will not be migrated.
+        db: The database to migrate (PostgresDb, MySQLDb, SqliteDb, or MongoDb)
+        v1_db_schema: The schema of the v1 tables (leave empty for SQLite and MongoDB)
+        agent_sessions_table_name: The name of the agent sessions table/collection. If not provided, agent sessions will not be migrated.
+        team_sessions_table_name: The name of the team sessions table/collection. If not provided, team sessions will not be migrated.
+        workflow_sessions_table_name: The name of the workflow sessions table/collection. If not provided, workflow sessions will not be migrated.
+        memories_table_name: The name of the memories table/collection. If not provided, memories will not be migrated.
+        batch_size: Number of records to process in each batch (default: 5000)
     """
     if agent_sessions_table_name:
-        db.migrate_table_from_v1_to_v2(
+        migrate_table_in_batches(
+            db=db,
             v1_db_schema=v1_db_schema,
             v1_table_name=agent_sessions_table_name,
             v1_table_type="agent_sessions",
+            batch_size=batch_size,
         )
     if team_sessions_table_name:
-        db.migrate_table_from_v1_to_v2(
+        migrate_table_in_batches(
+            db=db,
             v1_db_schema=v1_db_schema,
             v1_table_name=team_sessions_table_name,
             v1_table_type="team_sessions",
+            batch_size=batch_size,
         )
     if workflow_sessions_table_name:
-        db.migrate_table_from_v1_to_v2(
+        migrate_table_in_batches(
+            db=db,
             v1_db_schema=v1_db_schema,
             v1_table_name=workflow_sessions_table_name,
             v1_table_type="workflow_sessions",
+            batch_size=batch_size,
         )
     if memories_table_name:
-        db.migrate_table_from_v1_to_v2(
+        migrate_table_in_batches(
+            db=db,
             v1_db_schema=v1_db_schema,
             v1_table_name=memories_table_name,
             v1_table_type="memories",
+            batch_size=batch_size,
         )
-def get_all_table_content(db, db_schema: str, table_name: str) -> list[dict[str, Any]]:
-    """Get all content from the given table"""
+def migrate_table_in_batches(
+    db: BaseDb,
+    v1_db_schema: str,
+    v1_table_name: str,
+    v1_table_type: str,
+    batch_size: int = 5000,
+):
+    log_info(f"Starting migration of table {v1_table_name} (type: {v1_table_type}) with batch size {batch_size}")
+    total_migrated = 0
+    batch_count = 0
+    for batch_content in get_table_content_in_batches(db, v1_db_schema, v1_table_name, batch_size):
+        batch_count += 1
+        batch_size_actual = len(batch_content)
+        log_info(f"Processing batch {batch_count} with {batch_size_actual} records from table {v1_table_name}")
+        # Parse the content into the new format
+        memories: List[UserMemory] = []
+        sessions: Union[List[AgentSession], List[TeamSession], List[WorkflowSession]] = []
+        if v1_table_type == "agent_sessions":
+            sessions = parse_agent_sessions(batch_content)
+        elif v1_table_type == "team_sessions":
+            sessions = parse_team_sessions(batch_content)
+        elif v1_table_type == "workflow_sessions":
+            sessions = parse_workflow_sessions(batch_content)
+        elif v1_table_type == "memories":
+            memories = parse_memories(batch_content)
+        else:
+            raise ValueError(f"Invalid table type: {v1_table_type}")
+        # Insert the batch into the new table
+        if v1_table_type in ["agent_sessions", "team_sessions", "workflow_sessions"]:
+            if sessions:
+                # Clear any existing scoped session state for SQL databases to prevent transaction conflicts
+                if hasattr(db, "Session"):
+                    db.Session.remove()  # type: ignore
+                db.upsert_sessions(sessions, preserve_updated_at=True)  # type: ignore
+                total_migrated += len(sessions)
+                log_info(f"Bulk upserted {len(sessions)} sessions in batch {batch_count}")
+        elif v1_table_type == "memories":
+            if memories:
+                # Clear any existing scoped session state for SQL databases to prevent transaction conflicts
+                if hasattr(db, "Session"):
+                    db.Session.remove()  # type: ignore
+                db.upsert_memories(memories, preserve_updated_at=True)
+                total_migrated += len(memories)
+                log_info(f"Bulk upserted {len(memories)} memories in batch {batch_count}")
+        log_info(f"Completed batch {batch_count}: migrated {batch_size_actual} records")
+        # Explicit cleanup to free memory before next batch
+        del batch_content
+        if v1_table_type in ["agent_sessions", "team_sessions", "workflow_sessions"]:
+            del sessions
+        elif v1_table_type == "memories":
+            del memories
+        # Force garbage collection to return memory to OS
+        # This is necessary because Python's memory allocator retains memory after large operations
+        # See: https://github.com/sqlalchemy/sqlalchemy/issues/4616
+        gc.collect()
+    log_info(f"✅ Migration completed for table {v1_table_name}: {total_migrated} total records migrated")
+def get_table_content_in_batches(db: BaseDb, db_schema: str, table_name: str, batch_size: int = 5000):
+    """Get table content in batches to avoid memory issues with large tables"""
     try:
-        with db.Session() as sess:
-            result = sess.execute(text(f"SELECT * FROM {db_schema}.{table_name}"))
-            return [row._asdict() for row in result]
+        if type(db).__name__ == "MongoDb":
+            from agno.db.mongo.mongo import MongoDb
+            db = cast(MongoDb, db)
+            # MongoDB implementation with cursor and batching
+            collection = db.database[table_name]
+            cursor = collection.find({}).batch_size(batch_size)
+            batch = []
+            for doc in cursor:
+                # Convert ObjectId to string for compatibility
+                if "_id" in doc:
+                    doc["_id"] = str(doc["_id"])
+                batch.append(doc)
+                if len(batch) >= batch_size:
+                    yield batch
+                    batch = []
+            # Yield remaining items
+            if batch:
+                yield batch
+        else:
+            # SQL database implementations (PostgresDb, MySQLDb, SqliteDb)
+            if type(db).__name__ == "PostgresDb":
+                from agno.db.postgres.postgres import PostgresDb
+                db = cast(PostgresDb, db)
+            elif type(db).__name__ == "MySQLDb":
+                from agno.db.mysql.mysql import MySQLDb
+                db = cast(MySQLDb, db)
+            elif type(db).__name__ == "SqliteDb":
+                from agno.db.sqlite.sqlite import SqliteDb
+                db = cast(SqliteDb, db)
+            else:
+                raise ValueError(f"Invalid database type: {type(db).__name__}")
+            offset = 0
+            while True:
+                # Create a new session for each batch to avoid transaction conflicts
+                with db.Session() as sess:
+                    # Handle empty schema by omitting the schema prefix (needed for SQLite)
+                    if db_schema and db_schema.strip():
+                        sql_query = f"SELECT * FROM {db_schema}.{table_name} LIMIT {batch_size} OFFSET {offset}"
+                    else:
+                        sql_query = f"SELECT * FROM {table_name} LIMIT {batch_size} OFFSET {offset}"
+                    result = sess.execute(text(sql_query))
+                    batch = [row._asdict() for row in result]
+                    if not batch:
+                        break
+                    yield batch
+                    offset += batch_size
+                    # If batch is smaller than batch_size, we've reached the end
+                    if len(batch) < batch_size:
+                        break
     except Exception as e:
-        log_error(f"Error getting all content from table {table_name}: {e}")
-        return []
+        log_error(f"Error getting batched content from table/collection {table_name}: {e}")
+        return
+def get_all_table_content(db, db_schema: str, table_name: str) -> list[dict[str, Any]]:
+    """Get all content from the given table/collection (legacy method kept for backward compatibility)
+    WARNING: This method loads all data into memory and should not be used for large tables.
+    Use get_table_content_in_batches() for large datasets.
+    """
+    log_warning(
+        f"Loading entire table {table_name} into memory. Consider using get_table_content_in_batches() for large tables, or if you experience any complication."
+    )
+    all_content = []
+    for batch in get_table_content_in_batches(db, db_schema, table_name):
+        all_content.extend(batch)
+    return all_content
 def parse_agent_sessions(v1_content: List[Dict[str, Any]]) -> List[AgentSession]:
@@ -82,13 +539,19 @@ def parse_agent_sessions(v1_content: List[Dict[str, Any]]) -> List[AgentSession]
             "agent_data": item.get("agent_data"),
             "session_id": item.get("session_id"),
             "user_id": item.get("user_id"),
-            "session_data": item.get("session_data"),
-            "metadata": item.get("extra_data"),
-            "runs": item.get("memory", {}).get("runs"),
+            "session_data": convert_session_data_comprehensively(item.get("session_data")),
+            "metadata": convert_any_metrics_in_data(item.get("extra_data")),
+            "runs": convert_any_metrics_in_data(safe_get_runs_from_memory(item.get("memory"))),
             "created_at": item.get("created_at"),
             "updated_at": item.get("updated_at"),
         }
-        agent_session = AgentSession.from_dict(session)
+        try:
+            agent_session = AgentSession.from_dict(session)
+        except Exception as e:
+            log_error(f"Error parsing agent session: {e}. This is the complete session that failed: {session}")
+            continue
         if agent_session is not None:
             sessions_v2.append(agent_session)
@@ -105,13 +568,18 @@ def parse_team_sessions(v1_content: List[Dict[str, Any]]) -> List[TeamSession]:
             "team_data": item.get("team_data"),
             "session_id": item.get("session_id"),
             "user_id": item.get("user_id"),
-            "session_data": item.get("session_data"),
-            "metadata": item.get("extra_data"),
-            "runs": item.get("memory", {}).get("runs"),
+            "session_data": convert_session_data_comprehensively(item.get("session_data")),
+            "metadata": convert_any_metrics_in_data(item.get("extra_data")),
+            "runs": convert_any_metrics_in_data(safe_get_runs_from_memory(item.get("memory"))),
             "created_at": item.get("created_at"),
             "updated_at": item.get("updated_at"),
         }
-        team_session = TeamSession.from_dict(session)
+        try:
+            team_session = TeamSession.from_dict(session)
+        except Exception as e:
+            log_error(f"Error parsing team session: {e}. This is the complete session that failed: {session}")
+            continue
         if team_session is not None:
             sessions_v2.append(team_session)
@@ -128,15 +596,20 @@ def parse_workflow_sessions(v1_content: List[Dict[str, Any]]) -> List[WorkflowSe
             "workflow_data": item.get("workflow_data"),
             "session_id": item.get("session_id"),
             "user_id": item.get("user_id"),
-            "session_data": item.get("session_data"),
-            "metadata": item.get("extra_data"),
+            "session_data": convert_session_data_comprehensively(item.get("session_data")),
+            "metadata": convert_any_metrics_in_data(item.get("extra_data")),
             "created_at": item.get("created_at"),
             "updated_at": item.get("updated_at"),
             # Workflow v2 specific fields
             "workflow_name": item.get("workflow_name"),
-            "runs": item.get("runs"),
+            "runs": convert_any_metrics_in_data(item.get("runs")),
         }
-        workflow_session = WorkflowSession.from_dict(session)
+        try:
+            workflow_session = WorkflowSession.from_dict(session)
+        except Exception as e:
+            log_error(f"Error parsing workflow session: {e}. This is the complete session that failed: {session}")
+            continue
         if workflow_session is not None:
             sessions_v2.append(workflow_session)

agno/db/migrations/versions/__init__.py ADDED Viewed

File without changes

agno 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl

agno 2.0.0rc2py3-none-any.whl → 2.3.0py3-none-any.whl