agno 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (331) hide show
  1. agno/agent/agent.py +6009 -2874
  2. agno/api/api.py +2 -0
  3. agno/api/os.py +1 -1
  4. agno/culture/__init__.py +3 -0
  5. agno/culture/manager.py +956 -0
  6. agno/db/async_postgres/__init__.py +3 -0
  7. agno/db/base.py +385 -6
  8. agno/db/dynamo/dynamo.py +388 -81
  9. agno/db/dynamo/schemas.py +47 -10
  10. agno/db/dynamo/utils.py +63 -4
  11. agno/db/firestore/firestore.py +435 -64
  12. agno/db/firestore/schemas.py +11 -0
  13. agno/db/firestore/utils.py +102 -4
  14. agno/db/gcs_json/gcs_json_db.py +384 -42
  15. agno/db/gcs_json/utils.py +60 -26
  16. agno/db/in_memory/in_memory_db.py +351 -66
  17. agno/db/in_memory/utils.py +60 -2
  18. agno/db/json/json_db.py +339 -48
  19. agno/db/json/utils.py +60 -26
  20. agno/db/migrations/manager.py +199 -0
  21. agno/db/migrations/v1_to_v2.py +510 -37
  22. agno/db/migrations/versions/__init__.py +0 -0
  23. agno/db/migrations/versions/v2_3_0.py +938 -0
  24. agno/db/mongo/__init__.py +15 -1
  25. agno/db/mongo/async_mongo.py +2036 -0
  26. agno/db/mongo/mongo.py +653 -76
  27. agno/db/mongo/schemas.py +13 -0
  28. agno/db/mongo/utils.py +80 -8
  29. agno/db/mysql/mysql.py +687 -25
  30. agno/db/mysql/schemas.py +61 -37
  31. agno/db/mysql/utils.py +60 -2
  32. agno/db/postgres/__init__.py +2 -1
  33. agno/db/postgres/async_postgres.py +2001 -0
  34. agno/db/postgres/postgres.py +676 -57
  35. agno/db/postgres/schemas.py +43 -18
  36. agno/db/postgres/utils.py +164 -2
  37. agno/db/redis/redis.py +344 -38
  38. agno/db/redis/schemas.py +18 -0
  39. agno/db/redis/utils.py +60 -2
  40. agno/db/schemas/__init__.py +2 -1
  41. agno/db/schemas/culture.py +120 -0
  42. agno/db/schemas/memory.py +13 -0
  43. agno/db/singlestore/schemas.py +26 -1
  44. agno/db/singlestore/singlestore.py +687 -53
  45. agno/db/singlestore/utils.py +60 -2
  46. agno/db/sqlite/__init__.py +2 -1
  47. agno/db/sqlite/async_sqlite.py +2371 -0
  48. agno/db/sqlite/schemas.py +24 -0
  49. agno/db/sqlite/sqlite.py +774 -85
  50. agno/db/sqlite/utils.py +168 -5
  51. agno/db/surrealdb/__init__.py +3 -0
  52. agno/db/surrealdb/metrics.py +292 -0
  53. agno/db/surrealdb/models.py +309 -0
  54. agno/db/surrealdb/queries.py +71 -0
  55. agno/db/surrealdb/surrealdb.py +1361 -0
  56. agno/db/surrealdb/utils.py +147 -0
  57. agno/db/utils.py +50 -22
  58. agno/eval/accuracy.py +50 -43
  59. agno/eval/performance.py +6 -3
  60. agno/eval/reliability.py +6 -3
  61. agno/eval/utils.py +33 -16
  62. agno/exceptions.py +68 -1
  63. agno/filters.py +354 -0
  64. agno/guardrails/__init__.py +6 -0
  65. agno/guardrails/base.py +19 -0
  66. agno/guardrails/openai.py +144 -0
  67. agno/guardrails/pii.py +94 -0
  68. agno/guardrails/prompt_injection.py +52 -0
  69. agno/integrations/discord/client.py +1 -0
  70. agno/knowledge/chunking/agentic.py +13 -10
  71. agno/knowledge/chunking/fixed.py +1 -1
  72. agno/knowledge/chunking/semantic.py +40 -8
  73. agno/knowledge/chunking/strategy.py +59 -15
  74. agno/knowledge/embedder/aws_bedrock.py +9 -4
  75. agno/knowledge/embedder/azure_openai.py +54 -0
  76. agno/knowledge/embedder/base.py +2 -0
  77. agno/knowledge/embedder/cohere.py +184 -5
  78. agno/knowledge/embedder/fastembed.py +1 -1
  79. agno/knowledge/embedder/google.py +79 -1
  80. agno/knowledge/embedder/huggingface.py +9 -4
  81. agno/knowledge/embedder/jina.py +63 -0
  82. agno/knowledge/embedder/mistral.py +78 -11
  83. agno/knowledge/embedder/nebius.py +1 -1
  84. agno/knowledge/embedder/ollama.py +13 -0
  85. agno/knowledge/embedder/openai.py +37 -65
  86. agno/knowledge/embedder/sentence_transformer.py +8 -4
  87. agno/knowledge/embedder/vllm.py +262 -0
  88. agno/knowledge/embedder/voyageai.py +69 -16
  89. agno/knowledge/knowledge.py +595 -187
  90. agno/knowledge/reader/base.py +9 -2
  91. agno/knowledge/reader/csv_reader.py +8 -10
  92. agno/knowledge/reader/docx_reader.py +5 -6
  93. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  94. agno/knowledge/reader/json_reader.py +6 -5
  95. agno/knowledge/reader/markdown_reader.py +13 -13
  96. agno/knowledge/reader/pdf_reader.py +43 -68
  97. agno/knowledge/reader/pptx_reader.py +101 -0
  98. agno/knowledge/reader/reader_factory.py +51 -6
  99. agno/knowledge/reader/s3_reader.py +3 -15
  100. agno/knowledge/reader/tavily_reader.py +194 -0
  101. agno/knowledge/reader/text_reader.py +13 -13
  102. agno/knowledge/reader/web_search_reader.py +2 -43
  103. agno/knowledge/reader/website_reader.py +43 -25
  104. agno/knowledge/reranker/__init__.py +3 -0
  105. agno/knowledge/types.py +9 -0
  106. agno/knowledge/utils.py +20 -0
  107. agno/media.py +339 -266
  108. agno/memory/manager.py +336 -82
  109. agno/models/aimlapi/aimlapi.py +2 -2
  110. agno/models/anthropic/claude.py +183 -37
  111. agno/models/aws/bedrock.py +52 -112
  112. agno/models/aws/claude.py +33 -1
  113. agno/models/azure/ai_foundry.py +33 -15
  114. agno/models/azure/openai_chat.py +25 -8
  115. agno/models/base.py +1011 -566
  116. agno/models/cerebras/cerebras.py +19 -13
  117. agno/models/cerebras/cerebras_openai.py +8 -5
  118. agno/models/cohere/chat.py +27 -1
  119. agno/models/cometapi/__init__.py +5 -0
  120. agno/models/cometapi/cometapi.py +57 -0
  121. agno/models/dashscope/dashscope.py +1 -0
  122. agno/models/deepinfra/deepinfra.py +2 -2
  123. agno/models/deepseek/deepseek.py +2 -2
  124. agno/models/fireworks/fireworks.py +2 -2
  125. agno/models/google/gemini.py +110 -37
  126. agno/models/groq/groq.py +28 -11
  127. agno/models/huggingface/huggingface.py +2 -1
  128. agno/models/internlm/internlm.py +2 -2
  129. agno/models/langdb/langdb.py +4 -4
  130. agno/models/litellm/chat.py +18 -1
  131. agno/models/litellm/litellm_openai.py +2 -2
  132. agno/models/llama_cpp/__init__.py +5 -0
  133. agno/models/llama_cpp/llama_cpp.py +22 -0
  134. agno/models/message.py +143 -4
  135. agno/models/meta/llama.py +27 -10
  136. agno/models/meta/llama_openai.py +5 -17
  137. agno/models/nebius/nebius.py +6 -6
  138. agno/models/nexus/__init__.py +3 -0
  139. agno/models/nexus/nexus.py +22 -0
  140. agno/models/nvidia/nvidia.py +2 -2
  141. agno/models/ollama/chat.py +60 -6
  142. agno/models/openai/chat.py +102 -43
  143. agno/models/openai/responses.py +103 -106
  144. agno/models/openrouter/openrouter.py +41 -3
  145. agno/models/perplexity/perplexity.py +4 -5
  146. agno/models/portkey/portkey.py +3 -3
  147. agno/models/requesty/__init__.py +5 -0
  148. agno/models/requesty/requesty.py +52 -0
  149. agno/models/response.py +81 -5
  150. agno/models/sambanova/sambanova.py +2 -2
  151. agno/models/siliconflow/__init__.py +5 -0
  152. agno/models/siliconflow/siliconflow.py +25 -0
  153. agno/models/together/together.py +2 -2
  154. agno/models/utils.py +254 -8
  155. agno/models/vercel/v0.py +2 -2
  156. agno/models/vertexai/__init__.py +0 -0
  157. agno/models/vertexai/claude.py +96 -0
  158. agno/models/vllm/vllm.py +1 -0
  159. agno/models/xai/xai.py +3 -2
  160. agno/os/app.py +543 -175
  161. agno/os/auth.py +24 -14
  162. agno/os/config.py +1 -0
  163. agno/os/interfaces/__init__.py +1 -0
  164. agno/os/interfaces/a2a/__init__.py +3 -0
  165. agno/os/interfaces/a2a/a2a.py +42 -0
  166. agno/os/interfaces/a2a/router.py +250 -0
  167. agno/os/interfaces/a2a/utils.py +924 -0
  168. agno/os/interfaces/agui/agui.py +23 -7
  169. agno/os/interfaces/agui/router.py +27 -3
  170. agno/os/interfaces/agui/utils.py +242 -142
  171. agno/os/interfaces/base.py +6 -2
  172. agno/os/interfaces/slack/router.py +81 -23
  173. agno/os/interfaces/slack/slack.py +29 -14
  174. agno/os/interfaces/whatsapp/router.py +11 -4
  175. agno/os/interfaces/whatsapp/whatsapp.py +14 -7
  176. agno/os/mcp.py +111 -54
  177. agno/os/middleware/__init__.py +7 -0
  178. agno/os/middleware/jwt.py +233 -0
  179. agno/os/router.py +556 -139
  180. agno/os/routers/evals/evals.py +71 -34
  181. agno/os/routers/evals/schemas.py +31 -31
  182. agno/os/routers/evals/utils.py +6 -5
  183. agno/os/routers/health.py +31 -0
  184. agno/os/routers/home.py +52 -0
  185. agno/os/routers/knowledge/knowledge.py +185 -38
  186. agno/os/routers/knowledge/schemas.py +82 -22
  187. agno/os/routers/memory/memory.py +158 -53
  188. agno/os/routers/memory/schemas.py +20 -16
  189. agno/os/routers/metrics/metrics.py +20 -8
  190. agno/os/routers/metrics/schemas.py +16 -16
  191. agno/os/routers/session/session.py +499 -38
  192. agno/os/schema.py +308 -198
  193. agno/os/utils.py +401 -41
  194. agno/reasoning/anthropic.py +80 -0
  195. agno/reasoning/azure_ai_foundry.py +2 -2
  196. agno/reasoning/deepseek.py +2 -2
  197. agno/reasoning/default.py +3 -1
  198. agno/reasoning/gemini.py +73 -0
  199. agno/reasoning/groq.py +2 -2
  200. agno/reasoning/ollama.py +2 -2
  201. agno/reasoning/openai.py +7 -2
  202. agno/reasoning/vertexai.py +76 -0
  203. agno/run/__init__.py +6 -0
  204. agno/run/agent.py +266 -112
  205. agno/run/base.py +53 -24
  206. agno/run/team.py +252 -111
  207. agno/run/workflow.py +156 -45
  208. agno/session/agent.py +105 -89
  209. agno/session/summary.py +65 -25
  210. agno/session/team.py +176 -96
  211. agno/session/workflow.py +406 -40
  212. agno/team/team.py +3854 -1692
  213. agno/tools/brightdata.py +3 -3
  214. agno/tools/cartesia.py +3 -5
  215. agno/tools/dalle.py +9 -8
  216. agno/tools/decorator.py +4 -2
  217. agno/tools/desi_vocal.py +2 -2
  218. agno/tools/duckduckgo.py +15 -11
  219. agno/tools/e2b.py +20 -13
  220. agno/tools/eleven_labs.py +26 -28
  221. agno/tools/exa.py +21 -16
  222. agno/tools/fal.py +4 -4
  223. agno/tools/file.py +153 -23
  224. agno/tools/file_generation.py +350 -0
  225. agno/tools/firecrawl.py +4 -4
  226. agno/tools/function.py +257 -37
  227. agno/tools/giphy.py +2 -2
  228. agno/tools/gmail.py +238 -14
  229. agno/tools/google_drive.py +270 -0
  230. agno/tools/googlecalendar.py +36 -8
  231. agno/tools/googlesheets.py +20 -5
  232. agno/tools/jira.py +20 -0
  233. agno/tools/knowledge.py +3 -3
  234. agno/tools/lumalab.py +3 -3
  235. agno/tools/mcp/__init__.py +10 -0
  236. agno/tools/mcp/mcp.py +331 -0
  237. agno/tools/mcp/multi_mcp.py +347 -0
  238. agno/tools/mcp/params.py +24 -0
  239. agno/tools/mcp_toolbox.py +284 -0
  240. agno/tools/mem0.py +11 -17
  241. agno/tools/memori.py +1 -53
  242. agno/tools/memory.py +419 -0
  243. agno/tools/models/azure_openai.py +2 -2
  244. agno/tools/models/gemini.py +3 -3
  245. agno/tools/models/groq.py +3 -5
  246. agno/tools/models/nebius.py +7 -7
  247. agno/tools/models_labs.py +25 -15
  248. agno/tools/notion.py +204 -0
  249. agno/tools/openai.py +4 -9
  250. agno/tools/opencv.py +3 -3
  251. agno/tools/parallel.py +314 -0
  252. agno/tools/replicate.py +7 -7
  253. agno/tools/scrapegraph.py +58 -31
  254. agno/tools/searxng.py +2 -2
  255. agno/tools/serper.py +2 -2
  256. agno/tools/slack.py +18 -3
  257. agno/tools/spider.py +2 -2
  258. agno/tools/tavily.py +146 -0
  259. agno/tools/whatsapp.py +1 -1
  260. agno/tools/workflow.py +278 -0
  261. agno/tools/yfinance.py +12 -11
  262. agno/utils/agent.py +820 -0
  263. agno/utils/audio.py +27 -0
  264. agno/utils/common.py +90 -1
  265. agno/utils/events.py +222 -7
  266. agno/utils/gemini.py +181 -23
  267. agno/utils/hooks.py +57 -0
  268. agno/utils/http.py +111 -0
  269. agno/utils/knowledge.py +12 -5
  270. agno/utils/log.py +1 -0
  271. agno/utils/mcp.py +95 -5
  272. agno/utils/media.py +188 -10
  273. agno/utils/merge_dict.py +22 -1
  274. agno/utils/message.py +60 -0
  275. agno/utils/models/claude.py +40 -11
  276. agno/utils/models/cohere.py +1 -1
  277. agno/utils/models/watsonx.py +1 -1
  278. agno/utils/openai.py +1 -1
  279. agno/utils/print_response/agent.py +105 -21
  280. agno/utils/print_response/team.py +103 -38
  281. agno/utils/print_response/workflow.py +251 -34
  282. agno/utils/reasoning.py +22 -1
  283. agno/utils/serialize.py +32 -0
  284. agno/utils/streamlit.py +16 -10
  285. agno/utils/string.py +41 -0
  286. agno/utils/team.py +98 -9
  287. agno/utils/tools.py +1 -1
  288. agno/vectordb/base.py +23 -4
  289. agno/vectordb/cassandra/cassandra.py +65 -9
  290. agno/vectordb/chroma/chromadb.py +182 -38
  291. agno/vectordb/clickhouse/clickhousedb.py +64 -11
  292. agno/vectordb/couchbase/couchbase.py +105 -10
  293. agno/vectordb/lancedb/lance_db.py +183 -135
  294. agno/vectordb/langchaindb/langchaindb.py +25 -7
  295. agno/vectordb/lightrag/lightrag.py +17 -3
  296. agno/vectordb/llamaindex/__init__.py +3 -0
  297. agno/vectordb/llamaindex/llamaindexdb.py +46 -7
  298. agno/vectordb/milvus/milvus.py +126 -9
  299. agno/vectordb/mongodb/__init__.py +7 -1
  300. agno/vectordb/mongodb/mongodb.py +112 -7
  301. agno/vectordb/pgvector/pgvector.py +142 -21
  302. agno/vectordb/pineconedb/pineconedb.py +80 -8
  303. agno/vectordb/qdrant/qdrant.py +125 -39
  304. agno/vectordb/redis/__init__.py +9 -0
  305. agno/vectordb/redis/redisdb.py +694 -0
  306. agno/vectordb/singlestore/singlestore.py +111 -25
  307. agno/vectordb/surrealdb/surrealdb.py +31 -5
  308. agno/vectordb/upstashdb/upstashdb.py +76 -8
  309. agno/vectordb/weaviate/weaviate.py +86 -15
  310. agno/workflow/__init__.py +2 -0
  311. agno/workflow/agent.py +299 -0
  312. agno/workflow/condition.py +112 -18
  313. agno/workflow/loop.py +69 -10
  314. agno/workflow/parallel.py +266 -118
  315. agno/workflow/router.py +110 -17
  316. agno/workflow/step.py +645 -136
  317. agno/workflow/steps.py +65 -6
  318. agno/workflow/types.py +71 -33
  319. agno/workflow/workflow.py +2113 -300
  320. agno-2.3.0.dist-info/METADATA +618 -0
  321. agno-2.3.0.dist-info/RECORD +577 -0
  322. agno-2.3.0.dist-info/licenses/LICENSE +201 -0
  323. agno/knowledge/reader/url_reader.py +0 -128
  324. agno/tools/googlesearch.py +0 -98
  325. agno/tools/mcp.py +0 -610
  326. agno/utils/models/aws_claude.py +0 -170
  327. agno-2.0.0rc2.dist-info/METADATA +0 -355
  328. agno-2.0.0rc2.dist-info/RECORD +0 -515
  329. agno-2.0.0rc2.dist-info/licenses/LICENSE +0 -375
  330. {agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
  331. {agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0
@@ -1,75 +1,532 @@
1
1
  """Migration utility to migrate your Agno tables from v1 to v2"""
2
2
 
3
- from typing import Any, Dict, List, Optional, Union
3
+ import gc
4
+ import json
5
+ from typing import Any, Dict, List, Optional, Union, cast
4
6
 
5
7
  from sqlalchemy import text
6
8
 
7
- from agno.db.mysql.mysql import MySQLDb
8
- from agno.db.postgres.postgres import PostgresDb
9
+ from agno.db.base import BaseDb
9
10
  from agno.db.schemas.memory import UserMemory
10
- from agno.db.sqlite.sqlite import SqliteDb
11
11
  from agno.session import AgentSession, TeamSession, WorkflowSession
12
- from agno.utils.log import log_error
12
+ from agno.utils.log import log_error, log_info, log_warning
13
+
14
+
15
+ def convert_v1_metrics_to_v2(metrics_dict: Dict[str, Any]) -> Dict[str, Any]:
16
+ """Convert v1 metrics dictionary to v2 format by mapping old field names to new ones."""
17
+ if not isinstance(metrics_dict, dict):
18
+ return metrics_dict
19
+
20
+ # Create a copy to avoid modifying the original
21
+ v2_metrics = metrics_dict.copy()
22
+
23
+ # Map v1 field names to v2 field names
24
+ field_mappings = {
25
+ "time": "duration",
26
+ "audio_tokens": "audio_total_tokens",
27
+ "input_audio_tokens": "audio_input_tokens",
28
+ "output_audio_tokens": "audio_output_tokens",
29
+ "cached_tokens": "cache_read_tokens",
30
+ }
31
+
32
+ # Fields to remove (deprecated in v2)
33
+ deprecated_fields = ["prompt_tokens", "completion_tokens", "prompt_tokens_details", "completion_tokens_details"]
34
+
35
+ # Apply field mappings
36
+ for old_field, new_field in field_mappings.items():
37
+ if old_field in v2_metrics:
38
+ v2_metrics[new_field] = v2_metrics.pop(old_field)
39
+
40
+ # Remove deprecated fields
41
+ for field in deprecated_fields:
42
+ v2_metrics.pop(field, None)
43
+
44
+ return v2_metrics
45
+
46
+
47
+ def convert_any_metrics_in_data(data: Any) -> Any:
48
+ """Recursively find and convert any metrics dictionaries and handle v1 to v2 field conversion."""
49
+ if isinstance(data, dict):
50
+ # First apply v1 to v2 field conversion (handles extra_data extraction, thinking/reasoning_content consolidation, etc.)
51
+ data = convert_v1_fields_to_v2(data)
52
+
53
+ # Check if this looks like a metrics dictionary
54
+ if _is_metrics_dict(data):
55
+ return convert_v1_metrics_to_v2(data)
56
+
57
+ # Otherwise, recursively process all values
58
+ converted_dict = {}
59
+ for key, value in data.items():
60
+ # Special handling for 'metrics' keys - always convert their values
61
+ if key == "metrics" and isinstance(value, dict):
62
+ converted_dict[key] = convert_v1_metrics_to_v2(value)
63
+ else:
64
+ converted_dict[key] = convert_any_metrics_in_data(value)
65
+ return converted_dict
66
+
67
+ elif isinstance(data, list):
68
+ return [convert_any_metrics_in_data(item) for item in data]
69
+
70
+ else:
71
+ # Not a dict or list, return as-is
72
+ return data
73
+
74
+
75
+ def _is_metrics_dict(data: Dict[str, Any]) -> bool:
76
+ """Check if a dictionary looks like a metrics dictionary based on common field names."""
77
+ if not isinstance(data, dict):
78
+ return False
79
+
80
+ # Common metrics field names (both v1 and v2)
81
+ metrics_indicators = {
82
+ "input_tokens",
83
+ "output_tokens",
84
+ "total_tokens",
85
+ "time",
86
+ "duration",
87
+ "audio_tokens",
88
+ "audio_total_tokens",
89
+ "audio_input_tokens",
90
+ "audio_output_tokens",
91
+ "cached_tokens",
92
+ "cache_read_tokens",
93
+ "cache_write_tokens",
94
+ "reasoning_tokens",
95
+ "prompt_tokens",
96
+ "completion_tokens",
97
+ "time_to_first_token",
98
+ "provider_metrics",
99
+ "additional_metrics",
100
+ }
101
+
102
+ # Deprecated v1 fields that are strong indicators this is a metrics dict
103
+ deprecated_v1_indicators = {"time", "audio_tokens", "cached_tokens", "prompt_tokens", "completion_tokens"}
104
+
105
+ # If we find any deprecated v1 field, it's definitely a metrics dict that needs conversion
106
+ if any(field in data for field in deprecated_v1_indicators):
107
+ return True
108
+
109
+ # Otherwise, if the dict has at least 2 metrics-related fields, consider it a metrics dict
110
+ matching_fields = sum(1 for field in data.keys() if field in metrics_indicators)
111
+ return matching_fields >= 2
112
+
113
+
114
+ def convert_session_data_comprehensively(session_data: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
115
+ """Comprehensively convert session data from v1 to v2 format, including metrics conversion and field mapping."""
116
+ if not session_data:
117
+ return session_data
118
+
119
+ # Use the recursive converter to handle all v1 to v2 conversions (metrics, field mapping, extra_data extraction, etc.)
120
+ return convert_any_metrics_in_data(session_data)
121
+
122
+
123
+ def safe_get_runs_from_memory(memory_data: Any) -> Any:
124
+ """Safely extract runs data from memory field, handling various data types."""
125
+ if memory_data is None:
126
+ return None
127
+
128
+ runs: Any = []
129
+
130
+ # If memory_data is a string, try to parse it as JSON
131
+ if isinstance(memory_data, str):
132
+ try:
133
+ memory_dict = json.loads(memory_data)
134
+ if isinstance(memory_dict, dict):
135
+ runs = memory_dict.get("runs")
136
+ except (json.JSONDecodeError, AttributeError):
137
+ # If JSON parsing fails, memory_data might just be a string value
138
+ return None
139
+
140
+ # If memory_data is already a dict, access runs directly
141
+ elif isinstance(memory_data, dict):
142
+ runs = memory_data.get("runs")
143
+
144
+ for run in runs or []:
145
+ # Adjust fields mapping for Agent sessions
146
+ if run.get("agent_id") is not None:
147
+ if run.get("team_id") is not None:
148
+ run.pop("team_id")
149
+ if run.get("team_session_id") is not None:
150
+ run["session_id"] = run.pop("team_session_id")
151
+ if run.get("event"):
152
+ run["events"] = [run.pop("event")]
153
+
154
+ # Adjust fields mapping for Team sessions
155
+ if run.get("team_id") is not None:
156
+ if run.get("agent_id") is not None:
157
+ run.pop("agent_id")
158
+ if member_responses := run.get("member_responses"):
159
+ for response in member_responses:
160
+ if response.get("agent_id") is not None and response.get("team_id") is not None:
161
+ response.pop("team_id")
162
+ if response.get("agent_id") is not None and response.get("team_session_id") is not None:
163
+ response["session_id"] = response.pop("team_session_id")
164
+ run["member_responses"] = member_responses
165
+
166
+ return runs
167
+
168
+
169
+ def convert_v1_media_to_v2(media_data: Dict[str, Any]) -> Dict[str, Any]:
170
+ """Convert v1 media objects to v2 format."""
171
+ if not isinstance(media_data, dict):
172
+ return media_data
173
+
174
+ # Create a copy to avoid modifying the original
175
+ v2_media = media_data.copy()
176
+
177
+ # Add id if missing (required in v2)
178
+ if "id" not in v2_media or v2_media["id"] is None:
179
+ from uuid import uuid4
180
+
181
+ v2_media["id"] = str(uuid4())
182
+
183
+ # Handle VideoArtifact → Video conversion
184
+ if "eta" in v2_media or "length" in v2_media:
185
+ # Convert length to duration if it's numeric
186
+ length = v2_media.pop("length", None)
187
+ if length and isinstance(length, (int, float)):
188
+ v2_media["duration"] = length
189
+ elif length and isinstance(length, str):
190
+ try:
191
+ v2_media["duration"] = float(length)
192
+ except ValueError:
193
+ pass # Keep as is if not convertible
194
+
195
+ # Handle AudioArtifact → Audio conversion
196
+ if "base64_audio" in v2_media:
197
+ # Map base64_audio to content
198
+ base64_audio = v2_media.pop("base64_audio", None)
199
+ if base64_audio:
200
+ v2_media["content"] = base64_audio
201
+
202
+ # Handle AudioResponse content conversion (base64 string to bytes if needed)
203
+ if "transcript" in v2_media and "content" in v2_media:
204
+ content = v2_media.get("content")
205
+ if content and isinstance(content, str):
206
+ # Try to decode base64 content to bytes for v2
207
+ try:
208
+ import base64
209
+
210
+ v2_media["content"] = base64.b64decode(content)
211
+ except Exception:
212
+ # If not valid base64, keep as string
213
+ pass
214
+
215
+ # Ensure format and mime_type are set appropriately
216
+ if "format" in v2_media and "mime_type" not in v2_media:
217
+ format_val = v2_media["format"]
218
+ if format_val:
219
+ # Set mime_type based on format for common types
220
+ mime_type_map = {
221
+ "mp4": "video/mp4",
222
+ "mov": "video/quicktime",
223
+ "avi": "video/x-msvideo",
224
+ "webm": "video/webm",
225
+ "mp3": "audio/mpeg",
226
+ "wav": "audio/wav",
227
+ "ogg": "audio/ogg",
228
+ "png": "image/png",
229
+ "jpg": "image/jpeg",
230
+ "jpeg": "image/jpeg",
231
+ "gif": "image/gif",
232
+ "webp": "image/webp",
233
+ }
234
+ if format_val.lower() in mime_type_map:
235
+ v2_media["mime_type"] = mime_type_map[format_val.lower()]
236
+
237
+ return v2_media
238
+
239
+
240
+ def convert_v1_fields_to_v2(data: Dict[str, Any]) -> Dict[str, Any]:
241
+ """Convert v1 fields to v2 format with proper field mapping and extraction."""
242
+ if not isinstance(data, dict):
243
+ return data
244
+
245
+ # Create a copy to avoid modifying the original
246
+ v2_data = data.copy()
247
+
248
+ # Fields that should be completely ignored/removed in v2
249
+ deprecated_fields = {
250
+ "team_session_id", # RunOutput v1 field, removed in v2
251
+ "formatted_tool_calls", # RunOutput v1 field, removed in v2
252
+ "event", # Remove event field
253
+ "events", # Remove events field
254
+ # Add other deprecated fields here as needed
255
+ }
256
+
257
+ # Extract and map fields from extra_data before removing it
258
+ extra_data = v2_data.get("extra_data")
259
+ if extra_data and isinstance(extra_data, dict):
260
+ # Map extra_data fields to their v2 locations
261
+ if "add_messages" in extra_data:
262
+ v2_data["additional_input"] = extra_data["add_messages"]
263
+ if "references" in extra_data:
264
+ v2_data["references"] = extra_data["references"]
265
+ if "reasoning_steps" in extra_data:
266
+ v2_data["reasoning_steps"] = extra_data["reasoning_steps"]
267
+ if "reasoning_content" in extra_data:
268
+ # reasoning_content from extra_data also goes to reasoning_content
269
+ v2_data["reasoning_content"] = extra_data["reasoning_content"]
270
+ if "reasoning_messages" in extra_data:
271
+ v2_data["reasoning_messages"] = extra_data["reasoning_messages"]
272
+
273
+ # Handle thinking and reasoning_content consolidation
274
+ # Both thinking and reasoning_content from v1 should become reasoning_content in v2
275
+ thinking = v2_data.get("thinking")
276
+ reasoning_content = v2_data.get("reasoning_content")
277
+
278
+ # Consolidate thinking and reasoning_content into reasoning_content
279
+ if thinking and reasoning_content:
280
+ # Both exist, combine them (thinking first, then reasoning_content)
281
+ v2_data["reasoning_content"] = f"{thinking}\n{reasoning_content}"
282
+ elif thinking and not reasoning_content:
283
+ # Only thinking exists, move it to reasoning_content
284
+ v2_data["reasoning_content"] = thinking
285
+ # If only reasoning_content exists, keep it as is
286
+
287
+ # Remove thinking field since it's now consolidated into reasoning_content
288
+ if "thinking" in v2_data:
289
+ del v2_data["thinking"]
290
+
291
+ # Handle media object conversions
292
+ media_fields = ["images", "videos", "audio", "response_audio"]
293
+ for field in media_fields:
294
+ if field in v2_data and v2_data[field]:
295
+ if isinstance(v2_data[field], list):
296
+ # Handle list of media objects
297
+ v2_data[field] = [
298
+ convert_v1_media_to_v2(item) if isinstance(item, dict) else item for item in v2_data[field]
299
+ ]
300
+ elif isinstance(v2_data[field], dict):
301
+ # Handle single media object
302
+ v2_data[field] = convert_v1_media_to_v2(v2_data[field])
303
+
304
+ # Remove extra_data after extraction
305
+ if "extra_data" in v2_data:
306
+ del v2_data["extra_data"]
307
+
308
+ # Remove other deprecated fields
309
+ for field in deprecated_fields:
310
+ v2_data.pop(field, None)
311
+
312
+ return v2_data
13
313
 
14
314
 
15
315
  def migrate(
16
- db: Union[PostgresDb, MySQLDb, SqliteDb],
316
+ db: BaseDb,
17
317
  v1_db_schema: str,
18
318
  agent_sessions_table_name: Optional[str] = None,
19
319
  team_sessions_table_name: Optional[str] = None,
20
320
  workflow_sessions_table_name: Optional[str] = None,
21
321
  memories_table_name: Optional[str] = None,
322
+ batch_size: int = 5000,
22
323
  ):
23
- """Given a PostgresDb and table names, parse and migrate the tables' content to the corresponding v2 tables.
324
+ """Given a database connection and table/collection names, parse and migrate the content to corresponding v2 tables/collections.
24
325
 
25
326
  Args:
26
- db: The database to migrate
27
- v1_db_schema: The schema of the v1 tables
28
- agent_sessions_table_name: The name of the agent sessions table. If not provided, the agent sessions table will not be migrated.
29
- team_sessions_table_name: The name of the team sessions table. If not provided, the team sessions table will not be migrated.
30
- workflow_sessions_table_name: The name of the workflow sessions table. If not provided, the workflow sessions table will not be migrated.
31
- workflow_v2_sessions_table_name: The name of the workflow v2 sessions table. If not provided, the workflow v2 sessions table will not be migrated.
32
- memories_table_name: The name of the memories table. If not provided, the memories table will not be migrated.
327
+ db: The database to migrate (PostgresDb, MySQLDb, SqliteDb, or MongoDb)
328
+ v1_db_schema: The schema of the v1 tables (leave empty for SQLite and MongoDB)
329
+ agent_sessions_table_name: The name of the agent sessions table/collection. If not provided, agent sessions will not be migrated.
330
+ team_sessions_table_name: The name of the team sessions table/collection. If not provided, team sessions will not be migrated.
331
+ workflow_sessions_table_name: The name of the workflow sessions table/collection. If not provided, workflow sessions will not be migrated.
332
+ memories_table_name: The name of the memories table/collection. If not provided, memories will not be migrated.
333
+ batch_size: Number of records to process in each batch (default: 5000)
33
334
  """
34
335
  if agent_sessions_table_name:
35
- db.migrate_table_from_v1_to_v2(
336
+ migrate_table_in_batches(
337
+ db=db,
36
338
  v1_db_schema=v1_db_schema,
37
339
  v1_table_name=agent_sessions_table_name,
38
340
  v1_table_type="agent_sessions",
341
+ batch_size=batch_size,
39
342
  )
40
343
 
41
344
  if team_sessions_table_name:
42
- db.migrate_table_from_v1_to_v2(
345
+ migrate_table_in_batches(
346
+ db=db,
43
347
  v1_db_schema=v1_db_schema,
44
348
  v1_table_name=team_sessions_table_name,
45
349
  v1_table_type="team_sessions",
350
+ batch_size=batch_size,
46
351
  )
47
352
 
48
353
  if workflow_sessions_table_name:
49
- db.migrate_table_from_v1_to_v2(
354
+ migrate_table_in_batches(
355
+ db=db,
50
356
  v1_db_schema=v1_db_schema,
51
357
  v1_table_name=workflow_sessions_table_name,
52
358
  v1_table_type="workflow_sessions",
359
+ batch_size=batch_size,
53
360
  )
54
361
 
55
362
  if memories_table_name:
56
- db.migrate_table_from_v1_to_v2(
363
+ migrate_table_in_batches(
364
+ db=db,
57
365
  v1_db_schema=v1_db_schema,
58
366
  v1_table_name=memories_table_name,
59
367
  v1_table_type="memories",
368
+ batch_size=batch_size,
60
369
  )
61
370
 
62
371
 
63
- def get_all_table_content(db, db_schema: str, table_name: str) -> list[dict[str, Any]]:
64
- """Get all content from the given table"""
372
+ def migrate_table_in_batches(
373
+ db: BaseDb,
374
+ v1_db_schema: str,
375
+ v1_table_name: str,
376
+ v1_table_type: str,
377
+ batch_size: int = 5000,
378
+ ):
379
+ log_info(f"Starting migration of table {v1_table_name} (type: {v1_table_type}) with batch size {batch_size}")
380
+
381
+ total_migrated = 0
382
+ batch_count = 0
383
+
384
+ for batch_content in get_table_content_in_batches(db, v1_db_schema, v1_table_name, batch_size):
385
+ batch_count += 1
386
+ batch_size_actual = len(batch_content)
387
+ log_info(f"Processing batch {batch_count} with {batch_size_actual} records from table {v1_table_name}")
388
+
389
+ # Parse the content into the new format
390
+ memories: List[UserMemory] = []
391
+ sessions: Union[List[AgentSession], List[TeamSession], List[WorkflowSession]] = []
392
+
393
+ if v1_table_type == "agent_sessions":
394
+ sessions = parse_agent_sessions(batch_content)
395
+ elif v1_table_type == "team_sessions":
396
+ sessions = parse_team_sessions(batch_content)
397
+ elif v1_table_type == "workflow_sessions":
398
+ sessions = parse_workflow_sessions(batch_content)
399
+ elif v1_table_type == "memories":
400
+ memories = parse_memories(batch_content)
401
+ else:
402
+ raise ValueError(f"Invalid table type: {v1_table_type}")
403
+
404
+ # Insert the batch into the new table
405
+ if v1_table_type in ["agent_sessions", "team_sessions", "workflow_sessions"]:
406
+ if sessions:
407
+ # Clear any existing scoped session state for SQL databases to prevent transaction conflicts
408
+ if hasattr(db, "Session"):
409
+ db.Session.remove() # type: ignore
410
+
411
+ db.upsert_sessions(sessions, preserve_updated_at=True) # type: ignore
412
+ total_migrated += len(sessions)
413
+ log_info(f"Bulk upserted {len(sessions)} sessions in batch {batch_count}")
414
+
415
+ elif v1_table_type == "memories":
416
+ if memories:
417
+ # Clear any existing scoped session state for SQL databases to prevent transaction conflicts
418
+ if hasattr(db, "Session"):
419
+ db.Session.remove() # type: ignore
420
+
421
+ db.upsert_memories(memories, preserve_updated_at=True)
422
+ total_migrated += len(memories)
423
+ log_info(f"Bulk upserted {len(memories)} memories in batch {batch_count}")
424
+
425
+ log_info(f"Completed batch {batch_count}: migrated {batch_size_actual} records")
426
+
427
+ # Explicit cleanup to free memory before next batch
428
+ del batch_content
429
+ if v1_table_type in ["agent_sessions", "team_sessions", "workflow_sessions"]:
430
+ del sessions
431
+ elif v1_table_type == "memories":
432
+ del memories
433
+
434
+ # Force garbage collection to return memory to OS
435
+ # This is necessary because Python's memory allocator retains memory after large operations
436
+ # See: https://github.com/sqlalchemy/sqlalchemy/issues/4616
437
+ gc.collect()
438
+
439
+ log_info(f"✅ Migration completed for table {v1_table_name}: {total_migrated} total records migrated")
440
+
441
+
442
+ def get_table_content_in_batches(db: BaseDb, db_schema: str, table_name: str, batch_size: int = 5000):
443
+ """Get table content in batches to avoid memory issues with large tables"""
65
444
  try:
66
- with db.Session() as sess:
67
- result = sess.execute(text(f"SELECT * FROM {db_schema}.{table_name}"))
68
- return [row._asdict() for row in result]
445
+ if type(db).__name__ == "MongoDb":
446
+ from agno.db.mongo.mongo import MongoDb
447
+
448
+ db = cast(MongoDb, db)
449
+
450
+ # MongoDB implementation with cursor and batching
451
+ collection = db.database[table_name]
452
+ cursor = collection.find({}).batch_size(batch_size)
453
+
454
+ batch = []
455
+ for doc in cursor:
456
+ # Convert ObjectId to string for compatibility
457
+ if "_id" in doc:
458
+ doc["_id"] = str(doc["_id"])
459
+ batch.append(doc)
460
+
461
+ if len(batch) >= batch_size:
462
+ yield batch
463
+ batch = []
464
+
465
+ # Yield remaining items
466
+ if batch:
467
+ yield batch
468
+ else:
469
+ # SQL database implementations (PostgresDb, MySQLDb, SqliteDb)
470
+ if type(db).__name__ == "PostgresDb":
471
+ from agno.db.postgres.postgres import PostgresDb
472
+
473
+ db = cast(PostgresDb, db)
474
+
475
+ elif type(db).__name__ == "MySQLDb":
476
+ from agno.db.mysql.mysql import MySQLDb
477
+
478
+ db = cast(MySQLDb, db)
479
+
480
+ elif type(db).__name__ == "SqliteDb":
481
+ from agno.db.sqlite.sqlite import SqliteDb
482
+
483
+ db = cast(SqliteDb, db)
484
+
485
+ else:
486
+ raise ValueError(f"Invalid database type: {type(db).__name__}")
487
+
488
+ offset = 0
489
+ while True:
490
+ # Create a new session for each batch to avoid transaction conflicts
491
+ with db.Session() as sess:
492
+ # Handle empty schema by omitting the schema prefix (needed for SQLite)
493
+ if db_schema and db_schema.strip():
494
+ sql_query = f"SELECT * FROM {db_schema}.{table_name} LIMIT {batch_size} OFFSET {offset}"
495
+ else:
496
+ sql_query = f"SELECT * FROM {table_name} LIMIT {batch_size} OFFSET {offset}"
497
+
498
+ result = sess.execute(text(sql_query))
499
+ batch = [row._asdict() for row in result]
500
+
501
+ if not batch:
502
+ break
503
+
504
+ yield batch
505
+ offset += batch_size
506
+
507
+ # If batch is smaller than batch_size, we've reached the end
508
+ if len(batch) < batch_size:
509
+ break
69
510
 
70
511
  except Exception as e:
71
- log_error(f"Error getting all content from table {table_name}: {e}")
72
- return []
512
+ log_error(f"Error getting batched content from table/collection {table_name}: {e}")
513
+ return
514
+
515
+
516
+ def get_all_table_content(db, db_schema: str, table_name: str) -> list[dict[str, Any]]:
517
+ """Get all content from the given table/collection (legacy method kept for backward compatibility)
518
+
519
+ WARNING: This method loads all data into memory and should not be used for large tables.
520
+ Use get_table_content_in_batches() for large datasets.
521
+ """
522
+ log_warning(
523
+ f"Loading entire table {table_name} into memory. Consider using get_table_content_in_batches() for large tables, or if you experience any complication."
524
+ )
525
+
526
+ all_content = []
527
+ for batch in get_table_content_in_batches(db, db_schema, table_name):
528
+ all_content.extend(batch)
529
+ return all_content
73
530
 
74
531
 
75
532
  def parse_agent_sessions(v1_content: List[Dict[str, Any]]) -> List[AgentSession]:
@@ -82,13 +539,19 @@ def parse_agent_sessions(v1_content: List[Dict[str, Any]]) -> List[AgentSession]
82
539
  "agent_data": item.get("agent_data"),
83
540
  "session_id": item.get("session_id"),
84
541
  "user_id": item.get("user_id"),
85
- "session_data": item.get("session_data"),
86
- "metadata": item.get("extra_data"),
87
- "runs": item.get("memory", {}).get("runs"),
542
+ "session_data": convert_session_data_comprehensively(item.get("session_data")),
543
+ "metadata": convert_any_metrics_in_data(item.get("extra_data")),
544
+ "runs": convert_any_metrics_in_data(safe_get_runs_from_memory(item.get("memory"))),
88
545
  "created_at": item.get("created_at"),
89
546
  "updated_at": item.get("updated_at"),
90
547
  }
91
- agent_session = AgentSession.from_dict(session)
548
+
549
+ try:
550
+ agent_session = AgentSession.from_dict(session)
551
+ except Exception as e:
552
+ log_error(f"Error parsing agent session: {e}. This is the complete session that failed: {session}")
553
+ continue
554
+
92
555
  if agent_session is not None:
93
556
  sessions_v2.append(agent_session)
94
557
 
@@ -105,13 +568,18 @@ def parse_team_sessions(v1_content: List[Dict[str, Any]]) -> List[TeamSession]:
105
568
  "team_data": item.get("team_data"),
106
569
  "session_id": item.get("session_id"),
107
570
  "user_id": item.get("user_id"),
108
- "session_data": item.get("session_data"),
109
- "metadata": item.get("extra_data"),
110
- "runs": item.get("memory", {}).get("runs"),
571
+ "session_data": convert_session_data_comprehensively(item.get("session_data")),
572
+ "metadata": convert_any_metrics_in_data(item.get("extra_data")),
573
+ "runs": convert_any_metrics_in_data(safe_get_runs_from_memory(item.get("memory"))),
111
574
  "created_at": item.get("created_at"),
112
575
  "updated_at": item.get("updated_at"),
113
576
  }
114
- team_session = TeamSession.from_dict(session)
577
+ try:
578
+ team_session = TeamSession.from_dict(session)
579
+ except Exception as e:
580
+ log_error(f"Error parsing team session: {e}. This is the complete session that failed: {session}")
581
+ continue
582
+
115
583
  if team_session is not None:
116
584
  sessions_v2.append(team_session)
117
585
 
@@ -128,15 +596,20 @@ def parse_workflow_sessions(v1_content: List[Dict[str, Any]]) -> List[WorkflowSe
128
596
  "workflow_data": item.get("workflow_data"),
129
597
  "session_id": item.get("session_id"),
130
598
  "user_id": item.get("user_id"),
131
- "session_data": item.get("session_data"),
132
- "metadata": item.get("extra_data"),
599
+ "session_data": convert_session_data_comprehensively(item.get("session_data")),
600
+ "metadata": convert_any_metrics_in_data(item.get("extra_data")),
133
601
  "created_at": item.get("created_at"),
134
602
  "updated_at": item.get("updated_at"),
135
603
  # Workflow v2 specific fields
136
604
  "workflow_name": item.get("workflow_name"),
137
- "runs": item.get("runs"),
605
+ "runs": convert_any_metrics_in_data(item.get("runs")),
138
606
  }
139
- workflow_session = WorkflowSession.from_dict(session)
607
+ try:
608
+ workflow_session = WorkflowSession.from_dict(session)
609
+ except Exception as e:
610
+ log_error(f"Error parsing workflow session: {e}. This is the complete session that failed: {session}")
611
+ continue
612
+
140
613
  if workflow_session is not None:
141
614
  sessions_v2.append(workflow_session)
142
615
 
File without changes