agno 2.0.7__py3-none-any.whl → 2.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +83 -51
- agno/db/base.py +14 -0
- agno/db/dynamo/dynamo.py +107 -27
- agno/db/firestore/firestore.py +109 -33
- agno/db/gcs_json/gcs_json_db.py +100 -20
- agno/db/in_memory/in_memory_db.py +95 -20
- agno/db/json/json_db.py +101 -21
- agno/db/migrations/v1_to_v2.py +322 -47
- agno/db/mongo/mongo.py +251 -26
- agno/db/mysql/mysql.py +307 -6
- agno/db/postgres/postgres.py +279 -33
- agno/db/redis/redis.py +99 -22
- agno/db/singlestore/singlestore.py +319 -38
- agno/db/sqlite/sqlite.py +339 -23
- agno/knowledge/embedder/sentence_transformer.py +3 -3
- agno/knowledge/knowledge.py +152 -31
- agno/knowledge/types.py +8 -0
- agno/models/anthropic/claude.py +0 -20
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +57 -0
- agno/models/google/gemini.py +4 -8
- agno/models/huggingface/huggingface.py +2 -1
- agno/models/ollama/chat.py +52 -3
- agno/models/openai/chat.py +9 -7
- agno/models/openai/responses.py +21 -17
- agno/os/interfaces/agui/agui.py +2 -2
- agno/os/interfaces/agui/utils.py +81 -18
- agno/os/interfaces/base.py +2 -0
- agno/os/interfaces/slack/router.py +50 -10
- agno/os/interfaces/slack/slack.py +6 -4
- agno/os/interfaces/whatsapp/router.py +7 -4
- agno/os/interfaces/whatsapp/whatsapp.py +2 -2
- agno/os/router.py +18 -0
- agno/os/utils.py +10 -2
- agno/reasoning/azure_ai_foundry.py +2 -2
- agno/reasoning/deepseek.py +2 -2
- agno/reasoning/default.py +3 -1
- agno/reasoning/groq.py +2 -2
- agno/reasoning/ollama.py +2 -2
- agno/reasoning/openai.py +2 -2
- agno/run/base.py +15 -2
- agno/session/agent.py +8 -5
- agno/session/team.py +14 -10
- agno/team/team.py +218 -111
- agno/tools/function.py +43 -4
- agno/tools/mcp.py +60 -37
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/scrapegraph.py +58 -31
- agno/tools/whatsapp.py +1 -1
- agno/utils/gemini.py +147 -19
- agno/utils/models/claude.py +9 -0
- agno/utils/print_response/agent.py +18 -2
- agno/utils/print_response/team.py +22 -6
- agno/utils/reasoning.py +22 -1
- agno/utils/string.py +9 -0
- agno/vectordb/base.py +2 -2
- agno/vectordb/langchaindb/langchaindb.py +5 -7
- agno/vectordb/llamaindex/llamaindexdb.py +25 -6
- agno/workflow/workflow.py +30 -15
- {agno-2.0.7.dist-info → agno-2.0.9.dist-info}/METADATA +4 -1
- {agno-2.0.7.dist-info → agno-2.0.9.dist-info}/RECORD +64 -61
- {agno-2.0.7.dist-info → agno-2.0.9.dist-info}/WHEEL +0 -0
- {agno-2.0.7.dist-info → agno-2.0.9.dist-info}/licenses/LICENSE +0 -0
- {agno-2.0.7.dist-info → agno-2.0.9.dist-info}/top_level.txt +0 -0
agno/db/migrations/v1_to_v2.py
CHANGED
|
@@ -11,7 +11,7 @@ from agno.db.postgres.postgres import PostgresDb
|
|
|
11
11
|
from agno.db.schemas.memory import UserMemory
|
|
12
12
|
from agno.db.sqlite.sqlite import SqliteDb
|
|
13
13
|
from agno.session import AgentSession, TeamSession, WorkflowSession
|
|
14
|
-
from agno.utils.log import log_error
|
|
14
|
+
from agno.utils.log import log_error, log_info, log_warning
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def convert_v1_metrics_to_v2(metrics_dict: Dict[str, Any]) -> Dict[str, Any]:
|
|
@@ -47,10 +47,10 @@ def convert_v1_metrics_to_v2(metrics_dict: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
def convert_any_metrics_in_data(data: Any) -> Any:
|
|
50
|
-
"""Recursively find and convert any metrics dictionaries
|
|
50
|
+
"""Recursively find and convert any metrics dictionaries and handle v1 to v2 field conversion."""
|
|
51
51
|
if isinstance(data, dict):
|
|
52
|
-
# First
|
|
53
|
-
data =
|
|
52
|
+
# First apply v1 to v2 field conversion (handles extra_data extraction, thinking/reasoning_content consolidation, etc.)
|
|
53
|
+
data = convert_v1_fields_to_v2(data)
|
|
54
54
|
|
|
55
55
|
# Check if this looks like a metrics dictionary
|
|
56
56
|
if _is_metrics_dict(data):
|
|
@@ -114,11 +114,11 @@ def _is_metrics_dict(data: Dict[str, Any]) -> bool:
|
|
|
114
114
|
|
|
115
115
|
|
|
116
116
|
def convert_session_data_comprehensively(session_data: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
|
|
117
|
-
"""Comprehensively convert
|
|
117
|
+
"""Comprehensively convert session data from v1 to v2 format, including metrics conversion and field mapping."""
|
|
118
118
|
if not session_data:
|
|
119
119
|
return session_data
|
|
120
120
|
|
|
121
|
-
# Use the recursive converter to
|
|
121
|
+
# Use the recursive converter to handle all v1 to v2 conversions (metrics, field mapping, extra_data extraction, etc.)
|
|
122
122
|
return convert_any_metrics_in_data(session_data)
|
|
123
123
|
|
|
124
124
|
|
|
@@ -127,39 +127,191 @@ def safe_get_runs_from_memory(memory_data: Any) -> Any:
|
|
|
127
127
|
if memory_data is None:
|
|
128
128
|
return None
|
|
129
129
|
|
|
130
|
+
runs: Any = []
|
|
131
|
+
|
|
130
132
|
# If memory_data is a string, try to parse it as JSON
|
|
131
133
|
if isinstance(memory_data, str):
|
|
132
134
|
try:
|
|
133
135
|
memory_dict = json.loads(memory_data)
|
|
134
136
|
if isinstance(memory_dict, dict):
|
|
135
|
-
|
|
137
|
+
runs = memory_dict.get("runs")
|
|
136
138
|
except (json.JSONDecodeError, AttributeError):
|
|
137
139
|
# If JSON parsing fails, memory_data might just be a string value
|
|
138
140
|
return None
|
|
139
141
|
|
|
140
142
|
# If memory_data is already a dict, access runs directly
|
|
141
143
|
elif isinstance(memory_data, dict):
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
144
|
+
runs = memory_data.get("runs")
|
|
145
|
+
|
|
146
|
+
for run in runs or []:
|
|
147
|
+
# Adjust fields mapping for Agent sessions
|
|
148
|
+
if run.get("agent_id") is not None:
|
|
149
|
+
if run.get("team_id") is not None:
|
|
150
|
+
run.pop("team_id")
|
|
151
|
+
if run.get("team_session_id") is not None:
|
|
152
|
+
run["session_id"] = run.pop("team_session_id")
|
|
153
|
+
if run.get("event"):
|
|
154
|
+
run["events"] = [run.pop("event")]
|
|
155
|
+
|
|
156
|
+
# Adjust fields mapping for Team sessions
|
|
157
|
+
if run.get("team_id") is not None:
|
|
158
|
+
if run.get("agent_id") is not None:
|
|
159
|
+
run.pop("agent_id")
|
|
160
|
+
if member_responses := run.get("member_responses"):
|
|
161
|
+
for response in member_responses:
|
|
162
|
+
if response.get("agent_id") is not None and response.get("team_id") is not None:
|
|
163
|
+
response.pop("team_id")
|
|
164
|
+
if response.get("agent_id") is not None and response.get("team_session_id") is not None:
|
|
165
|
+
response["session_id"] = response.pop("team_session_id")
|
|
166
|
+
run["member_responses"] = member_responses
|
|
167
|
+
|
|
168
|
+
return runs
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def convert_v1_media_to_v2(media_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
172
|
+
"""Convert v1 media objects to v2 format."""
|
|
173
|
+
if not isinstance(media_data, dict):
|
|
174
|
+
return media_data
|
|
147
175
|
|
|
148
|
-
|
|
149
|
-
|
|
176
|
+
# Create a copy to avoid modifying the original
|
|
177
|
+
v2_media = media_data.copy()
|
|
178
|
+
|
|
179
|
+
# Add id if missing (required in v2)
|
|
180
|
+
if "id" not in v2_media or v2_media["id"] is None:
|
|
181
|
+
from uuid import uuid4
|
|
182
|
+
|
|
183
|
+
v2_media["id"] = str(uuid4())
|
|
184
|
+
|
|
185
|
+
# Handle VideoArtifact → Video conversion
|
|
186
|
+
if "eta" in v2_media or "length" in v2_media:
|
|
187
|
+
# Convert length to duration if it's numeric
|
|
188
|
+
length = v2_media.pop("length", None)
|
|
189
|
+
if length and isinstance(length, (int, float)):
|
|
190
|
+
v2_media["duration"] = length
|
|
191
|
+
elif length and isinstance(length, str):
|
|
192
|
+
try:
|
|
193
|
+
v2_media["duration"] = float(length)
|
|
194
|
+
except ValueError:
|
|
195
|
+
pass # Keep as is if not convertible
|
|
196
|
+
|
|
197
|
+
# Handle AudioArtifact → Audio conversion
|
|
198
|
+
if "base64_audio" in v2_media:
|
|
199
|
+
# Map base64_audio to content
|
|
200
|
+
base64_audio = v2_media.pop("base64_audio", None)
|
|
201
|
+
if base64_audio:
|
|
202
|
+
v2_media["content"] = base64_audio
|
|
203
|
+
|
|
204
|
+
# Handle AudioResponse content conversion (base64 string to bytes if needed)
|
|
205
|
+
if "transcript" in v2_media and "content" in v2_media:
|
|
206
|
+
content = v2_media.get("content")
|
|
207
|
+
if content and isinstance(content, str):
|
|
208
|
+
# Try to decode base64 content to bytes for v2
|
|
209
|
+
try:
|
|
210
|
+
import base64
|
|
211
|
+
|
|
212
|
+
v2_media["content"] = base64.b64decode(content)
|
|
213
|
+
except Exception:
|
|
214
|
+
# If not valid base64, keep as string
|
|
215
|
+
pass
|
|
216
|
+
|
|
217
|
+
# Ensure format and mime_type are set appropriately
|
|
218
|
+
if "format" in v2_media and "mime_type" not in v2_media:
|
|
219
|
+
format_val = v2_media["format"]
|
|
220
|
+
if format_val:
|
|
221
|
+
# Set mime_type based on format for common types
|
|
222
|
+
mime_type_map = {
|
|
223
|
+
"mp4": "video/mp4",
|
|
224
|
+
"mov": "video/quicktime",
|
|
225
|
+
"avi": "video/x-msvideo",
|
|
226
|
+
"webm": "video/webm",
|
|
227
|
+
"mp3": "audio/mpeg",
|
|
228
|
+
"wav": "audio/wav",
|
|
229
|
+
"ogg": "audio/ogg",
|
|
230
|
+
"png": "image/png",
|
|
231
|
+
"jpg": "image/jpeg",
|
|
232
|
+
"jpeg": "image/jpeg",
|
|
233
|
+
"gif": "image/gif",
|
|
234
|
+
"webp": "image/webp",
|
|
235
|
+
}
|
|
236
|
+
if format_val.lower() in mime_type_map:
|
|
237
|
+
v2_media["mime_type"] = mime_type_map[format_val.lower()]
|
|
238
|
+
|
|
239
|
+
return v2_media
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def convert_v1_fields_to_v2(data: Dict[str, Any]) -> Dict[str, Any]:
|
|
243
|
+
"""Convert v1 fields to v2 format with proper field mapping and extraction."""
|
|
150
244
|
if not isinstance(data, dict):
|
|
151
245
|
return data
|
|
152
246
|
|
|
153
|
-
#
|
|
247
|
+
# Create a copy to avoid modifying the original
|
|
248
|
+
v2_data = data.copy()
|
|
249
|
+
|
|
250
|
+
# Fields that should be completely ignored/removed in v2
|
|
154
251
|
deprecated_fields = {
|
|
155
252
|
"team_session_id", # RunOutput v1 field, removed in v2
|
|
156
253
|
"formatted_tool_calls", # RunOutput v1 field, removed in v2
|
|
254
|
+
"event", # Remove event field
|
|
255
|
+
"events", # Remove events field
|
|
157
256
|
# Add other deprecated fields here as needed
|
|
158
257
|
}
|
|
159
258
|
|
|
160
|
-
#
|
|
161
|
-
|
|
162
|
-
|
|
259
|
+
# Extract and map fields from extra_data before removing it
|
|
260
|
+
extra_data = v2_data.get("extra_data")
|
|
261
|
+
if extra_data and isinstance(extra_data, dict):
|
|
262
|
+
# Map extra_data fields to their v2 locations
|
|
263
|
+
if "add_messages" in extra_data:
|
|
264
|
+
v2_data["additional_input"] = extra_data["add_messages"]
|
|
265
|
+
if "references" in extra_data:
|
|
266
|
+
v2_data["references"] = extra_data["references"]
|
|
267
|
+
if "reasoning_steps" in extra_data:
|
|
268
|
+
v2_data["reasoning_steps"] = extra_data["reasoning_steps"]
|
|
269
|
+
if "reasoning_content" in extra_data:
|
|
270
|
+
# reasoning_content from extra_data also goes to reasoning_content
|
|
271
|
+
v2_data["reasoning_content"] = extra_data["reasoning_content"]
|
|
272
|
+
if "reasoning_messages" in extra_data:
|
|
273
|
+
v2_data["reasoning_messages"] = extra_data["reasoning_messages"]
|
|
274
|
+
|
|
275
|
+
# Handle thinking and reasoning_content consolidation
|
|
276
|
+
# Both thinking and reasoning_content from v1 should become reasoning_content in v2
|
|
277
|
+
thinking = v2_data.get("thinking")
|
|
278
|
+
reasoning_content = v2_data.get("reasoning_content")
|
|
279
|
+
|
|
280
|
+
# Consolidate thinking and reasoning_content into reasoning_content
|
|
281
|
+
if thinking and reasoning_content:
|
|
282
|
+
# Both exist, combine them (thinking first, then reasoning_content)
|
|
283
|
+
v2_data["reasoning_content"] = f"{thinking}\n{reasoning_content}"
|
|
284
|
+
elif thinking and not reasoning_content:
|
|
285
|
+
# Only thinking exists, move it to reasoning_content
|
|
286
|
+
v2_data["reasoning_content"] = thinking
|
|
287
|
+
# If only reasoning_content exists, keep it as is
|
|
288
|
+
|
|
289
|
+
# Remove thinking field since it's now consolidated into reasoning_content
|
|
290
|
+
if "thinking" in v2_data:
|
|
291
|
+
del v2_data["thinking"]
|
|
292
|
+
|
|
293
|
+
# Handle media object conversions
|
|
294
|
+
media_fields = ["images", "videos", "audio", "response_audio"]
|
|
295
|
+
for field in media_fields:
|
|
296
|
+
if field in v2_data and v2_data[field]:
|
|
297
|
+
if isinstance(v2_data[field], list):
|
|
298
|
+
# Handle list of media objects
|
|
299
|
+
v2_data[field] = [
|
|
300
|
+
convert_v1_media_to_v2(item) if isinstance(item, dict) else item for item in v2_data[field]
|
|
301
|
+
]
|
|
302
|
+
elif isinstance(v2_data[field], dict):
|
|
303
|
+
# Handle single media object
|
|
304
|
+
v2_data[field] = convert_v1_media_to_v2(v2_data[field])
|
|
305
|
+
|
|
306
|
+
# Remove extra_data after extraction
|
|
307
|
+
if "extra_data" in v2_data:
|
|
308
|
+
del v2_data["extra_data"]
|
|
309
|
+
|
|
310
|
+
# Remove other deprecated fields
|
|
311
|
+
for field in deprecated_fields:
|
|
312
|
+
v2_data.pop(field, None)
|
|
313
|
+
|
|
314
|
+
return v2_data
|
|
163
315
|
|
|
164
316
|
|
|
165
317
|
def migrate(
|
|
@@ -169,6 +321,7 @@ def migrate(
|
|
|
169
321
|
team_sessions_table_name: Optional[str] = None,
|
|
170
322
|
workflow_sessions_table_name: Optional[str] = None,
|
|
171
323
|
memories_table_name: Optional[str] = None,
|
|
324
|
+
batch_size: int = 5000,
|
|
172
325
|
):
|
|
173
326
|
"""Given a database connection and table/collection names, parse and migrate the content to corresponding v2 tables/collections.
|
|
174
327
|
|
|
@@ -179,65 +332,171 @@ def migrate(
|
|
|
179
332
|
team_sessions_table_name: The name of the team sessions table/collection. If not provided, team sessions will not be migrated.
|
|
180
333
|
workflow_sessions_table_name: The name of the workflow sessions table/collection. If not provided, workflow sessions will not be migrated.
|
|
181
334
|
memories_table_name: The name of the memories table/collection. If not provided, memories will not be migrated.
|
|
335
|
+
batch_size: Number of records to process in each batch (default: 5000)
|
|
182
336
|
"""
|
|
183
337
|
if agent_sessions_table_name:
|
|
184
|
-
|
|
338
|
+
migrate_table_in_batches(
|
|
339
|
+
db=db,
|
|
185
340
|
v1_db_schema=v1_db_schema,
|
|
186
341
|
v1_table_name=agent_sessions_table_name,
|
|
187
342
|
v1_table_type="agent_sessions",
|
|
343
|
+
batch_size=batch_size,
|
|
188
344
|
)
|
|
189
345
|
|
|
190
346
|
if team_sessions_table_name:
|
|
191
|
-
|
|
347
|
+
migrate_table_in_batches(
|
|
348
|
+
db=db,
|
|
192
349
|
v1_db_schema=v1_db_schema,
|
|
193
350
|
v1_table_name=team_sessions_table_name,
|
|
194
351
|
v1_table_type="team_sessions",
|
|
352
|
+
batch_size=batch_size,
|
|
195
353
|
)
|
|
196
354
|
|
|
197
355
|
if workflow_sessions_table_name:
|
|
198
|
-
|
|
356
|
+
migrate_table_in_batches(
|
|
357
|
+
db=db,
|
|
199
358
|
v1_db_schema=v1_db_schema,
|
|
200
359
|
v1_table_name=workflow_sessions_table_name,
|
|
201
360
|
v1_table_type="workflow_sessions",
|
|
361
|
+
batch_size=batch_size,
|
|
202
362
|
)
|
|
203
363
|
|
|
204
364
|
if memories_table_name:
|
|
205
|
-
|
|
365
|
+
migrate_table_in_batches(
|
|
366
|
+
db=db,
|
|
206
367
|
v1_db_schema=v1_db_schema,
|
|
207
368
|
v1_table_name=memories_table_name,
|
|
208
369
|
v1_table_type="memories",
|
|
370
|
+
batch_size=batch_size,
|
|
209
371
|
)
|
|
210
372
|
|
|
211
373
|
|
|
212
|
-
def
|
|
213
|
-
|
|
374
|
+
def migrate_table_in_batches(
|
|
375
|
+
db: Union[PostgresDb, MySQLDb, SqliteDb, MongoDb],
|
|
376
|
+
v1_db_schema: str,
|
|
377
|
+
v1_table_name: str,
|
|
378
|
+
v1_table_type: str,
|
|
379
|
+
batch_size: int = 5000,
|
|
380
|
+
):
|
|
381
|
+
log_info(f"Starting migration of table {v1_table_name} (type: {v1_table_type}) with batch size {batch_size}")
|
|
382
|
+
|
|
383
|
+
total_migrated = 0
|
|
384
|
+
batch_count = 0
|
|
385
|
+
|
|
386
|
+
for batch_content in get_table_content_in_batches(db, v1_db_schema, v1_table_name, batch_size):
|
|
387
|
+
batch_count += 1
|
|
388
|
+
batch_size_actual = len(batch_content)
|
|
389
|
+
log_info(f"Processing batch {batch_count} with {batch_size_actual} records from table {v1_table_name}")
|
|
390
|
+
|
|
391
|
+
# Parse the content into the new format
|
|
392
|
+
memories: List[UserMemory] = []
|
|
393
|
+
sessions: Union[List[AgentSession], List[TeamSession], List[WorkflowSession]] = []
|
|
394
|
+
|
|
395
|
+
if v1_table_type == "agent_sessions":
|
|
396
|
+
sessions = parse_agent_sessions(batch_content)
|
|
397
|
+
elif v1_table_type == "team_sessions":
|
|
398
|
+
sessions = parse_team_sessions(batch_content)
|
|
399
|
+
elif v1_table_type == "workflow_sessions":
|
|
400
|
+
sessions = parse_workflow_sessions(batch_content)
|
|
401
|
+
elif v1_table_type == "memories":
|
|
402
|
+
memories = parse_memories(batch_content)
|
|
403
|
+
else:
|
|
404
|
+
raise ValueError(f"Invalid table type: {v1_table_type}")
|
|
405
|
+
|
|
406
|
+
# Insert the batch into the new table
|
|
407
|
+
if v1_table_type in ["agent_sessions", "team_sessions", "workflow_sessions"]:
|
|
408
|
+
if sessions:
|
|
409
|
+
# Clear any existing scoped session state for SQL databases to prevent transaction conflicts
|
|
410
|
+
if hasattr(db, "Session"):
|
|
411
|
+
db.Session.remove() # type: ignore
|
|
412
|
+
|
|
413
|
+
db.upsert_sessions(sessions) # type: ignore
|
|
414
|
+
total_migrated += len(sessions)
|
|
415
|
+
log_info(f"Bulk upserted {len(sessions)} sessions in batch {batch_count}")
|
|
416
|
+
|
|
417
|
+
elif v1_table_type == "memories":
|
|
418
|
+
if memories:
|
|
419
|
+
# Clear any existing scoped session state for SQL databases to prevent transaction conflicts
|
|
420
|
+
if hasattr(db, "Session"):
|
|
421
|
+
db.Session.remove() # type: ignore
|
|
422
|
+
|
|
423
|
+
db.upsert_memories(memories)
|
|
424
|
+
total_migrated += len(memories)
|
|
425
|
+
log_info(f"Bulk upserted {len(memories)} memories in batch {batch_count}")
|
|
426
|
+
|
|
427
|
+
log_info(f"Completed batch {batch_count}: migrated {batch_size_actual} records")
|
|
428
|
+
|
|
429
|
+
log_info(f"✅ Migration completed for table {v1_table_name}: {total_migrated} total records migrated")
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def get_table_content_in_batches(
|
|
433
|
+
db: Union[PostgresDb, MySQLDb, SqliteDb, MongoDb], db_schema: str, table_name: str, batch_size: int = 5000
|
|
434
|
+
):
|
|
435
|
+
"""Get table content in batches to avoid memory issues with large tables"""
|
|
214
436
|
try:
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
# MongoDB implementation
|
|
437
|
+
if isinstance(db, MongoDb):
|
|
438
|
+
# MongoDB implementation with cursor and batching
|
|
218
439
|
collection = db.database[table_name]
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
for doc in
|
|
440
|
+
cursor = collection.find({}).batch_size(batch_size)
|
|
441
|
+
|
|
442
|
+
batch = []
|
|
443
|
+
for doc in cursor:
|
|
444
|
+
# Convert ObjectId to string for compatibility
|
|
223
445
|
if "_id" in doc:
|
|
224
446
|
doc["_id"] = str(doc["_id"])
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
if db_schema and db_schema.strip():
|
|
231
|
-
sql_query = f"SELECT * FROM {db_schema}.{table_name}"
|
|
232
|
-
else:
|
|
233
|
-
sql_query = f"SELECT * FROM {table_name}"
|
|
447
|
+
batch.append(doc)
|
|
448
|
+
|
|
449
|
+
if len(batch) >= batch_size:
|
|
450
|
+
yield batch
|
|
451
|
+
batch = []
|
|
234
452
|
|
|
235
|
-
|
|
236
|
-
|
|
453
|
+
# Yield remaining items
|
|
454
|
+
if batch:
|
|
455
|
+
yield batch
|
|
456
|
+
else:
|
|
457
|
+
# SQL database implementations (PostgresDb, MySQLDb, SqliteDb)
|
|
458
|
+
offset = 0
|
|
459
|
+
while True:
|
|
460
|
+
# Create a new session for each batch to avoid transaction conflicts
|
|
461
|
+
with db.Session() as sess:
|
|
462
|
+
# Handle empty schema by omitting the schema prefix (needed for SQLite)
|
|
463
|
+
if db_schema and db_schema.strip():
|
|
464
|
+
sql_query = f"SELECT * FROM {db_schema}.{table_name} LIMIT {batch_size} OFFSET {offset}"
|
|
465
|
+
else:
|
|
466
|
+
sql_query = f"SELECT * FROM {table_name} LIMIT {batch_size} OFFSET {offset}"
|
|
467
|
+
|
|
468
|
+
result = sess.execute(text(sql_query))
|
|
469
|
+
batch = [row._asdict() for row in result]
|
|
470
|
+
|
|
471
|
+
if not batch:
|
|
472
|
+
break
|
|
473
|
+
|
|
474
|
+
yield batch
|
|
475
|
+
offset += batch_size
|
|
476
|
+
|
|
477
|
+
# If batch is smaller than batch_size, we've reached the end
|
|
478
|
+
if len(batch) < batch_size:
|
|
479
|
+
break
|
|
237
480
|
|
|
238
481
|
except Exception as e:
|
|
239
|
-
log_error(f"Error getting
|
|
240
|
-
return
|
|
482
|
+
log_error(f"Error getting batched content from table/collection {table_name}: {e}")
|
|
483
|
+
return
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def get_all_table_content(db, db_schema: str, table_name: str) -> list[dict[str, Any]]:
|
|
487
|
+
"""Get all content from the given table/collection (legacy method kept for backward compatibility)
|
|
488
|
+
|
|
489
|
+
WARNING: This method loads all data into memory and should not be used for large tables.
|
|
490
|
+
Use get_table_content_in_batches() for large datasets.
|
|
491
|
+
"""
|
|
492
|
+
log_warning(
|
|
493
|
+
f"Loading entire table {table_name} into memory. Consider using get_table_content_in_batches() for large tables, or if you experience any complication."
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
all_content = []
|
|
497
|
+
for batch in get_table_content_in_batches(db, db_schema, table_name):
|
|
498
|
+
all_content.extend(batch)
|
|
499
|
+
return all_content
|
|
241
500
|
|
|
242
501
|
|
|
243
502
|
def parse_agent_sessions(v1_content: List[Dict[str, Any]]) -> List[AgentSession]:
|
|
@@ -256,7 +515,13 @@ def parse_agent_sessions(v1_content: List[Dict[str, Any]]) -> List[AgentSession]
|
|
|
256
515
|
"created_at": item.get("created_at"),
|
|
257
516
|
"updated_at": item.get("updated_at"),
|
|
258
517
|
}
|
|
259
|
-
|
|
518
|
+
|
|
519
|
+
try:
|
|
520
|
+
agent_session = AgentSession.from_dict(session)
|
|
521
|
+
except Exception as e:
|
|
522
|
+
log_error(f"Error parsing agent session: {e}. This is the complete session that failed: {session}")
|
|
523
|
+
continue
|
|
524
|
+
|
|
260
525
|
if agent_session is not None:
|
|
261
526
|
sessions_v2.append(agent_session)
|
|
262
527
|
|
|
@@ -279,7 +544,12 @@ def parse_team_sessions(v1_content: List[Dict[str, Any]]) -> List[TeamSession]:
|
|
|
279
544
|
"created_at": item.get("created_at"),
|
|
280
545
|
"updated_at": item.get("updated_at"),
|
|
281
546
|
}
|
|
282
|
-
|
|
547
|
+
try:
|
|
548
|
+
team_session = TeamSession.from_dict(session)
|
|
549
|
+
except Exception as e:
|
|
550
|
+
log_error(f"Error parsing team session: {e}. This is the complete session that failed: {session}")
|
|
551
|
+
continue
|
|
552
|
+
|
|
283
553
|
if team_session is not None:
|
|
284
554
|
sessions_v2.append(team_session)
|
|
285
555
|
|
|
@@ -304,7 +574,12 @@ def parse_workflow_sessions(v1_content: List[Dict[str, Any]]) -> List[WorkflowSe
|
|
|
304
574
|
"workflow_name": item.get("workflow_name"),
|
|
305
575
|
"runs": convert_any_metrics_in_data(item.get("runs")),
|
|
306
576
|
}
|
|
307
|
-
|
|
577
|
+
try:
|
|
578
|
+
workflow_session = WorkflowSession.from_dict(session)
|
|
579
|
+
except Exception as e:
|
|
580
|
+
log_error(f"Error parsing workflow session: {e}. This is the complete session that failed: {session}")
|
|
581
|
+
continue
|
|
582
|
+
|
|
308
583
|
if workflow_session is not None:
|
|
309
584
|
sessions_v2.append(workflow_session)
|
|
310
585
|
|