jaf-py 2.5.10__py3-none-any.whl → 2.5.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jaf/__init__.py +154 -57
- jaf/a2a/__init__.py +42 -21
- jaf/a2a/agent.py +79 -126
- jaf/a2a/agent_card.py +87 -78
- jaf/a2a/client.py +30 -66
- jaf/a2a/examples/client_example.py +12 -12
- jaf/a2a/examples/integration_example.py +38 -47
- jaf/a2a/examples/server_example.py +56 -53
- jaf/a2a/memory/__init__.py +0 -4
- jaf/a2a/memory/cleanup.py +28 -21
- jaf/a2a/memory/factory.py +155 -133
- jaf/a2a/memory/providers/composite.py +21 -26
- jaf/a2a/memory/providers/in_memory.py +89 -83
- jaf/a2a/memory/providers/postgres.py +117 -115
- jaf/a2a/memory/providers/redis.py +128 -121
- jaf/a2a/memory/serialization.py +77 -87
- jaf/a2a/memory/tests/run_comprehensive_tests.py +112 -83
- jaf/a2a/memory/tests/test_cleanup.py +211 -94
- jaf/a2a/memory/tests/test_serialization.py +73 -68
- jaf/a2a/memory/tests/test_stress_concurrency.py +186 -133
- jaf/a2a/memory/tests/test_task_lifecycle.py +138 -120
- jaf/a2a/memory/types.py +91 -53
- jaf/a2a/protocol.py +95 -125
- jaf/a2a/server.py +90 -118
- jaf/a2a/standalone_client.py +30 -43
- jaf/a2a/tests/__init__.py +16 -33
- jaf/a2a/tests/run_tests.py +17 -53
- jaf/a2a/tests/test_agent.py +40 -140
- jaf/a2a/tests/test_client.py +54 -117
- jaf/a2a/tests/test_integration.py +28 -82
- jaf/a2a/tests/test_protocol.py +54 -139
- jaf/a2a/tests/test_types.py +50 -136
- jaf/a2a/types.py +58 -34
- jaf/cli.py +21 -41
- jaf/core/__init__.py +7 -1
- jaf/core/agent_tool.py +93 -72
- jaf/core/analytics.py +257 -207
- jaf/core/checkpoint.py +223 -0
- jaf/core/composition.py +249 -235
- jaf/core/engine.py +817 -519
- jaf/core/errors.py +55 -42
- jaf/core/guardrails.py +276 -202
- jaf/core/handoff.py +47 -31
- jaf/core/parallel_agents.py +69 -75
- jaf/core/performance.py +75 -73
- jaf/core/proxy.py +43 -44
- jaf/core/proxy_helpers.py +24 -27
- jaf/core/regeneration.py +220 -129
- jaf/core/state.py +68 -66
- jaf/core/streaming.py +115 -108
- jaf/core/tool_results.py +111 -101
- jaf/core/tools.py +114 -116
- jaf/core/tracing.py +310 -210
- jaf/core/types.py +403 -151
- jaf/core/workflows.py +209 -168
- jaf/exceptions.py +46 -38
- jaf/memory/__init__.py +1 -6
- jaf/memory/approval_storage.py +54 -77
- jaf/memory/factory.py +4 -4
- jaf/memory/providers/in_memory.py +216 -180
- jaf/memory/providers/postgres.py +216 -146
- jaf/memory/providers/redis.py +173 -116
- jaf/memory/types.py +70 -51
- jaf/memory/utils.py +36 -34
- jaf/plugins/__init__.py +12 -12
- jaf/plugins/base.py +105 -96
- jaf/policies/__init__.py +0 -1
- jaf/policies/handoff.py +37 -46
- jaf/policies/validation.py +76 -52
- jaf/providers/__init__.py +6 -3
- jaf/providers/mcp.py +97 -51
- jaf/providers/model.py +475 -283
- jaf/server/__init__.py +1 -1
- jaf/server/main.py +7 -11
- jaf/server/server.py +514 -359
- jaf/server/types.py +208 -52
- jaf/utils/__init__.py +17 -18
- jaf/utils/attachments.py +111 -116
- jaf/utils/document_processor.py +175 -174
- jaf/visualization/__init__.py +1 -1
- jaf/visualization/example.py +111 -110
- jaf/visualization/functional_core.py +46 -71
- jaf/visualization/graphviz.py +154 -189
- jaf/visualization/imperative_shell.py +7 -16
- jaf/visualization/types.py +8 -4
- {jaf_py-2.5.10.dist-info → jaf_py-2.5.12.dist-info}/METADATA +2 -2
- jaf_py-2.5.12.dist-info/RECORD +97 -0
- jaf_py-2.5.10.dist-info/RECORD +0 -96
- {jaf_py-2.5.10.dist-info → jaf_py-2.5.12.dist-info}/WHEEL +0 -0
- {jaf_py-2.5.10.dist-info → jaf_py-2.5.12.dist-info}/entry_points.txt +0 -0
- {jaf_py-2.5.10.dist-info → jaf_py-2.5.12.dist-info}/licenses/LICENSE +0 -0
- {jaf_py-2.5.10.dist-info → jaf_py-2.5.12.dist-info}/top_level.txt +0 -0
|
@@ -3,7 +3,7 @@ A2A Memory Stress & Concurrency Tests - Phase 3: Advanced Scenarios
|
|
|
3
3
|
|
|
4
4
|
Comprehensive stress testing for A2A task memory system including:
|
|
5
5
|
- High concurrency scenarios
|
|
6
|
-
- Large-scale data operations
|
|
6
|
+
- Large-scale data operations
|
|
7
7
|
- Performance under load
|
|
8
8
|
- Race condition detection
|
|
9
9
|
- Resource exhaustion handling
|
|
@@ -46,14 +46,14 @@ class StressTestBase:
|
|
|
46
46
|
count: int,
|
|
47
47
|
context_id: str = "stress_ctx",
|
|
48
48
|
base_id: str = "stress_task",
|
|
49
|
-
distribute_contexts: bool = True
|
|
49
|
+
distribute_contexts: bool = True,
|
|
50
50
|
) -> List[A2ATask]:
|
|
51
51
|
"""Create multiple tasks for bulk operations"""
|
|
52
52
|
tasks = []
|
|
53
53
|
for i in range(count):
|
|
54
54
|
# Distribute across contexts by default for stress testing, but allow single context
|
|
55
55
|
actual_context_id = f"{context_id}_{i % 10}" if distribute_contexts else context_id
|
|
56
|
-
|
|
56
|
+
|
|
57
57
|
task = A2ATask(
|
|
58
58
|
id=f"{base_id}_{i:05d}",
|
|
59
59
|
contextId=actual_context_id,
|
|
@@ -65,24 +65,21 @@ class StressTestBase:
|
|
|
65
65
|
parts=[A2ATextPart(kind="text", text=f"Bulk task number {i}")],
|
|
66
66
|
messageId=f"bulk_msg_{i}",
|
|
67
67
|
contextId=actual_context_id,
|
|
68
|
-
kind="message"
|
|
68
|
+
kind="message",
|
|
69
69
|
),
|
|
70
|
-
timestamp=datetime.now(timezone.utc).isoformat()
|
|
70
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
71
71
|
),
|
|
72
72
|
metadata={
|
|
73
73
|
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
74
74
|
"batch_id": "stress_test",
|
|
75
|
-
"sequence": i
|
|
76
|
-
}
|
|
75
|
+
"sequence": i,
|
|
76
|
+
},
|
|
77
77
|
)
|
|
78
78
|
tasks.append(task)
|
|
79
79
|
return tasks
|
|
80
80
|
|
|
81
81
|
def create_large_task(
|
|
82
|
-
self,
|
|
83
|
-
task_id: str,
|
|
84
|
-
context_id: str,
|
|
85
|
-
size_multiplier: int = 100
|
|
82
|
+
self, task_id: str, context_id: str, size_multiplier: int = 100
|
|
86
83
|
) -> A2ATask:
|
|
87
84
|
"""Create a task with large data payload"""
|
|
88
85
|
# Create large text content
|
|
@@ -92,7 +89,7 @@ class StressTestBase:
|
|
|
92
89
|
large_data = {
|
|
93
90
|
"data": list(range(size_multiplier * 10)),
|
|
94
91
|
"metadata": {f"key_{i}": f"value_{i}" * 50 for i in range(size_multiplier)},
|
|
95
|
-
"content": large_text
|
|
92
|
+
"content": large_text,
|
|
96
93
|
}
|
|
97
94
|
|
|
98
95
|
return A2ATask(
|
|
@@ -105,13 +102,13 @@ class StressTestBase:
|
|
|
105
102
|
role="agent",
|
|
106
103
|
parts=[
|
|
107
104
|
A2ATextPart(kind="text", text=large_text),
|
|
108
|
-
A2ADataPart(kind="data", data=large_data)
|
|
105
|
+
A2ADataPart(kind="data", data=large_data),
|
|
109
106
|
],
|
|
110
107
|
messageId=f"large_msg_{task_id}",
|
|
111
108
|
contextId=context_id,
|
|
112
|
-
kind="message"
|
|
109
|
+
kind="message",
|
|
113
110
|
),
|
|
114
|
-
timestamp=datetime.now(timezone.utc).isoformat()
|
|
111
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
115
112
|
),
|
|
116
113
|
history=[
|
|
117
114
|
A2AMessage(
|
|
@@ -119,7 +116,7 @@ class StressTestBase:
|
|
|
119
116
|
parts=[A2ATextPart(kind="text", text="Process this large dataset")],
|
|
120
117
|
messageId=f"init_{task_id}",
|
|
121
118
|
contextId=context_id,
|
|
122
|
-
kind="message"
|
|
119
|
+
kind="message",
|
|
123
120
|
)
|
|
124
121
|
],
|
|
125
122
|
artifacts=[
|
|
@@ -129,10 +126,10 @@ class StressTestBase:
|
|
|
129
126
|
description="An artifact containing large amounts of data",
|
|
130
127
|
parts=[
|
|
131
128
|
A2ATextPart(kind="text", text=large_text),
|
|
132
|
-
A2ADataPart(kind="data", data=large_data)
|
|
133
|
-
]
|
|
129
|
+
A2ADataPart(kind="data", data=large_data),
|
|
130
|
+
],
|
|
134
131
|
)
|
|
135
|
-
]
|
|
132
|
+
],
|
|
136
133
|
)
|
|
137
134
|
|
|
138
135
|
async def measure_operation_time(self, operation_func, *args, **kwargs) -> Tuple[Any, float]:
|
|
@@ -148,7 +145,7 @@ async def stress_provider() -> A2ATaskProvider:
|
|
|
148
145
|
"""Create provider for stress testing with higher limits"""
|
|
149
146
|
config = A2AInMemoryTaskConfig(
|
|
150
147
|
max_tasks=50000, # Higher limits for stress testing
|
|
151
|
-
max_tasks_per_context=10000
|
|
148
|
+
max_tasks_per_context=10000,
|
|
152
149
|
)
|
|
153
150
|
provider = create_a2a_in_memory_task_provider(config)
|
|
154
151
|
yield provider
|
|
@@ -179,14 +176,18 @@ class TestConcurrencyTorture(StressTestBase):
|
|
|
179
176
|
|
|
180
177
|
# Verify all operations succeeded
|
|
181
178
|
success_count = sum(1 for result in results if result.data is None)
|
|
182
|
-
assert success_count == concurrent_tasks,
|
|
179
|
+
assert success_count == concurrent_tasks, (
|
|
180
|
+
f"Expected all {concurrent_tasks} stores to succeed, got {success_count}"
|
|
181
|
+
)
|
|
183
182
|
|
|
184
183
|
# Performance check - should complete within reasonable time
|
|
185
184
|
total_time = end_time - start_time
|
|
186
185
|
avg_time_per_op = total_time / concurrent_tasks
|
|
187
186
|
assert total_time < 30.0, f"Concurrent writes took too long: {total_time:.2f}s"
|
|
188
187
|
|
|
189
|
-
print(
|
|
188
|
+
print(
|
|
189
|
+
f"Concurrent writes: {concurrent_tasks} tasks in {total_time:.2f}s ({avg_time_per_op * 1000:.2f}ms avg)"
|
|
190
|
+
)
|
|
190
191
|
|
|
191
192
|
# Verify data integrity
|
|
192
193
|
for task in tasks[:10]: # Sample check
|
|
@@ -228,8 +229,8 @@ class TestConcurrencyTorture(StressTestBase):
|
|
|
228
229
|
parts=[A2ATextPart(kind="text", text=f"Updated task {i}")],
|
|
229
230
|
messageId=f"update_{i}",
|
|
230
231
|
contextId=f"rw_ctx_{i % 10}",
|
|
231
|
-
kind="message"
|
|
232
|
-
)
|
|
232
|
+
kind="message",
|
|
233
|
+
),
|
|
233
234
|
)
|
|
234
235
|
)
|
|
235
236
|
|
|
@@ -240,9 +241,9 @@ class TestConcurrencyTorture(StressTestBase):
|
|
|
240
241
|
end_time = time.perf_counter()
|
|
241
242
|
|
|
242
243
|
# Analyze results
|
|
243
|
-
read_results = results[:len(read_operations)]
|
|
244
|
-
write_results = results[len(read_operations):len(read_operations) + len(write_operations)]
|
|
245
|
-
update_results = results[len(read_operations) + len(write_operations):]
|
|
244
|
+
read_results = results[: len(read_operations)]
|
|
245
|
+
write_results = results[len(read_operations) : len(read_operations) + len(write_operations)]
|
|
246
|
+
update_results = results[len(read_operations) + len(write_operations) :]
|
|
246
247
|
|
|
247
248
|
# Verify read operations
|
|
248
249
|
successful_reads = sum(1 for result in read_results if result.data is not None)
|
|
@@ -270,9 +271,8 @@ class TestConcurrencyTorture(StressTestBase):
|
|
|
270
271
|
contextId=context_id,
|
|
271
272
|
kind="task",
|
|
272
273
|
status=A2ATaskStatus(
|
|
273
|
-
state=TaskState.SUBMITTED,
|
|
274
|
-
|
|
275
|
-
)
|
|
274
|
+
state=TaskState.SUBMITTED, timestamp=datetime.now(timezone.utc).isoformat()
|
|
275
|
+
),
|
|
276
276
|
)
|
|
277
277
|
await stress_provider.store_task(initial_task)
|
|
278
278
|
|
|
@@ -289,9 +289,9 @@ class TestConcurrencyTorture(StressTestBase):
|
|
|
289
289
|
parts=[A2ATextPart(kind="text", text=f"Concurrent update {i}")],
|
|
290
290
|
messageId=f"concurrent_{i}",
|
|
291
291
|
contextId=context_id,
|
|
292
|
-
kind="message"
|
|
292
|
+
kind="message",
|
|
293
293
|
),
|
|
294
|
-
timestamp=datetime.now(timezone.utc).isoformat()
|
|
294
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
295
295
|
)
|
|
296
296
|
update_operations.append(operation)
|
|
297
297
|
|
|
@@ -300,7 +300,9 @@ class TestConcurrencyTorture(StressTestBase):
|
|
|
300
300
|
|
|
301
301
|
# All updates should succeed
|
|
302
302
|
successful_updates = sum(1 for result in results if result.data is None)
|
|
303
|
-
assert successful_updates == update_count,
|
|
303
|
+
assert successful_updates == update_count, (
|
|
304
|
+
f"Expected {update_count} successful updates, got {successful_updates}"
|
|
305
|
+
)
|
|
304
306
|
|
|
305
307
|
# Verify final state consistency
|
|
306
308
|
final_result = await stress_provider.get_task(task_id)
|
|
@@ -334,12 +336,16 @@ class TestConcurrencyTorture(StressTestBase):
|
|
|
334
336
|
state=TaskState.SUBMITTED,
|
|
335
337
|
message=A2AMessage(
|
|
336
338
|
role="user",
|
|
337
|
-
parts=[
|
|
339
|
+
parts=[
|
|
340
|
+
A2ATextPart(
|
|
341
|
+
kind="text", text=f"Task {task_idx} in context {ctx_idx}"
|
|
342
|
+
)
|
|
343
|
+
],
|
|
338
344
|
messageId=f"iso_msg_{ctx_idx}_{task_idx}",
|
|
339
345
|
contextId=context_id,
|
|
340
|
-
kind="message"
|
|
341
|
-
)
|
|
342
|
-
)
|
|
346
|
+
kind="message",
|
|
347
|
+
),
|
|
348
|
+
),
|
|
343
349
|
)
|
|
344
350
|
expected_context_tasks[context_id].append(task.id)
|
|
345
351
|
all_store_operations.append(stress_provider.store_task(task))
|
|
@@ -350,7 +356,9 @@ class TestConcurrencyTorture(StressTestBase):
|
|
|
350
356
|
# Verify all stores succeeded
|
|
351
357
|
successful_stores = sum(1 for result in results if result.data is None)
|
|
352
358
|
expected_total = contexts_count * tasks_per_context
|
|
353
|
-
assert successful_stores == expected_total,
|
|
359
|
+
assert successful_stores == expected_total, (
|
|
360
|
+
f"Expected {expected_total} stores, got {successful_stores}"
|
|
361
|
+
)
|
|
354
362
|
|
|
355
363
|
# Verify context isolation
|
|
356
364
|
for context_id, expected_task_ids in expected_context_tasks.items():
|
|
@@ -360,8 +368,12 @@ class TestConcurrencyTorture(StressTestBase):
|
|
|
360
368
|
actual_task_ids = {task.id for task in context_result.data}
|
|
361
369
|
expected_task_ids_set = set(expected_task_ids)
|
|
362
370
|
|
|
363
|
-
assert actual_task_ids == expected_task_ids_set,
|
|
364
|
-
|
|
371
|
+
assert actual_task_ids == expected_task_ids_set, (
|
|
372
|
+
f"Context {context_id} has incorrect tasks"
|
|
373
|
+
)
|
|
374
|
+
assert len(context_result.data) == tasks_per_context, (
|
|
375
|
+
f"Context {context_id} should have {tasks_per_context} tasks"
|
|
376
|
+
)
|
|
365
377
|
|
|
366
378
|
|
|
367
379
|
class TestLargeScaleOperations(StressTestBase):
|
|
@@ -381,23 +393,23 @@ class TestLargeScaleOperations(StressTestBase):
|
|
|
381
393
|
# Store tasks in batches to avoid overwhelming the system
|
|
382
394
|
batch_size = 50
|
|
383
395
|
for i in range(0, len(tasks), batch_size):
|
|
384
|
-
batch = tasks[i:i + batch_size]
|
|
396
|
+
batch = tasks[i : i + batch_size]
|
|
385
397
|
batch_operations = [stress_provider.store_task(task) for task in batch]
|
|
386
398
|
batch_results = await asyncio.gather(*batch_operations)
|
|
387
399
|
|
|
388
400
|
# Verify batch success
|
|
389
401
|
batch_success = sum(1 for result in batch_results if result.data is None)
|
|
390
|
-
assert batch_success == len(batch),
|
|
402
|
+
assert batch_success == len(batch), (
|
|
403
|
+
f"Batch {i // batch_size} failed: {batch_success}/{len(batch)} succeeded"
|
|
404
|
+
)
|
|
391
405
|
|
|
392
406
|
end_time = time.perf_counter()
|
|
393
407
|
total_time = end_time - start_time
|
|
394
408
|
ops_per_second = count / total_time
|
|
395
409
|
|
|
396
|
-
performance_results.append(
|
|
397
|
-
"count": count,
|
|
398
|
-
|
|
399
|
-
"ops_per_second": ops_per_second
|
|
400
|
-
})
|
|
410
|
+
performance_results.append(
|
|
411
|
+
{"count": count, "time": total_time, "ops_per_second": ops_per_second}
|
|
412
|
+
)
|
|
401
413
|
|
|
402
414
|
print(f"Bulk storage: {count} tasks in {total_time:.2f}s ({ops_per_second:.2f} ops/s)")
|
|
403
415
|
|
|
@@ -411,7 +423,9 @@ class TestLargeScaleOperations(StressTestBase):
|
|
|
411
423
|
degradation_ratio = smallest_ops / largest_ops
|
|
412
424
|
|
|
413
425
|
# Performance shouldn't degrade by more than 10x
|
|
414
|
-
assert degradation_ratio < 10,
|
|
426
|
+
assert degradation_ratio < 10, (
|
|
427
|
+
f"Performance degraded {degradation_ratio:.2f}x from {smallest_ops:.2f} to {largest_ops:.2f} ops/s"
|
|
428
|
+
)
|
|
415
429
|
|
|
416
430
|
async def test_large_data_payload_handling(self, stress_provider):
|
|
417
431
|
"""Test handling of tasks with large data payloads"""
|
|
@@ -428,13 +442,17 @@ class TestLargeScaleOperations(StressTestBase):
|
|
|
428
442
|
store_result, store_time = await self.measure_operation_time(
|
|
429
443
|
stress_provider.store_task, large_task
|
|
430
444
|
)
|
|
431
|
-
assert store_result.data is None,
|
|
445
|
+
assert store_result.data is None, (
|
|
446
|
+
f"Large task storage should succeed for size {multiplier}"
|
|
447
|
+
)
|
|
432
448
|
|
|
433
449
|
# Measure retrieval time
|
|
434
450
|
get_result, get_time = await self.measure_operation_time(
|
|
435
451
|
stress_provider.get_task, task_id
|
|
436
452
|
)
|
|
437
|
-
assert get_result.data is not None,
|
|
453
|
+
assert get_result.data is not None, (
|
|
454
|
+
f"Large task retrieval should succeed for size {multiplier}"
|
|
455
|
+
)
|
|
438
456
|
|
|
439
457
|
# Verify data integrity
|
|
440
458
|
retrieved_task = get_result.data
|
|
@@ -443,7 +461,9 @@ class TestLargeScaleOperations(StressTestBase):
|
|
|
443
461
|
assert len(retrieved_task.artifacts or []) == 1, "Artifacts should be preserved"
|
|
444
462
|
|
|
445
463
|
# Performance checks
|
|
446
|
-
assert store_time < 5000,
|
|
464
|
+
assert store_time < 5000, (
|
|
465
|
+
f"Store time too slow for size {multiplier}: {store_time:.2f}ms"
|
|
466
|
+
)
|
|
447
467
|
assert get_time < 5000, f"Get time too slow for size {multiplier}: {get_time:.2f}ms"
|
|
448
468
|
|
|
449
469
|
print(f"Large payload {multiplier}x: store={store_time:.2f}ms, get={get_time:.2f}ms")
|
|
@@ -455,32 +475,30 @@ class TestLargeScaleOperations(StressTestBase):
|
|
|
455
475
|
context_id = "pagination_perf_ctx"
|
|
456
476
|
|
|
457
477
|
# Create large dataset - use single context for pagination test
|
|
458
|
-
tasks = self.create_bulk_tasks(
|
|
478
|
+
tasks = self.create_bulk_tasks(
|
|
479
|
+
total_tasks, context_id, "page_task", distribute_contexts=False
|
|
480
|
+
)
|
|
459
481
|
|
|
460
482
|
# Store in batches and verify success
|
|
461
483
|
batch_size = 50
|
|
462
484
|
stored_count = 0
|
|
463
485
|
for i in range(0, len(tasks), batch_size):
|
|
464
|
-
batch = tasks[i:i + batch_size]
|
|
486
|
+
batch = tasks[i : i + batch_size]
|
|
465
487
|
batch_operations = [stress_provider.store_task(task) for task in batch]
|
|
466
488
|
batch_results = await asyncio.gather(*batch_operations)
|
|
467
|
-
|
|
489
|
+
|
|
468
490
|
# Verify batch success
|
|
469
491
|
for result in batch_results:
|
|
470
|
-
if hasattr(result,
|
|
492
|
+
if hasattr(result, "data") and result.data is None:
|
|
471
493
|
stored_count += 1
|
|
472
|
-
|
|
494
|
+
|
|
473
495
|
assert stored_count == total_tasks, f"Only stored {stored_count}/{total_tasks} tasks"
|
|
474
496
|
|
|
475
497
|
# Test pagination performance at different offsets
|
|
476
498
|
test_offsets = [0, 250, 500, 750, 900] # Beginning, middle, end
|
|
477
499
|
|
|
478
500
|
for offset in test_offsets:
|
|
479
|
-
query = A2ATaskQuery(
|
|
480
|
-
context_id=context_id,
|
|
481
|
-
limit=page_size,
|
|
482
|
-
offset=offset
|
|
483
|
-
)
|
|
501
|
+
query = A2ATaskQuery(context_id=context_id, limit=page_size, offset=offset)
|
|
484
502
|
|
|
485
503
|
# Measure query time
|
|
486
504
|
result, query_time = await self.measure_operation_time(
|
|
@@ -492,7 +510,9 @@ class TestLargeScaleOperations(StressTestBase):
|
|
|
492
510
|
# Verify correct page size (except possibly last page)
|
|
493
511
|
page_tasks = result.data
|
|
494
512
|
expected_size = min(page_size, total_tasks - offset)
|
|
495
|
-
assert len(page_tasks) == expected_size,
|
|
513
|
+
assert len(page_tasks) == expected_size, (
|
|
514
|
+
f"Page at offset {offset} should have {expected_size} tasks"
|
|
515
|
+
)
|
|
496
516
|
|
|
497
517
|
# Performance check - deep pagination shouldn't be too slow
|
|
498
518
|
assert query_time < 1000, f"Query at offset {offset} too slow: {query_time:.2f}ms"
|
|
@@ -517,19 +537,19 @@ class TestLargeScaleOperations(StressTestBase):
|
|
|
517
537
|
kind="task",
|
|
518
538
|
status=A2ATaskStatus(
|
|
519
539
|
state=states[state_idx],
|
|
520
|
-
timestamp=(datetime.now(timezone.utc) - timedelta(hours=i % 24)).isoformat()
|
|
540
|
+
timestamp=(datetime.now(timezone.utc) - timedelta(hours=i % 24)).isoformat(),
|
|
521
541
|
),
|
|
522
542
|
metadata={
|
|
523
543
|
"priority": "high" if i % 3 == 0 else "normal",
|
|
524
|
-
"category": f"cat_{i % 5}"
|
|
525
|
-
}
|
|
544
|
+
"category": f"cat_{i % 5}",
|
|
545
|
+
},
|
|
526
546
|
)
|
|
527
547
|
all_tasks.append(task)
|
|
528
548
|
|
|
529
549
|
# Store all tasks
|
|
530
550
|
batch_size = 100
|
|
531
551
|
for i in range(0, len(all_tasks), batch_size):
|
|
532
|
-
batch = all_tasks[i:i + batch_size]
|
|
552
|
+
batch = all_tasks[i : i + batch_size]
|
|
533
553
|
batch_operations = [stress_provider.store_task(task) for task in batch]
|
|
534
554
|
await asyncio.gather(*batch_operations)
|
|
535
555
|
|
|
@@ -537,22 +557,16 @@ class TestLargeScaleOperations(StressTestBase):
|
|
|
537
557
|
complex_queries = [
|
|
538
558
|
# Query by state
|
|
539
559
|
A2ATaskQuery(state=TaskState.WORKING, limit=100),
|
|
540
|
-
|
|
541
560
|
# Query by context
|
|
542
561
|
A2ATaskQuery(context_id="complex_ctx_005", limit=100),
|
|
543
|
-
|
|
544
562
|
# Query by time range
|
|
545
|
-
A2ATaskQuery(
|
|
546
|
-
since=datetime.now(timezone.utc) - timedelta(hours=12),
|
|
547
|
-
limit=100
|
|
548
|
-
),
|
|
549
|
-
|
|
563
|
+
A2ATaskQuery(since=datetime.now(timezone.utc) - timedelta(hours=12), limit=100),
|
|
550
564
|
# Query with multiple filters
|
|
551
565
|
A2ATaskQuery(
|
|
552
566
|
state=TaskState.COMPLETED,
|
|
553
567
|
since=datetime.now(timezone.utc) - timedelta(hours=6),
|
|
554
|
-
limit=50
|
|
555
|
-
)
|
|
568
|
+
limit=50,
|
|
569
|
+
),
|
|
556
570
|
]
|
|
557
571
|
|
|
558
572
|
for i, query in enumerate(complex_queries):
|
|
@@ -581,7 +595,7 @@ class TestResourceExhaustion(StressTestBase):
|
|
|
581
595
|
large_task = self.create_large_task(
|
|
582
596
|
f"memory_task_{i:03d}",
|
|
583
597
|
f"memory_ctx_{i % 10}",
|
|
584
|
-
size_multiplier=200 # Large payloads
|
|
598
|
+
size_multiplier=200, # Large payloads
|
|
585
599
|
)
|
|
586
600
|
large_tasks.append(large_task)
|
|
587
601
|
|
|
@@ -614,7 +628,7 @@ class TestResourceExhaustion(StressTestBase):
|
|
|
614
628
|
# Create provider with low limits for testing
|
|
615
629
|
limited_config = A2AInMemoryTaskConfig(
|
|
616
630
|
max_tasks=100, # Low limit for testing
|
|
617
|
-
max_tasks_per_context=20
|
|
631
|
+
max_tasks_per_context=20,
|
|
618
632
|
)
|
|
619
633
|
limited_provider = create_a2a_in_memory_task_provider(limited_config)
|
|
620
634
|
|
|
@@ -627,16 +641,22 @@ class TestResourceExhaustion(StressTestBase):
|
|
|
627
641
|
|
|
628
642
|
for task in tasks:
|
|
629
643
|
result = await limited_provider.store_task(task)
|
|
630
|
-
if hasattr(result,
|
|
644
|
+
if hasattr(result, "data") and result.data is None:
|
|
631
645
|
stored_count += 1
|
|
632
|
-
elif hasattr(result,
|
|
646
|
+
elif hasattr(result, "error"):
|
|
633
647
|
rejected_count += 1
|
|
634
648
|
|
|
635
649
|
# Should get appropriate error message about storage limits
|
|
636
650
|
error_msg = str(result.error.message).lower()
|
|
637
651
|
# Check for storage-related error messages
|
|
638
|
-
assert (
|
|
639
|
-
|
|
652
|
+
assert (
|
|
653
|
+
"limit" in error_msg
|
|
654
|
+
or "full" in error_msg
|
|
655
|
+
or "maximum" in error_msg
|
|
656
|
+
or "storage" in error_msg
|
|
657
|
+
or "exceeded" in error_msg
|
|
658
|
+
or "failed to store" in error_msg
|
|
659
|
+
)
|
|
640
660
|
else:
|
|
641
661
|
rejected_count += 1
|
|
642
662
|
|
|
@@ -653,7 +673,7 @@ class TestResourceExhaustion(StressTestBase):
|
|
|
653
673
|
|
|
654
674
|
for task in context_tasks:
|
|
655
675
|
result = await limited_provider.store_task(task)
|
|
656
|
-
if hasattr(result,
|
|
676
|
+
if hasattr(result, "data") and result.data is None:
|
|
657
677
|
ctx_stored += 1
|
|
658
678
|
else:
|
|
659
679
|
ctx_rejected += 1
|
|
@@ -677,7 +697,7 @@ class TestResourceExhaustion(StressTestBase):
|
|
|
677
697
|
id=f"exhaust_task_{i}",
|
|
678
698
|
contextId=f"exhaust_ctx_{i % 10}",
|
|
679
699
|
kind="task",
|
|
680
|
-
status=A2ATaskStatus(state=TaskState.SUBMITTED)
|
|
700
|
+
status=A2ATaskStatus(state=TaskState.SUBMITTED),
|
|
681
701
|
)
|
|
682
702
|
operations.append(stress_provider.store_task(task))
|
|
683
703
|
elif i % 3 == 1:
|
|
@@ -706,7 +726,9 @@ class TestResourceExhaustion(StressTestBase):
|
|
|
706
726
|
print(f"Connection exhaustion test: {success_count} succeeded, {error_count} failed")
|
|
707
727
|
|
|
708
728
|
# Should handle at least some operations successfully
|
|
709
|
-
assert success_count > concurrent_ops * 0.1,
|
|
729
|
+
assert success_count > concurrent_ops * 0.1, (
|
|
730
|
+
"Should handle at least 10% of operations successfully"
|
|
731
|
+
)
|
|
710
732
|
|
|
711
733
|
# Provider should recover and be healthy
|
|
712
734
|
await asyncio.sleep(1) # Allow recovery time
|
|
@@ -735,7 +757,7 @@ class TestMemoryLeakDetection(StressTestBase):
|
|
|
735
757
|
id=f"leak_task_{cycle}",
|
|
736
758
|
contextId=context_id,
|
|
737
759
|
kind="task",
|
|
738
|
-
status=A2ATaskStatus(state=TaskState.SUBMITTED)
|
|
760
|
+
status=A2ATaskStatus(state=TaskState.SUBMITTED),
|
|
739
761
|
)
|
|
740
762
|
|
|
741
763
|
# Store task
|
|
@@ -750,23 +772,25 @@ class TestMemoryLeakDetection(StressTestBase):
|
|
|
750
772
|
parts=[A2ATextPart(kind="text", text="Working on it")],
|
|
751
773
|
messageId=f"work_{cycle}",
|
|
752
774
|
contextId=context_id,
|
|
753
|
-
kind="message"
|
|
754
|
-
)
|
|
775
|
+
kind="message",
|
|
776
|
+
),
|
|
755
777
|
)
|
|
756
778
|
|
|
757
779
|
# Complete task
|
|
758
|
-
completed_task = task.model_copy(
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
780
|
+
completed_task = task.model_copy(
|
|
781
|
+
update={
|
|
782
|
+
"status": A2ATaskStatus(
|
|
783
|
+
state=TaskState.COMPLETED,
|
|
784
|
+
message=A2AMessage(
|
|
785
|
+
role="agent",
|
|
786
|
+
parts=[A2ATextPart(kind="text", text="Completed")],
|
|
787
|
+
messageId=f"done_{cycle}",
|
|
788
|
+
contextId=context_id,
|
|
789
|
+
kind="message",
|
|
790
|
+
),
|
|
767
791
|
)
|
|
768
|
-
|
|
769
|
-
|
|
792
|
+
}
|
|
793
|
+
)
|
|
770
794
|
await stress_provider.update_task(completed_task)
|
|
771
795
|
|
|
772
796
|
# Delete task
|
|
@@ -784,12 +808,16 @@ class TestMemoryLeakDetection(StressTestBase):
|
|
|
784
808
|
object_growth = final_objects - initial_objects
|
|
785
809
|
growth_per_cycle = object_growth / cycles if cycles > 0 else 0
|
|
786
810
|
|
|
787
|
-
print(
|
|
811
|
+
print(
|
|
812
|
+
f"Memory leak test: {object_growth} objects growth over {cycles} cycles ({growth_per_cycle:.2f} per cycle)"
|
|
813
|
+
)
|
|
788
814
|
|
|
789
815
|
# Should not have significant memory growth
|
|
790
816
|
# Allow some growth but flag if it's excessive
|
|
791
817
|
max_allowed_growth_per_cycle = 100 # objects per cycle
|
|
792
|
-
assert growth_per_cycle < max_allowed_growth_per_cycle,
|
|
818
|
+
assert growth_per_cycle < max_allowed_growth_per_cycle, (
|
|
819
|
+
f"Potential memory leak: {growth_per_cycle:.2f} objects per cycle"
|
|
820
|
+
)
|
|
793
821
|
|
|
794
822
|
async def test_provider_cleanup_on_close(self, stress_provider):
|
|
795
823
|
"""Test that provider properly cleans up resources on close"""
|
|
@@ -805,7 +833,7 @@ class TestMemoryLeakDetection(StressTestBase):
|
|
|
805
833
|
# (This is implementation-specific and might need adjustment)
|
|
806
834
|
try:
|
|
807
835
|
# Access provider internals if possible
|
|
808
|
-
if hasattr(stress_provider,
|
|
836
|
+
if hasattr(stress_provider, "_state"):
|
|
809
837
|
weak_refs.append(weakref.ref(stress_provider._state))
|
|
810
838
|
except Exception:
|
|
811
839
|
pass # Provider might not expose internals
|
|
@@ -829,12 +857,14 @@ class TestMemoryLeakDetection(StressTestBase):
|
|
|
829
857
|
"""Test cleanup efficiency with large datasets"""
|
|
830
858
|
# Create large dataset - use single context for cleanup test
|
|
831
859
|
large_dataset_size = 1000
|
|
832
|
-
tasks = self.create_bulk_tasks(
|
|
860
|
+
tasks = self.create_bulk_tasks(
|
|
861
|
+
large_dataset_size, "large_cleanup_ctx", "large_cleanup_task", distribute_contexts=False
|
|
862
|
+
)
|
|
833
863
|
|
|
834
864
|
# Store all tasks
|
|
835
865
|
batch_size = 50
|
|
836
866
|
for i in range(0, len(tasks), batch_size):
|
|
837
|
-
batch = tasks[i:i + batch_size]
|
|
867
|
+
batch = tasks[i : i + batch_size]
|
|
838
868
|
batch_operations = [stress_provider.store_task(task) for task in batch]
|
|
839
869
|
await asyncio.gather(*batch_operations)
|
|
840
870
|
|
|
@@ -848,7 +878,9 @@ class TestMemoryLeakDetection(StressTestBase):
|
|
|
848
878
|
cleanup_time = end_time - start_time
|
|
849
879
|
|
|
850
880
|
# Verify cleanup was successful
|
|
851
|
-
assert delete_result.data == large_dataset_size,
|
|
881
|
+
assert delete_result.data == large_dataset_size, (
|
|
882
|
+
f"Should delete all {large_dataset_size} tasks"
|
|
883
|
+
)
|
|
852
884
|
|
|
853
885
|
# Verify context is empty
|
|
854
886
|
remaining_result = await stress_provider.get_tasks_by_context("large_cleanup_ctx")
|
|
@@ -859,7 +891,9 @@ class TestMemoryLeakDetection(StressTestBase):
|
|
|
859
891
|
cleanup_rate = large_dataset_size / cleanup_time
|
|
860
892
|
assert cleanup_rate > 100, f"Cleanup too slow: {cleanup_rate:.2f} tasks/second"
|
|
861
893
|
|
|
862
|
-
print(
|
|
894
|
+
print(
|
|
895
|
+
f"Large dataset cleanup: {large_dataset_size} tasks in {cleanup_time:.2f}s ({cleanup_rate:.2f} tasks/s)"
|
|
896
|
+
)
|
|
863
897
|
|
|
864
898
|
|
|
865
899
|
class TestPerformanceRegression(StressTestBase):
|
|
@@ -869,11 +903,11 @@ class TestPerformanceRegression(StressTestBase):
|
|
|
869
903
|
"""Benchmark key operations to detect performance regressions"""
|
|
870
904
|
# Define performance benchmarks (in milliseconds)
|
|
871
905
|
benchmarks = {
|
|
872
|
-
"store_task": 100,
|
|
873
|
-
"get_task": 50,
|
|
874
|
-
"update_task": 100,
|
|
875
|
-
"find_tasks": 200,
|
|
876
|
-
"delete_task": 50
|
|
906
|
+
"store_task": 100, # Single task store should be < 100ms
|
|
907
|
+
"get_task": 50, # Single task get should be < 50ms
|
|
908
|
+
"update_task": 100, # Single task update should be < 100ms
|
|
909
|
+
"find_tasks": 200, # Query should be < 200ms
|
|
910
|
+
"delete_task": 50, # Single task delete should be < 50ms
|
|
877
911
|
}
|
|
878
912
|
|
|
879
913
|
# Setup test data
|
|
@@ -884,11 +918,15 @@ class TestPerformanceRegression(StressTestBase):
|
|
|
884
918
|
# Test store performance
|
|
885
919
|
new_task = self.create_bulk_tasks(1, "perf_new_ctx", "perf_new_task")[0]
|
|
886
920
|
_, store_time = await self.measure_operation_time(stress_provider.store_task, new_task)
|
|
887
|
-
assert store_time < benchmarks["store_task"],
|
|
921
|
+
assert store_time < benchmarks["store_task"], (
|
|
922
|
+
f"Store performance regression: {store_time:.2f}ms > {benchmarks['store_task']}ms"
|
|
923
|
+
)
|
|
888
924
|
|
|
889
925
|
# Test get performance
|
|
890
926
|
_, get_time = await self.measure_operation_time(stress_provider.get_task, "perf_task_00050")
|
|
891
|
-
assert get_time < benchmarks["get_task"],
|
|
927
|
+
assert get_time < benchmarks["get_task"], (
|
|
928
|
+
f"Get performance regression: {get_time:.2f}ms > {benchmarks['get_task']}ms"
|
|
929
|
+
)
|
|
892
930
|
|
|
893
931
|
# Test update performance
|
|
894
932
|
update_message = A2AMessage(
|
|
@@ -896,24 +934,29 @@ class TestPerformanceRegression(StressTestBase):
|
|
|
896
934
|
parts=[A2ATextPart(kind="text", text="Performance test update")],
|
|
897
935
|
messageId="perf_update",
|
|
898
936
|
contextId="perf_ctx_5",
|
|
899
|
-
kind="message"
|
|
937
|
+
kind="message",
|
|
900
938
|
)
|
|
901
939
|
_, update_time = await self.measure_operation_time(
|
|
902
|
-
stress_provider.update_task_status,
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
940
|
+
stress_provider.update_task_status, "perf_task_00025", TaskState.WORKING, update_message
|
|
941
|
+
)
|
|
942
|
+
assert update_time < benchmarks["update_task"], (
|
|
943
|
+
f"Update performance regression: {update_time:.2f}ms > {benchmarks['update_task']}ms"
|
|
906
944
|
)
|
|
907
|
-
assert update_time < benchmarks["update_task"], f"Update performance regression: {update_time:.2f}ms > {benchmarks['update_task']}ms"
|
|
908
945
|
|
|
909
946
|
# Test query performance
|
|
910
947
|
query = A2ATaskQuery(context_id="perf_ctx_1", limit=10)
|
|
911
948
|
_, query_time = await self.measure_operation_time(stress_provider.find_tasks, query)
|
|
912
|
-
assert query_time < benchmarks["find_tasks"],
|
|
949
|
+
assert query_time < benchmarks["find_tasks"], (
|
|
950
|
+
f"Query performance regression: {query_time:.2f}ms > {benchmarks['find_tasks']}ms"
|
|
951
|
+
)
|
|
913
952
|
|
|
914
953
|
# Test delete performance
|
|
915
|
-
_, delete_time = await self.measure_operation_time(
|
|
916
|
-
|
|
954
|
+
_, delete_time = await self.measure_operation_time(
|
|
955
|
+
stress_provider.delete_task, "perf_task_00075"
|
|
956
|
+
)
|
|
957
|
+
assert delete_time < benchmarks["delete_task"], (
|
|
958
|
+
f"Delete performance regression: {delete_time:.2f}ms > {benchmarks['delete_task']}ms"
|
|
959
|
+
)
|
|
917
960
|
|
|
918
961
|
print("Performance benchmark results:")
|
|
919
962
|
print(f" Store: {store_time:.2f}ms (limit: {benchmarks['store_task']}ms)")
|
|
@@ -946,16 +989,20 @@ class TestPerformanceRegression(StressTestBase):
|
|
|
946
989
|
query_time = time.perf_counter() - start_time
|
|
947
990
|
|
|
948
991
|
# Calculate metrics
|
|
949
|
-
storage_ops_per_sec = size / storage_time if storage_time > 0 else float(
|
|
950
|
-
query_ops_per_sec = size / query_time if query_time > 0 else float(
|
|
951
|
-
|
|
952
|
-
scaling_results.append(
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
992
|
+
storage_ops_per_sec = size / storage_time if storage_time > 0 else float("inf")
|
|
993
|
+
query_ops_per_sec = size / query_time if query_time > 0 else float("inf")
|
|
994
|
+
|
|
995
|
+
scaling_results.append(
|
|
996
|
+
{
|
|
997
|
+
"size": size,
|
|
998
|
+
"storage_ops_per_sec": storage_ops_per_sec,
|
|
999
|
+
"query_ops_per_sec": query_ops_per_sec,
|
|
1000
|
+
}
|
|
1001
|
+
)
|
|
957
1002
|
|
|
958
|
-
print(
|
|
1003
|
+
print(
|
|
1004
|
+
f"Scalability {size} tasks: storage={storage_ops_per_sec:.2f} ops/s, query={query_ops_per_sec:.2f} ops/s"
|
|
1005
|
+
)
|
|
959
1006
|
|
|
960
1007
|
# Analyze scaling characteristics
|
|
961
1008
|
if len(scaling_results) >= 2:
|
|
@@ -963,10 +1010,16 @@ class TestPerformanceRegression(StressTestBase):
|
|
|
963
1010
|
last_result = scaling_results[-1]
|
|
964
1011
|
|
|
965
1012
|
# Performance shouldn't degrade too severely with scale
|
|
966
|
-
storage_degradation =
|
|
1013
|
+
storage_degradation = (
|
|
1014
|
+
first_result["storage_ops_per_sec"] / last_result["storage_ops_per_sec"]
|
|
1015
|
+
)
|
|
967
1016
|
query_degradation = first_result["query_ops_per_sec"] / last_result["query_ops_per_sec"]
|
|
968
1017
|
|
|
969
1018
|
max_allowed_degradation = 5.0 # 5x degradation is acceptable
|
|
970
1019
|
|
|
971
|
-
assert storage_degradation < max_allowed_degradation,
|
|
972
|
-
|
|
1020
|
+
assert storage_degradation < max_allowed_degradation, (
|
|
1021
|
+
f"Storage performance degraded {storage_degradation:.2f}x"
|
|
1022
|
+
)
|
|
1023
|
+
assert query_degradation < max_allowed_degradation, (
|
|
1024
|
+
f"Query performance degraded {query_degradation:.2f}x"
|
|
1025
|
+
)
|