jaf-py 2.5.10__py3-none-any.whl → 2.5.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. jaf/__init__.py +154 -57
  2. jaf/a2a/__init__.py +42 -21
  3. jaf/a2a/agent.py +79 -126
  4. jaf/a2a/agent_card.py +87 -78
  5. jaf/a2a/client.py +30 -66
  6. jaf/a2a/examples/client_example.py +12 -12
  7. jaf/a2a/examples/integration_example.py +38 -47
  8. jaf/a2a/examples/server_example.py +56 -53
  9. jaf/a2a/memory/__init__.py +0 -4
  10. jaf/a2a/memory/cleanup.py +28 -21
  11. jaf/a2a/memory/factory.py +155 -133
  12. jaf/a2a/memory/providers/composite.py +21 -26
  13. jaf/a2a/memory/providers/in_memory.py +89 -83
  14. jaf/a2a/memory/providers/postgres.py +117 -115
  15. jaf/a2a/memory/providers/redis.py +128 -121
  16. jaf/a2a/memory/serialization.py +77 -87
  17. jaf/a2a/memory/tests/run_comprehensive_tests.py +112 -83
  18. jaf/a2a/memory/tests/test_cleanup.py +211 -94
  19. jaf/a2a/memory/tests/test_serialization.py +73 -68
  20. jaf/a2a/memory/tests/test_stress_concurrency.py +186 -133
  21. jaf/a2a/memory/tests/test_task_lifecycle.py +138 -120
  22. jaf/a2a/memory/types.py +91 -53
  23. jaf/a2a/protocol.py +95 -125
  24. jaf/a2a/server.py +90 -118
  25. jaf/a2a/standalone_client.py +30 -43
  26. jaf/a2a/tests/__init__.py +16 -33
  27. jaf/a2a/tests/run_tests.py +17 -53
  28. jaf/a2a/tests/test_agent.py +40 -140
  29. jaf/a2a/tests/test_client.py +54 -117
  30. jaf/a2a/tests/test_integration.py +28 -82
  31. jaf/a2a/tests/test_protocol.py +54 -139
  32. jaf/a2a/tests/test_types.py +50 -136
  33. jaf/a2a/types.py +58 -34
  34. jaf/cli.py +21 -41
  35. jaf/core/__init__.py +7 -1
  36. jaf/core/agent_tool.py +93 -72
  37. jaf/core/analytics.py +257 -207
  38. jaf/core/checkpoint.py +223 -0
  39. jaf/core/composition.py +249 -235
  40. jaf/core/engine.py +817 -519
  41. jaf/core/errors.py +55 -42
  42. jaf/core/guardrails.py +276 -202
  43. jaf/core/handoff.py +47 -31
  44. jaf/core/parallel_agents.py +69 -75
  45. jaf/core/performance.py +75 -73
  46. jaf/core/proxy.py +43 -44
  47. jaf/core/proxy_helpers.py +24 -27
  48. jaf/core/regeneration.py +220 -129
  49. jaf/core/state.py +68 -66
  50. jaf/core/streaming.py +115 -108
  51. jaf/core/tool_results.py +111 -101
  52. jaf/core/tools.py +114 -116
  53. jaf/core/tracing.py +310 -210
  54. jaf/core/types.py +403 -151
  55. jaf/core/workflows.py +209 -168
  56. jaf/exceptions.py +46 -38
  57. jaf/memory/__init__.py +1 -6
  58. jaf/memory/approval_storage.py +54 -77
  59. jaf/memory/factory.py +4 -4
  60. jaf/memory/providers/in_memory.py +216 -180
  61. jaf/memory/providers/postgres.py +216 -146
  62. jaf/memory/providers/redis.py +173 -116
  63. jaf/memory/types.py +70 -51
  64. jaf/memory/utils.py +36 -34
  65. jaf/plugins/__init__.py +12 -12
  66. jaf/plugins/base.py +105 -96
  67. jaf/policies/__init__.py +0 -1
  68. jaf/policies/handoff.py +37 -46
  69. jaf/policies/validation.py +76 -52
  70. jaf/providers/__init__.py +6 -3
  71. jaf/providers/mcp.py +97 -51
  72. jaf/providers/model.py +475 -283
  73. jaf/server/__init__.py +1 -1
  74. jaf/server/main.py +7 -11
  75. jaf/server/server.py +514 -359
  76. jaf/server/types.py +208 -52
  77. jaf/utils/__init__.py +17 -18
  78. jaf/utils/attachments.py +111 -116
  79. jaf/utils/document_processor.py +175 -174
  80. jaf/visualization/__init__.py +1 -1
  81. jaf/visualization/example.py +111 -110
  82. jaf/visualization/functional_core.py +46 -71
  83. jaf/visualization/graphviz.py +154 -189
  84. jaf/visualization/imperative_shell.py +7 -16
  85. jaf/visualization/types.py +8 -4
  86. {jaf_py-2.5.10.dist-info → jaf_py-2.5.12.dist-info}/METADATA +2 -2
  87. jaf_py-2.5.12.dist-info/RECORD +97 -0
  88. jaf_py-2.5.10.dist-info/RECORD +0 -96
  89. {jaf_py-2.5.10.dist-info → jaf_py-2.5.12.dist-info}/WHEEL +0 -0
  90. {jaf_py-2.5.10.dist-info → jaf_py-2.5.12.dist-info}/entry_points.txt +0 -0
  91. {jaf_py-2.5.10.dist-info → jaf_py-2.5.12.dist-info}/licenses/LICENSE +0 -0
  92. {jaf_py-2.5.10.dist-info → jaf_py-2.5.12.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ A2A Memory Stress & Concurrency Tests - Phase 3: Advanced Scenarios
3
3
 
4
4
  Comprehensive stress testing for A2A task memory system including:
5
5
  - High concurrency scenarios
6
- - Large-scale data operations
6
+ - Large-scale data operations
7
7
  - Performance under load
8
8
  - Race condition detection
9
9
  - Resource exhaustion handling
@@ -46,14 +46,14 @@ class StressTestBase:
46
46
  count: int,
47
47
  context_id: str = "stress_ctx",
48
48
  base_id: str = "stress_task",
49
- distribute_contexts: bool = True
49
+ distribute_contexts: bool = True,
50
50
  ) -> List[A2ATask]:
51
51
  """Create multiple tasks for bulk operations"""
52
52
  tasks = []
53
53
  for i in range(count):
54
54
  # Distribute across contexts by default for stress testing, but allow single context
55
55
  actual_context_id = f"{context_id}_{i % 10}" if distribute_contexts else context_id
56
-
56
+
57
57
  task = A2ATask(
58
58
  id=f"{base_id}_{i:05d}",
59
59
  contextId=actual_context_id,
@@ -65,24 +65,21 @@ class StressTestBase:
65
65
  parts=[A2ATextPart(kind="text", text=f"Bulk task number {i}")],
66
66
  messageId=f"bulk_msg_{i}",
67
67
  contextId=actual_context_id,
68
- kind="message"
68
+ kind="message",
69
69
  ),
70
- timestamp=datetime.now(timezone.utc).isoformat()
70
+ timestamp=datetime.now(timezone.utc).isoformat(),
71
71
  ),
72
72
  metadata={
73
73
  "created_at": datetime.now(timezone.utc).isoformat(),
74
74
  "batch_id": "stress_test",
75
- "sequence": i
76
- }
75
+ "sequence": i,
76
+ },
77
77
  )
78
78
  tasks.append(task)
79
79
  return tasks
80
80
 
81
81
  def create_large_task(
82
- self,
83
- task_id: str,
84
- context_id: str,
85
- size_multiplier: int = 100
82
+ self, task_id: str, context_id: str, size_multiplier: int = 100
86
83
  ) -> A2ATask:
87
84
  """Create a task with large data payload"""
88
85
  # Create large text content
@@ -92,7 +89,7 @@ class StressTestBase:
92
89
  large_data = {
93
90
  "data": list(range(size_multiplier * 10)),
94
91
  "metadata": {f"key_{i}": f"value_{i}" * 50 for i in range(size_multiplier)},
95
- "content": large_text
92
+ "content": large_text,
96
93
  }
97
94
 
98
95
  return A2ATask(
@@ -105,13 +102,13 @@ class StressTestBase:
105
102
  role="agent",
106
103
  parts=[
107
104
  A2ATextPart(kind="text", text=large_text),
108
- A2ADataPart(kind="data", data=large_data)
105
+ A2ADataPart(kind="data", data=large_data),
109
106
  ],
110
107
  messageId=f"large_msg_{task_id}",
111
108
  contextId=context_id,
112
- kind="message"
109
+ kind="message",
113
110
  ),
114
- timestamp=datetime.now(timezone.utc).isoformat()
111
+ timestamp=datetime.now(timezone.utc).isoformat(),
115
112
  ),
116
113
  history=[
117
114
  A2AMessage(
@@ -119,7 +116,7 @@ class StressTestBase:
119
116
  parts=[A2ATextPart(kind="text", text="Process this large dataset")],
120
117
  messageId=f"init_{task_id}",
121
118
  contextId=context_id,
122
- kind="message"
119
+ kind="message",
123
120
  )
124
121
  ],
125
122
  artifacts=[
@@ -129,10 +126,10 @@ class StressTestBase:
129
126
  description="An artifact containing large amounts of data",
130
127
  parts=[
131
128
  A2ATextPart(kind="text", text=large_text),
132
- A2ADataPart(kind="data", data=large_data)
133
- ]
129
+ A2ADataPart(kind="data", data=large_data),
130
+ ],
134
131
  )
135
- ]
132
+ ],
136
133
  )
137
134
 
138
135
  async def measure_operation_time(self, operation_func, *args, **kwargs) -> Tuple[Any, float]:
@@ -148,7 +145,7 @@ async def stress_provider() -> A2ATaskProvider:
148
145
  """Create provider for stress testing with higher limits"""
149
146
  config = A2AInMemoryTaskConfig(
150
147
  max_tasks=50000, # Higher limits for stress testing
151
- max_tasks_per_context=10000
148
+ max_tasks_per_context=10000,
152
149
  )
153
150
  provider = create_a2a_in_memory_task_provider(config)
154
151
  yield provider
@@ -179,14 +176,18 @@ class TestConcurrencyTorture(StressTestBase):
179
176
 
180
177
  # Verify all operations succeeded
181
178
  success_count = sum(1 for result in results if result.data is None)
182
- assert success_count == concurrent_tasks, f"Expected all {concurrent_tasks} stores to succeed, got {success_count}"
179
+ assert success_count == concurrent_tasks, (
180
+ f"Expected all {concurrent_tasks} stores to succeed, got {success_count}"
181
+ )
183
182
 
184
183
  # Performance check - should complete within reasonable time
185
184
  total_time = end_time - start_time
186
185
  avg_time_per_op = total_time / concurrent_tasks
187
186
  assert total_time < 30.0, f"Concurrent writes took too long: {total_time:.2f}s"
188
187
 
189
- print(f"Concurrent writes: {concurrent_tasks} tasks in {total_time:.2f}s ({avg_time_per_op*1000:.2f}ms avg)")
188
+ print(
189
+ f"Concurrent writes: {concurrent_tasks} tasks in {total_time:.2f}s ({avg_time_per_op * 1000:.2f}ms avg)"
190
+ )
190
191
 
191
192
  # Verify data integrity
192
193
  for task in tasks[:10]: # Sample check
@@ -228,8 +229,8 @@ class TestConcurrencyTorture(StressTestBase):
228
229
  parts=[A2ATextPart(kind="text", text=f"Updated task {i}")],
229
230
  messageId=f"update_{i}",
230
231
  contextId=f"rw_ctx_{i % 10}",
231
- kind="message"
232
- )
232
+ kind="message",
233
+ ),
233
234
  )
234
235
  )
235
236
 
@@ -240,9 +241,9 @@ class TestConcurrencyTorture(StressTestBase):
240
241
  end_time = time.perf_counter()
241
242
 
242
243
  # Analyze results
243
- read_results = results[:len(read_operations)]
244
- write_results = results[len(read_operations):len(read_operations) + len(write_operations)]
245
- update_results = results[len(read_operations) + len(write_operations):]
244
+ read_results = results[: len(read_operations)]
245
+ write_results = results[len(read_operations) : len(read_operations) + len(write_operations)]
246
+ update_results = results[len(read_operations) + len(write_operations) :]
246
247
 
247
248
  # Verify read operations
248
249
  successful_reads = sum(1 for result in read_results if result.data is not None)
@@ -270,9 +271,8 @@ class TestConcurrencyTorture(StressTestBase):
270
271
  contextId=context_id,
271
272
  kind="task",
272
273
  status=A2ATaskStatus(
273
- state=TaskState.SUBMITTED,
274
- timestamp=datetime.now(timezone.utc).isoformat()
275
- )
274
+ state=TaskState.SUBMITTED, timestamp=datetime.now(timezone.utc).isoformat()
275
+ ),
276
276
  )
277
277
  await stress_provider.store_task(initial_task)
278
278
 
@@ -289,9 +289,9 @@ class TestConcurrencyTorture(StressTestBase):
289
289
  parts=[A2ATextPart(kind="text", text=f"Concurrent update {i}")],
290
290
  messageId=f"concurrent_{i}",
291
291
  contextId=context_id,
292
- kind="message"
292
+ kind="message",
293
293
  ),
294
- timestamp=datetime.now(timezone.utc).isoformat()
294
+ timestamp=datetime.now(timezone.utc).isoformat(),
295
295
  )
296
296
  update_operations.append(operation)
297
297
 
@@ -300,7 +300,9 @@ class TestConcurrencyTorture(StressTestBase):
300
300
 
301
301
  # All updates should succeed
302
302
  successful_updates = sum(1 for result in results if result.data is None)
303
- assert successful_updates == update_count, f"Expected {update_count} successful updates, got {successful_updates}"
303
+ assert successful_updates == update_count, (
304
+ f"Expected {update_count} successful updates, got {successful_updates}"
305
+ )
304
306
 
305
307
  # Verify final state consistency
306
308
  final_result = await stress_provider.get_task(task_id)
@@ -334,12 +336,16 @@ class TestConcurrencyTorture(StressTestBase):
334
336
  state=TaskState.SUBMITTED,
335
337
  message=A2AMessage(
336
338
  role="user",
337
- parts=[A2ATextPart(kind="text", text=f"Task {task_idx} in context {ctx_idx}")],
339
+ parts=[
340
+ A2ATextPart(
341
+ kind="text", text=f"Task {task_idx} in context {ctx_idx}"
342
+ )
343
+ ],
338
344
  messageId=f"iso_msg_{ctx_idx}_{task_idx}",
339
345
  contextId=context_id,
340
- kind="message"
341
- )
342
- )
346
+ kind="message",
347
+ ),
348
+ ),
343
349
  )
344
350
  expected_context_tasks[context_id].append(task.id)
345
351
  all_store_operations.append(stress_provider.store_task(task))
@@ -350,7 +356,9 @@ class TestConcurrencyTorture(StressTestBase):
350
356
  # Verify all stores succeeded
351
357
  successful_stores = sum(1 for result in results if result.data is None)
352
358
  expected_total = contexts_count * tasks_per_context
353
- assert successful_stores == expected_total, f"Expected {expected_total} stores, got {successful_stores}"
359
+ assert successful_stores == expected_total, (
360
+ f"Expected {expected_total} stores, got {successful_stores}"
361
+ )
354
362
 
355
363
  # Verify context isolation
356
364
  for context_id, expected_task_ids in expected_context_tasks.items():
@@ -360,8 +368,12 @@ class TestConcurrencyTorture(StressTestBase):
360
368
  actual_task_ids = {task.id for task in context_result.data}
361
369
  expected_task_ids_set = set(expected_task_ids)
362
370
 
363
- assert actual_task_ids == expected_task_ids_set, f"Context {context_id} has incorrect tasks"
364
- assert len(context_result.data) == tasks_per_context, f"Context {context_id} should have {tasks_per_context} tasks"
371
+ assert actual_task_ids == expected_task_ids_set, (
372
+ f"Context {context_id} has incorrect tasks"
373
+ )
374
+ assert len(context_result.data) == tasks_per_context, (
375
+ f"Context {context_id} should have {tasks_per_context} tasks"
376
+ )
365
377
 
366
378
 
367
379
  class TestLargeScaleOperations(StressTestBase):
@@ -381,23 +393,23 @@ class TestLargeScaleOperations(StressTestBase):
381
393
  # Store tasks in batches to avoid overwhelming the system
382
394
  batch_size = 50
383
395
  for i in range(0, len(tasks), batch_size):
384
- batch = tasks[i:i + batch_size]
396
+ batch = tasks[i : i + batch_size]
385
397
  batch_operations = [stress_provider.store_task(task) for task in batch]
386
398
  batch_results = await asyncio.gather(*batch_operations)
387
399
 
388
400
  # Verify batch success
389
401
  batch_success = sum(1 for result in batch_results if result.data is None)
390
- assert batch_success == len(batch), f"Batch {i//batch_size} failed: {batch_success}/{len(batch)} succeeded"
402
+ assert batch_success == len(batch), (
403
+ f"Batch {i // batch_size} failed: {batch_success}/{len(batch)} succeeded"
404
+ )
391
405
 
392
406
  end_time = time.perf_counter()
393
407
  total_time = end_time - start_time
394
408
  ops_per_second = count / total_time
395
409
 
396
- performance_results.append({
397
- "count": count,
398
- "time": total_time,
399
- "ops_per_second": ops_per_second
400
- })
410
+ performance_results.append(
411
+ {"count": count, "time": total_time, "ops_per_second": ops_per_second}
412
+ )
401
413
 
402
414
  print(f"Bulk storage: {count} tasks in {total_time:.2f}s ({ops_per_second:.2f} ops/s)")
403
415
 
@@ -411,7 +423,9 @@ class TestLargeScaleOperations(StressTestBase):
411
423
  degradation_ratio = smallest_ops / largest_ops
412
424
 
413
425
  # Performance shouldn't degrade by more than 10x
414
- assert degradation_ratio < 10, f"Performance degraded {degradation_ratio:.2f}x from {smallest_ops:.2f} to {largest_ops:.2f} ops/s"
426
+ assert degradation_ratio < 10, (
427
+ f"Performance degraded {degradation_ratio:.2f}x from {smallest_ops:.2f} to {largest_ops:.2f} ops/s"
428
+ )
415
429
 
416
430
  async def test_large_data_payload_handling(self, stress_provider):
417
431
  """Test handling of tasks with large data payloads"""
@@ -428,13 +442,17 @@ class TestLargeScaleOperations(StressTestBase):
428
442
  store_result, store_time = await self.measure_operation_time(
429
443
  stress_provider.store_task, large_task
430
444
  )
431
- assert store_result.data is None, f"Large task storage should succeed for size {multiplier}"
445
+ assert store_result.data is None, (
446
+ f"Large task storage should succeed for size {multiplier}"
447
+ )
432
448
 
433
449
  # Measure retrieval time
434
450
  get_result, get_time = await self.measure_operation_time(
435
451
  stress_provider.get_task, task_id
436
452
  )
437
- assert get_result.data is not None, f"Large task retrieval should succeed for size {multiplier}"
453
+ assert get_result.data is not None, (
454
+ f"Large task retrieval should succeed for size {multiplier}"
455
+ )
438
456
 
439
457
  # Verify data integrity
440
458
  retrieved_task = get_result.data
@@ -443,7 +461,9 @@ class TestLargeScaleOperations(StressTestBase):
443
461
  assert len(retrieved_task.artifacts or []) == 1, "Artifacts should be preserved"
444
462
 
445
463
  # Performance checks
446
- assert store_time < 5000, f"Store time too slow for size {multiplier}: {store_time:.2f}ms"
464
+ assert store_time < 5000, (
465
+ f"Store time too slow for size {multiplier}: {store_time:.2f}ms"
466
+ )
447
467
  assert get_time < 5000, f"Get time too slow for size {multiplier}: {get_time:.2f}ms"
448
468
 
449
469
  print(f"Large payload {multiplier}x: store={store_time:.2f}ms, get={get_time:.2f}ms")
@@ -455,32 +475,30 @@ class TestLargeScaleOperations(StressTestBase):
455
475
  context_id = "pagination_perf_ctx"
456
476
 
457
477
  # Create large dataset - use single context for pagination test
458
- tasks = self.create_bulk_tasks(total_tasks, context_id, "page_task", distribute_contexts=False)
478
+ tasks = self.create_bulk_tasks(
479
+ total_tasks, context_id, "page_task", distribute_contexts=False
480
+ )
459
481
 
460
482
  # Store in batches and verify success
461
483
  batch_size = 50
462
484
  stored_count = 0
463
485
  for i in range(0, len(tasks), batch_size):
464
- batch = tasks[i:i + batch_size]
486
+ batch = tasks[i : i + batch_size]
465
487
  batch_operations = [stress_provider.store_task(task) for task in batch]
466
488
  batch_results = await asyncio.gather(*batch_operations)
467
-
489
+
468
490
  # Verify batch success
469
491
  for result in batch_results:
470
- if hasattr(result, 'data') and result.data is None:
492
+ if hasattr(result, "data") and result.data is None:
471
493
  stored_count += 1
472
-
494
+
473
495
  assert stored_count == total_tasks, f"Only stored {stored_count}/{total_tasks} tasks"
474
496
 
475
497
  # Test pagination performance at different offsets
476
498
  test_offsets = [0, 250, 500, 750, 900] # Beginning, middle, end
477
499
 
478
500
  for offset in test_offsets:
479
- query = A2ATaskQuery(
480
- context_id=context_id,
481
- limit=page_size,
482
- offset=offset
483
- )
501
+ query = A2ATaskQuery(context_id=context_id, limit=page_size, offset=offset)
484
502
 
485
503
  # Measure query time
486
504
  result, query_time = await self.measure_operation_time(
@@ -492,7 +510,9 @@ class TestLargeScaleOperations(StressTestBase):
492
510
  # Verify correct page size (except possibly last page)
493
511
  page_tasks = result.data
494
512
  expected_size = min(page_size, total_tasks - offset)
495
- assert len(page_tasks) == expected_size, f"Page at offset {offset} should have {expected_size} tasks"
513
+ assert len(page_tasks) == expected_size, (
514
+ f"Page at offset {offset} should have {expected_size} tasks"
515
+ )
496
516
 
497
517
  # Performance check - deep pagination shouldn't be too slow
498
518
  assert query_time < 1000, f"Query at offset {offset} too slow: {query_time:.2f}ms"
@@ -517,19 +537,19 @@ class TestLargeScaleOperations(StressTestBase):
517
537
  kind="task",
518
538
  status=A2ATaskStatus(
519
539
  state=states[state_idx],
520
- timestamp=(datetime.now(timezone.utc) - timedelta(hours=i % 24)).isoformat()
540
+ timestamp=(datetime.now(timezone.utc) - timedelta(hours=i % 24)).isoformat(),
521
541
  ),
522
542
  metadata={
523
543
  "priority": "high" if i % 3 == 0 else "normal",
524
- "category": f"cat_{i % 5}"
525
- }
544
+ "category": f"cat_{i % 5}",
545
+ },
526
546
  )
527
547
  all_tasks.append(task)
528
548
 
529
549
  # Store all tasks
530
550
  batch_size = 100
531
551
  for i in range(0, len(all_tasks), batch_size):
532
- batch = all_tasks[i:i + batch_size]
552
+ batch = all_tasks[i : i + batch_size]
533
553
  batch_operations = [stress_provider.store_task(task) for task in batch]
534
554
  await asyncio.gather(*batch_operations)
535
555
 
@@ -537,22 +557,16 @@ class TestLargeScaleOperations(StressTestBase):
537
557
  complex_queries = [
538
558
  # Query by state
539
559
  A2ATaskQuery(state=TaskState.WORKING, limit=100),
540
-
541
560
  # Query by context
542
561
  A2ATaskQuery(context_id="complex_ctx_005", limit=100),
543
-
544
562
  # Query by time range
545
- A2ATaskQuery(
546
- since=datetime.now(timezone.utc) - timedelta(hours=12),
547
- limit=100
548
- ),
549
-
563
+ A2ATaskQuery(since=datetime.now(timezone.utc) - timedelta(hours=12), limit=100),
550
564
  # Query with multiple filters
551
565
  A2ATaskQuery(
552
566
  state=TaskState.COMPLETED,
553
567
  since=datetime.now(timezone.utc) - timedelta(hours=6),
554
- limit=50
555
- )
568
+ limit=50,
569
+ ),
556
570
  ]
557
571
 
558
572
  for i, query in enumerate(complex_queries):
@@ -581,7 +595,7 @@ class TestResourceExhaustion(StressTestBase):
581
595
  large_task = self.create_large_task(
582
596
  f"memory_task_{i:03d}",
583
597
  f"memory_ctx_{i % 10}",
584
- size_multiplier=200 # Large payloads
598
+ size_multiplier=200, # Large payloads
585
599
  )
586
600
  large_tasks.append(large_task)
587
601
 
@@ -614,7 +628,7 @@ class TestResourceExhaustion(StressTestBase):
614
628
  # Create provider with low limits for testing
615
629
  limited_config = A2AInMemoryTaskConfig(
616
630
  max_tasks=100, # Low limit for testing
617
- max_tasks_per_context=20
631
+ max_tasks_per_context=20,
618
632
  )
619
633
  limited_provider = create_a2a_in_memory_task_provider(limited_config)
620
634
 
@@ -627,16 +641,22 @@ class TestResourceExhaustion(StressTestBase):
627
641
 
628
642
  for task in tasks:
629
643
  result = await limited_provider.store_task(task)
630
- if hasattr(result, 'data') and result.data is None:
644
+ if hasattr(result, "data") and result.data is None:
631
645
  stored_count += 1
632
- elif hasattr(result, 'error'):
646
+ elif hasattr(result, "error"):
633
647
  rejected_count += 1
634
648
 
635
649
  # Should get appropriate error message about storage limits
636
650
  error_msg = str(result.error.message).lower()
637
651
  # Check for storage-related error messages
638
- assert ("limit" in error_msg or "full" in error_msg or "maximum" in error_msg or
639
- "storage" in error_msg or "exceeded" in error_msg or "failed to store" in error_msg)
652
+ assert (
653
+ "limit" in error_msg
654
+ or "full" in error_msg
655
+ or "maximum" in error_msg
656
+ or "storage" in error_msg
657
+ or "exceeded" in error_msg
658
+ or "failed to store" in error_msg
659
+ )
640
660
  else:
641
661
  rejected_count += 1
642
662
 
@@ -653,7 +673,7 @@ class TestResourceExhaustion(StressTestBase):
653
673
 
654
674
  for task in context_tasks:
655
675
  result = await limited_provider.store_task(task)
656
- if hasattr(result, 'data') and result.data is None:
676
+ if hasattr(result, "data") and result.data is None:
657
677
  ctx_stored += 1
658
678
  else:
659
679
  ctx_rejected += 1
@@ -677,7 +697,7 @@ class TestResourceExhaustion(StressTestBase):
677
697
  id=f"exhaust_task_{i}",
678
698
  contextId=f"exhaust_ctx_{i % 10}",
679
699
  kind="task",
680
- status=A2ATaskStatus(state=TaskState.SUBMITTED)
700
+ status=A2ATaskStatus(state=TaskState.SUBMITTED),
681
701
  )
682
702
  operations.append(stress_provider.store_task(task))
683
703
  elif i % 3 == 1:
@@ -706,7 +726,9 @@ class TestResourceExhaustion(StressTestBase):
706
726
  print(f"Connection exhaustion test: {success_count} succeeded, {error_count} failed")
707
727
 
708
728
  # Should handle at least some operations successfully
709
- assert success_count > concurrent_ops * 0.1, "Should handle at least 10% of operations successfully"
729
+ assert success_count > concurrent_ops * 0.1, (
730
+ "Should handle at least 10% of operations successfully"
731
+ )
710
732
 
711
733
  # Provider should recover and be healthy
712
734
  await asyncio.sleep(1) # Allow recovery time
@@ -735,7 +757,7 @@ class TestMemoryLeakDetection(StressTestBase):
735
757
  id=f"leak_task_{cycle}",
736
758
  contextId=context_id,
737
759
  kind="task",
738
- status=A2ATaskStatus(state=TaskState.SUBMITTED)
760
+ status=A2ATaskStatus(state=TaskState.SUBMITTED),
739
761
  )
740
762
 
741
763
  # Store task
@@ -750,23 +772,25 @@ class TestMemoryLeakDetection(StressTestBase):
750
772
  parts=[A2ATextPart(kind="text", text="Working on it")],
751
773
  messageId=f"work_{cycle}",
752
774
  contextId=context_id,
753
- kind="message"
754
- )
775
+ kind="message",
776
+ ),
755
777
  )
756
778
 
757
779
  # Complete task
758
- completed_task = task.model_copy(update={
759
- "status": A2ATaskStatus(
760
- state=TaskState.COMPLETED,
761
- message=A2AMessage(
762
- role="agent",
763
- parts=[A2ATextPart(kind="text", text="Completed")],
764
- messageId=f"done_{cycle}",
765
- contextId=context_id,
766
- kind="message"
780
+ completed_task = task.model_copy(
781
+ update={
782
+ "status": A2ATaskStatus(
783
+ state=TaskState.COMPLETED,
784
+ message=A2AMessage(
785
+ role="agent",
786
+ parts=[A2ATextPart(kind="text", text="Completed")],
787
+ messageId=f"done_{cycle}",
788
+ contextId=context_id,
789
+ kind="message",
790
+ ),
767
791
  )
768
- )
769
- })
792
+ }
793
+ )
770
794
  await stress_provider.update_task(completed_task)
771
795
 
772
796
  # Delete task
@@ -784,12 +808,16 @@ class TestMemoryLeakDetection(StressTestBase):
784
808
  object_growth = final_objects - initial_objects
785
809
  growth_per_cycle = object_growth / cycles if cycles > 0 else 0
786
810
 
787
- print(f"Memory leak test: {object_growth} objects growth over {cycles} cycles ({growth_per_cycle:.2f} per cycle)")
811
+ print(
812
+ f"Memory leak test: {object_growth} objects growth over {cycles} cycles ({growth_per_cycle:.2f} per cycle)"
813
+ )
788
814
 
789
815
  # Should not have significant memory growth
790
816
  # Allow some growth but flag if it's excessive
791
817
  max_allowed_growth_per_cycle = 100 # objects per cycle
792
- assert growth_per_cycle < max_allowed_growth_per_cycle, f"Potential memory leak: {growth_per_cycle:.2f} objects per cycle"
818
+ assert growth_per_cycle < max_allowed_growth_per_cycle, (
819
+ f"Potential memory leak: {growth_per_cycle:.2f} objects per cycle"
820
+ )
793
821
 
794
822
  async def test_provider_cleanup_on_close(self, stress_provider):
795
823
  """Test that provider properly cleans up resources on close"""
@@ -805,7 +833,7 @@ class TestMemoryLeakDetection(StressTestBase):
805
833
  # (This is implementation-specific and might need adjustment)
806
834
  try:
807
835
  # Access provider internals if possible
808
- if hasattr(stress_provider, '_state'):
836
+ if hasattr(stress_provider, "_state"):
809
837
  weak_refs.append(weakref.ref(stress_provider._state))
810
838
  except Exception:
811
839
  pass # Provider might not expose internals
@@ -829,12 +857,14 @@ class TestMemoryLeakDetection(StressTestBase):
829
857
  """Test cleanup efficiency with large datasets"""
830
858
  # Create large dataset - use single context for cleanup test
831
859
  large_dataset_size = 1000
832
- tasks = self.create_bulk_tasks(large_dataset_size, "large_cleanup_ctx", "large_cleanup_task", distribute_contexts=False)
860
+ tasks = self.create_bulk_tasks(
861
+ large_dataset_size, "large_cleanup_ctx", "large_cleanup_task", distribute_contexts=False
862
+ )
833
863
 
834
864
  # Store all tasks
835
865
  batch_size = 50
836
866
  for i in range(0, len(tasks), batch_size):
837
- batch = tasks[i:i + batch_size]
867
+ batch = tasks[i : i + batch_size]
838
868
  batch_operations = [stress_provider.store_task(task) for task in batch]
839
869
  await asyncio.gather(*batch_operations)
840
870
 
@@ -848,7 +878,9 @@ class TestMemoryLeakDetection(StressTestBase):
848
878
  cleanup_time = end_time - start_time
849
879
 
850
880
  # Verify cleanup was successful
851
- assert delete_result.data == large_dataset_size, f"Should delete all {large_dataset_size} tasks"
881
+ assert delete_result.data == large_dataset_size, (
882
+ f"Should delete all {large_dataset_size} tasks"
883
+ )
852
884
 
853
885
  # Verify context is empty
854
886
  remaining_result = await stress_provider.get_tasks_by_context("large_cleanup_ctx")
@@ -859,7 +891,9 @@ class TestMemoryLeakDetection(StressTestBase):
859
891
  cleanup_rate = large_dataset_size / cleanup_time
860
892
  assert cleanup_rate > 100, f"Cleanup too slow: {cleanup_rate:.2f} tasks/second"
861
893
 
862
- print(f"Large dataset cleanup: {large_dataset_size} tasks in {cleanup_time:.2f}s ({cleanup_rate:.2f} tasks/s)")
894
+ print(
895
+ f"Large dataset cleanup: {large_dataset_size} tasks in {cleanup_time:.2f}s ({cleanup_rate:.2f} tasks/s)"
896
+ )
863
897
 
864
898
 
865
899
  class TestPerformanceRegression(StressTestBase):
@@ -869,11 +903,11 @@ class TestPerformanceRegression(StressTestBase):
869
903
  """Benchmark key operations to detect performance regressions"""
870
904
  # Define performance benchmarks (in milliseconds)
871
905
  benchmarks = {
872
- "store_task": 100, # Single task store should be < 100ms
873
- "get_task": 50, # Single task get should be < 50ms
874
- "update_task": 100, # Single task update should be < 100ms
875
- "find_tasks": 200, # Query should be < 200ms
876
- "delete_task": 50 # Single task delete should be < 50ms
906
+ "store_task": 100, # Single task store should be < 100ms
907
+ "get_task": 50, # Single task get should be < 50ms
908
+ "update_task": 100, # Single task update should be < 100ms
909
+ "find_tasks": 200, # Query should be < 200ms
910
+ "delete_task": 50, # Single task delete should be < 50ms
877
911
  }
878
912
 
879
913
  # Setup test data
@@ -884,11 +918,15 @@ class TestPerformanceRegression(StressTestBase):
884
918
  # Test store performance
885
919
  new_task = self.create_bulk_tasks(1, "perf_new_ctx", "perf_new_task")[0]
886
920
  _, store_time = await self.measure_operation_time(stress_provider.store_task, new_task)
887
- assert store_time < benchmarks["store_task"], f"Store performance regression: {store_time:.2f}ms > {benchmarks['store_task']}ms"
921
+ assert store_time < benchmarks["store_task"], (
922
+ f"Store performance regression: {store_time:.2f}ms > {benchmarks['store_task']}ms"
923
+ )
888
924
 
889
925
  # Test get performance
890
926
  _, get_time = await self.measure_operation_time(stress_provider.get_task, "perf_task_00050")
891
- assert get_time < benchmarks["get_task"], f"Get performance regression: {get_time:.2f}ms > {benchmarks['get_task']}ms"
927
+ assert get_time < benchmarks["get_task"], (
928
+ f"Get performance regression: {get_time:.2f}ms > {benchmarks['get_task']}ms"
929
+ )
892
930
 
893
931
  # Test update performance
894
932
  update_message = A2AMessage(
@@ -896,24 +934,29 @@ class TestPerformanceRegression(StressTestBase):
896
934
  parts=[A2ATextPart(kind="text", text="Performance test update")],
897
935
  messageId="perf_update",
898
936
  contextId="perf_ctx_5",
899
- kind="message"
937
+ kind="message",
900
938
  )
901
939
  _, update_time = await self.measure_operation_time(
902
- stress_provider.update_task_status,
903
- "perf_task_00025",
904
- TaskState.WORKING,
905
- update_message
940
+ stress_provider.update_task_status, "perf_task_00025", TaskState.WORKING, update_message
941
+ )
942
+ assert update_time < benchmarks["update_task"], (
943
+ f"Update performance regression: {update_time:.2f}ms > {benchmarks['update_task']}ms"
906
944
  )
907
- assert update_time < benchmarks["update_task"], f"Update performance regression: {update_time:.2f}ms > {benchmarks['update_task']}ms"
908
945
 
909
946
  # Test query performance
910
947
  query = A2ATaskQuery(context_id="perf_ctx_1", limit=10)
911
948
  _, query_time = await self.measure_operation_time(stress_provider.find_tasks, query)
912
- assert query_time < benchmarks["find_tasks"], f"Query performance regression: {query_time:.2f}ms > {benchmarks['find_tasks']}ms"
949
+ assert query_time < benchmarks["find_tasks"], (
950
+ f"Query performance regression: {query_time:.2f}ms > {benchmarks['find_tasks']}ms"
951
+ )
913
952
 
914
953
  # Test delete performance
915
- _, delete_time = await self.measure_operation_time(stress_provider.delete_task, "perf_task_00075")
916
- assert delete_time < benchmarks["delete_task"], f"Delete performance regression: {delete_time:.2f}ms > {benchmarks['delete_task']}ms"
954
+ _, delete_time = await self.measure_operation_time(
955
+ stress_provider.delete_task, "perf_task_00075"
956
+ )
957
+ assert delete_time < benchmarks["delete_task"], (
958
+ f"Delete performance regression: {delete_time:.2f}ms > {benchmarks['delete_task']}ms"
959
+ )
917
960
 
918
961
  print("Performance benchmark results:")
919
962
  print(f" Store: {store_time:.2f}ms (limit: {benchmarks['store_task']}ms)")
@@ -946,16 +989,20 @@ class TestPerformanceRegression(StressTestBase):
946
989
  query_time = time.perf_counter() - start_time
947
990
 
948
991
  # Calculate metrics
949
- storage_ops_per_sec = size / storage_time if storage_time > 0 else float('inf')
950
- query_ops_per_sec = size / query_time if query_time > 0 else float('inf')
951
-
952
- scaling_results.append({
953
- "size": size,
954
- "storage_ops_per_sec": storage_ops_per_sec,
955
- "query_ops_per_sec": query_ops_per_sec
956
- })
992
+ storage_ops_per_sec = size / storage_time if storage_time > 0 else float("inf")
993
+ query_ops_per_sec = size / query_time if query_time > 0 else float("inf")
994
+
995
+ scaling_results.append(
996
+ {
997
+ "size": size,
998
+ "storage_ops_per_sec": storage_ops_per_sec,
999
+ "query_ops_per_sec": query_ops_per_sec,
1000
+ }
1001
+ )
957
1002
 
958
- print(f"Scalability {size} tasks: storage={storage_ops_per_sec:.2f} ops/s, query={query_ops_per_sec:.2f} ops/s")
1003
+ print(
1004
+ f"Scalability {size} tasks: storage={storage_ops_per_sec:.2f} ops/s, query={query_ops_per_sec:.2f} ops/s"
1005
+ )
959
1006
 
960
1007
  # Analyze scaling characteristics
961
1008
  if len(scaling_results) >= 2:
@@ -963,10 +1010,16 @@ class TestPerformanceRegression(StressTestBase):
963
1010
  last_result = scaling_results[-1]
964
1011
 
965
1012
  # Performance shouldn't degrade too severely with scale
966
- storage_degradation = first_result["storage_ops_per_sec"] / last_result["storage_ops_per_sec"]
1013
+ storage_degradation = (
1014
+ first_result["storage_ops_per_sec"] / last_result["storage_ops_per_sec"]
1015
+ )
967
1016
  query_degradation = first_result["query_ops_per_sec"] / last_result["query_ops_per_sec"]
968
1017
 
969
1018
  max_allowed_degradation = 5.0 # 5x degradation is acceptable
970
1019
 
971
- assert storage_degradation < max_allowed_degradation, f"Storage performance degraded {storage_degradation:.2f}x"
972
- assert query_degradation < max_allowed_degradation, f"Query performance degraded {query_degradation:.2f}x"
1020
+ assert storage_degradation < max_allowed_degradation, (
1021
+ f"Storage performance degraded {storage_degradation:.2f}x"
1022
+ )
1023
+ assert query_degradation < max_allowed_degradation, (
1024
+ f"Query performance degraded {query_degradation:.2f}x"
1025
+ )