attune-ai 2.1.5__py3-none-any.whl → 2.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- attune/cli/__init__.py +3 -59
- attune/cli/commands/batch.py +4 -12
- attune/cli/commands/cache.py +8 -16
- attune/cli/commands/provider.py +17 -0
- attune/cli/commands/routing.py +3 -1
- attune/cli/commands/setup.py +122 -0
- attune/cli/commands/tier.py +1 -3
- attune/cli/commands/workflow.py +31 -0
- attune/cli/parsers/cache.py +1 -0
- attune/cli/parsers/help.py +1 -3
- attune/cli/parsers/provider.py +7 -0
- attune/cli/parsers/routing.py +1 -3
- attune/cli/parsers/setup.py +7 -0
- attune/cli/parsers/status.py +1 -3
- attune/cli/parsers/tier.py +1 -3
- attune/cli_minimal.py +9 -3
- attune/cli_router.py +9 -7
- attune/cli_unified.py +3 -0
- attune/dashboard/app.py +3 -1
- attune/dashboard/simple_server.py +3 -1
- attune/dashboard/standalone_server.py +7 -3
- attune/mcp/server.py +54 -102
- attune/memory/long_term.py +0 -2
- attune/memory/short_term/__init__.py +84 -0
- attune/memory/short_term/base.py +465 -0
- attune/memory/short_term/batch.py +219 -0
- attune/memory/short_term/caching.py +227 -0
- attune/memory/short_term/conflicts.py +265 -0
- attune/memory/short_term/cross_session.py +122 -0
- attune/memory/short_term/facade.py +653 -0
- attune/memory/short_term/pagination.py +207 -0
- attune/memory/short_term/patterns.py +271 -0
- attune/memory/short_term/pubsub.py +286 -0
- attune/memory/short_term/queues.py +244 -0
- attune/memory/short_term/security.py +300 -0
- attune/memory/short_term/sessions.py +250 -0
- attune/memory/short_term/streams.py +242 -0
- attune/memory/short_term/timelines.py +234 -0
- attune/memory/short_term/transactions.py +184 -0
- attune/memory/short_term/working.py +252 -0
- attune/meta_workflows/cli_commands/__init__.py +3 -0
- attune/meta_workflows/cli_commands/agent_commands.py +0 -4
- attune/meta_workflows/cli_commands/analytics_commands.py +0 -6
- attune/meta_workflows/cli_commands/config_commands.py +0 -5
- attune/meta_workflows/cli_commands/memory_commands.py +0 -5
- attune/meta_workflows/cli_commands/template_commands.py +0 -5
- attune/meta_workflows/cli_commands/workflow_commands.py +0 -6
- attune/meta_workflows/plan_generator.py +2 -4
- attune/models/adaptive_routing.py +4 -8
- attune/models/auth_cli.py +3 -9
- attune/models/auth_strategy.py +2 -4
- attune/models/telemetry/analytics.py +0 -2
- attune/models/telemetry/backend.py +0 -3
- attune/models/telemetry/storage.py +0 -2
- attune/monitoring/alerts.py +6 -10
- attune/orchestration/_strategies/__init__.py +156 -0
- attune/orchestration/_strategies/base.py +227 -0
- attune/orchestration/_strategies/conditional_strategies.py +365 -0
- attune/orchestration/_strategies/conditions.py +369 -0
- attune/orchestration/_strategies/core_strategies.py +479 -0
- attune/orchestration/_strategies/data_classes.py +64 -0
- attune/orchestration/_strategies/nesting.py +233 -0
- attune/orchestration/execution_strategies.py +58 -1567
- attune/orchestration/meta_orchestrator.py +1 -3
- attune/project_index/scanner.py +1 -3
- attune/project_index/scanner_parallel.py +7 -5
- attune/socratic/storage.py +2 -4
- attune/socratic_router.py +1 -3
- attune/telemetry/agent_coordination.py +9 -3
- attune/telemetry/agent_tracking.py +16 -3
- attune/telemetry/approval_gates.py +22 -5
- attune/telemetry/cli.py +1 -3
- attune/telemetry/commands/dashboard_commands.py +24 -8
- attune/telemetry/event_streaming.py +8 -2
- attune/telemetry/feedback_loop.py +10 -2
- attune/tools.py +2 -1
- attune/workflow_commands.py +1 -3
- attune/workflow_patterns/structural.py +4 -8
- attune/workflows/__init__.py +54 -10
- attune/workflows/autonomous_test_gen.py +158 -102
- attune/workflows/base.py +48 -672
- attune/workflows/batch_processing.py +1 -3
- attune/workflows/compat.py +156 -0
- attune/workflows/cost_mixin.py +141 -0
- attune/workflows/data_classes.py +92 -0
- attune/workflows/document_gen/workflow.py +11 -14
- attune/workflows/history.py +16 -9
- attune/workflows/llm_base.py +1 -3
- attune/workflows/migration.py +432 -0
- attune/workflows/output.py +2 -7
- attune/workflows/parsing_mixin.py +427 -0
- attune/workflows/perf_audit.py +3 -1
- attune/workflows/progress.py +9 -11
- attune/workflows/release_prep.py +5 -1
- attune/workflows/routing.py +0 -2
- attune/workflows/secure_release.py +4 -1
- attune/workflows/security_audit.py +20 -14
- attune/workflows/security_audit_phase3.py +28 -22
- attune/workflows/seo_optimization.py +27 -27
- attune/workflows/test_gen/test_templates.py +1 -4
- attune/workflows/test_gen/workflow.py +0 -2
- attune/workflows/test_gen_behavioral.py +6 -19
- attune/workflows/test_gen_parallel.py +8 -6
- {attune_ai-2.1.5.dist-info → attune_ai-2.2.1.dist-info}/METADATA +4 -3
- {attune_ai-2.1.5.dist-info → attune_ai-2.2.1.dist-info}/RECORD +121 -96
- {attune_ai-2.1.5.dist-info → attune_ai-2.2.1.dist-info}/entry_points.txt +0 -2
- attune_healthcare/monitors/monitoring/__init__.py +9 -9
- attune_llm/agent_factory/__init__.py +6 -6
- attune_llm/agent_factory/adapters/haystack_adapter.py +1 -4
- attune_llm/commands/__init__.py +10 -10
- attune_llm/commands/models.py +3 -3
- attune_llm/config/__init__.py +8 -8
- attune_llm/learning/__init__.py +3 -3
- attune_llm/learning/extractor.py +5 -3
- attune_llm/learning/storage.py +5 -3
- attune_llm/security/__init__.py +17 -17
- attune_llm/utils/tokens.py +3 -1
- attune/cli_legacy.py +0 -3978
- attune/memory/short_term.py +0 -2192
- attune/workflows/manage_docs.py +0 -87
- attune/workflows/test5.py +0 -125
- {attune_ai-2.1.5.dist-info → attune_ai-2.2.1.dist-info}/WHEEL +0 -0
- {attune_ai-2.1.5.dist-info → attune_ai-2.2.1.dist-info}/licenses/LICENSE +0 -0
- {attune_ai-2.1.5.dist-info → attune_ai-2.2.1.dist-info}/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +0 -0
- {attune_ai-2.1.5.dist-info → attune_ai-2.2.1.dist-info}/top_level.txt +0 -0
|
@@ -43,6 +43,7 @@ logger = logging.getLogger(__name__)
|
|
|
43
43
|
@dataclass
|
|
44
44
|
class ValidationResult:
|
|
45
45
|
"""Result of pytest validation."""
|
|
46
|
+
|
|
46
47
|
passed: bool
|
|
47
48
|
failures: str
|
|
48
49
|
error_count: int
|
|
@@ -52,6 +53,7 @@ class ValidationResult:
|
|
|
52
53
|
@dataclass
|
|
53
54
|
class CoverageResult:
|
|
54
55
|
"""Result of coverage analysis."""
|
|
56
|
+
|
|
55
57
|
coverage: float
|
|
56
58
|
missing_lines: list[int]
|
|
57
59
|
total_statements: int
|
|
@@ -69,7 +71,7 @@ class AutonomousTestGenerator:
|
|
|
69
71
|
enable_refinement: bool = True,
|
|
70
72
|
max_refinement_iterations: int = 3,
|
|
71
73
|
enable_coverage_guided: bool = False,
|
|
72
|
-
target_coverage: float = 0.80
|
|
74
|
+
target_coverage: float = 0.80,
|
|
73
75
|
):
|
|
74
76
|
"""Initialize generator.
|
|
75
77
|
|
|
@@ -107,7 +109,9 @@ class AutonomousTestGenerator:
|
|
|
107
109
|
self.output_dir = Path(f"tests/behavioral/generated/batch{batch_num}")
|
|
108
110
|
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
109
111
|
|
|
110
|
-
logger.info(
|
|
112
|
+
logger.info(
|
|
113
|
+
f"Generator initialized: refinement={enable_refinement}, coverage_guided={enable_coverage_guided}"
|
|
114
|
+
)
|
|
111
115
|
|
|
112
116
|
def generate_all(self) -> dict[str, Any]:
|
|
113
117
|
"""Generate tests for all modules with progress tracking.
|
|
@@ -122,7 +126,7 @@ class AutonomousTestGenerator:
|
|
|
122
126
|
"batch": self.batch_num,
|
|
123
127
|
"total_modules": len(self.modules),
|
|
124
128
|
"workflow": "autonomous_test_generation",
|
|
125
|
-
}
|
|
129
|
+
},
|
|
126
130
|
)
|
|
127
131
|
|
|
128
132
|
try:
|
|
@@ -143,7 +147,7 @@ class AutonomousTestGenerator:
|
|
|
143
147
|
self.coordinator.beat(
|
|
144
148
|
status="running",
|
|
145
149
|
progress=progress,
|
|
146
|
-
current_task=f"Generating tests for {module_name}"
|
|
150
|
+
current_task=f"Generating tests for {module_name}",
|
|
147
151
|
)
|
|
148
152
|
|
|
149
153
|
try:
|
|
@@ -162,8 +166,8 @@ class AutonomousTestGenerator:
|
|
|
162
166
|
"agent_id": self.agent_id,
|
|
163
167
|
"module": module_name,
|
|
164
168
|
"test_file": str(test_file),
|
|
165
|
-
"batch": self.batch_num
|
|
166
|
-
}
|
|
169
|
+
"batch": self.batch_num,
|
|
170
|
+
},
|
|
167
171
|
)
|
|
168
172
|
|
|
169
173
|
# Record quality feedback
|
|
@@ -173,7 +177,11 @@ class AutonomousTestGenerator:
|
|
|
173
177
|
stage_name="generation",
|
|
174
178
|
tier="capable",
|
|
175
179
|
quality_score=1.0, # Success
|
|
176
|
-
metadata={
|
|
180
|
+
metadata={
|
|
181
|
+
"module": module_name,
|
|
182
|
+
"status": "success",
|
|
183
|
+
"batch": self.batch_num,
|
|
184
|
+
},
|
|
177
185
|
)
|
|
178
186
|
else:
|
|
179
187
|
results["failed"] += 1
|
|
@@ -186,7 +194,11 @@ class AutonomousTestGenerator:
|
|
|
186
194
|
stage_name="validation",
|
|
187
195
|
tier="capable",
|
|
188
196
|
quality_score=0.0, # Failure
|
|
189
|
-
metadata={
|
|
197
|
+
metadata={
|
|
198
|
+
"module": module_name,
|
|
199
|
+
"status": "validation_failed",
|
|
200
|
+
"batch": self.batch_num,
|
|
201
|
+
},
|
|
190
202
|
)
|
|
191
203
|
|
|
192
204
|
except Exception as e:
|
|
@@ -201,8 +213,8 @@ class AutonomousTestGenerator:
|
|
|
201
213
|
"agent_id": self.agent_id,
|
|
202
214
|
"module": module_name,
|
|
203
215
|
"error": str(e),
|
|
204
|
-
"batch": self.batch_num
|
|
205
|
-
}
|
|
216
|
+
"batch": self.batch_num,
|
|
217
|
+
},
|
|
206
218
|
)
|
|
207
219
|
|
|
208
220
|
# Count total tests
|
|
@@ -212,18 +224,14 @@ class AutonomousTestGenerator:
|
|
|
212
224
|
self.coordinator.beat(
|
|
213
225
|
status="completed",
|
|
214
226
|
progress=1.0,
|
|
215
|
-
current_task=f"Completed: {results['completed']}/{results['total_modules']} modules"
|
|
227
|
+
current_task=f"Completed: {results['completed']}/{results['total_modules']} modules",
|
|
216
228
|
)
|
|
217
229
|
|
|
218
230
|
return results
|
|
219
231
|
|
|
220
232
|
except Exception as e:
|
|
221
233
|
# Error tracking
|
|
222
|
-
self.coordinator.beat(
|
|
223
|
-
status="failed",
|
|
224
|
-
progress=0.0,
|
|
225
|
-
current_task=f"Failed: {str(e)}"
|
|
226
|
-
)
|
|
234
|
+
self.coordinator.beat(status="failed", progress=0.0, current_task=f"Failed: {str(e)}")
|
|
227
235
|
raise
|
|
228
236
|
|
|
229
237
|
finally:
|
|
@@ -269,10 +277,14 @@ class AutonomousTestGenerator:
|
|
|
269
277
|
|
|
270
278
|
if self.enable_refinement:
|
|
271
279
|
logger.info(f"🔄 Using Phase 2: Multi-turn refinement for {module_name}")
|
|
272
|
-
test_content = self._generate_with_refinement(
|
|
280
|
+
test_content = self._generate_with_refinement(
|
|
281
|
+
module_name, module_path, source_file, source_code, test_file
|
|
282
|
+
)
|
|
273
283
|
else:
|
|
274
284
|
logger.info(f"📝 Using Phase 1: Basic generation for {module_name}")
|
|
275
|
-
test_content = self._generate_with_llm(
|
|
285
|
+
test_content = self._generate_with_llm(
|
|
286
|
+
module_name, module_path, source_file, source_code
|
|
287
|
+
)
|
|
276
288
|
|
|
277
289
|
if not test_content:
|
|
278
290
|
logger.warning(f"LLM generation failed for {module_name}")
|
|
@@ -290,7 +302,9 @@ class AutonomousTestGenerator:
|
|
|
290
302
|
test_content = improved_content
|
|
291
303
|
logger.info(f"✅ Coverage-guided improvement complete for {module_name}")
|
|
292
304
|
else:
|
|
293
|
-
logger.warning(
|
|
305
|
+
logger.warning(
|
|
306
|
+
f"⚠️ Coverage-guided improvement failed, using previous version for {module_name}"
|
|
307
|
+
)
|
|
294
308
|
|
|
295
309
|
# Write final test file
|
|
296
310
|
test_file.write_text(test_content)
|
|
@@ -322,7 +336,7 @@ class AutonomousTestGenerator:
|
|
|
322
336
|
r"TelemetryCollector",
|
|
323
337
|
r"from\s+anthropic\s+import",
|
|
324
338
|
r"messages\.create",
|
|
325
|
-
r"client\.messages"
|
|
339
|
+
r"client\.messages",
|
|
326
340
|
]
|
|
327
341
|
|
|
328
342
|
return any(re.search(pattern, source_code) for pattern in indicators)
|
|
@@ -405,7 +419,9 @@ class TestMyWorkflow:
|
|
|
405
419
|
```
|
|
406
420
|
"""
|
|
407
421
|
|
|
408
|
-
def _get_workflow_specific_prompt(
|
|
422
|
+
def _get_workflow_specific_prompt(
|
|
423
|
+
self, module_name: str, module_path: str, source_code: str
|
|
424
|
+
) -> str:
|
|
409
425
|
"""Get workflow-specific test generation prompt with comprehensive mocking guidance."""
|
|
410
426
|
return f"""Generate comprehensive tests for this WORKFLOW module.
|
|
411
427
|
|
|
@@ -453,7 +469,9 @@ Generate a complete test file with:
|
|
|
453
469
|
|
|
454
470
|
Return ONLY the complete Python test file, no explanations."""
|
|
455
471
|
|
|
456
|
-
def _generate_with_llm(
|
|
472
|
+
def _generate_with_llm(
|
|
473
|
+
self, module_name: str, module_path: str, source_file: Path, source_code: str
|
|
474
|
+
) -> str | None:
|
|
457
475
|
"""Generate comprehensive tests using LLM with Anthropic best practices.
|
|
458
476
|
|
|
459
477
|
ENHANCEMENTS (Phase 1):
|
|
@@ -487,11 +505,15 @@ Return ONLY the complete Python test file, no explanations."""
|
|
|
487
505
|
|
|
488
506
|
# Detect if this is a workflow module
|
|
489
507
|
is_workflow = self._is_workflow_module(source_code, module_path)
|
|
490
|
-
logger.info(
|
|
508
|
+
logger.info(
|
|
509
|
+
f"Module {module_name}: workflow={is_workflow}, size={len(source_code)} bytes (FULL)"
|
|
510
|
+
)
|
|
491
511
|
|
|
492
512
|
# Build appropriate prompt based on module type
|
|
493
513
|
if is_workflow:
|
|
494
|
-
generation_prompt = self._get_workflow_specific_prompt(
|
|
514
|
+
generation_prompt = self._get_workflow_specific_prompt(
|
|
515
|
+
module_name, module_path, source_code
|
|
516
|
+
)
|
|
495
517
|
else:
|
|
496
518
|
generation_prompt = f"""Generate comprehensive behavioral tests for this Python module.
|
|
497
519
|
|
|
@@ -537,31 +559,30 @@ Return ONLY the complete Python test file content, no explanations."""
|
|
|
537
559
|
{
|
|
538
560
|
"type": "text",
|
|
539
561
|
"text": "You are an expert Python test engineer. Here are examples of excellent tests:",
|
|
540
|
-
"cache_control": {"type": "ephemeral"}
|
|
562
|
+
"cache_control": {"type": "ephemeral"},
|
|
541
563
|
},
|
|
542
564
|
{
|
|
543
565
|
"type": "text",
|
|
544
566
|
"text": self._get_example_tests(),
|
|
545
|
-
"cache_control": {"type": "ephemeral"}
|
|
567
|
+
"cache_control": {"type": "ephemeral"},
|
|
546
568
|
},
|
|
547
|
-
{
|
|
548
|
-
|
|
549
|
-
"text": generation_prompt
|
|
550
|
-
}
|
|
551
|
-
]
|
|
569
|
+
{"type": "text", "text": generation_prompt},
|
|
570
|
+
],
|
|
552
571
|
}
|
|
553
572
|
]
|
|
554
573
|
|
|
555
574
|
try:
|
|
556
575
|
# Call Anthropic API with extended thinking and caching
|
|
557
|
-
logger.info(
|
|
576
|
+
logger.info(
|
|
577
|
+
f"Calling LLM with extended thinking for {module_name} (workflow={is_workflow})"
|
|
578
|
+
)
|
|
558
579
|
client = anthropic.Anthropic(api_key=api_key)
|
|
559
580
|
response = client.messages.create(
|
|
560
581
|
model="claude-sonnet-4-5", # capable tier
|
|
561
582
|
max_tokens=40000, # Very generous total budget for comprehensive tests
|
|
562
583
|
thinking={
|
|
563
584
|
"type": "enabled",
|
|
564
|
-
"budget_tokens": 20000 # Generous thinking budget for thorough planning
|
|
585
|
+
"budget_tokens": 20000, # Generous thinking budget for thorough planning
|
|
565
586
|
},
|
|
566
587
|
messages=messages,
|
|
567
588
|
timeout=900.0, # 15 minutes timeout for extended thinking + generation
|
|
@@ -590,7 +611,7 @@ Return ONLY the complete Python test file content, no explanations."""
|
|
|
590
611
|
|
|
591
612
|
# Clean up response (remove markdown fences if present)
|
|
592
613
|
if test_content.startswith("```python"):
|
|
593
|
-
test_content = test_content[len("```python"):].strip()
|
|
614
|
+
test_content = test_content[len("```python") :].strip()
|
|
594
615
|
if test_content.endswith("```"):
|
|
595
616
|
test_content = test_content[:-3].strip()
|
|
596
617
|
|
|
@@ -602,10 +623,13 @@ Return ONLY the complete Python test file content, no explanations."""
|
|
|
602
623
|
# Quick syntax pre-check before returning
|
|
603
624
|
try:
|
|
604
625
|
import ast
|
|
626
|
+
|
|
605
627
|
ast.parse(test_content)
|
|
606
628
|
logger.info(f"✓ Quick syntax check passed for {module_name}")
|
|
607
629
|
except SyntaxError as e:
|
|
608
|
-
logger.error(
|
|
630
|
+
logger.error(
|
|
631
|
+
f"❌ LLM generated invalid syntax for {module_name}: {e.msg} at line {e.lineno}"
|
|
632
|
+
)
|
|
609
633
|
return None
|
|
610
634
|
|
|
611
635
|
logger.info(f"Test content cleaned, final size: {len(test_content)} bytes")
|
|
@@ -657,10 +681,7 @@ Return ONLY the complete Python test file content, no explanations."""
|
|
|
657
681
|
logger.info(f"Pytest validation: passed={passed}, errors={error_count}")
|
|
658
682
|
|
|
659
683
|
return ValidationResult(
|
|
660
|
-
passed=passed,
|
|
661
|
-
failures=failures,
|
|
662
|
-
error_count=error_count,
|
|
663
|
-
output=output
|
|
684
|
+
passed=passed, failures=failures, error_count=error_count, output=output
|
|
664
685
|
)
|
|
665
686
|
|
|
666
687
|
except subprocess.TimeoutExpired:
|
|
@@ -669,21 +690,16 @@ Return ONLY the complete Python test file content, no explanations."""
|
|
|
669
690
|
passed=False,
|
|
670
691
|
failures="Validation timeout after 60 seconds",
|
|
671
692
|
error_count=1,
|
|
672
|
-
output="Timeout"
|
|
693
|
+
output="Timeout",
|
|
673
694
|
)
|
|
674
695
|
except Exception as e:
|
|
675
696
|
logger.error(f"Pytest validation exception: {e}")
|
|
676
697
|
return ValidationResult(
|
|
677
|
-
passed=False,
|
|
678
|
-
failures=f"Validation exception: {e}",
|
|
679
|
-
error_count=1,
|
|
680
|
-
output=str(e)
|
|
698
|
+
passed=False, failures=f"Validation exception: {e}", error_count=1, output=str(e)
|
|
681
699
|
)
|
|
682
700
|
|
|
683
701
|
def _call_llm_with_history(
|
|
684
|
-
self,
|
|
685
|
-
conversation_history: list[dict[str, Any]],
|
|
686
|
-
api_key: str
|
|
702
|
+
self, conversation_history: list[dict[str, Any]], api_key: str
|
|
687
703
|
) -> str | None:
|
|
688
704
|
"""Call LLM with conversation history for refinement.
|
|
689
705
|
|
|
@@ -703,7 +719,7 @@ Return ONLY the complete Python test file content, no explanations."""
|
|
|
703
719
|
max_tokens=40000, # Very generous total budget for iterative refinement
|
|
704
720
|
thinking={
|
|
705
721
|
"type": "enabled",
|
|
706
|
-
"budget_tokens": 20000 # Generous thinking budget for thorough analysis
|
|
722
|
+
"budget_tokens": 20000, # Generous thinking budget for thorough analysis
|
|
707
723
|
},
|
|
708
724
|
messages=conversation_history,
|
|
709
725
|
timeout=900.0, # 15 minutes timeout for refinement iterations
|
|
@@ -726,7 +742,7 @@ Return ONLY the complete Python test file content, no explanations."""
|
|
|
726
742
|
|
|
727
743
|
# Clean up response
|
|
728
744
|
if test_content.startswith("```python"):
|
|
729
|
-
test_content = test_content[len("```python"):].strip()
|
|
745
|
+
test_content = test_content[len("```python") :].strip()
|
|
730
746
|
if test_content.endswith("```"):
|
|
731
747
|
test_content = test_content[:-3].strip()
|
|
732
748
|
|
|
@@ -742,7 +758,7 @@ Return ONLY the complete Python test file content, no explanations."""
|
|
|
742
758
|
module_path: str,
|
|
743
759
|
source_file: Path,
|
|
744
760
|
source_code: str,
|
|
745
|
-
test_file: Path
|
|
761
|
+
test_file: Path,
|
|
746
762
|
) -> str | None:
|
|
747
763
|
"""Generate tests with iterative refinement (Phase 2).
|
|
748
764
|
|
|
@@ -769,7 +785,9 @@ Return ONLY the complete Python test file content, no explanations."""
|
|
|
769
785
|
logger.error("ANTHROPIC_API_KEY not set")
|
|
770
786
|
return None
|
|
771
787
|
|
|
772
|
-
logger.info(
|
|
788
|
+
logger.info(
|
|
789
|
+
f"🔄 Phase 2: Multi-turn refinement enabled for {module_name} (max {self.max_refinement_iterations} iterations)"
|
|
790
|
+
)
|
|
773
791
|
|
|
774
792
|
# Step 1: Generate initial tests
|
|
775
793
|
test_content = self._generate_with_llm(module_name, module_path, source_file, source_code)
|
|
@@ -782,7 +800,9 @@ Return ONLY the complete Python test file content, no explanations."""
|
|
|
782
800
|
|
|
783
801
|
# Initial prompt (for history tracking)
|
|
784
802
|
if is_workflow:
|
|
785
|
-
initial_prompt = self._get_workflow_specific_prompt(
|
|
803
|
+
initial_prompt = self._get_workflow_specific_prompt(
|
|
804
|
+
module_name, module_path, source_code
|
|
805
|
+
)
|
|
786
806
|
else:
|
|
787
807
|
initial_prompt = f"""Generate comprehensive behavioral tests for {module_name}.
|
|
788
808
|
|
|
@@ -795,20 +815,27 @@ SOURCE CODE:
|
|
|
795
815
|
{
|
|
796
816
|
"role": "user",
|
|
797
817
|
"content": [
|
|
798
|
-
{
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
818
|
+
{
|
|
819
|
+
"type": "text",
|
|
820
|
+
"text": "You are an expert Python test engineer. Examples:",
|
|
821
|
+
"cache_control": {"type": "ephemeral"},
|
|
822
|
+
},
|
|
823
|
+
{
|
|
824
|
+
"type": "text",
|
|
825
|
+
"text": self._get_example_tests(),
|
|
826
|
+
"cache_control": {"type": "ephemeral"},
|
|
827
|
+
},
|
|
828
|
+
{"type": "text", "text": initial_prompt},
|
|
829
|
+
],
|
|
802
830
|
},
|
|
803
|
-
{
|
|
804
|
-
"role": "assistant",
|
|
805
|
-
"content": test_content
|
|
806
|
-
}
|
|
831
|
+
{"role": "assistant", "content": test_content},
|
|
807
832
|
]
|
|
808
833
|
|
|
809
834
|
# Step 2: Iterative refinement loop
|
|
810
835
|
for iteration in range(self.max_refinement_iterations):
|
|
811
|
-
logger.info(
|
|
836
|
+
logger.info(
|
|
837
|
+
f"📝 Refinement iteration {iteration + 1}/{self.max_refinement_iterations} for {module_name}"
|
|
838
|
+
)
|
|
812
839
|
|
|
813
840
|
# Write current version to temp file
|
|
814
841
|
temp_test_file = test_file.parent / f"_temp_{test_file.name}"
|
|
@@ -823,7 +850,9 @@ SOURCE CODE:
|
|
|
823
850
|
return test_content
|
|
824
851
|
|
|
825
852
|
# Tests failed - ask Claude to fix
|
|
826
|
-
logger.warning(
|
|
853
|
+
logger.warning(
|
|
854
|
+
f"⚠️ Tests failed on iteration {iteration + 1}: {validation_result.error_count} errors"
|
|
855
|
+
)
|
|
827
856
|
|
|
828
857
|
refinement_prompt = f"""The tests you generated have failures. Please fix these specific issues:
|
|
829
858
|
|
|
@@ -840,10 +869,7 @@ Requirements:
|
|
|
840
869
|
Return ONLY the complete Python test file, no explanations."""
|
|
841
870
|
|
|
842
871
|
# Add to conversation history
|
|
843
|
-
conversation_history.append({
|
|
844
|
-
"role": "user",
|
|
845
|
-
"content": refinement_prompt
|
|
846
|
-
})
|
|
872
|
+
conversation_history.append({"role": "user", "content": refinement_prompt})
|
|
847
873
|
|
|
848
874
|
# Call LLM for refinement
|
|
849
875
|
refined_content = self._call_llm_with_history(conversation_history, api_key)
|
|
@@ -855,15 +881,14 @@ Return ONLY the complete Python test file, no explanations."""
|
|
|
855
881
|
|
|
856
882
|
# Update content and history
|
|
857
883
|
test_content = refined_content
|
|
858
|
-
conversation_history.append({
|
|
859
|
-
"role": "assistant",
|
|
860
|
-
"content": test_content
|
|
861
|
-
})
|
|
884
|
+
conversation_history.append({"role": "assistant", "content": test_content})
|
|
862
885
|
|
|
863
886
|
logger.info(f"🔄 Refinement iteration {iteration + 1} complete, retrying validation...")
|
|
864
887
|
|
|
865
888
|
# Max iterations reached
|
|
866
|
-
logger.warning(
|
|
889
|
+
logger.warning(
|
|
890
|
+
f"⚠️ Max refinement iterations reached for {module_name} - returning best attempt"
|
|
891
|
+
)
|
|
867
892
|
return test_content
|
|
868
893
|
|
|
869
894
|
def _run_coverage_analysis(self, test_file: Path, source_file: Path) -> CoverageResult:
|
|
@@ -880,17 +905,19 @@ Return ONLY the complete Python test file, no explanations."""
|
|
|
880
905
|
# Run pytest with coverage (result intentionally unused - we read coverage from file)
|
|
881
906
|
subprocess.run(
|
|
882
907
|
[
|
|
883
|
-
sys.executable,
|
|
908
|
+
sys.executable,
|
|
909
|
+
"-m",
|
|
910
|
+
"pytest",
|
|
884
911
|
str(test_file),
|
|
885
912
|
f"--cov={source_file.parent}",
|
|
886
913
|
"--cov-report=term-missing",
|
|
887
914
|
"--cov-report=json",
|
|
888
|
-
"-v"
|
|
915
|
+
"-v",
|
|
889
916
|
],
|
|
890
917
|
capture_output=True,
|
|
891
918
|
text=True,
|
|
892
919
|
timeout=120,
|
|
893
|
-
cwd=Path.cwd()
|
|
920
|
+
cwd=Path.cwd(),
|
|
894
921
|
)
|
|
895
922
|
|
|
896
923
|
# Parse coverage from JSON report
|
|
@@ -898,10 +925,7 @@ Return ONLY the complete Python test file, no explanations."""
|
|
|
898
925
|
if not coverage_json_path.exists():
|
|
899
926
|
logger.warning("Coverage JSON not generated")
|
|
900
927
|
return CoverageResult(
|
|
901
|
-
coverage=0.0,
|
|
902
|
-
missing_lines=[],
|
|
903
|
-
total_statements=0,
|
|
904
|
-
covered_statements=0
|
|
928
|
+
coverage=0.0, missing_lines=[], total_statements=0, covered_statements=0
|
|
905
929
|
)
|
|
906
930
|
|
|
907
931
|
with open(coverage_json_path) as f:
|
|
@@ -918,10 +942,7 @@ Return ONLY the complete Python test file, no explanations."""
|
|
|
918
942
|
if not file_coverage:
|
|
919
943
|
logger.warning(f"No coverage data found for {source_file}")
|
|
920
944
|
return CoverageResult(
|
|
921
|
-
coverage=0.0,
|
|
922
|
-
missing_lines=[],
|
|
923
|
-
total_statements=0,
|
|
924
|
-
covered_statements=0
|
|
945
|
+
coverage=0.0, missing_lines=[], total_statements=0, covered_statements=0
|
|
925
946
|
)
|
|
926
947
|
|
|
927
948
|
# Extract metrics
|
|
@@ -930,21 +951,27 @@ Return ONLY the complete Python test file, no explanations."""
|
|
|
930
951
|
coverage_pct = file_coverage["summary"]["percent_covered"] / 100.0
|
|
931
952
|
missing_lines = file_coverage["missing_lines"]
|
|
932
953
|
|
|
933
|
-
logger.info(
|
|
954
|
+
logger.info(
|
|
955
|
+
f"Coverage: {coverage_pct:.1%} ({covered_statements}/{total_statements} statements)"
|
|
956
|
+
)
|
|
934
957
|
|
|
935
958
|
return CoverageResult(
|
|
936
959
|
coverage=coverage_pct,
|
|
937
960
|
missing_lines=missing_lines,
|
|
938
961
|
total_statements=total_statements,
|
|
939
|
-
covered_statements=covered_statements
|
|
962
|
+
covered_statements=covered_statements,
|
|
940
963
|
)
|
|
941
964
|
|
|
942
965
|
except subprocess.TimeoutExpired:
|
|
943
966
|
logger.error("Coverage analysis timeout")
|
|
944
|
-
return CoverageResult(
|
|
967
|
+
return CoverageResult(
|
|
968
|
+
coverage=0.0, missing_lines=[], total_statements=0, covered_statements=0
|
|
969
|
+
)
|
|
945
970
|
except Exception as e:
|
|
946
971
|
logger.error(f"Coverage analysis error: {e}", exc_info=True)
|
|
947
|
-
return CoverageResult(
|
|
972
|
+
return CoverageResult(
|
|
973
|
+
coverage=0.0, missing_lines=[], total_statements=0, covered_statements=0
|
|
974
|
+
)
|
|
948
975
|
|
|
949
976
|
def _extract_uncovered_lines(self, source_file: Path, missing_lines: list[int]) -> str:
|
|
950
977
|
"""Extract source code for uncovered lines.
|
|
@@ -1003,7 +1030,7 @@ Return ONLY the complete Python test file, no explanations."""
|
|
|
1003
1030
|
source_file: Path,
|
|
1004
1031
|
source_code: str,
|
|
1005
1032
|
test_file: Path,
|
|
1006
|
-
initial_test_content: str
|
|
1033
|
+
initial_test_content: str,
|
|
1007
1034
|
) -> str | None:
|
|
1008
1035
|
"""Generate tests iteratively until coverage target met (Phase 3).
|
|
1009
1036
|
|
|
@@ -1032,13 +1059,17 @@ Return ONLY the complete Python test file, no explanations."""
|
|
|
1032
1059
|
logger.error("ANTHROPIC_API_KEY not set")
|
|
1033
1060
|
return None
|
|
1034
1061
|
|
|
1035
|
-
logger.info(
|
|
1062
|
+
logger.info(
|
|
1063
|
+
f"📊 Phase 3: Coverage-guided generation enabled (target: {self.target_coverage:.0%})"
|
|
1064
|
+
)
|
|
1036
1065
|
|
|
1037
1066
|
test_content = initial_test_content
|
|
1038
1067
|
max_coverage_iterations = 5
|
|
1039
1068
|
|
|
1040
1069
|
for iteration in range(max_coverage_iterations):
|
|
1041
|
-
logger.info(
|
|
1070
|
+
logger.info(
|
|
1071
|
+
f"📈 Coverage iteration {iteration + 1}/{max_coverage_iterations} for {module_name}"
|
|
1072
|
+
)
|
|
1042
1073
|
|
|
1043
1074
|
# Write current tests
|
|
1044
1075
|
test_file.write_text(test_content)
|
|
@@ -1046,7 +1077,9 @@ Return ONLY the complete Python test file, no explanations."""
|
|
|
1046
1077
|
# Run coverage analysis
|
|
1047
1078
|
coverage_result = self._run_coverage_analysis(test_file, source_file)
|
|
1048
1079
|
|
|
1049
|
-
logger.info(
|
|
1080
|
+
logger.info(
|
|
1081
|
+
f"Current coverage: {coverage_result.coverage:.1%}, target: {self.target_coverage:.0%}"
|
|
1082
|
+
)
|
|
1050
1083
|
|
|
1051
1084
|
# Check if target reached
|
|
1052
1085
|
if coverage_result.coverage >= self.target_coverage:
|
|
@@ -1059,7 +1092,9 @@ Return ONLY the complete Python test file, no explanations."""
|
|
|
1059
1092
|
break
|
|
1060
1093
|
|
|
1061
1094
|
# Identify uncovered code
|
|
1062
|
-
uncovered_code = self._extract_uncovered_lines(
|
|
1095
|
+
uncovered_code = self._extract_uncovered_lines(
|
|
1096
|
+
source_file, coverage_result.missing_lines
|
|
1097
|
+
)
|
|
1063
1098
|
|
|
1064
1099
|
# Ask Claude to add tests for uncovered lines
|
|
1065
1100
|
refinement_prompt = f"""Current coverage: {coverage_result.coverage:.1%}
|
|
@@ -1083,23 +1118,39 @@ Return ONLY the complete Python test file with additions, no explanations."""
|
|
|
1083
1118
|
{
|
|
1084
1119
|
"role": "user",
|
|
1085
1120
|
"content": [
|
|
1086
|
-
{
|
|
1087
|
-
|
|
1088
|
-
|
|
1121
|
+
{
|
|
1122
|
+
"type": "text",
|
|
1123
|
+
"text": "You are an expert Python test engineer. Examples:",
|
|
1124
|
+
"cache_control": {"type": "ephemeral"},
|
|
1125
|
+
},
|
|
1126
|
+
{
|
|
1127
|
+
"type": "text",
|
|
1128
|
+
"text": self._get_example_tests(),
|
|
1129
|
+
"cache_control": {"type": "ephemeral"},
|
|
1130
|
+
},
|
|
1131
|
+
{
|
|
1132
|
+
"type": "text",
|
|
1133
|
+
"text": f"Source code:\n```python\n{source_code}\n```",
|
|
1134
|
+
"cache_control": {"type": "ephemeral"},
|
|
1135
|
+
},
|
|
1089
1136
|
{"type": "text", "text": f"Current tests:\n```python\n{test_content}\n```"},
|
|
1090
|
-
{"type": "text", "text": refinement_prompt}
|
|
1091
|
-
]
|
|
1137
|
+
{"type": "text", "text": refinement_prompt},
|
|
1138
|
+
],
|
|
1092
1139
|
}
|
|
1093
1140
|
]
|
|
1094
1141
|
|
|
1095
1142
|
# Call LLM for coverage improvement
|
|
1096
1143
|
try:
|
|
1097
1144
|
import anthropic
|
|
1145
|
+
|
|
1098
1146
|
client = anthropic.Anthropic(api_key=api_key)
|
|
1099
1147
|
response = client.messages.create(
|
|
1100
1148
|
model="claude-sonnet-4-5",
|
|
1101
1149
|
max_tokens=40000, # Very generous total budget for coverage improvement
|
|
1102
|
-
thinking={
|
|
1150
|
+
thinking={
|
|
1151
|
+
"type": "enabled",
|
|
1152
|
+
"budget_tokens": 20000,
|
|
1153
|
+
}, # Thorough thinking for coverage gaps
|
|
1103
1154
|
messages=messages,
|
|
1104
1155
|
timeout=900.0, # 15 minutes timeout for coverage-guided iterations
|
|
1105
1156
|
)
|
|
@@ -1116,7 +1167,7 @@ Return ONLY the complete Python test file with additions, no explanations."""
|
|
|
1116
1167
|
|
|
1117
1168
|
# Clean up
|
|
1118
1169
|
if refined_content.startswith("```python"):
|
|
1119
|
-
refined_content = refined_content[len("```python"):].strip()
|
|
1170
|
+
refined_content = refined_content[len("```python") :].strip()
|
|
1120
1171
|
if refined_content.endswith("```"):
|
|
1121
1172
|
refined_content = refined_content[:-3].strip()
|
|
1122
1173
|
|
|
@@ -1128,7 +1179,9 @@ Return ONLY the complete Python test file with additions, no explanations."""
|
|
|
1128
1179
|
break
|
|
1129
1180
|
|
|
1130
1181
|
# Return best attempt
|
|
1131
|
-
logger.info(
|
|
1182
|
+
logger.info(
|
|
1183
|
+
f"Coverage-guided generation complete: final coverage ~{coverage_result.coverage:.1%}"
|
|
1184
|
+
)
|
|
1132
1185
|
return test_content
|
|
1133
1186
|
|
|
1134
1187
|
def _validate_test_file(self, test_file: Path) -> bool:
|
|
@@ -1143,6 +1196,7 @@ Return ONLY the complete Python test file with additions, no explanations."""
|
|
|
1143
1196
|
# Step 1: Check for syntax errors with ast.parse (fast)
|
|
1144
1197
|
try:
|
|
1145
1198
|
import ast
|
|
1199
|
+
|
|
1146
1200
|
content = test_file.read_text()
|
|
1147
1201
|
ast.parse(content)
|
|
1148
1202
|
logger.info(f"✓ Syntax check passed for {test_file.name}")
|
|
@@ -1203,7 +1257,7 @@ def run_batch_generation(
|
|
|
1203
1257
|
batch_num: int,
|
|
1204
1258
|
modules_json: str,
|
|
1205
1259
|
enable_refinement: bool = True,
|
|
1206
|
-
enable_coverage_guided: bool = False
|
|
1260
|
+
enable_coverage_guided: bool = False,
|
|
1207
1261
|
) -> None:
|
|
1208
1262
|
"""Run test generation for a batch.
|
|
1209
1263
|
|
|
@@ -1223,7 +1277,7 @@ def run_batch_generation(
|
|
|
1223
1277
|
batch_num,
|
|
1224
1278
|
modules,
|
|
1225
1279
|
enable_refinement=enable_refinement,
|
|
1226
|
-
enable_coverage_guided=enable_coverage_guided
|
|
1280
|
+
enable_coverage_guided=enable_coverage_guided,
|
|
1227
1281
|
)
|
|
1228
1282
|
|
|
1229
1283
|
# Generate tests
|
|
@@ -1252,7 +1306,9 @@ if __name__ == "__main__":
|
|
|
1252
1306
|
import sys
|
|
1253
1307
|
|
|
1254
1308
|
if len(sys.argv) < 3:
|
|
1255
|
-
print(
|
|
1309
|
+
print(
|
|
1310
|
+
"Usage: python -m attune.workflows.autonomous_test_gen <batch_num> <modules_json> [--no-refinement] [--coverage-guided]"
|
|
1311
|
+
)
|
|
1256
1312
|
print("\nOptions:")
|
|
1257
1313
|
print(" --no-refinement Disable Phase 2 multi-turn refinement")
|
|
1258
1314
|
print(" --coverage-guided Enable Phase 3 coverage-guided generation (slower)")
|