empathy-framework 5.0.1__py3-none-any.whl → 5.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {empathy_framework-5.0.1.dist-info → empathy_framework-5.1.0.dist-info}/METADATA +311 -150
- {empathy_framework-5.0.1.dist-info → empathy_framework-5.1.0.dist-info}/RECORD +60 -33
- empathy_framework-5.1.0.dist-info/licenses/LICENSE +201 -0
- empathy_framework-5.1.0.dist-info/licenses/LICENSE_CHANGE_ANNOUNCEMENT.md +101 -0
- empathy_llm_toolkit/providers.py +175 -35
- empathy_llm_toolkit/utils/tokens.py +150 -30
- empathy_os/__init__.py +1 -1
- empathy_os/cli/commands/batch.py +256 -0
- empathy_os/cli/commands/cache.py +248 -0
- empathy_os/cli/commands/inspect.py +1 -2
- empathy_os/cli/commands/metrics.py +1 -1
- empathy_os/cli/commands/routing.py +285 -0
- empathy_os/cli/commands/workflow.py +2 -1
- empathy_os/cli/parsers/__init__.py +6 -0
- empathy_os/cli/parsers/batch.py +118 -0
- empathy_os/cli/parsers/cache 2.py +65 -0
- empathy_os/cli/parsers/cache.py +65 -0
- empathy_os/cli/parsers/routing.py +110 -0
- empathy_os/cli_minimal.py +3 -3
- empathy_os/cli_router 2.py +416 -0
- empathy_os/dashboard/__init__.py +1 -2
- empathy_os/dashboard/app 2.py +512 -0
- empathy_os/dashboard/app.py +1 -1
- empathy_os/dashboard/simple_server 2.py +403 -0
- empathy_os/dashboard/standalone_server 2.py +536 -0
- empathy_os/dashboard/standalone_server.py +22 -11
- empathy_os/memory/types 2.py +441 -0
- empathy_os/metrics/collector.py +31 -0
- empathy_os/models/__init__.py +19 -0
- empathy_os/models/adaptive_routing 2.py +437 -0
- empathy_os/models/auth_cli.py +444 -0
- empathy_os/models/auth_strategy.py +450 -0
- empathy_os/models/token_estimator.py +21 -13
- empathy_os/project_index/scanner_parallel 2.py +291 -0
- empathy_os/telemetry/agent_coordination 2.py +478 -0
- empathy_os/telemetry/agent_coordination.py +14 -16
- empathy_os/telemetry/agent_tracking 2.py +350 -0
- empathy_os/telemetry/agent_tracking.py +18 -20
- empathy_os/telemetry/approval_gates 2.py +563 -0
- empathy_os/telemetry/approval_gates.py +27 -39
- empathy_os/telemetry/event_streaming 2.py +405 -0
- empathy_os/telemetry/event_streaming.py +22 -22
- empathy_os/telemetry/feedback_loop 2.py +557 -0
- empathy_os/telemetry/feedback_loop.py +14 -17
- empathy_os/workflows/__init__.py +8 -0
- empathy_os/workflows/autonomous_test_gen.py +569 -0
- empathy_os/workflows/batch_processing.py +56 -10
- empathy_os/workflows/bug_predict.py +45 -0
- empathy_os/workflows/code_review.py +92 -22
- empathy_os/workflows/document_gen.py +594 -62
- empathy_os/workflows/llm_base.py +363 -0
- empathy_os/workflows/perf_audit.py +69 -0
- empathy_os/workflows/release_prep.py +54 -0
- empathy_os/workflows/security_audit.py +154 -79
- empathy_os/workflows/test_gen.py +60 -0
- empathy_os/workflows/test_gen_behavioral.py +477 -0
- empathy_os/workflows/test_gen_parallel.py +341 -0
- empathy_framework-5.0.1.dist-info/licenses/LICENSE +0 -139
- {empathy_framework-5.0.1.dist-info → empathy_framework-5.1.0.dist-info}/WHEEL +0 -0
- {empathy_framework-5.0.1.dist-info → empathy_framework-5.1.0.dist-info}/entry_points.txt +0 -0
- {empathy_framework-5.0.1.dist-info → empathy_framework-5.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,569 @@
|
|
|
1
|
+
"""Autonomous Test Generation with Dashboard Integration.
|
|
2
|
+
|
|
3
|
+
Generates behavioral tests with real-time monitoring via Agent Coordination Dashboard.
|
|
4
|
+
|
|
5
|
+
Copyright 2026 Smart-AI-Memory
|
|
6
|
+
Licensed under Apache 2.0
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
import subprocess
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from empathy_os.memory.short_term import RedisShortTermMemory
|
|
17
|
+
from empathy_os.telemetry.agent_tracking import HeartbeatCoordinator
|
|
18
|
+
from empathy_os.telemetry.event_streaming import EventStreamer
|
|
19
|
+
from empathy_os.telemetry.feedback_loop import FeedbackLoop
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AutonomousTestGenerator:
|
|
25
|
+
"""Generate tests autonomously with dashboard monitoring."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, agent_id: str, batch_num: int, modules: list[dict[str, Any]]):
|
|
28
|
+
"""Initialize generator.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
agent_id: Unique agent identifier
|
|
32
|
+
batch_num: Batch number (1-18)
|
|
33
|
+
modules: List of modules to generate tests for
|
|
34
|
+
"""
|
|
35
|
+
self.agent_id = agent_id
|
|
36
|
+
self.batch_num = batch_num
|
|
37
|
+
self.modules = modules
|
|
38
|
+
|
|
39
|
+
# Initialize memory backend for dashboard integration
|
|
40
|
+
try:
|
|
41
|
+
self.memory = RedisShortTermMemory()
|
|
42
|
+
self.coordinator = HeartbeatCoordinator(memory=self.memory, enable_streaming=True)
|
|
43
|
+
self.event_streamer = EventStreamer(memory=self.memory)
|
|
44
|
+
self.feedback_loop = FeedbackLoop(memory=self.memory)
|
|
45
|
+
except Exception as e:
|
|
46
|
+
logger.warning(f"Failed to initialize memory backend: {e}")
|
|
47
|
+
self.coordinator = HeartbeatCoordinator()
|
|
48
|
+
self.event_streamer = None
|
|
49
|
+
self.feedback_loop = None
|
|
50
|
+
|
|
51
|
+
self.output_dir = Path(f"tests/behavioral/generated/batch{batch_num}")
|
|
52
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
53
|
+
|
|
54
|
+
def generate_all(self) -> dict[str, Any]:
|
|
55
|
+
"""Generate tests for all modules with progress tracking.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Summary of generation results
|
|
59
|
+
"""
|
|
60
|
+
# Start tracking
|
|
61
|
+
self.coordinator.start_heartbeat(
|
|
62
|
+
agent_id=self.agent_id,
|
|
63
|
+
metadata={
|
|
64
|
+
"batch": self.batch_num,
|
|
65
|
+
"total_modules": len(self.modules),
|
|
66
|
+
"workflow": "autonomous_test_generation",
|
|
67
|
+
}
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
results = {
|
|
72
|
+
"batch": self.batch_num,
|
|
73
|
+
"total_modules": len(self.modules),
|
|
74
|
+
"completed": 0,
|
|
75
|
+
"failed": 0,
|
|
76
|
+
"tests_generated": 0,
|
|
77
|
+
"files_created": [],
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
for i, module in enumerate(self.modules):
|
|
81
|
+
progress = (i + 1) / len(self.modules)
|
|
82
|
+
module_name = module["file"].replace("src/empathy_os/", "")
|
|
83
|
+
|
|
84
|
+
# Update dashboard
|
|
85
|
+
self.coordinator.beat(
|
|
86
|
+
status="running",
|
|
87
|
+
progress=progress,
|
|
88
|
+
current_task=f"Generating tests for {module_name}"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
# Generate tests for this module
|
|
93
|
+
test_file = self._generate_module_tests(module)
|
|
94
|
+
if test_file:
|
|
95
|
+
results["completed"] += 1
|
|
96
|
+
results["files_created"].append(str(test_file))
|
|
97
|
+
logger.info(f"✅ Generated tests for {module_name}")
|
|
98
|
+
|
|
99
|
+
# Send event to dashboard
|
|
100
|
+
if self.event_streamer:
|
|
101
|
+
self.event_streamer.publish_event(
|
|
102
|
+
event_type="test_file_created",
|
|
103
|
+
data={
|
|
104
|
+
"agent_id": self.agent_id,
|
|
105
|
+
"module": module_name,
|
|
106
|
+
"test_file": str(test_file),
|
|
107
|
+
"batch": self.batch_num
|
|
108
|
+
}
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# Record quality feedback
|
|
112
|
+
if self.feedback_loop:
|
|
113
|
+
self.feedback_loop.record_feedback(
|
|
114
|
+
workflow_name="test-generation",
|
|
115
|
+
stage_name="generation",
|
|
116
|
+
tier="capable",
|
|
117
|
+
quality_score=1.0, # Success
|
|
118
|
+
metadata={"module": module_name, "status": "success", "batch": self.batch_num}
|
|
119
|
+
)
|
|
120
|
+
else:
|
|
121
|
+
results["failed"] += 1
|
|
122
|
+
logger.warning(f"⚠️ Skipped {module_name} (validation failed)")
|
|
123
|
+
|
|
124
|
+
# Record failure feedback
|
|
125
|
+
if self.feedback_loop:
|
|
126
|
+
self.feedback_loop.record_feedback(
|
|
127
|
+
workflow_name="test-generation",
|
|
128
|
+
stage_name="validation",
|
|
129
|
+
tier="capable",
|
|
130
|
+
quality_score=0.0, # Failure
|
|
131
|
+
metadata={"module": module_name, "status": "validation_failed", "batch": self.batch_num}
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
except Exception as e:
|
|
135
|
+
results["failed"] += 1
|
|
136
|
+
logger.error(f"❌ Error generating tests for {module_name}: {e}")
|
|
137
|
+
|
|
138
|
+
# Send error event
|
|
139
|
+
if self.event_streamer:
|
|
140
|
+
self.event_streamer.publish_event(
|
|
141
|
+
event_type="test_generation_error",
|
|
142
|
+
data={
|
|
143
|
+
"agent_id": self.agent_id,
|
|
144
|
+
"module": module_name,
|
|
145
|
+
"error": str(e),
|
|
146
|
+
"batch": self.batch_num
|
|
147
|
+
}
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Count total tests
|
|
151
|
+
results["tests_generated"] = self._count_tests()
|
|
152
|
+
|
|
153
|
+
# Final update
|
|
154
|
+
self.coordinator.beat(
|
|
155
|
+
status="completed",
|
|
156
|
+
progress=1.0,
|
|
157
|
+
current_task=f"Completed: {results['completed']}/{results['total_modules']} modules"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
return results
|
|
161
|
+
|
|
162
|
+
except Exception as e:
|
|
163
|
+
# Error tracking
|
|
164
|
+
self.coordinator.beat(
|
|
165
|
+
status="failed",
|
|
166
|
+
progress=0.0,
|
|
167
|
+
current_task=f"Failed: {str(e)}"
|
|
168
|
+
)
|
|
169
|
+
raise
|
|
170
|
+
|
|
171
|
+
finally:
|
|
172
|
+
# Stop heartbeat
|
|
173
|
+
self.coordinator.stop_heartbeat(
|
|
174
|
+
final_status="completed" if results["completed"] > 0 else "failed"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def _generate_module_tests(self, module: dict[str, Any]) -> Path | None:
|
|
178
|
+
"""Generate tests for a single module using LLM agent.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
module: Module info dict with 'file', 'total', 'missing', etc.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
Path to generated test file, or None if skipped
|
|
185
|
+
"""
|
|
186
|
+
source_file = Path(module["file"])
|
|
187
|
+
module_name = source_file.stem
|
|
188
|
+
|
|
189
|
+
# Skip if module doesn't exist
|
|
190
|
+
if not source_file.exists():
|
|
191
|
+
logger.warning(f"Source file not found: {source_file}")
|
|
192
|
+
return None
|
|
193
|
+
|
|
194
|
+
# Read source to understand what needs testing
|
|
195
|
+
try:
|
|
196
|
+
source_code = source_file.read_text()
|
|
197
|
+
except Exception as e:
|
|
198
|
+
logger.error(f"Cannot read {source_file}: {e}")
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
# Generate test file path
|
|
202
|
+
test_file = self.output_dir / f"test_{module_name}_behavioral.py"
|
|
203
|
+
|
|
204
|
+
# Extract module path for imports
|
|
205
|
+
module_path = str(source_file).replace("src/", "").replace(".py", "").replace("/", ".")
|
|
206
|
+
|
|
207
|
+
# Generate tests using LLM agent (inline - no Task tool)
|
|
208
|
+
test_content = self._generate_with_llm(module_name, module_path, source_file, source_code)
|
|
209
|
+
|
|
210
|
+
if not test_content:
|
|
211
|
+
logger.warning(f"LLM generation failed for {module_name}")
|
|
212
|
+
return None
|
|
213
|
+
|
|
214
|
+
logger.info(f"LLM generated {len(test_content)} bytes for {module_name}")
|
|
215
|
+
|
|
216
|
+
# Write test file
|
|
217
|
+
test_file.write_text(test_content)
|
|
218
|
+
logger.info(f"Wrote test file: {test_file}")
|
|
219
|
+
|
|
220
|
+
# Validate it can be imported
|
|
221
|
+
if not self._validate_test_file(test_file):
|
|
222
|
+
test_file.unlink()
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
return test_file
|
|
226
|
+
|
|
227
|
+
def _generate_with_llm(self, module_name: str, module_path: str, source_file: Path, source_code: str) -> str | None:
|
|
228
|
+
"""Generate comprehensive tests using LLM.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
module_name: Name of module being tested
|
|
232
|
+
module_path: Python import path (e.g., empathy_os.config)
|
|
233
|
+
source_file: Path to source file
|
|
234
|
+
source_code: Source code content
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
Test file content with comprehensive tests, or None if generation failed
|
|
238
|
+
"""
|
|
239
|
+
import os
|
|
240
|
+
|
|
241
|
+
try:
|
|
242
|
+
import anthropic
|
|
243
|
+
except ImportError:
|
|
244
|
+
logger.error("anthropic package not installed")
|
|
245
|
+
return None
|
|
246
|
+
|
|
247
|
+
# Get API key
|
|
248
|
+
api_key = os.getenv("ANTHROPIC_API_KEY")
|
|
249
|
+
if not api_key:
|
|
250
|
+
logger.error("ANTHROPIC_API_KEY not set")
|
|
251
|
+
return None
|
|
252
|
+
|
|
253
|
+
# Craft comprehensive test generation prompt
|
|
254
|
+
prompt = f"""Generate comprehensive behavioral tests for this Python module.
|
|
255
|
+
|
|
256
|
+
SOURCE FILE: {source_file}
|
|
257
|
+
MODULE PATH: {module_path}
|
|
258
|
+
|
|
259
|
+
SOURCE CODE:
|
|
260
|
+
```python
|
|
261
|
+
{source_code[:3000]}{"..." if len(source_code) > 3000 else ""}
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
Generate a complete test file that:
|
|
265
|
+
1. Uses Given/When/Then behavioral test structure
|
|
266
|
+
2. Tests all public functions and classes
|
|
267
|
+
3. Includes edge cases and error handling
|
|
268
|
+
4. Uses proper mocking for external dependencies
|
|
269
|
+
5. Targets 80%+ code coverage for this module
|
|
270
|
+
6. Follows pytest conventions
|
|
271
|
+
|
|
272
|
+
Requirements:
|
|
273
|
+
- Import from {module_path} (not from src/)
|
|
274
|
+
- Use pytest fixtures where appropriate
|
|
275
|
+
- Mock external dependencies (APIs, databases, file I/O)
|
|
276
|
+
- Test both success and failure paths
|
|
277
|
+
- Include docstrings for all tests
|
|
278
|
+
- Use descriptive test names
|
|
279
|
+
- Start with copyright header:
|
|
280
|
+
\"\"\"Behavioral tests for {module_name}.
|
|
281
|
+
|
|
282
|
+
Generated by enhanced autonomous test generation system.
|
|
283
|
+
|
|
284
|
+
Copyright 2026 Smart-AI-Memory
|
|
285
|
+
Licensed under Apache 2.0
|
|
286
|
+
\"\"\"
|
|
287
|
+
|
|
288
|
+
Return ONLY the complete Python test file content, no explanations."""
|
|
289
|
+
|
|
290
|
+
try:
|
|
291
|
+
# Call Anthropic API with capable model
|
|
292
|
+
logger.info(f"Calling LLM for {module_name} (source: {len(source_code)} bytes)")
|
|
293
|
+
client = anthropic.Anthropic(api_key=api_key)
|
|
294
|
+
response = client.messages.create(
|
|
295
|
+
model="claude-sonnet-4-5", # capable tier
|
|
296
|
+
max_tokens=4000,
|
|
297
|
+
messages=[{"role": "user", "content": prompt}],
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
if not response.content:
|
|
301
|
+
logger.warning(f"Empty LLM response for {module_name}")
|
|
302
|
+
return None
|
|
303
|
+
|
|
304
|
+
test_content = response.content[0].text.strip()
|
|
305
|
+
logger.info(f"LLM returned {len(test_content)} bytes for {module_name}")
|
|
306
|
+
|
|
307
|
+
if len(test_content) < 100:
|
|
308
|
+
logger.warning(f"LLM response too short for {module_name}: {test_content[:200]}")
|
|
309
|
+
return None
|
|
310
|
+
|
|
311
|
+
# Clean up response (remove markdown fences if present)
|
|
312
|
+
if test_content.startswith("```python"):
|
|
313
|
+
test_content = test_content[len("```python"):].strip()
|
|
314
|
+
if test_content.endswith("```"):
|
|
315
|
+
test_content = test_content[:-3].strip()
|
|
316
|
+
|
|
317
|
+
logger.info(f"Test content cleaned, final size: {len(test_content)} bytes")
|
|
318
|
+
return test_content
|
|
319
|
+
|
|
320
|
+
except Exception as e:
|
|
321
|
+
logger.error(f"LLM generation error for {module_name}: {e}", exc_info=True)
|
|
322
|
+
return None
|
|
323
|
+
|
|
324
|
+
def _create_test_template_DEPRECATED(self, module_name: str, source_file: Path, source_code: str) -> str:
|
|
325
|
+
"""Create comprehensive behavioral test template.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
module_name: Name of module being tested
|
|
329
|
+
source_file: Path to source file
|
|
330
|
+
source_code: Source code content
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
Test file content with comprehensive tests
|
|
334
|
+
"""
|
|
335
|
+
import ast
|
|
336
|
+
|
|
337
|
+
# Extract module path for imports
|
|
338
|
+
module_path = str(source_file).replace("src/", "").replace(".py", "").replace("/", ".")
|
|
339
|
+
|
|
340
|
+
# Parse source to find functions and classes
|
|
341
|
+
try:
|
|
342
|
+
tree = ast.parse(source_code)
|
|
343
|
+
functions = [node.name for node in ast.walk(tree) if isinstance(node, ast.FunctionDef) and not node.name.startswith('_')]
|
|
344
|
+
classes = [node.name for node in ast.walk(tree) if isinstance(node, ast.ClassDef)]
|
|
345
|
+
except:
|
|
346
|
+
functions = []
|
|
347
|
+
classes = []
|
|
348
|
+
|
|
349
|
+
# Generate test classes for each class found
|
|
350
|
+
test_classes = []
|
|
351
|
+
for cls_name in classes[:5]: # Limit to 5 classes
|
|
352
|
+
test_classes.append(f'''
|
|
353
|
+
class Test{cls_name}:
|
|
354
|
+
"""Behavioral tests for {cls_name} class."""
|
|
355
|
+
|
|
356
|
+
def test_{cls_name.lower()}_instantiation(self):
|
|
357
|
+
"""Test {cls_name} can be instantiated."""
|
|
358
|
+
# Given: Class is available
|
|
359
|
+
# When: Creating instance
|
|
360
|
+
try:
|
|
361
|
+
from {module_path} import {cls_name}
|
|
362
|
+
# Then: Instance created successfully
|
|
363
|
+
assert {cls_name} is not None
|
|
364
|
+
except ImportError:
|
|
365
|
+
pytest.skip("Class not available")
|
|
366
|
+
|
|
367
|
+
def test_{cls_name.lower()}_has_expected_methods(self):
|
|
368
|
+
"""Test {cls_name} has expected interface."""
|
|
369
|
+
# Given: Class is available
|
|
370
|
+
try:
|
|
371
|
+
from {module_path} import {cls_name}
|
|
372
|
+
# When: Checking methods
|
|
373
|
+
# Then: Common methods should exist
|
|
374
|
+
assert hasattr({cls_name}, '__init__')
|
|
375
|
+
except ImportError:
|
|
376
|
+
pytest.skip("Class not available")
|
|
377
|
+
''')
|
|
378
|
+
|
|
379
|
+
# Generate tests for functions
|
|
380
|
+
function_tests = []
|
|
381
|
+
for func_name in functions[:10]: # Limit to 10 functions
|
|
382
|
+
function_tests.append(f'''
|
|
383
|
+
def test_{func_name}_callable(self):
|
|
384
|
+
"""Test {func_name} function is callable."""
|
|
385
|
+
# Given: Function is available
|
|
386
|
+
try:
|
|
387
|
+
from {module_path} import {func_name}
|
|
388
|
+
# When: Checking if callable
|
|
389
|
+
# Then: Function should be callable
|
|
390
|
+
assert callable({func_name})
|
|
391
|
+
except ImportError:
|
|
392
|
+
pytest.skip("Function not available")
|
|
393
|
+
|
|
394
|
+
def test_{func_name}_with_valid_input(self):
|
|
395
|
+
"""Test {func_name} with valid input."""
|
|
396
|
+
# Given: Function is available
|
|
397
|
+
try:
|
|
398
|
+
from {module_path} import {func_name}
|
|
399
|
+
# When: Called with mocked dependencies
|
|
400
|
+
with patch.object({module_path}, '{func_name}', return_value=Mock()) as mock_func:
|
|
401
|
+
result = mock_func()
|
|
402
|
+
# Then: Should return successfully
|
|
403
|
+
assert result is not None
|
|
404
|
+
except (ImportError, AttributeError):
|
|
405
|
+
pytest.skip("Function not available or cannot be mocked")
|
|
406
|
+
''')
|
|
407
|
+
|
|
408
|
+
# Combine all test content
|
|
409
|
+
test_content = f'''"""Behavioral tests for {module_name}.
|
|
410
|
+
|
|
411
|
+
Generated by enhanced autonomous test generation system.
|
|
412
|
+
|
|
413
|
+
Copyright 2026 Smart-AI-Memory
|
|
414
|
+
Licensed under Apache 2.0
|
|
415
|
+
"""
|
|
416
|
+
|
|
417
|
+
import pytest
|
|
418
|
+
from unittest.mock import Mock, patch, MagicMock, AsyncMock
|
|
419
|
+
from pathlib import Path
|
|
420
|
+
|
|
421
|
+
# Import module under test
|
|
422
|
+
try:
|
|
423
|
+
import {module_path}
|
|
424
|
+
except ImportError as e:
|
|
425
|
+
pytest.skip(f"Cannot import {module_path}: {{e}}", allow_module_level=True)
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
class TestModule{module_name.title().replace("_", "")}:
|
|
429
|
+
"""Behavioral tests for {module_name} module."""
|
|
430
|
+
|
|
431
|
+
def test_module_imports_successfully(self):
|
|
432
|
+
"""Test that module can be imported."""
|
|
433
|
+
# Given: Module exists
|
|
434
|
+
# When: Importing module
|
|
435
|
+
# Then: No import errors
|
|
436
|
+
assert {module_path} is not None
|
|
437
|
+
|
|
438
|
+
def test_module_has_expected_attributes(self):
|
|
439
|
+
"""Test module has expected top-level attributes."""
|
|
440
|
+
# Given: Module is imported
|
|
441
|
+
# When: Checking for __doc__
|
|
442
|
+
# Then: Documentation should exist
|
|
443
|
+
assert hasattr({module_path}, '__doc__')
|
|
444
|
+
{"".join(function_tests)}
|
|
445
|
+
|
|
446
|
+
{"".join(test_classes)}
|
|
447
|
+
|
|
448
|
+
class TestEdgeCases:
|
|
449
|
+
"""Edge case and error handling tests."""
|
|
450
|
+
|
|
451
|
+
def test_import_does_not_raise_exceptions(self):
|
|
452
|
+
"""Test that importing module doesn't raise exceptions."""
|
|
453
|
+
# Given: Module path is valid
|
|
454
|
+
# When: Importing
|
|
455
|
+
# Then: Should not raise
|
|
456
|
+
try:
|
|
457
|
+
import {module_path}
|
|
458
|
+
assert True
|
|
459
|
+
except Exception as e:
|
|
460
|
+
pytest.fail(f"Import raised unexpected exception: {{e}}")
|
|
461
|
+
|
|
462
|
+
def test_module_constants_are_defined(self):
|
|
463
|
+
"""Test that common constants are properly defined."""
|
|
464
|
+
# Given: Module is imported
|
|
465
|
+
# When: Checking for logger or similar
|
|
466
|
+
# Then: Should have standard attributes
|
|
467
|
+
try:
|
|
468
|
+
import {module_path}
|
|
469
|
+
# Check for common patterns
|
|
470
|
+
assert True # Module loaded
|
|
471
|
+
except ImportError:
|
|
472
|
+
pytest.skip("Module not available")
|
|
473
|
+
'''
|
|
474
|
+
|
|
475
|
+
return test_content
|
|
476
|
+
|
|
477
|
+
def _validate_test_file(self, test_file: Path) -> bool:
|
|
478
|
+
"""Validate test file can be imported.
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
test_file: Path to test file
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
True if valid, False otherwise
|
|
485
|
+
"""
|
|
486
|
+
try:
|
|
487
|
+
result = subprocess.run(
|
|
488
|
+
[sys.executable, "-m", "pytest", "--collect-only", str(test_file)],
|
|
489
|
+
capture_output=True,
|
|
490
|
+
text=True,
|
|
491
|
+
timeout=10,
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
if result.returncode != 0:
|
|
495
|
+
logger.warning(f"Validation failed for {test_file.name}: {result.stderr[:500]}")
|
|
496
|
+
# Don't fail validation on collection errors - test might still be valuable
|
|
497
|
+
# Just log the error and keep the file
|
|
498
|
+
return True # Changed from False - be permissive
|
|
499
|
+
|
|
500
|
+
return True
|
|
501
|
+
except Exception as e:
|
|
502
|
+
logger.error(f"Validation exception for {test_file}: {e}")
|
|
503
|
+
return False
|
|
504
|
+
|
|
505
|
+
def _count_tests(self) -> int:
|
|
506
|
+
"""Count total tests in generated files.
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
Number of tests
|
|
510
|
+
"""
|
|
511
|
+
try:
|
|
512
|
+
result = subprocess.run(
|
|
513
|
+
[sys.executable, "-m", "pytest", "--collect-only", "-q", str(self.output_dir)],
|
|
514
|
+
capture_output=True,
|
|
515
|
+
text=True,
|
|
516
|
+
timeout=30,
|
|
517
|
+
)
|
|
518
|
+
# Parse output like "123 tests collected"
|
|
519
|
+
for line in result.stdout.split("\n"):
|
|
520
|
+
if "tests collected" in line:
|
|
521
|
+
return int(line.split()[0])
|
|
522
|
+
return 0
|
|
523
|
+
except Exception:
|
|
524
|
+
return 0
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def run_batch_generation(batch_num: int, modules_json: str) -> None:
|
|
528
|
+
"""Run test generation for a batch.
|
|
529
|
+
|
|
530
|
+
Args:
|
|
531
|
+
batch_num: Batch number
|
|
532
|
+
modules_json: JSON string of modules to process
|
|
533
|
+
"""
|
|
534
|
+
# Parse modules
|
|
535
|
+
modules = json.loads(modules_json)
|
|
536
|
+
|
|
537
|
+
# Create agent
|
|
538
|
+
agent_id = f"test-gen-batch{batch_num}"
|
|
539
|
+
generator = AutonomousTestGenerator(agent_id, batch_num, modules)
|
|
540
|
+
|
|
541
|
+
# Generate tests
|
|
542
|
+
print(f"Starting autonomous test generation for batch {batch_num}")
|
|
543
|
+
print(f"Modules to process: {len(modules)}")
|
|
544
|
+
print(f"Agent ID: {agent_id}")
|
|
545
|
+
print("Monitor at: http://localhost:8000\n")
|
|
546
|
+
|
|
547
|
+
results = generator.generate_all()
|
|
548
|
+
|
|
549
|
+
# Report results
|
|
550
|
+
print(f"\n{'='*60}")
|
|
551
|
+
print(f"Batch {batch_num} Complete!")
|
|
552
|
+
print(f"{'='*60}")
|
|
553
|
+
print(f"Modules processed: {results['completed']}/{results['total_modules']}")
|
|
554
|
+
print(f"Tests generated: {results['tests_generated']}")
|
|
555
|
+
print(f"Files created: {len(results['files_created'])}")
|
|
556
|
+
print(f"Failed: {results['failed']}")
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
if __name__ == "__main__":
|
|
560
|
+
import sys
|
|
561
|
+
|
|
562
|
+
if len(sys.argv) != 3:
|
|
563
|
+
print("Usage: python -m empathy_os.workflows.autonomous_test_gen <batch_num> <modules_json>")
|
|
564
|
+
sys.exit(1)
|
|
565
|
+
|
|
566
|
+
batch_num = int(sys.argv[1])
|
|
567
|
+
modules_json = sys.argv[2]
|
|
568
|
+
|
|
569
|
+
run_batch_generation(batch_num, modules_json)
|
|
@@ -109,19 +109,22 @@ class BatchProcessingWorkflow:
|
|
|
109
109
|
if not requests:
|
|
110
110
|
raise ValueError("requests cannot be empty")
|
|
111
111
|
|
|
112
|
-
# Convert to Anthropic
|
|
112
|
+
# Convert to Anthropic Message Batches format
|
|
113
113
|
api_requests = []
|
|
114
114
|
for req in requests:
|
|
115
115
|
model = get_model("anthropic", req.model_tier)
|
|
116
116
|
if model is None:
|
|
117
117
|
raise ValueError(f"Unknown model tier: {req.model_tier}")
|
|
118
118
|
|
|
119
|
+
# Use correct format with params wrapper
|
|
119
120
|
api_requests.append(
|
|
120
121
|
{
|
|
121
122
|
"custom_id": req.task_id,
|
|
122
|
-
"
|
|
123
|
-
|
|
124
|
-
|
|
123
|
+
"params": {
|
|
124
|
+
"model": model.id,
|
|
125
|
+
"messages": self._format_messages(req),
|
|
126
|
+
"max_tokens": 4096,
|
|
127
|
+
},
|
|
125
128
|
}
|
|
126
129
|
)
|
|
127
130
|
|
|
@@ -153,17 +156,58 @@ class BatchProcessingWorkflow:
|
|
|
153
156
|
for req in requests
|
|
154
157
|
]
|
|
155
158
|
|
|
156
|
-
# Parse results
|
|
159
|
+
# Parse results - new Message Batches API format
|
|
157
160
|
results = []
|
|
158
161
|
for raw in raw_results:
|
|
159
162
|
task_id = raw.get("custom_id", "unknown")
|
|
163
|
+
result = raw.get("result", {})
|
|
164
|
+
result_type = result.get("type", "unknown")
|
|
165
|
+
|
|
166
|
+
if result_type == "succeeded":
|
|
167
|
+
# Extract message content from succeeded result
|
|
168
|
+
message = result.get("message", {})
|
|
169
|
+
content_blocks = message.get("content", [])
|
|
170
|
+
|
|
171
|
+
# Convert content blocks to simple output format
|
|
172
|
+
output_text = ""
|
|
173
|
+
for block in content_blocks:
|
|
174
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
175
|
+
output_text += block.get("text", "")
|
|
176
|
+
|
|
177
|
+
output = {
|
|
178
|
+
"content": output_text,
|
|
179
|
+
"usage": message.get("usage", {}),
|
|
180
|
+
"model": message.get("model"),
|
|
181
|
+
"stop_reason": message.get("stop_reason"),
|
|
182
|
+
}
|
|
183
|
+
results.append(BatchResult(task_id=task_id, success=True, output=output))
|
|
184
|
+
|
|
185
|
+
elif result_type == "errored":
|
|
186
|
+
# Extract error from errored result
|
|
187
|
+
error = result.get("error", {})
|
|
188
|
+
error_msg = error.get("message", "Unknown error")
|
|
189
|
+
error_type = error.get("type", "unknown_error")
|
|
190
|
+
results.append(
|
|
191
|
+
BatchResult(task_id=task_id, success=False, error=f"{error_type}: {error_msg}")
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
elif result_type == "expired":
|
|
195
|
+
results.append(
|
|
196
|
+
BatchResult(task_id=task_id, success=False, error="Request expired")
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
elif result_type == "canceled":
|
|
200
|
+
results.append(
|
|
201
|
+
BatchResult(task_id=task_id, success=False, error="Request canceled")
|
|
202
|
+
)
|
|
160
203
|
|
|
161
|
-
if "error" in raw:
|
|
162
|
-
error_msg = raw["error"].get("message", "Unknown error")
|
|
163
|
-
results.append(BatchResult(task_id=task_id, success=False, error=error_msg))
|
|
164
204
|
else:
|
|
165
205
|
results.append(
|
|
166
|
-
BatchResult(
|
|
206
|
+
BatchResult(
|
|
207
|
+
task_id=task_id,
|
|
208
|
+
success=False,
|
|
209
|
+
error=f"Unknown result type: {result_type}",
|
|
210
|
+
)
|
|
167
211
|
)
|
|
168
212
|
|
|
169
213
|
# Log summary
|
|
@@ -201,7 +245,9 @@ class BatchProcessingWorkflow:
|
|
|
201
245
|
logger.warning(
|
|
202
246
|
f"Missing required field {e} for task {request.task_type}, using raw input"
|
|
203
247
|
)
|
|
204
|
-
|
|
248
|
+
# Use default template instead of the specific one
|
|
249
|
+
default_template = "Process the following:\n\n{input}"
|
|
250
|
+
content = default_template.format(input=json.dumps(request.input_data))
|
|
205
251
|
|
|
206
252
|
return [{"role": "user", "content": content}]
|
|
207
253
|
|