empathy-framework 3.9.2__py3-none-any.whl → 3.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {empathy_framework-3.9.2.dist-info → empathy_framework-3.10.1.dist-info}/METADATA +37 -5
- {empathy_framework-3.9.2.dist-info → empathy_framework-3.10.1.dist-info}/RECORD +32 -32
- empathy_llm_toolkit/agent_factory/crews/health_check.py +36 -29
- empathy_os/__init__.py +1 -1
- empathy_os/cache/hybrid.py +69 -9
- empathy_os/cli.py +183 -18
- empathy_os/cli_unified.py +113 -4
- empathy_os/config.py +7 -4
- empathy_os/hot_reload/integration.py +2 -1
- empathy_os/hot_reload/watcher.py +8 -4
- empathy_os/hot_reload/websocket.py +2 -1
- empathy_os/models/telemetry.py +900 -2
- empathy_os/test_generator/generator.py +1 -1
- empathy_os/tier_recommender.py +3 -3
- empathy_os/workflows/base.py +5 -2
- empathy_os/workflows/health_check.py +37 -0
- empathy_os/workflows/new_sample_workflow1.py +3 -3
- empathy_os/workflows/tier_tracking.py +1 -1
- empathy_software_plugin/wizards/advanced_debugging_wizard.py +9 -6
- empathy_software_plugin/wizards/debugging/bug_risk_analyzer.py +1 -1
- empathy_software_plugin/wizards/debugging/config_loaders.py +6 -2
- empathy_software_plugin/wizards/debugging/language_patterns.py +4 -2
- empathy_software_plugin/wizards/debugging/linter_parsers.py +1 -1
- empathy_software_plugin/wizards/performance/profiler_parsers.py +7 -7
- empathy_software_plugin/wizards/security/vulnerability_scanner.py +1 -1
- empathy_software_plugin/wizards/security_analysis_wizard.py +2 -2
- empathy_software_plugin/wizards/testing/quality_analyzer.py +3 -9
- empathy_software_plugin/wizards/testing/test_suggester.py +1 -1
- {empathy_framework-3.9.2.dist-info → empathy_framework-3.10.1.dist-info}/WHEEL +0 -0
- {empathy_framework-3.9.2.dist-info → empathy_framework-3.10.1.dist-info}/entry_points.txt +0 -0
- {empathy_framework-3.9.2.dist-info → empathy_framework-3.10.1.dist-info}/licenses/LICENSE +0 -0
- {empathy_framework-3.9.2.dist-info → empathy_framework-3.10.1.dist-info}/top_level.txt +0 -0
empathy_os/tier_recommender.py
CHANGED
|
@@ -79,7 +79,7 @@ class TierRecommender:
|
|
|
79
79
|
|
|
80
80
|
def _load_patterns(self) -> list[dict]:
|
|
81
81
|
"""Load all enhanced patterns with tier_progression data."""
|
|
82
|
-
patterns = []
|
|
82
|
+
patterns: list[dict] = []
|
|
83
83
|
|
|
84
84
|
if not self.patterns_dir.exists():
|
|
85
85
|
return patterns
|
|
@@ -359,8 +359,8 @@ class TierRecommender:
|
|
|
359
359
|
return {"total_patterns": 0, "message": "No patterns loaded"}
|
|
360
360
|
|
|
361
361
|
# Calculate tier distribution
|
|
362
|
-
tier_dist = defaultdict(int)
|
|
363
|
-
bug_type_dist = defaultdict(int)
|
|
362
|
+
tier_dist: dict[str, int] = defaultdict(int)
|
|
363
|
+
bug_type_dist: dict[str, int] = defaultdict(int)
|
|
364
364
|
total_savings = 0.0
|
|
365
365
|
|
|
366
366
|
for pattern in self.patterns:
|
empathy_os/workflows/base.py
CHANGED
|
@@ -26,6 +26,9 @@ from enum import Enum
|
|
|
26
26
|
from pathlib import Path
|
|
27
27
|
from typing import TYPE_CHECKING, Any
|
|
28
28
|
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from .tier_tracking import WorkflowTierTracker
|
|
31
|
+
|
|
29
32
|
# Load .env file for API keys if python-dotenv is available
|
|
30
33
|
try:
|
|
31
34
|
from dotenv import load_dotenv
|
|
@@ -430,7 +433,7 @@ class BaseWorkflow(ABC):
|
|
|
430
433
|
|
|
431
434
|
# Tier tracking support
|
|
432
435
|
self._enable_tier_tracking = enable_tier_tracking
|
|
433
|
-
self._tier_tracker = None
|
|
436
|
+
self._tier_tracker: WorkflowTierTracker | None = None
|
|
434
437
|
|
|
435
438
|
# Telemetry tracking (singleton instance)
|
|
436
439
|
self._telemetry_tracker: UsageTracker | None = None
|
|
@@ -569,7 +572,7 @@ class BaseWorkflow(ABC):
|
|
|
569
572
|
logger.debug(f"Cache hit for {self.name}:{stage}")
|
|
570
573
|
# Determine cache type
|
|
571
574
|
if hasattr(self._cache, "cache_type"):
|
|
572
|
-
ct = self._cache.cache_type
|
|
575
|
+
ct = self._cache.cache_type
|
|
573
576
|
# Ensure it's a string (not a Mock object)
|
|
574
577
|
cache_type = str(ct) if ct and isinstance(ct, str) else "hash"
|
|
575
578
|
else:
|
|
@@ -87,6 +87,7 @@ class HealthCheckWorkflow(BaseWorkflow):
|
|
|
87
87
|
check_tests: bool = True,
|
|
88
88
|
check_deps: bool = True,
|
|
89
89
|
xml_prompts: bool = True,
|
|
90
|
+
health_score_threshold: int = 95,
|
|
90
91
|
**kwargs: Any,
|
|
91
92
|
):
|
|
92
93
|
"""Initialize health check workflow.
|
|
@@ -98,6 +99,8 @@ class HealthCheckWorkflow(BaseWorkflow):
|
|
|
98
99
|
check_tests: Run test checks
|
|
99
100
|
check_deps: Run dependency checks
|
|
100
101
|
xml_prompts: Use XML-enhanced prompts
|
|
102
|
+
health_score_threshold: Minimum health score required (0-100, default: 95)
|
|
103
|
+
100 = perfect health, 95 = very strict (default), 80 = moderate
|
|
101
104
|
**kwargs: Additional arguments passed to BaseWorkflow
|
|
102
105
|
|
|
103
106
|
"""
|
|
@@ -108,6 +111,7 @@ class HealthCheckWorkflow(BaseWorkflow):
|
|
|
108
111
|
self.check_tests = check_tests
|
|
109
112
|
self.check_deps = check_deps
|
|
110
113
|
self.xml_prompts = xml_prompts
|
|
114
|
+
self.health_score_threshold = health_score_threshold
|
|
111
115
|
self._crew: Any = None
|
|
112
116
|
self._crew_available = False
|
|
113
117
|
|
|
@@ -153,6 +157,39 @@ class HealthCheckWorkflow(BaseWorkflow):
|
|
|
153
157
|
return await self._fix(input_data, tier)
|
|
154
158
|
raise ValueError(f"Unknown stage: {stage_name}")
|
|
155
159
|
|
|
160
|
+
def validate_output(self, stage_output: dict) -> tuple[bool, str | None]:
|
|
161
|
+
"""Validate health check output quality.
|
|
162
|
+
|
|
163
|
+
For health-check workflow, we validate that:
|
|
164
|
+
1. Diagnosis data is present
|
|
165
|
+
2. Health score meets the configured threshold (default: 95 = very strict quality)
|
|
166
|
+
3. No critical execution errors occurred
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
stage_output: Output from diagnose or fix stage
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Tuple of (is_valid, failure_reason)
|
|
173
|
+
|
|
174
|
+
"""
|
|
175
|
+
# First run parent validation (checks for empty output, errors)
|
|
176
|
+
is_valid, reason = super().validate_output(stage_output)
|
|
177
|
+
if not is_valid:
|
|
178
|
+
return False, reason
|
|
179
|
+
|
|
180
|
+
# Check diagnosis data exists
|
|
181
|
+
diagnosis = stage_output.get("diagnosis", {})
|
|
182
|
+
if not diagnosis:
|
|
183
|
+
return False, "diagnosis_missing"
|
|
184
|
+
|
|
185
|
+
# Check health score meets configured threshold
|
|
186
|
+
health_score = diagnosis.get("health_score", 0)
|
|
187
|
+
if health_score < self.health_score_threshold:
|
|
188
|
+
return False, "health_score_low"
|
|
189
|
+
|
|
190
|
+
# All validation passed
|
|
191
|
+
return True, None
|
|
192
|
+
|
|
156
193
|
async def _diagnose(self, input_data: dict, tier: ModelTier) -> tuple[dict, int, int]:
|
|
157
194
|
"""Run health diagnosis using HealthCheckCrew.
|
|
158
195
|
|
|
@@ -80,7 +80,7 @@ class NewSampleWorkflow1Workflow(BaseWorkflow):
|
|
|
80
80
|
|
|
81
81
|
"""
|
|
82
82
|
# TODO: Implement analyze logic
|
|
83
|
-
prompt = f"
|
|
83
|
+
prompt = f"analyze stage: {input_data}"
|
|
84
84
|
|
|
85
85
|
if self.executor:
|
|
86
86
|
result = await self.executor.execute(
|
|
@@ -107,7 +107,7 @@ class NewSampleWorkflow1Workflow(BaseWorkflow):
|
|
|
107
107
|
|
|
108
108
|
"""
|
|
109
109
|
# TODO: Implement process logic
|
|
110
|
-
prompt = f"
|
|
110
|
+
prompt = f"process stage: {input_data}"
|
|
111
111
|
|
|
112
112
|
if self.executor:
|
|
113
113
|
result = await self.executor.execute(
|
|
@@ -134,7 +134,7 @@ class NewSampleWorkflow1Workflow(BaseWorkflow):
|
|
|
134
134
|
|
|
135
135
|
"""
|
|
136
136
|
# TODO: Implement report logic
|
|
137
|
-
prompt = f"
|
|
137
|
+
prompt = f"report stage: {input_data}"
|
|
138
138
|
|
|
139
139
|
if self.executor:
|
|
140
140
|
result = await self.executor.execute(
|
|
@@ -369,7 +369,7 @@ class WorkflowTierTracker:
|
|
|
369
369
|
)
|
|
370
370
|
|
|
371
371
|
# Calculate actual cost from stages
|
|
372
|
-
actual_cost = sum(stage.cost for stage in workflow_result.stages)
|
|
372
|
+
actual_cost: float = sum(stage.cost for stage in workflow_result.stages)
|
|
373
373
|
|
|
374
374
|
# Rough estimate: PREMIUM tier is ~15x more expensive than CHEAP
|
|
375
375
|
return actual_cost * 5 # Conservative multiplier
|
|
@@ -47,8 +47,8 @@ class AdvancedDebuggingWizard(BaseWizard):
|
|
|
47
47
|
super().__init__()
|
|
48
48
|
self.bug_analyzer = BugRiskAnalyzer()
|
|
49
49
|
self.pattern_library = get_pattern_library()
|
|
50
|
-
self._name = "Advanced Debugging Wizard"
|
|
51
|
-
self._level = 4
|
|
50
|
+
self._name: str = "Advanced Debugging Wizard"
|
|
51
|
+
self._level: int = 4
|
|
52
52
|
|
|
53
53
|
@property
|
|
54
54
|
def name(self) -> str:
|
|
@@ -126,7 +126,8 @@ class AdvancedDebuggingWizard(BaseWizard):
|
|
|
126
126
|
# Phase 4: Group by fixability
|
|
127
127
|
fixability_by_linter = {}
|
|
128
128
|
for linter_name, result in linter_results.items():
|
|
129
|
-
|
|
129
|
+
issues_for_fixability: list[LintIssue] = result["issues"] # type: ignore[assignment]
|
|
130
|
+
fixability = group_issues_by_fixability(linter_name, issues_for_fixability)
|
|
130
131
|
fixability_by_linter[linter_name] = {
|
|
131
132
|
"auto_fixable": len(fixability["auto_fixable"]),
|
|
132
133
|
"manual": len(fixability["manual"]),
|
|
@@ -138,7 +139,8 @@ class AdvancedDebuggingWizard(BaseWizard):
|
|
|
138
139
|
logger.info("Applying auto-fixes...")
|
|
139
140
|
|
|
140
141
|
for linter_name, result in linter_results.items():
|
|
141
|
-
|
|
142
|
+
issues_for_fixing: list[LintIssue] = result["issues"] # type: ignore[assignment]
|
|
143
|
+
fixes = apply_fixes(linter_name, issues_for_fixing, dry_run=False, auto_only=True)
|
|
142
144
|
|
|
143
145
|
successful = [f for f in fixes if f.success]
|
|
144
146
|
failed = [f for f in fixes if not f.success]
|
|
@@ -155,7 +157,8 @@ class AdvancedDebuggingWizard(BaseWizard):
|
|
|
155
157
|
logger.info("Verifying fixes...")
|
|
156
158
|
|
|
157
159
|
for linter_name, result in linter_results.items():
|
|
158
|
-
|
|
160
|
+
issues_for_verification: list[LintIssue] = result["issues"] # type: ignore[assignment]
|
|
161
|
+
verification = verify_fixes(linter_name, project_path, issues_for_verification)
|
|
159
162
|
|
|
160
163
|
verification_results[linter_name] = verification.to_dict()
|
|
161
164
|
|
|
@@ -201,7 +204,7 @@ class AdvancedDebuggingWizard(BaseWizard):
|
|
|
201
204
|
insights = []
|
|
202
205
|
|
|
203
206
|
# Group issues by language
|
|
204
|
-
by_language = {}
|
|
207
|
+
by_language: dict[str, list[LintIssue]] = {}
|
|
205
208
|
for issue in issues:
|
|
206
209
|
lang = issue.linter
|
|
207
210
|
if lang not in by_language:
|
|
@@ -246,15 +246,19 @@ class PylintConfigLoader(BaseConfigLoader):
|
|
|
246
246
|
"""Load from pyproject.toml"""
|
|
247
247
|
try:
|
|
248
248
|
import tomli
|
|
249
|
+
|
|
250
|
+
toml_loader = tomli
|
|
249
251
|
except ImportError:
|
|
250
252
|
# Fallback for Python 3.11+
|
|
251
253
|
try:
|
|
252
|
-
import tomllib
|
|
254
|
+
import tomllib
|
|
255
|
+
|
|
256
|
+
toml_loader = tomllib
|
|
253
257
|
except ImportError as e:
|
|
254
258
|
raise ImportError("tomli or tomllib required for pyproject.toml") from e
|
|
255
259
|
|
|
256
260
|
with open(path, "rb") as f:
|
|
257
|
-
data =
|
|
261
|
+
data = toml_loader.load(f)
|
|
258
262
|
|
|
259
263
|
pylint_config = data.get("tool", {}).get("pylint", {})
|
|
260
264
|
|
|
@@ -278,6 +278,7 @@ class CrossLanguagePatternLibrary:
|
|
|
278
278
|
UniversalPattern if found, None otherwise
|
|
279
279
|
|
|
280
280
|
"""
|
|
281
|
+
pattern: UniversalPattern
|
|
281
282
|
for pattern in self.patterns.values():
|
|
282
283
|
if linter in pattern.language_manifestations:
|
|
283
284
|
if pattern.language_manifestations[linter] == rule:
|
|
@@ -295,11 +296,12 @@ class CrossLanguagePatternLibrary:
|
|
|
295
296
|
List of fix steps, or None if not found
|
|
296
297
|
|
|
297
298
|
"""
|
|
298
|
-
pattern = self.patterns.get(pattern_name)
|
|
299
|
+
pattern: UniversalPattern | None = self.patterns.get(pattern_name)
|
|
299
300
|
if not pattern:
|
|
300
301
|
return None
|
|
301
302
|
|
|
302
|
-
|
|
303
|
+
result: list[str] | None = pattern.language_specific_fixes.get(language)
|
|
304
|
+
return result
|
|
303
305
|
|
|
304
306
|
def suggest_cross_language_insight(
|
|
305
307
|
self,
|
|
@@ -167,20 +167,20 @@ class ChromeDevToolsParser(BaseProfilerParser):
|
|
|
167
167
|
function_times[name]["call_count"] += 1
|
|
168
168
|
|
|
169
169
|
# Convert to FunctionProfile
|
|
170
|
-
total_time = sum(
|
|
170
|
+
total_time = sum(stats["total_time"] for stats in function_times.values())
|
|
171
171
|
|
|
172
|
-
for func_name,
|
|
172
|
+
for func_name, stats in function_times.items():
|
|
173
173
|
profiles.append(
|
|
174
174
|
FunctionProfile(
|
|
175
175
|
function_name=func_name,
|
|
176
176
|
file_path="", # Chrome doesn't always provide
|
|
177
177
|
line_number=0,
|
|
178
|
-
total_time=
|
|
179
|
-
self_time=
|
|
180
|
-
call_count=
|
|
181
|
-
cumulative_time=
|
|
178
|
+
total_time=stats["total_time"],
|
|
179
|
+
self_time=stats["total_time"],
|
|
180
|
+
call_count=stats["call_count"],
|
|
181
|
+
cumulative_time=stats["total_time"],
|
|
182
182
|
percent_total=(
|
|
183
|
-
(
|
|
183
|
+
(stats["total_time"] / total_time * 100) if total_time > 0 else 0
|
|
184
184
|
),
|
|
185
185
|
profiler=self.profiler_name,
|
|
186
186
|
),
|
|
@@ -225,7 +225,7 @@ class VulnerabilityScanner:
|
|
|
225
225
|
lines: list[str],
|
|
226
226
|
) -> list[Vulnerability]:
|
|
227
227
|
"""Scan for hardcoded secrets"""
|
|
228
|
-
vulnerabilities = []
|
|
228
|
+
vulnerabilities: list[Vulnerability] = []
|
|
229
229
|
|
|
230
230
|
# Skip certain file types
|
|
231
231
|
if file_path.suffix in [".md", ".txt", ".json", ".xml"]:
|
|
@@ -168,10 +168,10 @@ class SecurityAnalysisWizard(BaseWizard):
|
|
|
168
168
|
|
|
169
169
|
def _group_by_category(self, vulnerabilities: list[dict[str, Any]]) -> dict[str, int]:
|
|
170
170
|
"""Group vulnerabilities by OWASP category"""
|
|
171
|
-
by_category = {}
|
|
171
|
+
by_category: dict[str, int] = {}
|
|
172
172
|
|
|
173
173
|
for vuln in vulnerabilities:
|
|
174
|
-
category = vuln.get("category", "unknown")
|
|
174
|
+
category: str = str(vuln.get("category", "unknown"))
|
|
175
175
|
by_category[category] = by_category.get(category, 0) + 1
|
|
176
176
|
|
|
177
177
|
return by_category
|
|
@@ -8,7 +8,7 @@ Licensed under Fair Source License 0.9
|
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import re
|
|
11
|
-
from dataclasses import dataclass
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
12
|
from enum import Enum
|
|
13
13
|
from pathlib import Path
|
|
14
14
|
from typing import Any
|
|
@@ -37,14 +37,8 @@ class TestFunction:
|
|
|
37
37
|
assertions_count: int
|
|
38
38
|
execution_time: float | None = None
|
|
39
39
|
is_async: bool = False
|
|
40
|
-
uses_fixtures: list[str]
|
|
41
|
-
issues: list[TestQualityIssue]
|
|
42
|
-
|
|
43
|
-
def __post_init__(self):
|
|
44
|
-
if self.uses_fixtures is None:
|
|
45
|
-
self.uses_fixtures = []
|
|
46
|
-
if self.issues is None:
|
|
47
|
-
self.issues = []
|
|
40
|
+
uses_fixtures: list[str] = field(default_factory=list)
|
|
41
|
+
issues: list[TestQualityIssue] = field(default_factory=list)
|
|
48
42
|
|
|
49
43
|
@property
|
|
50
44
|
def quality_score(self) -> float:
|
|
@@ -494,7 +494,7 @@ def {test_name}({params_str}):
|
|
|
494
494
|
summary.append("=" * 60)
|
|
495
495
|
|
|
496
496
|
# Group by priority
|
|
497
|
-
by_priority = {
|
|
497
|
+
by_priority: dict[TestPriority, list[TestSuggestion]] = {
|
|
498
498
|
TestPriority.CRITICAL: [],
|
|
499
499
|
TestPriority.HIGH: [],
|
|
500
500
|
TestPriority.MEDIUM: [],
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|