empathy-framework 4.6.3__py3-none-any.whl → 4.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {empathy_framework-4.6.3.dist-info → empathy_framework-4.6.5.dist-info}/METADATA +53 -11
  2. {empathy_framework-4.6.3.dist-info → empathy_framework-4.6.5.dist-info}/RECORD +32 -57
  3. empathy_llm_toolkit/agent_factory/crews/health_check.py +7 -4
  4. empathy_llm_toolkit/agent_factory/decorators.py +3 -2
  5. empathy_llm_toolkit/agent_factory/memory_integration.py +6 -2
  6. empathy_llm_toolkit/contextual_patterns.py +5 -2
  7. empathy_llm_toolkit/git_pattern_extractor.py +8 -4
  8. empathy_llm_toolkit/providers.py +4 -3
  9. empathy_os/__init__.py +1 -1
  10. empathy_os/cli/__init__.py +306 -0
  11. empathy_os/cli/__main__.py +26 -0
  12. empathy_os/cli/commands/__init__.py +8 -0
  13. empathy_os/cli/commands/inspection.py +48 -0
  14. empathy_os/cli/commands/memory.py +56 -0
  15. empathy_os/cli/commands/provider.py +86 -0
  16. empathy_os/cli/commands/utilities.py +94 -0
  17. empathy_os/cli/core.py +32 -0
  18. empathy_os/cli.py +18 -6
  19. empathy_os/cli_unified.py +19 -3
  20. empathy_os/memory/short_term.py +12 -2
  21. empathy_os/project_index/scanner.py +151 -49
  22. empathy_os/socratic/visual_editor.py +9 -4
  23. empathy_os/workflows/bug_predict.py +70 -1
  24. empathy_os/workflows/pr_review.py +6 -0
  25. empathy_os/workflows/security_audit.py +13 -0
  26. empathy_os/workflows/tier_tracking.py +50 -2
  27. wizards/discharge_summary_wizard.py +4 -2
  28. wizards/incident_report_wizard.py +4 -2
  29. empathy_os/meta_workflows/agent_creator 2.py +0 -254
  30. empathy_os/meta_workflows/builtin_templates 2.py +0 -567
  31. empathy_os/meta_workflows/cli_meta_workflows 2.py +0 -1551
  32. empathy_os/meta_workflows/form_engine 2.py +0 -304
  33. empathy_os/meta_workflows/intent_detector 2.py +0 -298
  34. empathy_os/meta_workflows/pattern_learner 2.py +0 -754
  35. empathy_os/meta_workflows/session_context 2.py +0 -398
  36. empathy_os/meta_workflows/template_registry 2.py +0 -229
  37. empathy_os/meta_workflows/workflow 2.py +0 -980
  38. empathy_os/orchestration/pattern_learner 2.py +0 -699
  39. empathy_os/orchestration/real_tools 2.py +0 -938
  40. empathy_os/socratic/__init__ 2.py +0 -273
  41. empathy_os/socratic/ab_testing 2.py +0 -969
  42. empathy_os/socratic/blueprint 2.py +0 -532
  43. empathy_os/socratic/cli 2.py +0 -689
  44. empathy_os/socratic/collaboration 2.py +0 -1112
  45. empathy_os/socratic/domain_templates 2.py +0 -916
  46. empathy_os/socratic/embeddings 2.py +0 -734
  47. empathy_os/socratic/engine 2.py +0 -729
  48. empathy_os/socratic/explainer 2.py +0 -663
  49. empathy_os/socratic/feedback 2.py +0 -767
  50. empathy_os/socratic/forms 2.py +0 -624
  51. empathy_os/socratic/generator 2.py +0 -716
  52. empathy_os/socratic/llm_analyzer 2.py +0 -635
  53. empathy_os/socratic/mcp_server 2.py +0 -751
  54. empathy_os/socratic/session 2.py +0 -306
  55. empathy_os/socratic/storage 2.py +0 -635
  56. empathy_os/socratic/success 2.py +0 -719
  57. empathy_os/socratic/visual_editor 2.py +0 -812
  58. empathy_os/socratic/web_ui 2.py +0 -925
  59. empathy_os/workflows/batch_processing 2.py +0 -310
  60. empathy_os/workflows/release_prep_crew 2.py +0 -968
  61. empathy_os/workflows/test_coverage_boost_crew 2.py +0 -848
  62. {empathy_framework-4.6.3.dist-info → empathy_framework-4.6.5.dist-info}/WHEEL +0 -0
  63. {empathy_framework-4.6.3.dist-info → empathy_framework-4.6.5.dist-info}/entry_points.txt +0 -0
  64. {empathy_framework-4.6.3.dist-info → empathy_framework-4.6.5.dist-info}/licenses/LICENSE +0 -0
  65. {empathy_framework-4.6.3.dist-info → empathy_framework-4.6.5.dist-info}/top_level.txt +0 -0
@@ -1,719 +0,0 @@
1
- """Success Criteria and Measurement System
2
-
3
- Define and measure success for generated workflows.
4
-
5
- Success criteria allow users to:
6
- 1. Define what "done" looks like for their workflow
7
- 2. Track progress toward goals
8
- 3. Measure effectiveness over time
9
- 4. Iterate and improve workflows
10
-
11
- Copyright 2026 Smart-AI-Memory
12
- Licensed under Fair Source License 0.9
13
- """
14
-
15
- from __future__ import annotations
16
-
17
- from collections.abc import Callable
18
- from dataclasses import dataclass, field
19
- from datetime import datetime
20
- from enum import Enum
21
- from typing import Any
22
-
23
-
24
- class MetricType(Enum):
25
- """Types of success metrics."""
26
-
27
- # Numeric metrics
28
- COUNT = "count" # Integer count (e.g., issues found)
29
- PERCENTAGE = "percentage" # 0-100 percentage
30
- RATIO = "ratio" # 0-1 ratio
31
- DURATION = "duration" # Time in seconds
32
-
33
- # Boolean metrics
34
- BOOLEAN = "boolean" # True/False
35
-
36
- # Comparison metrics
37
- IMPROVEMENT = "improvement" # Before/after comparison
38
- THRESHOLD = "threshold" # Above/below threshold
39
-
40
- # Quality metrics
41
- SCORE = "score" # 0-10 quality score
42
- RATING = "rating" # Categorical (good, moderate, poor)
43
-
44
-
45
- class MetricDirection(Enum):
46
- """Which direction indicates success."""
47
-
48
- HIGHER_IS_BETTER = "higher" # More issues found = better
49
- LOWER_IS_BETTER = "lower" # Less time = better
50
- TARGET_VALUE = "target" # Specific value is best
51
- RANGE = "range" # Within a range is success
52
-
53
-
54
- @dataclass
55
- class SuccessMetric:
56
- """A single success metric definition.
57
-
58
- Example:
59
- >>> metric = SuccessMetric(
60
- ... id="security_issues_found",
61
- ... name="Security Issues Detected",
62
- ... description="Number of security vulnerabilities identified",
63
- ... metric_type=MetricType.COUNT,
64
- ... direction=MetricDirection.HIGHER_IS_BETTER,
65
- ... target_value=None, # No specific target
66
- ... minimum_value=0,
67
- ... unit="issues"
68
- ... )
69
- """
70
-
71
- # Unique metric identifier
72
- id: str
73
-
74
- # Display name
75
- name: str
76
-
77
- # Description of what this measures
78
- description: str
79
-
80
- # Type of metric
81
- metric_type: MetricType
82
-
83
- # Which direction indicates success
84
- direction: MetricDirection = MetricDirection.HIGHER_IS_BETTER
85
-
86
- # Target value (for TARGET_VALUE direction)
87
- target_value: float | None = None
88
-
89
- # Minimum acceptable value
90
- minimum_value: float | None = None
91
-
92
- # Maximum acceptable value
93
- maximum_value: float | None = None
94
-
95
- # Unit of measurement
96
- unit: str = ""
97
-
98
- # Weight for composite scoring (0-1)
99
- weight: float = 1.0
100
-
101
- # Whether this is a primary success indicator
102
- is_primary: bool = False
103
-
104
- # How to extract this metric from workflow output
105
- extraction_path: str = "" # JSONPath-like expression
106
-
107
- # Custom extraction function
108
- extractor: Callable[[dict], float | bool] | None = None
109
-
110
- def evaluate(
111
- self,
112
- value: float | bool,
113
- baseline: float | bool | None = None,
114
- ) -> tuple[bool, float, str]:
115
- """Evaluate if a value meets this metric's success criteria.
116
-
117
- Args:
118
- value: The measured value
119
- baseline: Optional baseline for comparison
120
-
121
- Returns:
122
- Tuple of (met_criteria, score 0-1, explanation)
123
- """
124
- # Boolean metrics
125
- if self.metric_type == MetricType.BOOLEAN:
126
- if isinstance(value, bool):
127
- met = value
128
- score = 1.0 if value else 0.0
129
- explanation = "Criteria met" if met else "Criteria not met"
130
- return met, score, explanation
131
-
132
- # Ensure numeric value for other types
133
- if not isinstance(value, (int, float)):
134
- return False, 0.0, f"Expected numeric value, got {type(value)}"
135
-
136
- # Calculate score based on direction
137
- if self.direction == MetricDirection.HIGHER_IS_BETTER:
138
- if self.minimum_value is not None:
139
- met = value >= self.minimum_value
140
- # Score is ratio of value to minimum (capped at 1.0)
141
- score = min(value / self.minimum_value, 1.0) if self.minimum_value > 0 else 1.0
142
- else:
143
- met = True # No minimum, always met
144
- score = 1.0
145
-
146
- elif self.direction == MetricDirection.LOWER_IS_BETTER:
147
- if self.maximum_value is not None:
148
- met = value <= self.maximum_value
149
- # Score is inverse ratio (lower is better)
150
- score = max(1.0 - (value / self.maximum_value), 0.0) if self.maximum_value > 0 else 1.0
151
- else:
152
- met = True
153
- score = 1.0
154
-
155
- elif self.direction == MetricDirection.TARGET_VALUE:
156
- if self.target_value is not None:
157
- deviation = abs(value - self.target_value)
158
- # Allow 10% tolerance by default
159
- tolerance = self.target_value * 0.1 if self.target_value > 0 else 1.0
160
- met = deviation <= tolerance
161
- score = max(1.0 - (deviation / max(tolerance, 0.001)), 0.0)
162
- else:
163
- met = True
164
- score = 1.0
165
-
166
- elif self.direction == MetricDirection.RANGE:
167
- min_val = self.minimum_value or float("-inf")
168
- max_val = self.maximum_value or float("inf")
169
- met = min_val <= value <= max_val
170
- if met:
171
- # Score based on position in range (center = best)
172
- range_size = max_val - min_val
173
- if range_size > 0 and range_size != float("inf"):
174
- center = (min_val + max_val) / 2
175
- distance_from_center = abs(value - center)
176
- score = 1.0 - (distance_from_center / (range_size / 2))
177
- else:
178
- score = 1.0
179
- else:
180
- score = 0.0
181
- else:
182
- met = True
183
- score = 1.0
184
-
185
- # Generate explanation
186
- explanation = self._generate_explanation(value, met, score, baseline)
187
-
188
- return met, score, explanation
189
-
190
- def _generate_explanation(
191
- self,
192
- value: float | bool,
193
- met: bool,
194
- score: float,
195
- baseline: float | bool | None,
196
- ) -> str:
197
- """Generate human-readable explanation of the evaluation."""
198
- parts = []
199
-
200
- # Value statement
201
- if self.unit:
202
- parts.append(f"Measured: {value} {self.unit}")
203
- else:
204
- parts.append(f"Measured: {value}")
205
-
206
- # Comparison to baseline
207
- if baseline is not None and isinstance(value, (int, float)) and isinstance(baseline, (int, float)):
208
- diff = value - baseline
209
- pct_change = (diff / baseline * 100) if baseline != 0 else 0
210
- direction = "↑" if diff > 0 else "↓" if diff < 0 else "→"
211
- parts.append(f"vs baseline: {direction} {abs(pct_change):.1f}%")
212
-
213
- # Target comparison
214
- if self.direction == MetricDirection.TARGET_VALUE and self.target_value is not None:
215
- parts.append(f"Target: {self.target_value} {self.unit}".strip())
216
-
217
- # Result
218
- result = "✓ Met" if met else "✗ Not met"
219
- parts.append(f"{result} (score: {score:.1%})")
220
-
221
- return " | ".join(parts)
222
-
223
- def to_dict(self) -> dict[str, Any]:
224
- """Serialize to dictionary."""
225
- return {
226
- "id": self.id,
227
- "name": self.name,
228
- "description": self.description,
229
- "metric_type": self.metric_type.value,
230
- "direction": self.direction.value,
231
- "target_value": self.target_value,
232
- "minimum_value": self.minimum_value,
233
- "maximum_value": self.maximum_value,
234
- "unit": self.unit,
235
- "weight": self.weight,
236
- "is_primary": self.is_primary,
237
- "extraction_path": self.extraction_path,
238
- }
239
-
240
-
241
- @dataclass
242
- class MetricResult:
243
- """Result of evaluating a single metric."""
244
-
245
- metric_id: str
246
- value: float | bool
247
- met_criteria: bool
248
- score: float
249
- explanation: str
250
- baseline: float | bool | None = None
251
- timestamp: str = ""
252
-
253
-
254
- @dataclass
255
- class SuccessCriteria:
256
- """Complete success criteria for a workflow.
257
-
258
- Example:
259
- >>> criteria = SuccessCriteria(
260
- ... id="code_review_success",
261
- ... name="Code Review Success Criteria",
262
- ... description="Measures effectiveness of automated code review",
263
- ... metrics=[
264
- ... SuccessMetric(
265
- ... id="issues_found",
266
- ... name="Issues Found",
267
- ... metric_type=MetricType.COUNT,
268
- ... is_primary=True
269
- ... ),
270
- ... SuccessMetric(
271
- ... id="review_time",
272
- ... name="Review Time",
273
- ... metric_type=MetricType.DURATION,
274
- ... direction=MetricDirection.LOWER_IS_BETTER,
275
- ... maximum_value=60, # seconds
276
- ... ),
277
- ... ],
278
- ... success_threshold=0.7 # 70% overall score = success
279
- ... )
280
- """
281
-
282
- # Unique identifier
283
- id: str = ""
284
-
285
- # Display name
286
- name: str = ""
287
-
288
- # Description
289
- description: str = ""
290
-
291
- # List of metrics
292
- metrics: list[SuccessMetric] = field(default_factory=list)
293
-
294
- # Threshold for overall success (0-1)
295
- success_threshold: float = 0.7
296
-
297
- # Whether ALL metrics must be met (vs weighted average)
298
- require_all: bool = False
299
-
300
- # Minimum primary metrics that must pass
301
- min_primary_metrics: int = 1
302
-
303
- # Custom success evaluator
304
- custom_evaluator: Callable[[dict[str, MetricResult]], bool] | None = None
305
-
306
- def add_metric(self, metric: SuccessMetric) -> None:
307
- """Add a metric to the criteria."""
308
- self.metrics.append(metric)
309
-
310
- def get_primary_metrics(self) -> list[SuccessMetric]:
311
- """Get all primary success indicators."""
312
- return [m for m in self.metrics if m.is_primary]
313
-
314
- def evaluate(
315
- self,
316
- workflow_output: dict[str, Any],
317
- baselines: dict[str, float | bool] | None = None,
318
- ) -> SuccessEvaluation:
319
- """Evaluate workflow output against success criteria.
320
-
321
- Args:
322
- workflow_output: The workflow's output to evaluate
323
- baselines: Optional baseline values for comparison
324
-
325
- Returns:
326
- SuccessEvaluation with detailed results
327
- """
328
- baselines = baselines or {}
329
- results: list[MetricResult] = []
330
- timestamp = datetime.now().isoformat()
331
-
332
- # Evaluate each metric
333
- for metric in self.metrics:
334
- # Extract value from output
335
- value = self._extract_metric_value(metric, workflow_output)
336
-
337
- if value is None:
338
- # Metric not found in output
339
- results.append(MetricResult(
340
- metric_id=metric.id,
341
- value=0,
342
- met_criteria=False,
343
- score=0.0,
344
- explanation=f"Metric '{metric.name}' not found in output",
345
- timestamp=timestamp,
346
- ))
347
- continue
348
-
349
- # Get baseline if available
350
- baseline = baselines.get(metric.id)
351
-
352
- # Evaluate
353
- met, score, explanation = metric.evaluate(value, baseline)
354
-
355
- results.append(MetricResult(
356
- metric_id=metric.id,
357
- value=value,
358
- met_criteria=met,
359
- score=score,
360
- explanation=explanation,
361
- baseline=baseline,
362
- timestamp=timestamp,
363
- ))
364
-
365
- # Calculate overall success
366
- return self._calculate_overall_success(results)
367
-
368
- def _extract_metric_value(
369
- self,
370
- metric: SuccessMetric,
371
- output: dict[str, Any],
372
- ) -> float | bool | None:
373
- """Extract metric value from workflow output."""
374
- # Use custom extractor if provided
375
- if metric.extractor:
376
- try:
377
- return metric.extractor(output)
378
- except (KeyError, TypeError, ValueError):
379
- return None
380
-
381
- # Use extraction path
382
- if metric.extraction_path:
383
- try:
384
- value = output
385
- for key in metric.extraction_path.split("."):
386
- if isinstance(value, dict):
387
- value = value[key]
388
- elif isinstance(value, list) and key.isdigit():
389
- value = value[int(key)]
390
- else:
391
- return None
392
- return value
393
- except (KeyError, IndexError, TypeError):
394
- return None
395
-
396
- # Try direct key match
397
- if metric.id in output:
398
- return output[metric.id]
399
-
400
- # Try nested in 'metrics' key
401
- if "metrics" in output and isinstance(output["metrics"], dict):
402
- if metric.id in output["metrics"]:
403
- return output["metrics"][metric.id]
404
-
405
- return None
406
-
407
- def _calculate_overall_success(
408
- self,
409
- results: list[MetricResult],
410
- ) -> SuccessEvaluation:
411
- """Calculate overall success from metric results."""
412
- if not results:
413
- return SuccessEvaluation(
414
- overall_success=False,
415
- overall_score=0.0,
416
- metric_results=results,
417
- summary="No metrics to evaluate",
418
- )
419
-
420
- # Check primary metrics
421
- primary_results = [
422
- r for r in results
423
- if any(m.id == r.metric_id and m.is_primary for m in self.metrics)
424
- ]
425
- primary_passed = sum(1 for r in primary_results if r.met_criteria)
426
-
427
- # Check if minimum primary metrics are met
428
- primary_check = primary_passed >= self.min_primary_metrics
429
-
430
- # Check if all required
431
- if self.require_all:
432
- all_met = all(r.met_criteria for r in results)
433
- overall_success = all_met and primary_check
434
- overall_score = 1.0 if overall_success else sum(r.score for r in results) / len(results)
435
- else:
436
- # Weighted average score
437
- total_weight = sum(
438
- m.weight for m in self.metrics
439
- if any(r.metric_id == m.id for r in results)
440
- )
441
-
442
- if total_weight > 0:
443
- weighted_score = sum(
444
- r.score * next((m.weight for m in self.metrics if m.id == r.metric_id), 1.0)
445
- for r in results
446
- ) / total_weight
447
- else:
448
- weighted_score = sum(r.score for r in results) / len(results)
449
-
450
- overall_score = weighted_score
451
- overall_success = overall_score >= self.success_threshold and primary_check
452
-
453
- # Custom evaluator override
454
- if self.custom_evaluator:
455
- results_dict = {r.metric_id: r for r in results}
456
- overall_success = self.custom_evaluator(results_dict)
457
-
458
- # Generate summary
459
- summary = self._generate_summary(results, overall_success, overall_score)
460
-
461
- return SuccessEvaluation(
462
- overall_success=overall_success,
463
- overall_score=overall_score,
464
- metric_results=results,
465
- summary=summary,
466
- primary_metrics_passed=primary_passed,
467
- total_primary_metrics=len(primary_results),
468
- )
469
-
470
- def _generate_summary(
471
- self,
472
- results: list[MetricResult],
473
- success: bool,
474
- score: float,
475
- ) -> str:
476
- """Generate human-readable summary."""
477
- status = "✓ SUCCESS" if success else "✗ NOT MET"
478
- met_count = sum(1 for r in results if r.met_criteria)
479
-
480
- lines = [
481
- f"{status} - Overall score: {score:.1%}",
482
- f"Metrics: {met_count}/{len(results)} met criteria",
483
- "",
484
- "Details:",
485
- ]
486
-
487
- for result in results:
488
- metric = next((m for m in self.metrics if m.id == result.metric_id), None)
489
- name = metric.name if metric else result.metric_id
490
- indicator = "✓" if result.met_criteria else "✗"
491
- lines.append(f" {indicator} {name}: {result.explanation}")
492
-
493
- return "\n".join(lines)
494
-
495
- def to_dict(self) -> dict[str, Any]:
496
- """Serialize to dictionary."""
497
- return {
498
- "id": self.id,
499
- "name": self.name,
500
- "description": self.description,
501
- "metrics": [m.to_dict() for m in self.metrics],
502
- "success_threshold": self.success_threshold,
503
- "require_all": self.require_all,
504
- "min_primary_metrics": self.min_primary_metrics,
505
- }
506
-
507
-
508
- @dataclass
509
- class SuccessEvaluation:
510
- """Result of evaluating success criteria."""
511
-
512
- # Whether overall success criteria were met
513
- overall_success: bool
514
-
515
- # Overall score (0-1)
516
- overall_score: float
517
-
518
- # Individual metric results
519
- metric_results: list[MetricResult]
520
-
521
- # Human-readable summary
522
- summary: str
523
-
524
- # Primary metrics that passed
525
- primary_metrics_passed: int = 0
526
-
527
- # Total primary metrics
528
- total_primary_metrics: int = 0
529
-
530
- # Timestamp of evaluation
531
- evaluated_at: str = field(default_factory=lambda: datetime.now().isoformat())
532
-
533
- def to_dict(self) -> dict[str, Any]:
534
- """Serialize to dictionary."""
535
- return {
536
- "overall_success": self.overall_success,
537
- "overall_score": self.overall_score,
538
- "metric_results": [
539
- {
540
- "metric_id": r.metric_id,
541
- "value": r.value,
542
- "met_criteria": r.met_criteria,
543
- "score": r.score,
544
- "explanation": r.explanation,
545
- "baseline": r.baseline,
546
- }
547
- for r in self.metric_results
548
- ],
549
- "summary": self.summary,
550
- "primary_metrics_passed": self.primary_metrics_passed,
551
- "total_primary_metrics": self.total_primary_metrics,
552
- "evaluated_at": self.evaluated_at,
553
- }
554
-
555
-
556
- # =============================================================================
557
- # PREDEFINED SUCCESS CRITERIA TEMPLATES
558
- # =============================================================================
559
-
560
-
561
- def code_review_criteria() -> SuccessCriteria:
562
- """Create standard success criteria for code review workflows."""
563
- return SuccessCriteria(
564
- id="code_review_success",
565
- name="Code Review Success",
566
- description="Standard metrics for code review effectiveness",
567
- metrics=[
568
- SuccessMetric(
569
- id="issues_found",
570
- name="Issues Found",
571
- description="Number of issues identified",
572
- metric_type=MetricType.COUNT,
573
- direction=MetricDirection.HIGHER_IS_BETTER,
574
- is_primary=True,
575
- weight=1.0,
576
- extraction_path="findings_count",
577
- ),
578
- SuccessMetric(
579
- id="severity_coverage",
580
- name="Severity Coverage",
581
- description="Percentage of severity levels covered",
582
- metric_type=MetricType.PERCENTAGE,
583
- direction=MetricDirection.HIGHER_IS_BETTER,
584
- minimum_value=50,
585
- weight=0.8,
586
- extraction_path="severity_coverage",
587
- ),
588
- SuccessMetric(
589
- id="review_time",
590
- name="Review Duration",
591
- description="Time to complete review",
592
- metric_type=MetricType.DURATION,
593
- direction=MetricDirection.LOWER_IS_BETTER,
594
- maximum_value=120, # 2 minutes
595
- unit="seconds",
596
- weight=0.6,
597
- extraction_path="duration_seconds",
598
- ),
599
- SuccessMetric(
600
- id="actionable_recommendations",
601
- name="Actionable Recommendations",
602
- description="Whether recommendations are actionable",
603
- metric_type=MetricType.BOOLEAN,
604
- is_primary=True,
605
- weight=1.0,
606
- extraction_path="has_recommendations",
607
- ),
608
- ],
609
- success_threshold=0.7,
610
- min_primary_metrics=1,
611
- )
612
-
613
-
614
- def security_audit_criteria() -> SuccessCriteria:
615
- """Create success criteria for security audit workflows."""
616
- return SuccessCriteria(
617
- id="security_audit_success",
618
- name="Security Audit Success",
619
- description="Metrics for security audit effectiveness",
620
- metrics=[
621
- SuccessMetric(
622
- id="vulnerabilities_found",
623
- name="Vulnerabilities Found",
624
- description="Security vulnerabilities identified",
625
- metric_type=MetricType.COUNT,
626
- direction=MetricDirection.HIGHER_IS_BETTER,
627
- is_primary=True,
628
- weight=1.0,
629
- extraction_path="vulnerabilities.count",
630
- ),
631
- SuccessMetric(
632
- id="critical_issues",
633
- name="Critical Issues",
634
- description="High/critical severity issues found",
635
- metric_type=MetricType.COUNT,
636
- direction=MetricDirection.HIGHER_IS_BETTER,
637
- is_primary=True,
638
- weight=1.2, # Extra weight for critical issues
639
- extraction_path="vulnerabilities.critical_count",
640
- ),
641
- SuccessMetric(
642
- id="owasp_coverage",
643
- name="OWASP Coverage",
644
- description="OWASP Top 10 categories checked",
645
- metric_type=MetricType.PERCENTAGE,
646
- direction=MetricDirection.HIGHER_IS_BETTER,
647
- minimum_value=80,
648
- weight=0.9,
649
- extraction_path="owasp_coverage_percent",
650
- ),
651
- SuccessMetric(
652
- id="false_positive_rate",
653
- name="False Positive Rate",
654
- description="Estimated false positive rate",
655
- metric_type=MetricType.PERCENTAGE,
656
- direction=MetricDirection.LOWER_IS_BETTER,
657
- maximum_value=20,
658
- weight=0.7,
659
- extraction_path="estimated_fp_rate",
660
- ),
661
- ],
662
- success_threshold=0.75,
663
- min_primary_metrics=1,
664
- )
665
-
666
-
667
- def test_generation_criteria() -> SuccessCriteria:
668
- """Create success criteria for test generation workflows."""
669
- return SuccessCriteria(
670
- id="test_generation_success",
671
- name="Test Generation Success",
672
- description="Metrics for test generation effectiveness",
673
- metrics=[
674
- SuccessMetric(
675
- id="tests_generated",
676
- name="Tests Generated",
677
- description="Number of test cases generated",
678
- metric_type=MetricType.COUNT,
679
- direction=MetricDirection.HIGHER_IS_BETTER,
680
- minimum_value=1,
681
- is_primary=True,
682
- weight=1.0,
683
- extraction_path="tests.count",
684
- ),
685
- SuccessMetric(
686
- id="coverage_increase",
687
- name="Coverage Increase",
688
- description="Increase in code coverage",
689
- metric_type=MetricType.IMPROVEMENT,
690
- direction=MetricDirection.HIGHER_IS_BETTER,
691
- minimum_value=5, # At least 5% increase
692
- unit="%",
693
- weight=1.0,
694
- extraction_path="coverage.increase_percent",
695
- ),
696
- SuccessMetric(
697
- id="tests_passing",
698
- name="Tests Passing",
699
- description="Percentage of generated tests that pass",
700
- metric_type=MetricType.PERCENTAGE,
701
- direction=MetricDirection.HIGHER_IS_BETTER,
702
- minimum_value=80,
703
- is_primary=True,
704
- weight=1.2,
705
- extraction_path="tests.pass_rate",
706
- ),
707
- SuccessMetric(
708
- id="edge_cases_covered",
709
- name="Edge Cases Covered",
710
- description="Number of edge cases with tests",
711
- metric_type=MetricType.COUNT,
712
- direction=MetricDirection.HIGHER_IS_BETTER,
713
- weight=0.8,
714
- extraction_path="edge_cases.count",
715
- ),
716
- ],
717
- success_threshold=0.7,
718
- min_primary_metrics=2,
719
- )