@yasserkhanorg/e2e-agents 1.8.5 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. package/README.md +95 -8
  2. package/dist/adapters/cypress.d.ts +10 -0
  3. package/dist/adapters/cypress.d.ts.map +1 -0
  4. package/dist/adapters/cypress.js +86 -0
  5. package/dist/adapters/framework_adapter.d.ts +41 -0
  6. package/dist/adapters/framework_adapter.d.ts.map +1 -0
  7. package/dist/adapters/framework_adapter.js +152 -0
  8. package/dist/adapters/playwright.d.ts +10 -0
  9. package/dist/adapters/playwright.d.ts.map +1 -0
  10. package/dist/adapters/playwright.js +86 -0
  11. package/dist/adapters/pytest.d.ts +10 -0
  12. package/dist/adapters/pytest.d.ts.map +1 -0
  13. package/dist/adapters/pytest.js +96 -0
  14. package/dist/adapters/supertest.d.ts +12 -0
  15. package/dist/adapters/supertest.d.ts.map +1 -0
  16. package/dist/adapters/supertest.js +85 -0
  17. package/dist/agent/config.d.ts +1 -1
  18. package/dist/agent/config.d.ts.map +1 -1
  19. package/dist/agent/git.d.ts +1 -0
  20. package/dist/agent/git.d.ts.map +1 -1
  21. package/dist/agent/git.js +3 -0
  22. package/dist/agentic/fix_loop.d.ts.map +1 -1
  23. package/dist/agentic/fix_loop.js +5 -4
  24. package/dist/agentic/runner.d.ts +2 -0
  25. package/dist/agentic/runner.d.ts.map +1 -1
  26. package/dist/agentic/runner.js +15 -12
  27. package/dist/agents/cross-impact.d.ts.map +1 -1
  28. package/dist/agents/cross-impact.js +6 -1
  29. package/dist/agents/executor.d.ts.map +1 -1
  30. package/dist/agents/executor.js +6 -1
  31. package/dist/agents/strategist.d.ts.map +1 -1
  32. package/dist/agents/strategist.js +6 -1
  33. package/dist/agents/test-designer.d.ts.map +1 -1
  34. package/dist/agents/test-designer.js +6 -1
  35. package/dist/anthropic_provider.d.ts.map +1 -1
  36. package/dist/anthropic_provider.js +1 -0
  37. package/dist/base_provider.d.ts +56 -0
  38. package/dist/base_provider.d.ts.map +1 -1
  39. package/dist/base_provider.js +123 -1
  40. package/dist/budget_ledger.d.ts +28 -0
  41. package/dist/budget_ledger.d.ts.map +1 -0
  42. package/dist/budget_ledger.js +62 -0
  43. package/dist/cache/cached_provider.d.ts +45 -0
  44. package/dist/cache/cached_provider.d.ts.map +1 -0
  45. package/dist/cache/cached_provider.js +88 -0
  46. package/dist/cache/response_cache.d.ts +79 -0
  47. package/dist/cache/response_cache.d.ts.map +1 -0
  48. package/dist/cache/response_cache.js +177 -0
  49. package/dist/cli/commands/bootstrap.d.ts +3 -0
  50. package/dist/cli/commands/bootstrap.d.ts.map +1 -0
  51. package/dist/cli/commands/bootstrap.js +109 -0
  52. package/dist/cli/commands/cost_report.d.ts +3 -0
  53. package/dist/cli/commands/cost_report.d.ts.map +1 -0
  54. package/dist/cli/commands/cost_report.js +115 -0
  55. package/dist/cli/commands/crew.d.ts.map +1 -1
  56. package/dist/cli/commands/crew.js +118 -1
  57. package/dist/cli/commands/gate.d.ts +3 -0
  58. package/dist/cli/commands/gate.d.ts.map +1 -0
  59. package/dist/cli/commands/gate.js +86 -0
  60. package/dist/cli/commands/init.d.ts.map +1 -1
  61. package/dist/cli/commands/init.js +7 -62
  62. package/dist/cli/commands/train.d.ts.map +1 -1
  63. package/dist/cli/commands/train.js +16 -21
  64. package/dist/cli/defaults.d.ts +35 -0
  65. package/dist/cli/defaults.d.ts.map +1 -0
  66. package/dist/cli/defaults.js +125 -0
  67. package/dist/cli/errors.d.ts +27 -0
  68. package/dist/cli/errors.d.ts.map +1 -0
  69. package/dist/cli/errors.js +57 -0
  70. package/dist/cli/parse_args.d.ts.map +1 -1
  71. package/dist/cli/parse_args.js +24 -2
  72. package/dist/cli/types.d.ts +7 -1
  73. package/dist/cli/types.d.ts.map +1 -1
  74. package/dist/cli.js +47 -2
  75. package/dist/crew/context.d.ts +15 -0
  76. package/dist/crew/context.d.ts.map +1 -1
  77. package/dist/crew/orchestrator.d.ts +14 -0
  78. package/dist/crew/orchestrator.d.ts.map +1 -1
  79. package/dist/crew/orchestrator.js +162 -4
  80. package/dist/crew/protocol.d.ts +13 -0
  81. package/dist/crew/protocol.d.ts.map +1 -1
  82. package/dist/crew/provider.d.ts +15 -1
  83. package/dist/crew/provider.d.ts.map +1 -1
  84. package/dist/crew/provider.js +24 -4
  85. package/dist/custom_provider.d.ts.map +1 -1
  86. package/dist/custom_provider.js +1 -0
  87. package/dist/engine/diff_loader.d.ts.map +1 -1
  88. package/dist/engine/diff_loader.js +3 -14
  89. package/dist/engine/impact_engine.d.ts.map +1 -1
  90. package/dist/engine/impact_engine.js +9 -23
  91. package/dist/esm/adapters/cypress.js +49 -0
  92. package/dist/esm/adapters/framework_adapter.js +114 -0
  93. package/dist/esm/adapters/playwright.js +49 -0
  94. package/dist/esm/adapters/pytest.js +59 -0
  95. package/dist/esm/adapters/supertest.js +48 -0
  96. package/dist/esm/agent/git.js +3 -1
  97. package/dist/esm/agentic/fix_loop.js +5 -4
  98. package/dist/esm/agentic/runner.js +15 -12
  99. package/dist/esm/agents/cross-impact.js +6 -1
  100. package/dist/esm/agents/executor.js +6 -1
  101. package/dist/esm/agents/strategist.js +6 -1
  102. package/dist/esm/agents/test-designer.js +6 -1
  103. package/dist/esm/anthropic_provider.js +1 -0
  104. package/dist/esm/base_provider.js +121 -0
  105. package/dist/esm/budget_ledger.js +58 -0
  106. package/dist/esm/cache/cached_provider.js +82 -0
  107. package/dist/esm/cache/response_cache.js +140 -0
  108. package/dist/esm/cli/commands/bootstrap.js +106 -0
  109. package/dist/esm/cli/commands/cost_report.js +112 -0
  110. package/dist/esm/cli/commands/crew.js +118 -1
  111. package/dist/esm/cli/commands/gate.js +83 -0
  112. package/dist/esm/cli/commands/init.js +3 -58
  113. package/dist/esm/cli/commands/train.js +16 -21
  114. package/dist/esm/cli/defaults.js +118 -0
  115. package/dist/esm/cli/errors.js +52 -0
  116. package/dist/esm/cli/parse_args.js +24 -2
  117. package/dist/esm/cli.js +47 -2
  118. package/dist/esm/crew/orchestrator.js +162 -4
  119. package/dist/esm/crew/provider.js +24 -4
  120. package/dist/esm/custom_provider.js +1 -0
  121. package/dist/esm/engine/diff_loader.js +1 -12
  122. package/dist/esm/engine/impact_engine.js +9 -23
  123. package/dist/esm/index.js +21 -0
  124. package/dist/esm/knowledge/api_surface.js +265 -34
  125. package/dist/esm/knowledge/cluster_utils.js +60 -0
  126. package/dist/esm/knowledge/failure_history.js +121 -0
  127. package/dist/esm/knowledge/kg_bridge.js +381 -0
  128. package/dist/esm/knowledge/kg_types.js +3 -0
  129. package/dist/esm/knowledge/route_families.js +119 -0
  130. package/dist/esm/mcp-server.js +2 -4
  131. package/dist/esm/metrics/prometheus.js +149 -0
  132. package/dist/esm/model_router.js +59 -0
  133. package/dist/esm/ollama_provider.js +1 -0
  134. package/dist/esm/openai_provider.js +1 -0
  135. package/dist/esm/pipeline/orchestrator.js +6 -12
  136. package/dist/esm/pipeline/stage0_preprocess.js +12 -19
  137. package/dist/esm/pipeline/stage1_impact.js +19 -3
  138. package/dist/esm/pipeline/stage2_coverage.js +29 -7
  139. package/dist/esm/pipeline/stage3_generation.js +21 -1
  140. package/dist/esm/progress.js +112 -0
  141. package/dist/esm/prompts/coverage.js +17 -24
  142. package/dist/esm/prompts/cross-impact.js +3 -21
  143. package/dist/esm/prompts/generation.js +201 -45
  144. package/dist/esm/prompts/generation_profile.js +147 -0
  145. package/dist/esm/prompts/heal.js +33 -15
  146. package/dist/esm/prompts/impact.js +3 -22
  147. package/dist/esm/prompts/json_extract.js +36 -0
  148. package/dist/esm/prompts/strategist.js +2 -20
  149. package/dist/esm/prompts/test-designer.js +6 -21
  150. package/dist/esm/provider_factory.js +6 -4
  151. package/dist/esm/reporters/junit.js +86 -0
  152. package/dist/esm/reporters/reporter.js +3 -0
  153. package/dist/esm/reporters/sarif.js +131 -0
  154. package/dist/esm/resilience/circuit_breaker.js +78 -0
  155. package/dist/esm/resilience/retry.js +56 -0
  156. package/dist/esm/sanitize.js +66 -0
  157. package/dist/esm/training/kg_scanner.js +115 -0
  158. package/dist/esm/training/scanner.js +27 -34
  159. package/dist/esm/validation/guardrails.js +5 -0
  160. package/dist/esm/version.js +33 -0
  161. package/dist/index.d.ts +21 -1
  162. package/dist/index.d.ts.map +1 -1
  163. package/dist/index.js +45 -1
  164. package/dist/knowledge/api_surface.d.ts +12 -0
  165. package/dist/knowledge/api_surface.d.ts.map +1 -1
  166. package/dist/knowledge/api_surface.js +268 -34
  167. package/dist/knowledge/cluster_utils.d.ts +28 -0
  168. package/dist/knowledge/cluster_utils.d.ts.map +1 -0
  169. package/dist/knowledge/cluster_utils.js +67 -0
  170. package/dist/knowledge/failure_history.d.ts +39 -0
  171. package/dist/knowledge/failure_history.d.ts.map +1 -0
  172. package/dist/knowledge/failure_history.js +128 -0
  173. package/dist/knowledge/kg_bridge.d.ts +31 -0
  174. package/dist/knowledge/kg_bridge.d.ts.map +1 -0
  175. package/dist/knowledge/kg_bridge.js +388 -0
  176. package/dist/knowledge/kg_types.d.ts +75 -0
  177. package/dist/knowledge/kg_types.d.ts.map +1 -0
  178. package/dist/knowledge/kg_types.js +4 -0
  179. package/dist/knowledge/route_families.d.ts +29 -0
  180. package/dist/knowledge/route_families.d.ts.map +1 -1
  181. package/dist/knowledge/route_families.js +122 -0
  182. package/dist/mcp-server.d.ts.map +1 -1
  183. package/dist/mcp-server.js +2 -4
  184. package/dist/metrics/prometheus.d.ts +37 -0
  185. package/dist/metrics/prometheus.d.ts.map +1 -0
  186. package/dist/metrics/prometheus.js +153 -0
  187. package/dist/model_router.d.ts +28 -0
  188. package/dist/model_router.d.ts.map +1 -0
  189. package/dist/model_router.js +63 -0
  190. package/dist/ollama_provider.d.ts.map +1 -1
  191. package/dist/ollama_provider.js +1 -0
  192. package/dist/openai_provider.d.ts.map +1 -1
  193. package/dist/openai_provider.js +1 -0
  194. package/dist/pipeline/orchestrator.d.ts +2 -0
  195. package/dist/pipeline/orchestrator.d.ts.map +1 -1
  196. package/dist/pipeline/orchestrator.js +6 -12
  197. package/dist/pipeline/stage0_preprocess.d.ts.map +1 -1
  198. package/dist/pipeline/stage0_preprocess.js +11 -18
  199. package/dist/pipeline/stage1_impact.d.ts +1 -1
  200. package/dist/pipeline/stage1_impact.d.ts.map +1 -1
  201. package/dist/pipeline/stage1_impact.js +18 -2
  202. package/dist/pipeline/stage2_coverage.d.ts +2 -0
  203. package/dist/pipeline/stage2_coverage.d.ts.map +1 -1
  204. package/dist/pipeline/stage2_coverage.js +29 -7
  205. package/dist/pipeline/stage3_generation.d.ts +2 -0
  206. package/dist/pipeline/stage3_generation.d.ts.map +1 -1
  207. package/dist/pipeline/stage3_generation.js +21 -1
  208. package/dist/pipeline/stage4_heal.d.ts +2 -0
  209. package/dist/pipeline/stage4_heal.d.ts.map +1 -1
  210. package/dist/progress.d.ts +22 -0
  211. package/dist/progress.d.ts.map +1 -0
  212. package/dist/progress.js +116 -0
  213. package/dist/prompts/coverage.d.ts +2 -0
  214. package/dist/prompts/coverage.d.ts.map +1 -1
  215. package/dist/prompts/coverage.js +17 -24
  216. package/dist/prompts/cross-impact.d.ts +1 -0
  217. package/dist/prompts/cross-impact.d.ts.map +1 -1
  218. package/dist/prompts/cross-impact.js +3 -21
  219. package/dist/prompts/generation.d.ts +4 -2
  220. package/dist/prompts/generation.d.ts.map +1 -1
  221. package/dist/prompts/generation.js +201 -45
  222. package/dist/prompts/generation_profile.d.ts +29 -0
  223. package/dist/prompts/generation_profile.d.ts.map +1 -0
  224. package/dist/prompts/generation_profile.js +151 -0
  225. package/dist/prompts/heal.d.ts +3 -1
  226. package/dist/prompts/heal.d.ts.map +1 -1
  227. package/dist/prompts/heal.js +33 -15
  228. package/dist/prompts/impact.d.ts +1 -0
  229. package/dist/prompts/impact.d.ts.map +1 -1
  230. package/dist/prompts/impact.js +3 -22
  231. package/dist/prompts/json_extract.d.ts +14 -0
  232. package/dist/prompts/json_extract.d.ts.map +1 -0
  233. package/dist/prompts/json_extract.js +39 -0
  234. package/dist/prompts/strategist.d.ts.map +1 -1
  235. package/dist/prompts/strategist.js +2 -20
  236. package/dist/prompts/test-designer.d.ts +2 -0
  237. package/dist/prompts/test-designer.d.ts.map +1 -1
  238. package/dist/prompts/test-designer.js +6 -21
  239. package/dist/provider_factory.d.ts.map +1 -1
  240. package/dist/provider_factory.js +6 -4
  241. package/dist/reporters/junit.d.ts +6 -0
  242. package/dist/reporters/junit.d.ts.map +1 -0
  243. package/dist/reporters/junit.js +89 -0
  244. package/dist/reporters/reporter.d.ts +42 -0
  245. package/dist/reporters/reporter.d.ts.map +1 -0
  246. package/dist/reporters/reporter.js +4 -0
  247. package/dist/reporters/sarif.d.ts +7 -0
  248. package/dist/reporters/sarif.d.ts.map +1 -0
  249. package/dist/reporters/sarif.js +134 -0
  250. package/dist/resilience/circuit_breaker.d.ts +36 -0
  251. package/dist/resilience/circuit_breaker.d.ts.map +1 -0
  252. package/dist/resilience/circuit_breaker.js +82 -0
  253. package/dist/resilience/retry.d.ts +11 -0
  254. package/dist/resilience/retry.d.ts.map +1 -0
  255. package/dist/resilience/retry.js +59 -0
  256. package/dist/sanitize.d.ts +15 -0
  257. package/dist/sanitize.d.ts.map +1 -0
  258. package/dist/sanitize.js +71 -0
  259. package/dist/training/kg_scanner.d.ts +13 -0
  260. package/dist/training/kg_scanner.d.ts.map +1 -0
  261. package/dist/training/kg_scanner.js +118 -0
  262. package/dist/training/scanner.d.ts +7 -2
  263. package/dist/training/scanner.d.ts.map +1 -1
  264. package/dist/training/scanner.js +27 -34
  265. package/dist/validation/guardrails.d.ts +2 -0
  266. package/dist/validation/guardrails.d.ts.map +1 -1
  267. package/dist/validation/guardrails.js +5 -0
  268. package/dist/validation/output_schema.d.ts +3 -0
  269. package/dist/validation/output_schema.d.ts.map +1 -1
  270. package/dist/version.d.ts +6 -0
  271. package/dist/version.d.ts.map +1 -0
  272. package/dist/version.js +36 -0
  273. package/package.json +7 -2
  274. package/schemas/route-families.schema.json +31 -1
@@ -0,0 +1,149 @@
1
+ // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
2
+ // See LICENSE.txt for license information.
3
+ const DURATION_BUCKETS = [0.1, 0.5, 1, 2, 5, 10, 30, 60, 120, 300];
4
+ export class PrometheusMetrics {
5
+ constructor() {
6
+ this.counters = [];
7
+ this.gauges = [];
8
+ this.histograms = [];
9
+ }
10
+ /**
11
+ * Record an LLM request.
12
+ */
13
+ recordLLMRequest(provider, agent, durationMs, costUSD, tokens) {
14
+ this.incrementCounter('e2e_agents_llm_requests_total', 'Total LLM requests', { provider, agent });
15
+ this.incrementCounter('e2e_agents_llm_tokens_total', 'Total tokens consumed', { provider, agent }, tokens);
16
+ this.incrementCounter('e2e_agents_llm_cost_usd_total', 'Total LLM cost in USD', { provider, agent }, costUSD);
17
+ this.observeHistogram('e2e_agents_llm_request_duration_seconds', 'LLM request duration', { provider, agent }, durationMs / 1000);
18
+ }
19
+ /**
20
+ * Record a crew workflow run.
21
+ */
22
+ recordCrewRun(workflow, families, durationMs, costUSD) {
23
+ this.incrementCounter('e2e_agents_crew_runs_total', 'Total crew workflow runs', { workflow });
24
+ this.incrementCounter('e2e_agents_crew_families_processed_total', 'Total families processed', { workflow }, families);
25
+ this.incrementCounter('e2e_agents_crew_cost_usd_total', 'Total crew cost in USD', { workflow }, costUSD);
26
+ this.observeHistogram('e2e_agents_crew_duration_seconds', 'Crew workflow duration', { workflow }, durationMs / 1000);
27
+ }
28
+ /**
29
+ * Record a budget check event.
30
+ */
31
+ recordBudgetCheck(exceeded, currentUSD, limitUSD) {
32
+ this.incrementCounter('e2e_agents_budget_checks_total', 'Total budget checks', { exceeded: String(exceeded) });
33
+ this.setGauge('e2e_agents_budget_used_usd', 'Current budget usage in USD', {}, currentUSD);
34
+ this.setGauge('e2e_agents_budget_limit_usd', 'Budget limit in USD', {}, limitUSD);
35
+ }
36
+ /**
37
+ * Record a circuit breaker state change.
38
+ */
39
+ recordCircuitBreakerState(state) {
40
+ this.setGauge('e2e_agents_circuit_breaker_state', 'Circuit breaker state (0=closed, 1=open, 2=half-open)', {}, state === 'closed' ? 0 : state === 'open' ? 1 : 2);
41
+ }
42
+ /**
43
+ * Record a cache hit or miss.
44
+ */
45
+ recordCacheResult(hit, agent) {
46
+ this.incrementCounter('e2e_agents_cache_lookups_total', 'Total cache lookups', { result: hit ? 'hit' : 'miss', agent });
47
+ }
48
+ /**
49
+ * Export all metrics in Prometheus text exposition format.
50
+ */
51
+ export() {
52
+ const lines = [];
53
+ const seenHelp = new Set();
54
+ // Export counters
55
+ for (const counter of this.counters) {
56
+ if (!seenHelp.has(counter.name)) {
57
+ lines.push(`# HELP ${counter.name} ${counter.help}`);
58
+ lines.push(`# TYPE ${counter.name} counter`);
59
+ seenHelp.add(counter.name);
60
+ }
61
+ const labelStr = formatLabels(counter.labels);
62
+ lines.push(`${counter.name}${labelStr} ${counter.value}`);
63
+ }
64
+ // Export gauges
65
+ for (const gauge of this.gauges) {
66
+ if (!seenHelp.has(gauge.name)) {
67
+ lines.push(`# HELP ${gauge.name} ${gauge.help}`);
68
+ lines.push(`# TYPE ${gauge.name} gauge`);
69
+ seenHelp.add(gauge.name);
70
+ }
71
+ const labelStr = formatLabels(gauge.labels);
72
+ lines.push(`${gauge.name}${labelStr} ${gauge.value}`);
73
+ }
74
+ // Export histograms
75
+ for (const hist of this.histograms) {
76
+ if (!seenHelp.has(hist.name)) {
77
+ lines.push(`# HELP ${hist.name} ${hist.help}`);
78
+ lines.push(`# TYPE ${hist.name} histogram`);
79
+ seenHelp.add(hist.name);
80
+ }
81
+ const labelStr = formatLabels(hist.labels);
82
+ let cumulative = 0;
83
+ for (const bucket of DURATION_BUCKETS) {
84
+ cumulative += hist.buckets.get(bucket) || 0;
85
+ lines.push(`${hist.name}_bucket${formatLabels({ ...hist.labels, le: String(bucket) })} ${cumulative}`);
86
+ }
87
+ lines.push(`${hist.name}_bucket${formatLabels({ ...hist.labels, le: '+Inf' })} ${hist.count}`);
88
+ lines.push(`${hist.name}_sum${labelStr} ${hist.sum}`);
89
+ lines.push(`${hist.name}_count${labelStr} ${hist.count}`);
90
+ }
91
+ return lines.join('\n') + '\n';
92
+ }
93
+ /**
94
+ * Reset all metrics to zero.
95
+ */
96
+ reset() {
97
+ this.counters = [];
98
+ this.gauges = [];
99
+ this.histograms = [];
100
+ }
101
+ incrementCounter(name, help, labels, value = 1) {
102
+ const existing = this.counters.find((c) => c.name === name && labelsMatch(c.labels, labels));
103
+ if (existing) {
104
+ existing.value += value;
105
+ }
106
+ else {
107
+ this.counters.push({ name, help, labels, value });
108
+ }
109
+ }
110
+ setGauge(name, help, labels, value) {
111
+ const existing = this.gauges.find((c) => c.name === name && labelsMatch(c.labels, labels));
112
+ if (existing) {
113
+ existing.value = value;
114
+ }
115
+ else {
116
+ this.gauges.push({ name, help, labels, value });
117
+ }
118
+ }
119
+ observeHistogram(name, help, labels, value) {
120
+ let existing = this.histograms.find((h) => h.name === name && labelsMatch(h.labels, labels));
121
+ if (!existing) {
122
+ existing = { name, help, labels, sum: 0, count: 0, buckets: new Map() };
123
+ this.histograms.push(existing);
124
+ }
125
+ existing.sum += value;
126
+ existing.count++;
127
+ for (const bucket of DURATION_BUCKETS) {
128
+ if (value <= bucket) {
129
+ existing.buckets.set(bucket, (existing.buckets.get(bucket) || 0) + 1);
130
+ }
131
+ }
132
+ }
133
+ }
134
+ function escapeLabel(v) {
135
+ return v.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n');
136
+ }
137
+ function formatLabels(labels) {
138
+ const entries = Object.entries(labels);
139
+ if (entries.length === 0)
140
+ return '';
141
+ return `{${entries.map(([k, v]) => `${k}="${escapeLabel(v)}"`).join(',')}}`;
142
+ }
143
+ function labelsMatch(a, b) {
144
+ const keysA = Object.keys(a);
145
+ const keysB = Object.keys(b);
146
+ if (keysA.length !== keysB.length)
147
+ return false;
148
+ return keysA.every((k) => a[k] === b[k]);
149
+ }
@@ -0,0 +1,59 @@
1
+ // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
2
+ // See LICENSE.txt for license information.
3
+ const AGENT_COMPLEXITY = {
4
+ 'impact-analyst': 'classification',
5
+ 'coverage-evaluator': 'classification',
6
+ 'cross-impact': 'extraction',
7
+ 'regression-advisor': 'extraction',
8
+ 'strategist': 'classification',
9
+ 'test-designer': 'generation',
10
+ 'generator': 'generation',
11
+ 'executor': 'generation',
12
+ 'healer': 'reasoning',
13
+ 'explorer': 'reasoning',
14
+ };
15
+ const DEFAULT_MODELS = {
16
+ anthropic: {
17
+ classification: 'claude-haiku-4-5-20251001',
18
+ extraction: 'claude-haiku-4-5-20251001',
19
+ generation: 'claude-sonnet-4-5-20250514',
20
+ reasoning: 'claude-sonnet-4-5-20250514',
21
+ },
22
+ openai: {
23
+ classification: 'gpt-4o-mini',
24
+ extraction: 'gpt-4o-mini',
25
+ generation: 'gpt-4o',
26
+ reasoning: 'gpt-4o',
27
+ },
28
+ };
29
+ export class ModelRouter {
30
+ constructor(providerType, overrides) {
31
+ this.providerType = providerType;
32
+ this.overrides = overrides || {};
33
+ }
34
+ /**
35
+ * Get the recommended model for a given agent role.
36
+ * Returns undefined if no routing recommendation (use provider default).
37
+ */
38
+ getModel(role) {
39
+ const complexity = AGENT_COMPLEXITY[role];
40
+ if (!complexity)
41
+ return undefined;
42
+ // Check user overrides first
43
+ const override = this.overrides[complexity];
44
+ if (override)
45
+ return override;
46
+ // Check provider defaults
47
+ const defaults = DEFAULT_MODELS[this.providerType];
48
+ if (defaults)
49
+ return defaults[complexity];
50
+ // No recommendation — use provider's default model
51
+ return undefined;
52
+ }
53
+ /**
54
+ * Get the task complexity for an agent role.
55
+ */
56
+ getComplexity(role) {
57
+ return AGENT_COMPLEXITY[role] || 'generation';
58
+ }
59
+ }
@@ -122,6 +122,7 @@ export class OllamaProvider extends BaseProvider {
122
122
  this.model = model;
123
123
  }
124
124
  async generateText(prompt, options) {
125
+ this.checkBudget();
125
126
  const startTime = Date.now();
126
127
  try {
127
128
  // SECURITY: Validate prompt length
@@ -46,6 +46,7 @@ export class OpenAIProvider extends BaseProvider {
46
46
  };
47
47
  }
48
48
  async generateText(prompt, options) {
49
+ this.checkBudget();
49
50
  const startTime = Date.now();
50
51
  try {
51
52
  if (prompt.length > 10 * 1024 * 1024) {
@@ -2,7 +2,7 @@
2
2
  // See LICENSE.txt for license information.
3
3
  import { existsSync, mkdirSync, writeFileSync } from 'fs';
4
4
  import { join } from 'path';
5
- import { getChangedFiles } from '../agent/git.js';
5
+ import { getChangedFiles, isTestFile } from '../agent/git.js';
6
6
  import { logger } from '../logger.js';
7
7
  import { preprocess } from './stage0_preprocess.js';
8
8
  import { runImpactStage } from './stage1_impact.js';
@@ -11,6 +11,7 @@ import { runGenerationStage } from './stage3_generation.js';
11
11
  import { runHealStage, resolveHealTargets, renderHealMarkdown } from './stage4_heal.js';
12
12
  import { buildSummary } from '../validation/output_schema.js';
13
13
  import { computeCannotDetermineRatio } from '../validation/guardrails.js';
14
+ import { resolveGenerationProfile } from '../prompts/generation_profile.js';
14
15
  function createRunId() {
15
16
  const ciRunId = process.env.GITHUB_RUN_ID;
16
17
  const entropy = Math.random().toString(36).slice(2, 8);
@@ -20,19 +21,12 @@ function createRunId() {
20
21
  }
21
22
  return `pipeline-local-${ts}-${entropy}`;
22
23
  }
23
- function isTestFile(file) {
24
- const normalized = file.replace(/\\/g, '/');
25
- return /\.(spec|test)\.(ts|tsx|js|jsx)$/.test(normalized) ||
26
- /_test\.go$/.test(normalized) ||
27
- normalized.includes('__tests__/') ||
28
- normalized.includes('/tests/') ||
29
- normalized.includes('/test/');
30
- }
31
24
  export async function runPipeline(config) {
32
25
  const runId = createRunId();
33
26
  const startedAt = new Date().toISOString();
34
27
  const allWarnings = [];
35
28
  const stages = config.stages || ['preprocess', 'impact', 'coverage'];
29
+ const profile = config.profile || resolveGenerationProfile();
36
30
  let generatedSpecs;
37
31
  let healResult;
38
32
  // Step 1: Get changed files
@@ -87,7 +81,7 @@ export async function runPipeline(config) {
87
81
  // Step 4: Coverage stage — AI-powered spec coverage evaluation
88
82
  if (stages.includes('coverage') && decisions.length > 0) {
89
83
  const coverageTimer = logger.timer('coverage');
90
- const coverageResult = await runCoverageStage(decisions, preprocessResult.specIndex, preprocessResult.context, config.testsRoot, config.coverage || {});
84
+ const coverageResult = await runCoverageStage(decisions, preprocessResult.specIndex, preprocessResult.context, config.testsRoot, { ...(config.coverage || {}), profile });
91
85
  decisions = coverageResult.decisions;
92
86
  timings.coverage = coverageTimer.end();
93
87
  allWarnings.push(...coverageResult.warnings);
@@ -95,7 +89,7 @@ export async function runPipeline(config) {
95
89
  // Step 5: Generation stage — AI-powered spec generation for create_spec / add_scenarios
96
90
  if (stages.includes('generation') && decisions.length > 0) {
97
91
  const generationTimer = logger.timer('generation');
98
- const generationResult = await runGenerationStage(decisions, preprocessResult.apiSurface, config.testsRoot, config.generation || {});
92
+ const generationResult = await runGenerationStage(decisions, preprocessResult.apiSurface, config.testsRoot, { ...(config.generation || {}), profile });
99
93
  generatedSpecs = generationResult.generated;
100
94
  timings.generation = generationTimer.end();
101
95
  allWarnings.push(...generationResult.warnings);
@@ -108,7 +102,7 @@ export async function runPipeline(config) {
108
102
  generatedSpecs,
109
103
  }, decisions);
110
104
  if (healTargets.length > 0) {
111
- healResult = await runHealStage(config.testsRoot, healTargets, config.heal || { mcp: true });
105
+ healResult = await runHealStage(config.testsRoot, healTargets, { ...(config.heal || { mcp: true }), profile });
112
106
  allWarnings.push(...healResult.warnings);
113
107
  }
114
108
  else {
@@ -2,7 +2,7 @@
2
2
  // See LICENSE.txt for license information.
3
3
  import { existsSync, readFileSync } from 'fs';
4
4
  import { join } from 'path';
5
- import { bindFilesToFamilies, loadRouteFamilyManifest, } from '../knowledge/route_families.js';
5
+ import { bindFilesToFamilies, buildHeuristicFamilies, loadRouteFamilyManifest, } from '../knowledge/route_families.js';
6
6
  import { loadOrBuildApiSurface } from '../knowledge/api_surface.js';
7
7
  import { buildSpecIndex } from '../knowledge/spec_index.js';
8
8
  import { loadContextDocuments } from '../knowledge/context_loader.js';
@@ -31,10 +31,11 @@ function loadFileSnippet(appPath, filePath) {
31
31
  }
32
32
  export function preprocess(changedFiles, config) {
33
33
  const warnings = [];
34
- // Load route family manifest
35
- const manifest = loadRouteFamilyManifest(config.testsRoot, config.routeFamilies);
34
+ // Load route family manifest, fall back to heuristic families
35
+ let manifest = loadRouteFamilyManifest(config.testsRoot, config.routeFamilies);
36
36
  if (!manifest) {
37
- warnings.push('Route family manifest not found. File-to-family binding will be skipped; AI will operate without route constraints.');
37
+ manifest = buildHeuristicFamilies(changedFiles, config.testsRoot);
38
+ warnings.push('Route family manifest not found. Using directory-based heuristics (lower accuracy).', 'Tip: Run `e2e-ai-agents train` to generate a proper manifest.');
38
39
  }
39
40
  // Load API surface catalog
40
41
  const apiSurface = loadOrBuildApiSurface(config.testsRoot, config.apiSurface);
@@ -46,21 +47,13 @@ export function preprocess(changedFiles, config) {
46
47
  // Load context documents
47
48
  const context = loadContextDocuments(config.testsRoot, config.appPath);
48
49
  warnings.push(...context.warnings);
49
- // Bind files to families
50
- let fileBindings = [];
51
- let unboundFiles = [];
52
- if (manifest) {
53
- fileBindings = bindFilesToFamilies(changedFiles, manifest);
54
- unboundFiles = fileBindings
55
- .filter((fb) => fb.bindings.length === 0)
56
- .map((fb) => fb.file);
57
- if (unboundFiles.length > 0) {
58
- warnings.push(`${unboundFiles.length} changed file(s) did not match any route family: ${unboundFiles.slice(0, 5).join(', ')}${unboundFiles.length > 5 ? '...' : ''}`);
59
- }
60
- }
61
- else {
62
- fileBindings = changedFiles.map((f) => ({ file: f, bindings: [] }));
63
- unboundFiles = changedFiles;
50
+ // Bind files to families (manifest is always non-null now — either real or heuristic)
51
+ const fileBindings = bindFilesToFamilies(changedFiles, manifest);
52
+ const unboundFiles = fileBindings
53
+ .filter((fb) => fb.bindings.length === 0)
54
+ .map((fb) => fb.file);
55
+ if (unboundFiles.length > 0) {
56
+ warnings.push(`${unboundFiles.length} changed file(s) did not match any route family: ${unboundFiles.slice(0, 5).join(', ')}${unboundFiles.length > 5 ? '...' : ''}`);
64
57
  }
65
58
  // Group files by family+feature
66
59
  const groupMap = new Map();
@@ -3,7 +3,8 @@
3
3
  import { LLMProviderFactory } from '../provider_factory.js';
4
4
  import { buildImpactPrompt, parseImpactResponse } from '../prompts/impact.js';
5
5
  import { formatContextForPrompt } from '../knowledge/context_loader.js';
6
- import { getFamilyById } from '../knowledge/route_families.js';
6
+ import { getFamilyById, getAssertionPatternsForBinding } from '../knowledge/route_families.js';
7
+ import { loadFailureHistory, getConfidenceBoost } from '../knowledge/failure_history.js';
7
8
  import { getSpecsForFamily } from '../knowledge/spec_index.js';
8
9
  import { computeConfidence, shouldForceCannotDetermine } from '../validation/guardrails.js';
9
10
  function normalizePriority(value) {
@@ -18,7 +19,7 @@ async function getProvider(config) {
18
19
  }
19
20
  return LLMProviderFactory.createFromEnv();
20
21
  }
21
- export async function runImpactStage(familyGroups, manifest, specIndex, apiSurface, context, config) {
22
+ export async function runImpactStage(familyGroups, manifest, specIndex, apiSurface, context, config, testsRoot) {
22
23
  const warnings = [];
23
24
  const allDecisions = [];
24
25
  if (familyGroups.length === 0) {
@@ -35,6 +36,8 @@ export async function runImpactStage(familyGroups, manifest, specIndex, apiSurfa
35
36
  return { decisions: [], warnings, providerName: 'none' };
36
37
  }
37
38
  const contextBlock = formatContextForPrompt(context);
39
+ // Load historical failure correlations for confidence boosting
40
+ const failureHistory = testsRoot ? loadFailureHistory(testsRoot) : null;
38
41
  for (const group of familyGroups) {
39
42
  const family = manifest ? getFamilyById(manifest, group.familyId) : null;
40
43
  if (!family) {
@@ -83,15 +86,27 @@ export async function runImpactStage(familyGroups, manifest, specIndex, apiSurfa
83
86
  if (!flow.id || !flow.changedFiles || !Array.isArray(flow.changedFiles)) {
84
87
  continue;
85
88
  }
89
+ // Compute confidence with optional historical failure boost
90
+ const changedFilesList = Array.isArray(flow.changedFiles)
91
+ ? flow.changedFiles.filter((f) => typeof f === 'string')
92
+ : [];
93
+ const historyBoost = failureHistory
94
+ ? Math.max(...changedFilesList.map((f) => getConfidenceBoost(failureHistory, f)), 0)
95
+ : 0;
86
96
  const confidence = typeof flow.confidence === 'number'
87
- ? Math.max(0, Math.min(100, flow.confidence))
97
+ ? Math.min(100, Math.max(0, flow.confidence) + historyBoost)
88
98
  : computeConfidence({
89
99
  hasRouteFamily: true,
90
100
  hasSpecificRoute: Boolean(flow.route),
91
101
  hasPageObject: Boolean(flow.pageObjects && flow.pageObjects.length > 0),
92
102
  hasUserAction: Boolean(flow.userActions && flow.userActions.length > 0),
93
103
  hasExistingSpecCited: false,
104
+ historyBoost,
94
105
  });
106
+ // Resolve assertion patterns from manifest for this flow's family/feature
107
+ const assertionPatterns = manifest
108
+ ? getAssertionPatternsForBinding(manifest, { family: group.familyId, feature: group.featureId })
109
+ : [];
95
110
  const decision = {
96
111
  flowId: flow.id,
97
112
  flowName: flow.name || flow.id,
@@ -107,6 +122,7 @@ export async function runImpactStage(familyGroups, manifest, specIndex, apiSurfa
107
122
  blockingReason: shouldForceCannotDetermine(confidence) ? 'Confidence too low to determine action.' : undefined,
108
123
  priority: normalizePriority(flow.priority),
109
124
  userActions: Array.isArray(flow.userActions) ? flow.userActions.filter((a) => typeof a === 'string') : [],
125
+ assertionPatterns: assertionPatterns.length > 0 ? assertionPatterns : undefined,
110
126
  };
111
127
  allDecisions.push(decision);
112
128
  }
@@ -43,13 +43,26 @@ export async function runCoverageStage(decisions, specIndex, context, testsRoot,
43
43
  for (const [familyId, familyDecisions] of byFamily) {
44
44
  // Gather relevant specs
45
45
  const specs = getSpecsForFamily(specIndex, familyId);
46
- const specsWithContent = specs
47
- .map((s) => {
46
+ // Two-tier approach: send all spec titles (compact), full content for top matches only
47
+ const allSpecSummaries = specs.map((s) => ({
48
+ relativePath: s.relativePath,
49
+ testTitles: s.testTitles,
50
+ }));
51
+ // Load full content with a total budget of 200K chars (~50K tokens) to avoid blowing context windows
52
+ const MAX_TOTAL_SPEC_CHARS = 200000;
53
+ let totalSpecChars = 0;
54
+ const specsWithContent = [];
55
+ for (const s of specs) {
56
+ if (specsWithContent.length >= 30)
57
+ break;
48
58
  const content = loadSpecFileContent(testsRoot, s.relativePath, maxSpecChars);
49
- return content ? { relativePath: s.relativePath, content, testTitles: s.testTitles } : null;
50
- })
51
- .filter((s) => s !== null)
52
- .slice(0, 15); // Limit to 15 specs per family to stay within token budget
59
+ if (!content)
60
+ continue;
61
+ if (totalSpecChars + content.length > MAX_TOTAL_SPEC_CHARS)
62
+ break;
63
+ totalSpecChars += content.length;
64
+ specsWithContent.push({ relativePath: s.relativePath, content, testTitles: s.testTitles });
65
+ }
53
66
  if (specsWithContent.length === 0) {
54
67
  // No specs to evaluate — mark all as create_spec
55
68
  for (const d of familyDecisions) {
@@ -70,10 +83,19 @@ export async function runCoverageStage(decisions, specIndex, context, testsRoot,
70
83
  evidence: d.evidence,
71
84
  priority: d.priority,
72
85
  }));
86
+ // Include titles-only summaries for specs beyond the content limit
87
+ const extraSummaries = allSpecSummaries
88
+ .slice(specsWithContent.length)
89
+ .map((s) => ` - ${s.relativePath}: ${s.testTitles.join(', ')}`)
90
+ .join('\n');
91
+ const extraContext = extraSummaries
92
+ ? `\nADDITIONAL SPECS (titles only, no content loaded):\n${extraSummaries}\n`
93
+ : '';
73
94
  const prompt = buildCoveragePrompt({
74
95
  flows,
75
96
  specs: specsWithContent,
76
- contextBlock,
97
+ contextBlock: contextBlock + extraContext,
98
+ profile: config.profile,
77
99
  });
78
100
  try {
79
101
  const response = await provider.generateText(prompt, {
@@ -86,6 +86,7 @@ export async function runGenerationStage(decisions, apiSurface, testsRoot, confi
86
86
  existingSpecContent,
87
87
  specPath,
88
88
  mode,
89
+ profile: config.profile,
89
90
  });
90
91
  try {
91
92
  const response = await provider.generateText(prompt, {
@@ -100,10 +101,29 @@ export async function runGenerationStage(decisions, apiSurface, testsRoot, confi
100
101
  skipped.push(`${decision.flowId}: invalid code returned`);
101
102
  continue;
102
103
  }
103
- // Hallucination detection
104
+ // Hallucination detection — block specs with hallucinated methods
104
105
  const hallucinationWarnings = detectHallucinatedMethods(parsed.code, apiSurface);
105
106
  if (hallucinationWarnings.length > 0) {
106
107
  warnings.push(`Flow ${decision.flowId}: suspected hallucinated methods: ${hallucinationWarnings.join(', ')}`);
108
+ if (!config.warnOnHallucinations) {
109
+ // Block: move to needs-review instead of writing to specs dir
110
+ if (!dryRun) {
111
+ const reviewDir = join(testsRoot, 'generated-needs-review');
112
+ mkdirSync(reviewDir, { recursive: true });
113
+ const safeName = decision.flowId.replace(/[^a-zA-Z0-9_-]/g, '_').toLowerCase();
114
+ const reviewPath = join(reviewDir, `${safeName}-${Date.now().toString(36)}.spec.ts`);
115
+ writeFileSync(reviewPath, `${parsed.code}\n`, 'utf-8');
116
+ warnings.push(`Flow ${decision.flowId}: blocked — moved to ${reviewPath}`);
117
+ }
118
+ generated.push({
119
+ flowId: decision.flowId,
120
+ specPath,
121
+ mode,
122
+ written: false,
123
+ hallucinationWarnings,
124
+ });
125
+ continue;
126
+ }
107
127
  }
108
128
  let written = false;
109
129
  if (!dryRun) {
@@ -0,0 +1,112 @@
1
+ // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
2
+ // See LICENSE.txt for license information.
3
+ import { EventEmitter } from 'events';
4
+ export class ProgressReporter extends EventEmitter {
5
+ constructor(options) {
6
+ super();
7
+ this.isTTY = options?.isTTY ?? (process.stdout.isTTY === true);
8
+ this.silent = (options?.quiet ?? false) || (options?.jsonMode ?? false);
9
+ this.completedAgents = 0;
10
+ this.totalAgents = 0;
11
+ this.currentPhase = '';
12
+ }
13
+ phaseStart(phase, agentCount) {
14
+ const payload = { phase, agentCount };
15
+ this.emit('phase-start', payload);
16
+ if (this.silent) {
17
+ return;
18
+ }
19
+ this.currentPhase = phase;
20
+ this.completedAgents = 0;
21
+ this.totalAgents = agentCount;
22
+ const message = `--- Phase: ${phase} (${agentCount} agent${agentCount !== 1 ? 's' : ''}) ---`;
23
+ this.writeLine(message);
24
+ }
25
+ agentStart(agent, family) {
26
+ const payload = { agent, family };
27
+ this.emit('agent-start', payload);
28
+ if (this.silent) {
29
+ return;
30
+ }
31
+ const familyLabel = family ? ` processing ${family}` : '';
32
+ if (this.isTTY) {
33
+ const progress = `[${this.completedAgents}/${this.totalAgents} agents]`;
34
+ const message = `${progress} ${this.currentPhase}: ${agent}${familyLabel}...`;
35
+ process.stdout.write(`\r${clearLine()}${message}`);
36
+ }
37
+ else {
38
+ const message = `[${this.currentPhase}] ${agent} started${familyLabel ? ':' + familyLabel : ''}`;
39
+ this.writeLine(message);
40
+ }
41
+ }
42
+ agentComplete(agent, family, tokens, cost, durationMs) {
43
+ const payload = { agent, family, tokens, cost, durationMs };
44
+ this.emit('agent-complete', payload);
45
+ if (this.silent) {
46
+ return;
47
+ }
48
+ this.completedAgents++;
49
+ const costStr = formatCost(cost);
50
+ const durationStr = formatDuration(durationMs);
51
+ const tokensStr = formatTokens(tokens);
52
+ const familyLabel = family ? ` ${family}` : '';
53
+ if (this.isTTY) {
54
+ const progress = `[${this.completedAgents}/${this.totalAgents} agents]`;
55
+ const message = `${progress} ${this.currentPhase}: ${agent} complete${familyLabel} (${tokensStr}, ${costStr}, ${durationStr})`;
56
+ process.stdout.write(`\r${clearLine()}${message}\n`);
57
+ }
58
+ else {
59
+ const message = `[${this.currentPhase}] ${agent} complete:${familyLabel} (${tokensStr}, ${costStr}, ${durationStr})`;
60
+ this.writeLine(message);
61
+ }
62
+ }
63
+ phaseComplete(phase, elapsedMs) {
64
+ const payload = { phase, elapsedMs };
65
+ this.emit('phase-complete', payload);
66
+ if (this.silent) {
67
+ return;
68
+ }
69
+ const durationStr = formatDuration(elapsedMs);
70
+ const message = `--- Phase ${phase} complete (${durationStr}) ---`;
71
+ this.writeLine(message);
72
+ }
73
+ workflowComplete(totalCost, totalTokens, elapsedMs) {
74
+ const payload = { totalCost, totalTokens, elapsedMs };
75
+ this.emit('workflow-complete', payload);
76
+ if (this.silent) {
77
+ return;
78
+ }
79
+ const costStr = formatCost(totalCost);
80
+ const tokensStr = formatTokens(totalTokens);
81
+ const durationStr = formatDuration(elapsedMs);
82
+ const message = `=== Workflow complete: ${tokensStr}, ${costStr}, ${durationStr} ===`;
83
+ this.writeLine(message);
84
+ }
85
+ writeLine(message) {
86
+ process.stdout.write(message + '\n');
87
+ }
88
+ }
89
+ function clearLine() {
90
+ return '\x1B[2K';
91
+ }
92
+ function formatCost(cost) {
93
+ return `$${cost.toFixed(2)}`;
94
+ }
95
+ function formatTokens(tokens) {
96
+ if (tokens >= 1000000) {
97
+ return `${(tokens / 1000000).toFixed(1)}M tokens`;
98
+ }
99
+ if (tokens >= 1000) {
100
+ return `${(tokens / 1000).toFixed(0).replace(/\B(?=(\d{3})+(?!\d))/g, ',')} tokens`;
101
+ }
102
+ return `${tokens} tokens`;
103
+ }
104
+ function formatDuration(ms) {
105
+ const seconds = Math.round(ms / 1000);
106
+ if (seconds >= 60) {
107
+ const minutes = Math.floor(seconds / 60);
108
+ const remainingSeconds = seconds % 60;
109
+ return remainingSeconds > 0 ? `${minutes}m${remainingSeconds}s` : `${minutes}m`;
110
+ }
111
+ return `${seconds}s`;
112
+ }