@yasserkhanorg/e2e-agents 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +40 -9
  2. package/dist/agent/git.d.ts.map +1 -1
  3. package/dist/agent/git.js +9 -0
  4. package/dist/cli/commands/train.d.ts +3 -0
  5. package/dist/cli/commands/train.d.ts.map +1 -0
  6. package/dist/cli/commands/train.js +307 -0
  7. package/dist/cli/parse_args.d.ts.map +1 -1
  8. package/dist/cli/parse_args.js +7 -1
  9. package/dist/cli/types.d.ts +6 -1
  10. package/dist/cli/types.d.ts.map +1 -1
  11. package/dist/cli/usage.d.ts.map +1 -1
  12. package/dist/cli/usage.js +7 -1
  13. package/dist/cli.js +5 -0
  14. package/dist/esm/agent/git.js +9 -0
  15. package/dist/esm/cli/commands/train.js +271 -0
  16. package/dist/esm/cli/parse_args.js +7 -1
  17. package/dist/esm/cli/usage.js +7 -1
  18. package/dist/esm/cli.js +5 -0
  19. package/dist/esm/index.js +5 -0
  20. package/dist/esm/knowledge/route_families.js +2 -2
  21. package/dist/esm/training/enricher.js +273 -0
  22. package/dist/esm/training/merger.js +137 -0
  23. package/dist/esm/training/scanner.js +386 -0
  24. package/dist/esm/training/types.js +6 -0
  25. package/dist/esm/training/validator.js +153 -0
  26. package/dist/index.d.ts +5 -0
  27. package/dist/index.d.ts.map +1 -1
  28. package/dist/index.js +15 -1
  29. package/dist/knowledge/route_families.d.ts +2 -0
  30. package/dist/knowledge/route_families.d.ts.map +1 -1
  31. package/dist/knowledge/route_families.js +2 -0
  32. package/dist/training/enricher.d.ts +15 -0
  33. package/dist/training/enricher.d.ts.map +1 -0
  34. package/dist/training/enricher.js +278 -0
  35. package/dist/training/merger.d.ts +5 -0
  36. package/dist/training/merger.d.ts.map +1 -0
  37. package/dist/training/merger.js +141 -0
  38. package/dist/training/scanner.d.ts +5 -0
  39. package/dist/training/scanner.d.ts.map +1 -0
  40. package/dist/training/scanner.js +391 -0
  41. package/dist/training/types.d.ts +109 -0
  42. package/dist/training/types.d.ts.map +1 -0
  43. package/dist/training/types.js +9 -0
  44. package/dist/training/validator.d.ts +16 -0
  45. package/dist/training/validator.d.ts.map +1 -0
  46. package/dist/training/validator.js +160 -0
  47. package/package.json +1 -1
@@ -0,0 +1,271 @@
1
+ // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
2
+ // See LICENSE.txt for license information.
3
+ import { existsSync, mkdirSync, renameSync, writeFileSync } from 'fs';
4
+ import { dirname, join, resolve } from 'path';
5
+ import * as readline from 'readline';
6
+ import { resolveConfig } from '../../agent/config.js';
7
+ import { loadRouteFamilyManifest } from '../../knowledge/route_families.js';
8
+ import { LLMProviderFactory } from '../../provider_factory.js';
9
+ import { scanProject } from '../../training/scanner.js';
10
+ import { mergeFamilies, detectStaleFamilies } from '../../training/merger.js';
11
+ import { enrichFamilies } from '../../training/enricher.js';
12
+ import { getCommitFiles, validateCommit, buildValidationReport, formatValidationReport } from '../../training/validator.js';
13
+ class TrainError extends Error {
14
+ constructor(message) {
15
+ super(message);
16
+ this.name = 'TrainError';
17
+ }
18
+ }
19
+ const MAX_BUDGET_USD = 10;
20
+ /**
21
+ * Resolves train-specific options from CLI args.
22
+ * Unlike other commands (analyze, plan, heal) that use the shared resolveConfig()
23
+ * for full pipeline configuration, train only needs appPath and testsRoot.
24
+ * We call resolveConfig() solely to extract testsRoot from the config file.
25
+ */
26
+ function resolveTrainOptions(args, autoConfig) {
27
+ const appPath = args.path || '.';
28
+ let testsRoot = args.testsRoot || appPath;
29
+ // Try to resolve testsRoot from config
30
+ if (autoConfig) {
31
+ try {
32
+ const { config } = resolveConfig(process.cwd(), autoConfig, {
33
+ path: appPath,
34
+ testsRoot: args.testsRoot,
35
+ });
36
+ testsRoot = config.testsRoot || config.path || appPath;
37
+ }
38
+ catch {
39
+ // use defaults
40
+ }
41
+ }
42
+ const outputPath = args.trainOutput ||
43
+ join(testsRoot, '.e2e-ai-agents', 'route-families.json');
44
+ // Validate --pr is a positive integer
45
+ if (args.trainPr !== undefined && (!Number.isInteger(args.trainPr) || args.trainPr <= 0)) {
46
+ throw new TrainError('--pr must be a positive integer');
47
+ }
48
+ // Validate --pr and --since are mutually exclusive
49
+ if (args.trainPr && args.gitSince) {
50
+ throw new TrainError('--pr and --since are mutually exclusive.');
51
+ }
52
+ // Validate --since format (reject leading '-' to prevent git flag injection)
53
+ const since = args.gitSince || 'HEAD~20';
54
+ if (/^-/.test(since) || !/^[a-zA-Z0-9_.~^@\/-]+$/.test(since)) {
55
+ throw new TrainError(`Invalid git ref: ${since}`);
56
+ }
57
+ // Validate budget bounds
58
+ const budget = args.budgetUSD || 0.50;
59
+ if (budget <= 0) {
60
+ throw new TrainError('--budget-usd must be a positive number');
61
+ }
62
+ if (budget > MAX_BUDGET_USD) {
63
+ throw new TrainError(`Budget exceeds maximum of $${MAX_BUDGET_USD}. Use a lower --budget-usd value.`);
64
+ }
65
+ const resolvedAppPath = resolve(appPath);
66
+ const resolvedTestsRoot = resolve(testsRoot);
67
+ const resolvedOutputPath = resolve(outputPath);
68
+ // Validate --path is a real project
69
+ if (!existsSync(resolvedAppPath)) {
70
+ throw new TrainError(`Project root not found: ${resolvedAppPath}`);
71
+ }
72
+ // Validate --output is within project boundary (append separator to prevent prefix attacks)
73
+ const inApp = resolvedOutputPath === resolvedAppPath || resolvedOutputPath.startsWith(resolvedAppPath + '/');
74
+ const inTests = resolvedOutputPath === resolvedTestsRoot || resolvedOutputPath.startsWith(resolvedTestsRoot + '/');
75
+ if (!inApp && !inTests) {
76
+ throw new TrainError(`Output path must be within the project root or tests root: ${resolvedOutputPath}`);
77
+ }
78
+ return {
79
+ appPath: resolvedAppPath,
80
+ testsRoot: resolvedTestsRoot,
81
+ enrich: args.trainEnrich !== false,
82
+ validate: args.trainValidate || false,
83
+ since,
84
+ pr: args.trainPr,
85
+ outputPath: resolvedOutputPath,
86
+ dryRun: args.dryRun || false,
87
+ yes: args.trainYes || false,
88
+ budgetUSD: budget,
89
+ };
90
+ }
91
+ function ask(rl, question, defaultValue) {
92
+ const suffix = defaultValue ? ` (${defaultValue})` : '';
93
+ return new Promise((res) => {
94
+ rl.question(`${question}${suffix}: `, (answer) => {
95
+ res(answer.trim() || defaultValue || '');
96
+ });
97
+ });
98
+ }
99
+ function serializeManifest(manifest) {
100
+ const output = {
101
+ families: manifest.families.map((f) => {
102
+ // Remove undefined/empty optional fields for clean JSON
103
+ const cleaned = { ...f };
104
+ const optionalArrays = ['pageObjects', 'components', 'webappPaths', 'serverPaths', 'specDirs', 'cypressSpecDirs', 'tags', 'userFlows', 'features'];
105
+ for (const key of optionalArrays) {
106
+ if (!cleaned[key] || (Array.isArray(cleaned[key]) && cleaned[key].length === 0)) {
107
+ delete cleaned[key];
108
+ }
109
+ }
110
+ if (!cleaned.priority)
111
+ delete cleaned.priority;
112
+ return cleaned;
113
+ }),
114
+ };
115
+ return JSON.stringify(output, null, 2) + '\n';
116
+ }
117
+ export async function runTrainCommand(args, autoConfig) {
118
+ const opts = resolveTrainOptions(args, autoConfig);
119
+ console.log('');
120
+ console.log(' e2e-ai-agents train');
121
+ console.log(' ===================');
122
+ console.log('');
123
+ // ---------- Phase 1: Deterministic scan ----------
124
+ console.log(' Scanning project structure...');
125
+ const scanResult = scanProject(opts.appPath);
126
+ console.log(` Found ${scanResult.stats.totalSourceFiles} source files, ${scanResult.stats.totalTestFiles} test files`);
127
+ console.log(` Discovered ${scanResult.families.length} candidate families`);
128
+ if (scanResult.families.length === 0) {
129
+ console.log('');
130
+ console.log(' No families discovered. Make sure your project has recognizable');
131
+ console.log(' source directories (src/, server/, app/) and test directories');
132
+ console.log(' (tests/, e2e/, specs/) with matching names.');
133
+ return;
134
+ }
135
+ // ---------- Phase 2: Merge with existing ----------
136
+ const existing = loadRouteFamilyManifest(opts.testsRoot);
137
+ if (existing) {
138
+ console.log(` Found existing manifest with ${existing.families.length} families`);
139
+ }
140
+ let mergeResult = mergeFamilies(existing, scanResult.families);
141
+ console.log(` Merge: ${mergeResult.summary}`);
142
+ // ---------- Phase 3: Stale detection ----------
143
+ if (mergeResult.manifest.families.length > 0) {
144
+ const stale = detectStaleFamilies(mergeResult.manifest, opts.appPath);
145
+ if (stale.length > 0) {
146
+ console.log('');
147
+ console.log(` Stale families detected (${stale.length}):`);
148
+ for (const id of stale) {
149
+ console.log(` ${id} — paths no longer exist`);
150
+ }
151
+ if (!opts.yes && !opts.dryRun && process.stdin.isTTY) {
152
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
153
+ try {
154
+ const answer = await ask(rl, ' Remove stale families? [y/N]', 'N');
155
+ if (answer.toLowerCase() === 'y') {
156
+ const staleSet = new Set(stale);
157
+ mergeResult.manifest.families = mergeResult.manifest.families.filter((f) => !staleSet.has(f.id));
158
+ mergeResult.staleFamilies = stale;
159
+ console.log(` Removed ${stale.length} stale families`);
160
+ }
161
+ }
162
+ finally {
163
+ rl.close();
164
+ }
165
+ }
166
+ }
167
+ }
168
+ // ---------- Phase 4: LLM Enrichment ----------
169
+ if (opts.enrich) {
170
+ console.log('');
171
+ console.log(' Enriching with LLM...');
172
+ try {
173
+ const provider = await LLMProviderFactory.createFromEnv();
174
+ const enrichResult = await enrichFamilies(mergeResult.manifest.families, scanResult.families, opts.appPath, provider, opts.budgetUSD);
175
+ mergeResult.manifest.families = enrichResult.enrichedFamilies;
176
+ console.log(` Enriched ${enrichResult.enrichedFamilies.length} families (${enrichResult.tokensUsed} tokens, ~$${enrichResult.costUSD})`);
177
+ if (enrichResult.skippedFamilies.length > 0) {
178
+ console.log(` Skipped ${enrichResult.skippedFamilies.length} families (budget limit)`);
179
+ }
180
+ }
181
+ catch (error) {
182
+ console.warn(` LLM enrichment failed: ${error instanceof Error ? error.message : String(error)}`);
183
+ console.warn(' Continuing with deterministic results. Use --no-enrich to skip LLM.');
184
+ }
185
+ }
186
+ // ---------- Phase 5: Write manifest ----------
187
+ console.log('');
188
+ const json = serializeManifest(mergeResult.manifest);
189
+ if (opts.dryRun) {
190
+ console.log(' Dry run — proposed manifest:');
191
+ console.log('');
192
+ console.log(json);
193
+ }
194
+ else {
195
+ const dir = dirname(opts.outputPath);
196
+ if (!existsSync(dir)) {
197
+ mkdirSync(dir, { recursive: true });
198
+ }
199
+ const tmpPath = `${opts.outputPath}.tmp`;
200
+ writeFileSync(tmpPath, json, 'utf-8');
201
+ renameSync(tmpPath, opts.outputPath);
202
+ console.log(` Wrote ${opts.outputPath}`);
203
+ console.log(` ${mergeResult.manifest.families.length} families`);
204
+ }
205
+ // ---------- Phase 6: Report unmatched ----------
206
+ if (scanResult.unmatchedSourceDirs.length > 0 || scanResult.unmatchedTestDirs.length > 0) {
207
+ console.log('');
208
+ console.log(' Unmatched (review manually):');
209
+ for (const dir of scanResult.unmatchedSourceDirs.slice(0, 10)) {
210
+ console.log(` source: ${dir.relativePath}`);
211
+ }
212
+ for (const dir of scanResult.unmatchedTestDirs.slice(0, 10)) {
213
+ console.log(` test: ${dir.relativePath}`);
214
+ }
215
+ if (scanResult.unmatchedSourceDirs.length + scanResult.unmatchedTestDirs.length > 20) {
216
+ console.log(' ... and more');
217
+ }
218
+ }
219
+ // ---------- Phase 7: Validation (optional) ----------
220
+ if (opts.validate) {
221
+ if (opts.pr) {
222
+ console.log('');
223
+ console.log(` Validating against PR #${opts.pr}...`);
224
+ // Check for gh CLI
225
+ const { execFileSync } = await import('child_process');
226
+ try {
227
+ execFileSync('gh', ['--version'], { stdio: 'pipe' });
228
+ }
229
+ catch {
230
+ throw new TrainError('--pr requires the GitHub CLI (gh). Install: https://cli.github.com/');
231
+ }
232
+ // Fetch PR changed files via gh CLI
233
+ let prFiles;
234
+ try {
235
+ const output = execFileSync('gh', ['pr', 'view', String(opts.pr), '--json', 'files', '-q', '.files[].path'], {
236
+ cwd: opts.appPath,
237
+ encoding: 'utf-8',
238
+ stdio: ['pipe', 'pipe', 'pipe'],
239
+ });
240
+ prFiles = output.trim().split('\n').filter(Boolean);
241
+ }
242
+ catch (error) {
243
+ throw new TrainError(`Error fetching PR #${opts.pr}: ${error instanceof Error ? error.message : String(error)}`);
244
+ }
245
+ if (prFiles.length === 0) {
246
+ console.log(' No files found in PR.');
247
+ }
248
+ else {
249
+ const validation = validateCommit(mergeResult.manifest, prFiles, `PR#${opts.pr}`, `PR #${opts.pr}`);
250
+ const report = buildValidationReport([validation], mergeResult.manifest);
251
+ console.log('');
252
+ console.log(formatValidationReport(report));
253
+ }
254
+ }
255
+ else {
256
+ console.log('');
257
+ console.log(` Validating against git history (${opts.since})...`);
258
+ const commits = getCommitFiles(opts.appPath, opts.since);
259
+ if (commits.length === 0) {
260
+ console.log(' No commits found in range.');
261
+ }
262
+ else {
263
+ const validations = commits.map((c) => validateCommit(mergeResult.manifest, c.files, c.hash, c.message));
264
+ const report = buildValidationReport(validations, mergeResult.manifest);
265
+ console.log('');
266
+ console.log(formatValidationReport(report));
267
+ }
268
+ }
269
+ }
270
+ console.log('');
271
+ }
@@ -61,6 +61,10 @@ const FLAGS = {
61
61
  '--generate': { key: 'analyzeGenerate', type: 'boolean' },
62
62
  '--heal': { key: 'analyzeHeal', type: 'boolean' },
63
63
  '--no-ai': { key: 'noAi', type: 'boolean' },
64
+ '--enrich': { key: 'trainEnrich', type: 'boolean' },
65
+ '--no-enrich': { key: 'trainEnrich', type: 'boolean-false' },
66
+ '--validate': { key: 'trainValidate', type: 'boolean' },
67
+ '--yes': { key: 'trainYes', type: 'boolean', aliases: ['-y'] },
64
68
  '--mattermost': { key: 'profile', type: 'boolean', transform: () => 'mattermost' },
65
69
  // -- string flags --
66
70
  '--config': { key: 'configPath', type: 'string' },
@@ -90,6 +94,7 @@ const FLAGS = {
90
94
  '--generate-output': { key: 'analyzeGenerateOutputDir', type: 'string' },
91
95
  '--heal-report': { key: 'analyzeHealReport', type: 'string' },
92
96
  '--flow-catalog': { key: 'flowCatalogPath', type: 'string' },
97
+ '--output': { key: 'trainOutput', type: 'string' },
93
98
  // -- number flags (with isFinite guard) --
94
99
  '--pipeline-scenarios': { key: 'pipelineScenarios', type: 'number' },
95
100
  '--time': { key: 'timeLimitMinutes', type: 'number' },
@@ -101,6 +106,7 @@ const FLAGS = {
101
106
  '--traceability-min-hits': { key: 'traceabilityMinHits', type: 'number' },
102
107
  '--traceability-max-files-per-test': { key: 'traceabilityMaxFilesPerTest', type: 'number' },
103
108
  '--traceability-max-age-days': { key: 'traceabilityMaxAgeDays', type: 'number' },
109
+ '--pr': { key: 'trainPr', type: 'number' },
104
110
  // -- number-raw flags (no isFinite guard, assigned directly via Number()) --
105
111
  '--max-attempts': { key: 'maxAttempts', type: 'number-raw', transform: (v) => parseInt(v, 10) },
106
112
  '--pipeline-mcp-timeout-ms': { key: 'pipelineMcpTimeoutMs', type: 'number-raw' },
@@ -134,7 +140,7 @@ const COMMANDS = new Set([
134
140
  'init', 'impact', 'plan', 'heal', 'suggest', 'generate',
135
141
  'finalize-generated-tests', 'feedback',
136
142
  'traceability-capture', 'traceability-ingest',
137
- 'analyze', 'llm-health',
143
+ 'analyze', 'llm-health', 'train',
138
144
  ]);
139
145
  // ---------------------------------------------------------------------------
140
146
  // Parser
@@ -14,11 +14,12 @@ export function printUsage() {
14
14
  ' e2e-ai-agents traceability-ingest --path <app-root> --traceability-input <json>',
15
15
  ' e2e-ai-agents generate [--scenarios <path|json>] [--max-attempts <n>] [--dry-run]',
16
16
  ' e2e-ai-agents analyze --path <app-root> [--tests-root <path>] [--since <ref>] [--generate] [--generate-output <dir>] [--heal] [--heal-report <json>]',
17
+ ' e2e-ai-agents train --path <project-root> [--no-enrich] [--validate] [--since <ref>] [--pr <num>]',
17
18
  ' e2e-ai-agents llm-health',
18
19
  '',
19
20
  'Options:',
20
21
  ' --config <path> Path to e2e-ai-agents.config.json (auto-discovered if present)',
21
- ' --path <app-root> Path to the web app (required)',
22
+ ' --path <project-root> Path to the project root (scans both frontend and backend)',
22
23
  ' --profile <name> default | mattermost',
23
24
  ' --mattermost Shortcut for --profile mattermost',
24
25
  ' --tests-root <path> Path to tests root (optional)',
@@ -79,6 +80,11 @@ export function printUsage() {
79
80
  ' --scenarios <path|json> Scenarios file/JSON for generate command',
80
81
  ' --apply Apply data-testid patches and generate tests',
81
82
  ' (legacy shortcut; prefer approve-and-generate)',
83
+ ' --no-enrich Disable LLM enrichment (offline mode, train command)',
84
+ ' --validate Validate manifest against git history (train command)',
85
+ ' --pr <number> GitHub PR number for validation (requires gh CLI)',
86
+ ' --yes, -y Non-interactive mode (train command)',
87
+ ' --output <path> Output path for route-families.json (train command)',
82
88
  ' --help Show help',
83
89
  ].join('\n'));
84
90
  }
package/dist/esm/cli.js CHANGED
@@ -14,6 +14,7 @@ import { runImpactCommand } from './cli/commands/impact.js';
14
14
  import { runPlanCommand } from './cli/commands/plan.js';
15
15
  import { runGenerateCommand } from './cli/commands/generate.js';
16
16
  import { runInitCommand } from './cli/commands/init.js';
17
+ import { runTrainCommand } from './cli/commands/train.js';
17
18
  async function main() {
18
19
  const args = parseArgs(process.argv.slice(2));
19
20
  const autoConfig = resolveAutoConfig(args);
@@ -22,6 +23,10 @@ async function main() {
22
23
  await runInitCommand(hasYes);
23
24
  return;
24
25
  }
26
+ if (args.command === 'train') {
27
+ await runTrainCommand(args, autoConfig);
28
+ return;
29
+ }
25
30
  if (args.help || !args.command) {
26
31
  printUsage();
27
32
  process.exit(args.command ? 0 : 1);
package/dist/esm/index.js CHANGED
@@ -30,3 +30,8 @@ export { buildApiSurface, loadOrBuildApiSurface } from './knowledge/api_surface.
30
30
  export { buildSpecIndex, getSpecsForFamily } from './knowledge/spec_index.js';
31
31
  // Agentic generation
32
32
  export { runAgenticGeneration } from './agentic/runner.js';
33
+ // Training (route-families bootstrap and maintenance)
34
+ export { scanProject } from './training/scanner.js';
35
+ export { mergeFamilies, detectStaleFamilies } from './training/merger.js';
36
+ export { enrichFamilies } from './training/enricher.js';
37
+ export { getCommitFiles, validateCommit, buildValidationReport, formatValidationReport } from './training/validator.js';
@@ -3,7 +3,7 @@
3
3
  import { existsSync, readFileSync, statSync } from 'fs';
4
4
  import { join } from 'path';
5
5
  const manifestCache = new Map();
6
- function matchesGlob(filePath, pattern) {
6
+ export function matchesGlob(filePath, pattern) {
7
7
  const normalized = filePath.replace(/\\/g, '/');
8
8
  const parts = pattern.replace(/\\/g, '/').split('*');
9
9
  if (parts.length === 1) {
@@ -26,7 +26,7 @@ function matchesGlob(filePath, pattern) {
26
26
  }
27
27
  return true;
28
28
  }
29
- function matchesAnyPattern(filePath, patterns) {
29
+ export function matchesAnyPattern(filePath, patterns) {
30
30
  return patterns.some((pattern) => matchesGlob(filePath, pattern));
31
31
  }
32
32
  function validateFamily(family) {
@@ -0,0 +1,273 @@
1
+ // Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
2
+ // See LICENSE.txt for license information.
3
+ import { lstatSync, readdirSync, readFileSync } from 'fs';
4
+ import { join, relative, resolve } from 'path';
5
+ import { isGuessedRoute } from './types.js';
6
+ const MAX_FILES_PER_FAMILY = 20;
7
+ const MAX_LINES_PER_FILE = 50;
8
+ const LLM_TIMEOUT_MS = 60000;
9
+ const MAX_PROMPT_CHARS = 100000;
10
+ const SENSITIVE_PATTERNS = [
11
+ /[._]env/, /secret/i, /credential/i, /\.pem$/, /\.key$/, /password/i,
12
+ /config\/secrets/, /fixtures\/.*auth/i, /\.npmrc/, /\.netrc/,
13
+ /id_rsa/, /id_ed25519/, /\.p12$/, /\.pfx$/, /tokens?\.json/i,
14
+ ];
15
+ const SKIP_DIRS = new Set([
16
+ 'node_modules', '.git', 'dist', 'build', '.next', '.nuxt', 'coverage',
17
+ ]);
18
+ function sampleFiles(dir, maxFiles) {
19
+ const files = [];
20
+ function walk(d, depth = 0, maxDepth = 10) {
21
+ if (files.length >= maxFiles)
22
+ return;
23
+ if (depth > maxDepth)
24
+ return;
25
+ try {
26
+ for (const entry of readdirSync(d)) {
27
+ if (files.length >= maxFiles)
28
+ return;
29
+ // Skip dot-dirs and known heavy directories
30
+ if (entry.startsWith('.') || SKIP_DIRS.has(entry))
31
+ continue;
32
+ const full = join(d, entry);
33
+ try {
34
+ // Skip symlinks
35
+ const lstat = lstatSync(full);
36
+ if (lstat.isSymbolicLink())
37
+ continue;
38
+ // Skip sensitive files (test against relative path from scan root)
39
+ const relPath = relative(dir, full);
40
+ if (SENSITIVE_PATTERNS.some((p) => p.test(relPath) || p.test(entry)))
41
+ continue;
42
+ if (lstat.isDirectory()) {
43
+ walk(full, depth + 1, maxDepth);
44
+ }
45
+ else if (lstat.isFile() && lstat.size < 50000) {
46
+ const ext = entry.slice(entry.lastIndexOf('.'));
47
+ if (['.ts', '.tsx', '.js', '.jsx', '.go', '.py', '.rs'].includes(ext)) {
48
+ const content = readFileSync(full, 'utf-8');
49
+ const lines = content.split('\n').slice(0, MAX_LINES_PER_FILE).join('\n');
50
+ files.push({ path: full, content: lines });
51
+ }
52
+ }
53
+ }
54
+ catch { /* skip */ }
55
+ }
56
+ }
57
+ catch { /* skip */ }
58
+ }
59
+ walk(dir);
60
+ return files;
61
+ }
62
+ function buildEnrichPrompt(families, projectRoot) {
63
+ const sections = [];
64
+ for (const family of families) {
65
+ const allDirs = [
66
+ ...family.webappPaths.map((p) => p.replace(/\/?\*.*$/, '')),
67
+ ...family.serverPaths.map((p) => p.replace(/\/?\*.*$/, '')),
68
+ ];
69
+ const samples = [];
70
+ for (const dir of allDirs) {
71
+ if (!dir)
72
+ continue;
73
+ const fullDir = join(resolve(projectRoot), dir);
74
+ samples.push(...sampleFiles(fullDir, MAX_FILES_PER_FAMILY - samples.length));
75
+ if (samples.length >= MAX_FILES_PER_FAMILY)
76
+ break;
77
+ }
78
+ // Sample spec descriptions
79
+ const specSamples = [];
80
+ for (const specDir of family.specDirs) {
81
+ const fullDir = join(resolve(projectRoot), specDir);
82
+ const specFiles = sampleFiles(fullDir, 5);
83
+ for (const sf of specFiles) {
84
+ const matches = sf.content.match(/(?:test|it|describe)\s*\(\s*['"`]([^'"`]+)/g);
85
+ if (matches) {
86
+ specSamples.push(...matches.map((m) => m.replace(/(?:test|it|describe)\s*\(\s*['"`]/, '')));
87
+ }
88
+ }
89
+ }
90
+ sections.push(`## Family: ${family.id}
91
+ Routes (guessed): ${JSON.stringify(family.routes)}
92
+ Webapp paths: ${JSON.stringify(family.webappPaths)}
93
+ Server paths: ${JSON.stringify(family.serverPaths)}
94
+ Spec dirs: ${JSON.stringify(family.specDirs)}
95
+ Tags: ${JSON.stringify(family.tags)}
96
+ Features: ${family.features.map((f) => f.id).join(', ') || 'none'}
97
+
98
+ Sample files (${samples.length}):
99
+ ${samples.map((s) => `### ${relative(projectRoot, s.path)}\n\`\`\`\n${s.content}\n\`\`\``).join('\n')}
100
+
101
+ Test descriptions:
102
+ ${specSamples.length > 0 ? specSamples.map((d) => `- ${d}`).join('\n') : '(none found)'}
103
+ `);
104
+ }
105
+ return `You are analyzing a codebase to enrich route-family definitions for an E2E test impact analysis tool.
106
+
107
+ For each family below, provide:
108
+ 1. **priority**: P0 (critical user flow), P1 (important), or P2 (nice-to-have)
109
+ 2. **userFlows**: Array of human-readable flow names (e.g., "Create channel", "Search messages")
110
+ 3. **routes**: Improved URL patterns (e.g., "/{team}/channels/{channel}" instead of "/channels")
111
+ 4. **pageObjects**: Array of page object class names found in the code
112
+ 5. **components**: Array of UI component names relevant to this family
113
+
114
+ Respond in JSON format:
115
+ \`\`\`json
116
+ [
117
+ {
118
+ "id": "family_id",
119
+ "priority": "P0",
120
+ "userFlows": ["Flow name 1", "Flow name 2"],
121
+ "routes": ["/improved/route/{param}"],
122
+ "pageObjects": ["PageName"],
123
+ "components": ["ComponentName"]
124
+ }
125
+ ]
126
+ \`\`\`
127
+
128
+ ${sections.join('\n---\n')}`;
129
+ }
130
+ export function validateEntries(parsed) {
131
+ const filterStrings = (arr, maxLen) => {
132
+ if (!Array.isArray(arr))
133
+ return undefined;
134
+ const filtered = arr.filter((v) => typeof v === 'string' && v.length < maxLen);
135
+ return filtered.length > 0 ? filtered : undefined;
136
+ };
137
+ return parsed
138
+ .filter((e) => !!e && typeof e.id === 'string')
139
+ .map((entry) => ({
140
+ id: entry.id,
141
+ priority: ['P0', 'P1', 'P2'].includes(entry.priority) ? entry.priority : undefined,
142
+ routes: filterStrings(entry.routes, 200),
143
+ userFlows: filterStrings(entry.userFlows, 500),
144
+ pageObjects: filterStrings(entry.pageObjects, 200),
145
+ components: filterStrings(entry.components, 200),
146
+ }));
147
+ }
148
+ export function parseEnrichResponse(response) {
149
+ // Extract JSON from response (may be wrapped in markdown code block)
150
+ const jsonMatch = response.match(/```(?:json)?\s*([\s\S]*?)```/) || [null, response];
151
+ const jsonStr = jsonMatch[1]?.trim() || response.trim();
152
+ try {
153
+ const parsed = JSON.parse(jsonStr);
154
+ if (Array.isArray(parsed)) {
155
+ return validateEntries(parsed);
156
+ }
157
+ }
158
+ catch {
159
+ // Try to find any JSON array in the response
160
+ const arrayMatch = response.match(/\[[\s\S]*\]/);
161
+ if (arrayMatch) {
162
+ try {
163
+ const parsed = JSON.parse(arrayMatch[0]);
164
+ if (Array.isArray(parsed)) {
165
+ return validateEntries(parsed);
166
+ }
167
+ }
168
+ catch {
169
+ // give up
170
+ }
171
+ }
172
+ }
173
+ return [];
174
+ }
175
+ function applyEnrichment(family, enriched) {
176
+ const result = { ...family };
177
+ if (enriched.priority && !family.priority) {
178
+ result.priority = enriched.priority;
179
+ }
180
+ if (enriched.userFlows && (!family.userFlows || family.userFlows.length === 0)) {
181
+ result.userFlows = enriched.userFlows;
182
+ }
183
+ if (enriched.routes && enriched.routes.length > 0) {
184
+ // Only replace if current routes look like guesses
185
+ if (isGuessedRoute(family.routes)) {
186
+ result.routes = enriched.routes;
187
+ }
188
+ }
189
+ if (enriched.pageObjects && (!family.pageObjects || family.pageObjects.length === 0)) {
190
+ result.pageObjects = enriched.pageObjects;
191
+ }
192
+ if (enriched.components && (!family.components || family.components.length === 0)) {
193
+ result.components = enriched.components;
194
+ }
195
+ return result;
196
+ }
197
+ export async function enrichFamilies(families, scanned, projectRoot, provider, budgetUSD) {
198
+ const scannedMap = new Map(scanned.map((s) => [s.id, s]));
199
+ const enriched = [];
200
+ let totalTokens = 0;
201
+ let totalCost = 0;
202
+ const skipped = [];
203
+ // Process in chunks of 4 families
204
+ const chunkSize = 4;
205
+ for (let i = 0; i < families.length; i += chunkSize) {
206
+ if (totalCost >= budgetUSD) {
207
+ for (let j = i; j < families.length; j++) {
208
+ skipped.push(families[j].id);
209
+ enriched.push(families[j]);
210
+ }
211
+ break;
212
+ }
213
+ const chunk = families.slice(i, i + chunkSize);
214
+ const scannedChunk = chunk
215
+ .map((f) => scannedMap.get(f.id))
216
+ .filter((s) => s !== undefined);
217
+ if (scannedChunk.length === 0) {
218
+ enriched.push(...chunk);
219
+ continue;
220
+ }
221
+ let prompt = buildEnrichPrompt(scannedChunk, projectRoot);
222
+ if (prompt.length > MAX_PROMPT_CHARS) {
223
+ // Truncate at the last complete section boundary to avoid malformed input
224
+ const lastSectionEnd = prompt.lastIndexOf('\n---\n', MAX_PROMPT_CHARS);
225
+ if (lastSectionEnd > 0) {
226
+ console.warn(`[train] Prompt truncated from ${prompt.length} chars at section boundary`);
227
+ prompt = prompt.slice(0, lastSectionEnd);
228
+ }
229
+ else {
230
+ console.warn(`[train] Prompt truncated from ${prompt.length} to ${MAX_PROMPT_CHARS} chars`);
231
+ prompt = prompt.slice(0, MAX_PROMPT_CHARS);
232
+ }
233
+ }
234
+ let timer;
235
+ try {
236
+ const timeoutPromise = new Promise((_, reject) => {
237
+ timer = setTimeout(() => reject(new Error('LLM request timed out')), LLM_TIMEOUT_MS);
238
+ });
239
+ const response = await Promise.race([
240
+ provider.generateText(prompt, { maxTokens: 4096, temperature: 0.3 }),
241
+ timeoutPromise,
242
+ ]);
243
+ totalTokens += (response.usage?.inputTokens ?? 0) + (response.usage?.outputTokens ?? 0);
244
+ totalCost += response.cost ?? 0;
245
+ const entries = parseEnrichResponse(response.text);
246
+ const entryMap = new Map(entries.map((e) => [e.id, e]));
247
+ for (const family of chunk) {
248
+ const entry = entryMap.get(family.id);
249
+ if (entry) {
250
+ enriched.push(applyEnrichment(family, entry));
251
+ }
252
+ else {
253
+ enriched.push(family);
254
+ }
255
+ }
256
+ }
257
+ catch (error) {
258
+ // On LLM failure, keep families unchanged
259
+ console.warn(`[train] LLM enrichment failed for chunk: ${error instanceof Error ? error.message : String(error)}`);
260
+ enriched.push(...chunk);
261
+ }
262
+ finally {
263
+ if (timer)
264
+ clearTimeout(timer);
265
+ }
266
+ }
267
+ return {
268
+ enrichedFamilies: enriched,
269
+ tokensUsed: totalTokens,
270
+ costUSD: Math.round(totalCost * 100) / 100,
271
+ skippedFamilies: skipped,
272
+ };
273
+ }