@skyramp/mcp 0.2.1-rc.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/build/playwright/registerPlaywrightTools.js +10 -0
  2. package/build/prompts/test-maintenance/drift-analysis-prompt.js +98 -87
  3. package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +92 -60
  4. package/build/prompts/test-maintenance/driftAnalysisSections.js +139 -197
  5. package/build/prompts/test-recommendation/scopeAssessment.js +106 -5
  6. package/build/prompts/test-recommendation/scopeAssessment.test.js +128 -1
  7. package/build/prompts/testbot/testbot-prompts.js +6 -9
  8. package/build/prompts/testbot/testbot-prompts.test.js +38 -22
  9. package/build/services/TestDiscoveryService.js +39 -9
  10. package/build/tools/test-management/actionsTool.js +166 -148
  11. package/build/tools/test-management/analyzeChangesTool.js +10 -12
  12. package/build/tools/test-management/analyzeTestHealthTool.js +10 -22
  13. package/build/tools/test-management/uiAnalyzeChangesTool.js +8 -2
  14. package/build/tools/test-management/uiAnalyzeChangesTool.test.js +47 -0
  15. package/build/utils/dartRouteExtractor.js +319 -0
  16. package/build/utils/dartRouteExtractor.test.js +307 -0
  17. package/build/utils/docker.test.js +1 -1
  18. package/build/utils/uiPageEnumerator.js +67 -0
  19. package/build/utils/uiPageEnumerator.test.js +222 -0
  20. package/build/utils/versions.js +1 -1
  21. package/node_modules/playwright/lib/mcp/skyramp/assertApiRequestTool.js +46 -0
  22. package/node_modules/playwright/lib/mcp/skyramp/index.js +10 -0
  23. package/node_modules/playwright/lib/mcp/skyramp/loadTraceTool.js +313 -0
  24. package/node_modules/playwright/lib/mcp/skyramp/skyRampImport.js +146 -0
  25. package/node_modules/playwright/lib/mcp/skyramp/traceRecordingBackend.js +519 -52
  26. package/node_modules/playwright/lib/mcp/test/skyRampExport.js +32 -14
  27. package/package.json +2 -2
  28. package/node_modules/playwright/lib/mcp/browser/tools/domAnalyzer.js +0 -261
  29. package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.3.tgz +0 -0
  30. package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.4.tgz +0 -0
  31. package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.5.tgz +0 -0
  32. package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.6.tgz +0 -0
@@ -2,6 +2,7 @@ import { z } from "zod";
2
2
  import { logger } from "../../utils/logger.js";
3
3
  import { StateManager, } from "../../utils/AnalysisStateManager.js";
4
4
  import { TestSource, DriftAction, RecommendationPriority, EstimatedWork } from "../../types/TestAnalysis.js";
5
+ import { TestType } from "../../types/TestTypes.js";
5
6
  import * as fs from "fs";
6
7
  import * as path from "path";
7
8
  import { AnalyticsService } from "../../services/AnalyticsService.js";
@@ -43,6 +44,22 @@ export function computeRenamedTestFile(testFile, renames) {
43
44
  }
44
45
  return newFilePath;
45
46
  }
47
+ /**
48
+ * Select test types to generate based on HTTP method.
49
+ */
50
+ function selectTestTypesForEndpoint(method) {
51
+ switch (method.toUpperCase()) {
52
+ case "POST":
53
+ case "PUT":
54
+ case "PATCH":
55
+ return [TestType.INTEGRATION, TestType.CONTRACT];
56
+ case "DELETE":
57
+ return [TestType.INTEGRATION, TestType.SMOKE];
58
+ case "GET":
59
+ default:
60
+ return [TestType.CONTRACT, TestType.SMOKE];
61
+ }
62
+ }
46
63
  const recommendationSchema = z.object({
47
64
  testFile: z
48
65
  .string()
@@ -100,17 +117,20 @@ export function registerActionsTool(server) {
100
117
  idempotentHint: false,
101
118
  openWorldHint: true,
102
119
  },
103
- description: `Execute test maintenance actions — final step of the unified Test Health Analysis Flow.
120
+ description: `Execute test maintenance and generation actions — final step of the unified Test Health Analysis Flow.
104
121
 
105
122
  **PREREQUISITE:** Call \`skyramp_analyze_changes\` (produces the stateFile), then \`skyramp_analyze_test_health\` (runs the drift assessment). This tool reads the stateFile from \`skyramp_analyze_changes\`.
106
123
 
124
+ Call this tool after completing the drift assessment. It executes maintenance actions automatically from the stateFile — no user confirmation required.
125
+
107
126
  **EXECUTING ACTIONS:**
108
- - UPDATE: Reads each test file and emits targeted per-file edit instructions driven by \`updateInstructions\` and \`renamedEndpoints\`
109
- - REGENERATE: Reads the existing file for context (endpoint, auth, test type), then instructs the LLM to call the appropriate generation tool (e.g. \`skyramp_integration_test_generation\`) with \`outputDir\` + \`output\` matching the existing file to overwrite it
110
- - IGNORE / VERIFY / DELETE: Passed through and summarised — no file reads, no automated edits
127
+ - UPDATE: Tests with drift emits targeted per-file edit instructions driven by updateInstructions and renamedEndpoints
128
+ - REGENERATE: Emits file-level summary; follow up by calling the appropriate generation tool (e.g. skyramp_integration_test_generation) with the same filename to overwrite
129
+ - VERIFY: Emits file-level summary for human review — no automated edits
130
+ - ADD: Auto-generates tests for new endpoints via LLM instructions
111
131
 
112
132
  **OUTPUT:**
113
- Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM_INSTRUCTIONS\` block for automated execution.
133
+ Comprehensive report with executed actions, summary, and instructions for ADD recommendations
114
134
  `,
115
135
  inputSchema: actionsSchema,
116
136
  }, async (args) => {
@@ -126,22 +146,17 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
126
146
  errorResult = toolError(`State file is empty or invalid: ${args.stateFile}. Call skyramp_analyze_changes first to generate a valid state file.`);
127
147
  return errorResult;
128
148
  }
149
+ // External tests must not be candidates for UPDATE/REGENERATE/DELETE actions.
150
+ // Default source to Skyramp for backwards compat with state files created before the source field existed.
151
+ const testAnalysisResults = (stateData.existingTests || []).filter((t) => (t.source ?? TestSource.Skyramp) !== TestSource.External);
152
+ const newEndpoints = stateData.newEndpoints || [];
129
153
  // Resolve repo root for path normalization and security checks.
130
154
  const repoRoot = repositoryPath ? path.resolve(repositoryPath) : "";
131
- // Relevant external (user-written) tests: UPDATE is permitted; REGENERATE/DELETE
132
- // are report-only (the LLM may recommend them but this tool will not apply them).
133
- // Paths are stored relative to repositoryPath in the state file re-absolutize.
134
- const relevantExternalPaths = new Set((stateData.repositoryAnalysis?.relevantExternalTestPaths ?? []).map((p) => path.isAbsolute(p) ? p : path.resolve(repoRoot, p)));
135
- // Allowlist: Skyramp-generated tests + relevant external tests.
136
- // Using an allowlist (not a blocklist) catches hallucinated paths the LLM
137
- // may supply that are not in the scanned catalog at all.
138
- const testAnalysisResults = (stateData.existingTests || []);
139
- const skyrampTestFiles = new Set(testAnalysisResults
140
- .filter((t) => (t.source ?? TestSource.Skyramp) !== TestSource.External)
141
- .map((t) => t.testFile));
142
- const externalTestFiles = new Set(testAnalysisResults
143
- .filter((t) => (t.source ?? TestSource.Skyramp) === TestSource.External)
144
- .map((t) => t.testFile));
155
+ // Set of non-external (Skyramp-generated) test file paths the only files
156
+ // that may receive UPDATE/REGENERATE/DELETE actions. Using the allowlist rather
157
+ // than a blocklist catches both external tests AND hallucinated paths the LLM
158
+ // may supply that are not present in the scanned catalog at all.
159
+ const skyrampTestFiles = new Set(testAnalysisResults.map((t) => t.testFile));
145
160
  // ── Build recommendations from LLM-supplied drift assessment ──
146
161
  // The LLM performs the drift assessment in context after skyramp_analyze_test_health
147
162
  // and passes results here directly — analyzeTestHealthTool never writes assessment
@@ -150,55 +165,23 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
150
165
  (args.recommendations ?? []).forEach((rec) => {
151
166
  // Schema requires absolute paths; resolve any relative paths defensively
152
167
  // against repoRoot in case the LLM sends a relative path despite the schema.
153
- // Normalize via path.resolve to collapse any `..` segments before the
154
- // traversal guard — otherwise "/repo/../etc/passwd" would pass startsWith.
155
- const rawFile = path.isAbsolute(rec.testFile)
168
+ const resolvedFile = path.isAbsolute(rec.testFile)
156
169
  ? rec.testFile
157
170
  : repoRoot
158
171
  ? path.resolve(repoRoot, rec.testFile)
159
172
  : rec.testFile;
160
- const resolvedFile = path.resolve(rawFile);
161
173
  // Reject files outside the repo root (path-traversal guard).
162
- // Exception: files already in the scanned test catalog (externalTestFiles / skyrampTestFiles)
163
- // may legitimately live in a separate testsRepoDir outside repositoryPath — catalog
164
- // membership is a sufficient provenance check for those paths.
165
- const isInCatalog = skyrampTestFiles.has(resolvedFile) || skyrampTestFiles.has(rec.testFile)
166
- || externalTestFiles.has(resolvedFile) || externalTestFiles.has(rec.testFile);
167
- if (repoRoot && !isInCatalog && !resolvedFile.startsWith(repoRoot + path.sep) && resolvedFile !== repoRoot) {
174
+ if (repoRoot && !resolvedFile.startsWith(repoRoot + path.sep) && resolvedFile !== repoRoot) {
168
175
  logger.warning(`Skipping recommendation for path outside repo root: ${rec.testFile}`);
169
176
  return;
170
177
  }
171
- // Guard: only files present in the scanned test catalog may receive any
172
- // recommendation. Hallucinated paths (not in either set) are rejected for
173
- // all actions, including VERIFY and IGNORE, to keep the report consistent
174
- // with what was actually discovered.
175
- const isSkyramp = skyrampTestFiles.has(resolvedFile) || skyrampTestFiles.has(rec.testFile);
176
- const isRelevantExternal = (externalTestFiles.has(resolvedFile) || externalTestFiles.has(rec.testFile)) &&
177
- (relevantExternalPaths.has(resolvedFile) || relevantExternalPaths.has(rec.testFile));
178
- const isInAnyKnownCatalog = isSkyramp || isRelevantExternal
179
- || externalTestFiles.has(resolvedFile) || externalTestFiles.has(rec.testFile);
180
- if (!isInAnyKnownCatalog) {
181
- logger.warning(`Skipping ${rec.action} for unknown test (not in scanned catalog): ${rec.testFile}`);
182
- return;
183
- }
178
+ // Guard: only Skyramp-generated tests may receive UPDATE/REGENERATE/DELETE.
179
+ // Using an allowlist (skyrampTestFiles) rather than a blocklist catches both
180
+ // external tests and hallucinated paths the LLM may supply that are not in
181
+ // the scanned catalog. IGNORE/VERIFY are informational and pass through.
184
182
  const isActionable = [DriftAction.Update, DriftAction.Regenerate, DriftAction.Delete].includes(rec.action);
185
- if (isActionable && !isSkyramp && !isRelevantExternal) {
186
- logger.warning(`Skipping ${rec.action} for irrelevant external test: ${rec.testFile}`);
187
- return;
188
- }
189
- // REGENERATE and DELETE on external tests are report-only — convert to VERIFY so
190
- // the finding surfaces to the developer without touching the file.
191
- if (isRelevantExternal && !isSkyramp &&
192
- (rec.action === DriftAction.Regenerate || rec.action === DriftAction.Delete)) {
193
- recommendations.push({
194
- testFile: resolvedFile,
195
- action: DriftAction.Verify,
196
- priority: rec.priority ?? RecommendationPriority.Medium,
197
- rationale: `[external test — needs manual review] ${rec.rationale ?? ""}`.trimEnd(),
198
- estimatedWork: rec.estimatedWork ?? EstimatedWork.Small,
199
- updateInstructions: "",
200
- renamedEndpoints: [],
201
- });
183
+ if (isActionable && !skyrampTestFiles.has(resolvedFile) && !skyrampTestFiles.has(rec.testFile)) {
184
+ logger.warning(`Skipping ${rec.action} for non-Skyramp or unknown test: ${rec.testFile}`);
202
185
  return;
203
186
  }
204
187
  recommendations.push({
@@ -211,7 +194,7 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
211
194
  renamedEndpoints: rec.renamedEndpoints ?? [],
212
195
  });
213
196
  });
214
- // ── Process UPDATE and REGENERATE recommendations ──
197
+ // ── Process UPDATE recommendations ──
215
198
  // Deduplicate by testFile — keep the highest-priority entry when the LLM
216
199
  // repeats a file. Priority order: high > medium > low.
217
200
  const priorityRank = {
@@ -219,44 +202,28 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
219
202
  [RecommendationPriority.Medium]: 1,
220
203
  [RecommendationPriority.Low]: 0,
221
204
  };
222
- // Build per-file winner maps. REGENERATE beats UPDATE for the same file —
223
- // if the LLM emits both, keep REGENERATE (higher severity) and drop UPDATE.
224
205
  const updateByFile = new Map();
225
- const regenerateByFile = new Map();
226
206
  for (const rec of recommendations) {
227
- if (rec.action === DriftAction.Regenerate) {
228
- const existing = regenerateByFile.get(rec.testFile);
229
- if (!existing || priorityRank[rec.priority] > priorityRank[existing.priority]) {
230
- regenerateByFile.set(rec.testFile, rec);
231
- }
232
- }
233
- else if (rec.action === DriftAction.Update) {
234
- // Only add to updateByFile if no REGENERATE exists for this file.
235
- if (!regenerateByFile.has(rec.testFile)) {
236
- const existing = updateByFile.get(rec.testFile);
237
- if (!existing || priorityRank[rec.priority] > priorityRank[existing.priority]) {
238
- updateByFile.set(rec.testFile, rec);
239
- }
240
- }
207
+ if (rec.action !== DriftAction.Update)
208
+ continue;
209
+ const existing = updateByFile.get(rec.testFile);
210
+ if (!existing || priorityRank[rec.priority] > priorityRank[existing.priority]) {
211
+ updateByFile.set(rec.testFile, rec);
241
212
  }
242
213
  }
243
- // Second pass: drop any UPDATE entries for files that ended up with REGENERATE
244
- // (handles ordering where UPDATE was inserted before REGENERATE was seen).
245
- for (const file of regenerateByFile.keys()) {
246
- updateByFile.delete(file);
247
- }
248
214
  const updateRecommendations = Array.from(updateByFile.values());
249
- const regenerateRecommendations = Array.from(regenerateByFile.values());
250
215
  const fileInstructions = [];
251
216
  const testFilesToUpdate = [];
252
217
  const testFileContentMap = new Map();
253
- // ── UPDATE: read file, emit targeted edit instructions ──
254
218
  for (const rec of updateRecommendations) {
255
219
  if (!rec.testFile) {
256
220
  logger.warning("Recommendation missing testFile", rec);
257
221
  continue;
258
222
  }
259
223
  testFilesToUpdate.push(rec.testFile);
224
+ const testData = testAnalysisResults.find((t) => t.testFile === rec.testFile);
225
+ const driftData = testData?.drift;
226
+ const driftChanges = driftData?.changes || [];
260
227
  let testFileContent = "";
261
228
  try {
262
229
  testFileContent = fs.readFileSync(rec.testFile, "utf-8");
@@ -269,6 +236,9 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
269
236
  const renames = rec.renamedEndpoints || [];
270
237
  const isRenameUpdate = renames.length > 0;
271
238
  let instruction = `\n### ${rec.testFile}\n\n`;
239
+ instruction += `**Priority:** ${rec.priority} | `;
240
+ instruction += `**Estimated Effort:** ${rec.estimatedWork || EstimatedWork.Small}\n\n`;
241
+ instruction += `**Why Update Needed:** ${rec.rationale}\n\n`;
272
242
  if (isRenameUpdate) {
273
243
  instruction += `**Endpoint Rename Detected — Path Substitution Required:**\n\n`;
274
244
  instruction += `| Old Path | New Path | Method |\n`;
@@ -294,41 +264,79 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
294
264
  instruction += `Preserve all existing test logic — only add or adjust what is described above.\n\n`;
295
265
  }
296
266
  else if (!isRenameUpdate) {
297
- const fallbackRationale = rec.rationale ?? "";
298
- if (fallbackRationale) {
299
- instruction += `**Why:** ${fallbackRationale}\n\n`;
300
- }
301
- instruction += `**Action:** Update this test file based on the rationale above. `;
267
+ instruction += `**Action:** Update this test file per the rationale above. `;
302
268
  instruction += `Match the assertion style already used in the file. `;
303
269
  instruction += `Preserve all existing test logic — only add or adjust the minimum required assertions.\n\n`;
304
270
  }
305
- fileInstructions.push(instruction);
306
- }
307
- // ── REGENERATE: read file for context, emit overwrite instructions ──
308
- const regenerateInstructions = [];
309
- const testFilesToRegenerate = [];
310
- const regenerateContentMap = new Map();
311
- for (const rec of regenerateRecommendations) {
312
- if (!rec.testFile) {
313
- logger.warning("Recommendation missing testFile", rec);
314
- continue;
315
- }
316
- testFilesToRegenerate.push(rec.testFile);
317
- let existingContent = "";
318
- try {
319
- existingContent = fs.readFileSync(rec.testFile, "utf-8");
320
- regenerateContentMap.set(rec.testFile, existingContent);
271
+ if (driftData) {
272
+ instruction += `**Analysis:**\n`;
273
+ instruction += `- Changes Detected: ${driftData.changes?.length || 0}\n`;
274
+ instruction += `- Affected Files: ${driftData.affectedFiles.files?.length || 0}\n\n`;
321
275
  }
322
- catch (error) {
323
- logger.warning(`Could not read file for REGENERATE context ${rec.testFile}: ${error.message}`);
276
+ if (driftChanges.length > 0) {
277
+ instruction += `**Changes Detected:**\n`;
278
+ driftChanges.forEach((change) => {
279
+ instruction += `**${change.type}** (Severity: ${change.severity}): ${change.description}\n`;
280
+ if (change.details) {
281
+ instruction += ` └─ ${change.details}\n`;
282
+ }
283
+ if (change.file) {
284
+ instruction += ` └─ In: \`${change.file}\`\n`;
285
+ }
286
+ });
287
+ instruction += `\n`;
324
288
  }
325
- let instruction = `\n### ${rec.testFile}\n\n`;
326
- instruction += `**Action: REGENERATE** — the response shape changed too drastically for targeted edits.\n\n`;
327
- if (rec.updateInstructions) {
328
- instruction += `**What changed:**\n\n${rec.updateInstructions}\n\n`;
289
+ // File content is provided in LLM_INSTRUCTIONS.update_context.current_content — omit here to avoid duplication.
290
+ fileInstructions.push(instruction);
291
+ }
292
+ // ── Build ADD section for new endpoints ──
293
+ const wsBaseUrl = stateData.repositoryAnalysis?.wsBaseUrl || "";
294
+ const wsSchemaPath = stateData.repositoryAnalysis?.wsSchemaPath || "";
295
+ const primaryLanguage = stateData.repositoryAnalysis?.projectMeta?.primaryLanguage ||
296
+ "python";
297
+ const primaryFramework = stateData.repositoryAnalysis?.projectMeta?.primaryFramework ||
298
+ "pytest";
299
+ // Determine output directory from workspace config or repo path
300
+ const outputDir = repositoryPath
301
+ ? path.join(repositoryPath, "tests", "skyramp")
302
+ : "./tests/skyramp";
303
+ const addSummaryLines = [];
304
+ const llmToolCalls = [];
305
+ for (const ep of newEndpoints) {
306
+ const testTypes = selectTestTypesForEndpoint(ep.method);
307
+ const endpointURL = wsBaseUrl
308
+ ? wsBaseUrl.replace(/\/$/, "") + ep.path
309
+ : ep.path;
310
+ addSummaryLines.push(`- ${ep.method} ${ep.path} → ${testTypes.join(", ")} tests`);
311
+ for (const testType of testTypes) {
312
+ let toolName = "";
313
+ switch (testType) {
314
+ case TestType.CONTRACT:
315
+ toolName = "skyramp_contract_test_generation";
316
+ break;
317
+ case TestType.INTEGRATION:
318
+ toolName = "skyramp_integration_test_generation";
319
+ break;
320
+ case TestType.SMOKE:
321
+ toolName = "skyramp_smoke_test_generation";
322
+ break;
323
+ default:
324
+ toolName = "skyramp_contract_test_generation";
325
+ }
326
+ llmToolCalls.push({
327
+ tool: toolName,
328
+ params: {
329
+ endpointURL,
330
+ method: ep.method,
331
+ language: primaryLanguage,
332
+ framework: primaryFramework,
333
+ outputDir,
334
+ ...(wsSchemaPath ? { apiSchema: wsSchemaPath } : {}),
335
+ },
336
+ endpoint: `${ep.method} ${ep.path}`,
337
+ testType: testType,
338
+ });
329
339
  }
330
- instruction += `Call the appropriate generation tool (e.g. \`skyramp_integration_test_generation\`, \`skyramp_contract_test_generation\`) with \`outputDir: "${path.dirname(rec.testFile)}"\` and \`output: "${path.basename(rec.testFile)}"\` to overwrite this file from scratch. Use the existing file content in \`LLM_INSTRUCTIONS.regenerate_context\` for context on the endpoint, auth pattern, and test structure — replicate the test type and language.\n\n`;
331
- regenerateInstructions.push(instruction);
332
340
  }
333
341
  // ── Build response text ──
334
342
  let responseText = `# Test Actions Report\n\n`;
@@ -340,26 +348,37 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
340
348
  responseText += `\n---\n`;
341
349
  responseText += fileInstructions.join("\n---\n");
342
350
  }
343
- if (regenerateRecommendations.length > 0) {
344
- responseText += `\n## Tests Requiring Regeneration (${regenerateRecommendations.length})\n\n`;
345
- testFilesToRegenerate.forEach((file, idx) => {
346
- responseText += `${idx + 1}. \`${file}\`\n`;
351
+ if (newEndpoints.length > 0) {
352
+ responseText += `\n## New Endpoint Tests to Generate (${newEndpoints.length} endpoints)\n\n`;
353
+ addSummaryLines.forEach((line) => {
354
+ responseText += `${line}\n`;
347
355
  });
348
- responseText += `\n---\n`;
349
- responseText += regenerateInstructions.join("\n---\n");
356
+ responseText += `\nThe following tests will be generated automatically.\n`;
350
357
  }
351
- const otherRecs = recommendations.filter((rec) => rec.action !== DriftAction.Update && rec.action !== DriftAction.Regenerate);
352
- if (otherRecs.length > 0) {
353
- responseText += `\n## Other Findings (${otherRecs.length})\n\n`;
354
- otherRecs.forEach((rec) => {
355
- responseText += `- **${rec.testFile}** — Action: ${rec.action}, Priority: ${rec.priority}`;
356
- if (rec.rationale)
357
- responseText += `${rec.rationale}`;
358
- responseText += `\n`;
359
- });
358
+ if (updateRecommendations.length === 0 && newEndpoints.length === 0) {
359
+ const otherRecs = recommendations.filter((rec) => rec.action !== DriftAction.Update);
360
+ if (otherRecs.length > 0) {
361
+ responseText += `## Recommendations (${otherRecs.length})\n\n`;
362
+ otherRecs.forEach((rec) => {
363
+ responseText += `- **${rec.testFile}** — Action: ${rec.action}, Priority: ${rec.priority}\n`;
364
+ responseText += ` ${rec.rationale}\n`;
365
+ });
366
+ }
367
+ else {
368
+ responseText += `No action required. All existing tests appear healthy.\n`;
369
+ }
370
+ }
371
+ responseText += `\n\n## Next Steps\n\n`;
372
+ responseText += `The AI assistant will:\n`;
373
+ let stepNumber = 1;
374
+ if (updateRecommendations.length > 0) {
375
+ responseText += `${stepNumber++}. Review the changes and issues for each test\n`;
376
+ responseText += `${stepNumber++}. Update test files to fix compatibility issues\n`;
377
+ responseText += `${stepNumber++}. Preserve original test logic and structure\n`;
378
+ responseText += `${stepNumber++}. Show you the changes made\n`;
360
379
  }
361
- else if (updateRecommendations.length === 0 && regenerateRecommendations.length === 0) {
362
- responseText += `No action required. All existing tests appear healthy.\n`;
380
+ if (newEndpoints.length > 0) {
381
+ responseText += `${stepNumber++}. Generate new tests for new endpoints\n`;
363
382
  }
364
383
  responseText += `\n**This tool is currently in Early Preview stage. Please verify the results.**\n`;
365
384
  // ── Build LLM instructions for UPDATE ──
@@ -378,8 +397,6 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
378
397
  auto_proceed: true,
379
398
  files_to_update: testFilesToUpdate,
380
399
  update_count: updateRecommendations.length,
381
- files_to_regenerate: testFilesToRegenerate,
382
- regenerate_count: regenerateRecommendations.length,
383
400
  };
384
401
  if (uniqueRenames.length > 0) {
385
402
  llmInstructionsObj.endpoint_renames = uniqueRenames;
@@ -411,22 +428,6 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
411
428
  llmInstructionsObj.update_strategy =
412
429
  "For each file in update_context, apply the changes described in context to the provided current_content. Write the result using the Edit tool. Do NOT re-read the file first. Match the assertion style already used in the file. Preserve all existing test logic. After applying all edits, call skyramp_enhance_assertions with each updated file path to strengthen the assertions.";
413
430
  }
414
- // REGENERATE context: existing file content gives the generation tool the
415
- // endpoint URL, auth pattern, test type, and language to replicate.
416
- const regenerateContextFiles = [];
417
- for (const rec of regenerateRecommendations) {
418
- const existing_content = regenerateContentMap.get(rec.testFile);
419
- regenerateContextFiles.push({
420
- file: rec.testFile,
421
- rationale: rec.updateInstructions || rec.rationale,
422
- ...(existing_content !== undefined && { existing_content }),
423
- });
424
- }
425
- if (regenerateContextFiles.length > 0) {
426
- llmInstructionsObj.regenerate_context = regenerateContextFiles;
427
- llmInstructionsObj.regenerate_strategy =
428
- "For each file in regenerate_context, call the appropriate generation tool (skyramp_integration_test_generation or skyramp_contract_test_generation) with outputDir set to the file's directory and output set to the filename. Use existing_content to determine the test type, endpoint, auth pattern, and language. The generation tool will overwrite the file. Do NOT use skyramp_ui_test_generation here — UI test regeneration requires a recorded trace (playwrightInput) and must be handled separately.";
429
- }
430
431
  const llmInstructions = `<!-- LLM_INSTRUCTIONS:\n${JSON.stringify(llmInstructionsObj, null, 2)}\n-->\n`;
431
432
  const contentBlocks = [
432
433
  {
@@ -444,6 +445,23 @@ Per-file instructions for UPDATE and REGENERATE actions, plus a structured \`LLM
444
445
  },
445
446
  },
446
447
  ];
448
+ // ── Build ADD instructions for new endpoints ──
449
+ if (newEndpoints.length > 0 && llmToolCalls.length > 0) {
450
+ const addInstructionsObj = {
451
+ workflow: "add_tests_for_new_endpoints",
452
+ auto_proceed: true,
453
+ instruction: "Call each tool in tool_calls immediately and in order. Do NOT ask for confirmation. If endpointURL is a bare path, read .skyramp/workspace.yml for baseUrl and prepend it.",
454
+ tool_calls: llmToolCalls,
455
+ };
456
+ const addInstructions = `<!-- LLM_INSTRUCTIONS:\n${JSON.stringify(addInstructionsObj, null, 2)}\n-->\n`;
457
+ contentBlocks.push({
458
+ type: "text",
459
+ text: addInstructions,
460
+ annotations: {
461
+ audience: ["assistant"],
462
+ },
463
+ });
464
+ }
447
465
  return {
448
466
  content: contentBlocks,
449
467
  };
@@ -10,7 +10,7 @@ import { parseWorkspaceAuthType, getDefaultAuthHeader, WorkspaceAuthType, readWo
10
10
  import { AnalyticsService } from "../../services/AnalyticsService.js";
11
11
  import { StateManager, registerSession, storeSessionData, setTestsRepoDir, } from "../../utils/AnalysisStateManager.js";
12
12
  import { buildRecommendationPrompt } from "../../prompts/test-recommendation/test-recommendation-prompt.js";
13
- import { isFrontendFile, isTestFile } from "../../prompts/test-recommendation/scopeAssessment.js";
13
+ import { hasFlutterSdkDep, isFrontendFile, isTestFile } from "../../prompts/test-recommendation/scopeAssessment.js";
14
14
  import { enumerateCandidateUiPages } from "../../utils/uiPageEnumerator.js";
15
15
  import { MAX_RECOMMENDATIONS, MAX_TESTS_TO_GENERATE } from "../../prompts/test-recommendation/recommendationSections.js";
16
16
  import { TestDiscoveryService } from "../../services/TestDiscoveryService.js";
@@ -538,14 +538,6 @@ Combines API endpoint scanning, branch diff computation, and test discovery into
538
538
  }
539
539
  await sendProgress(50, 100, "Discovering existing tests...");
540
540
  // ── Step 3: Discover existing tests ──
541
- // Resolve testDir to scope the file scan — prefer explicit testsRepoDir param,
542
- // then workspace.yml testDirectory. repositoryPath remains the repo root for
543
- // git operations; testDir only limits which files are classified as tests.
544
- const wsConfigEarly = await readWorkspaceConfigRaw(params.repositoryPath);
545
- const wsTestDir = wsConfigEarly?.services?.[0]?.testDirectory;
546
- const testDir = params.testsRepoDir
547
- ?? (params.testDirectory ? path.resolve(params.repositoryPath, params.testDirectory) : undefined)
548
- ?? (wsTestDir ? path.resolve(params.repositoryPath, wsTestDir) : undefined);
549
541
  // Compute changedResources from classified endpoints for test discovery filtering.
550
542
  // undefined → full-repo mode (no diff context)
551
543
  // [] → PR mode, no endpoints found → skip external tests
@@ -585,7 +577,8 @@ Combines API endpoint scanning, branch diff computation, and test discovery into
585
577
  try {
586
578
  const testDiscoveryService = new TestDiscoveryService();
587
579
  setTestsRepoDir(params.testsRepoDir);
588
- const discoveryResult = await testDiscoveryService.discoverTests(testDir ?? params.repositoryPath, { changedResources });
580
+ const testScanPath = params.testsRepoDir ?? params.repositoryPath;
581
+ const discoveryResult = await testDiscoveryService.discoverTests(testScanPath, { changedResources });
589
582
  existingTests = discoveryResult.tests.map((test) => ({
590
583
  testFile: test.testFile,
591
584
  testType: test.testType,
@@ -1130,7 +1123,13 @@ Combines API endpoint scanning, branch diff computation, and test discovery into
1130
1123
  // pass isFrontendFile (any .ts under a frontend directory matches
1131
1124
  // the tier-3 rule) but aren't UI source we'd want to ground page
1132
1125
  // enumeration in.
1133
- const frontendFiles = changedFiles.filter((f) => isFrontendFile(f) && !isTestFile(f));
1126
+ // Compute hasFlutterSdkDep once and pass it through so .dart files
1127
+ // in a Flutter project are recognised as frontend. See Confluence
1128
+ // "Flutter support in Testbot" — this is the second budget-driving
1129
+ // call site that must thread the flag (the other is
1130
+ // uiAnalyzeChangesTool). Both must agree to avoid silent divergence.
1131
+ const flutterSdk = hasFlutterSdkDep(params.repositoryPath);
1132
+ const frontendFiles = changedFiles.filter((f) => isFrontendFile(f, { hasFlutterSdkDep: flutterSdk }) && !isTestFile(f));
1134
1133
  if (frontendFiles.length === 0)
1135
1134
  return undefined;
1136
1135
  const candidateUiPages = await enumerateCandidateUiPages(params.repositoryPath, frontendFiles);
@@ -1189,7 +1188,6 @@ Combines API endpoint scanning, branch diff computation, and test discovery into
1189
1188
  sessionId,
1190
1189
  routerMountContext,
1191
1190
  candidateRouteFiles,
1192
- relevantExternalTestPaths,
1193
1191
  },
1194
1192
  };
1195
1193
  // Clean up old state files (>24 hours) before creating new one
@@ -15,17 +15,16 @@ export function registerAnalyzeTestHealthTool(server) {
15
15
  idempotentHint: true,
16
16
  openWorldHint: false,
17
17
  },
18
- description: `Generate drift assessment instructions for existing tests — second step of the unified Test Health Analysis Flow.
18
+ description: `Generate drift and health assessment instructions for existing tests — second step of the unified Test Health Analysis Flow.
19
19
 
20
20
  **PREREQUISITE:** Call \`skyramp_analyze_changes\` first to get a stateFile.
21
21
 
22
- Returns a structured prompt for the LLM to assess each existing test against the branch diff and assign one of: UPDATE / REGENERATE / VERIFY / DELETE / IGNORE.
22
+ This tool reads existing tests, the branch diff, and scanned endpoints from the stateFile,
23
+ then returns a structured prompt for the LLM to assess each test for drift and health.
23
24
 
24
- Includes both Skyramp-generated tests and user-written (external) tests that are relevant to the PR's changed endpoints. For external tests, UPDATE is applied automatically; REGENERATE and DELETE are surfaced as report-only findings for the developer.
25
+ The LLM follows the returned prompt to assign drift details and actions (UPDATE / REGENERATE / VERIFY / DELETE / IGNORE) for each test, then calls \`skyramp_actions\`.
25
26
 
26
- The LLM follows the returned prompt (Action Decision Tree pre-scan Endpoint Existence Response Shape → Additive Fields → Auth/AuthZ → Behavioral Contract → Assign Action → Update Execution Rules), then calls \`skyramp_actions\` with its \`recommendations[]\`.
27
-
28
- (Optional) Execute tests using \`skyramp_execute_test\` with \`stateFile\` before \`skyramp_actions\` to validate live.`,
27
+ (Optional) Execute tests using \`skyramp_execute_test\` with \`stateFile\` parameter before \`skyramp_actions\` to validate tests live.`,
29
28
  inputSchema: {
30
29
  stateFile: z
31
30
  .string()
@@ -46,24 +45,14 @@ The LLM follows the returned prompt (Action Decision Tree pre-scan → Endpoint
46
45
  if (!stateData) {
47
46
  return toolError(`State file is empty or invalid: ${args.stateFile}. Call skyramp_analyze_changes first to generate a valid state file.`);
48
47
  }
48
+ // Only Skyramp tests are candidates for drift analysis and maintenance actions.
49
+ // External (user-written) tests are used only for recommendation deduplication.
50
+ // Default source to Skyramp for backwards compat with state files created before the source field existed.
51
+ const existingTests = (stateData.existingTests || []).filter((t) => (t.source ?? TestSource.Skyramp) !== TestSource.External);
52
+ logger.info(`Loaded ${existingTests.length} existing Skyramp tests from state file (excluded external)`);
49
53
  if (!repositoryPath || typeof repositoryPath !== "string") {
50
54
  return toolError(`repositoryPath not found in state file metadata. The state file was likely created by an older version — re-run skyramp_analyze_changes to regenerate it.`);
51
55
  }
52
- // Skyramp tests: full drift analysis + all actions permitted.
53
- // Relevant external tests (user-written, relevant to this PR's endpoints): drift analysis
54
- // + UPDATE only — REGENERATE and DELETE are report-only (enforced in skyramp_actions).
55
- // Other external tests: excluded entirely (deduplication only, not analysed).
56
- // relevantExternalTestPaths are stored relative to repositoryPath in the state file.
57
- // Re-absolutize here so has() comparisons against t.testFile (absolute) work correctly.
58
- const relevantExternalPaths = new Set((stateData.repositoryAnalysis?.relevantExternalTestPaths ?? []).map((p) => path.isAbsolute(p) ? p : path.resolve(repositoryPath, p)));
59
- const existingTests = (stateData.existingTests || []).filter((t) => {
60
- if ((t.source ?? TestSource.Skyramp) !== TestSource.External)
61
- return true;
62
- return relevantExternalPaths.has(t.testFile);
63
- });
64
- const skyrampCount = existingTests.filter((t) => (t.source ?? TestSource.Skyramp) !== TestSource.External).length;
65
- const externalCount = existingTests.length - skyrampCount;
66
- logger.info(`Loaded ${skyrampCount} Skyramp + ${externalCount} relevant external tests from state file`);
67
56
  const absoluteRepoPath = path.resolve(repositoryPath);
68
57
  const scannedEndpoints = stateData.repositoryAnalysis?.skeletonEndpoints || [];
69
58
  const routerMountContext = stateData.repositoryAnalysis?.routerMountContext;
@@ -87,7 +76,6 @@ The LLM follows the returned prompt (Action Decision Tree pre-scan → Endpoint
87
76
  routerMountContext,
88
77
  candidateRouteFiles,
89
78
  diffFilePath,
90
- relevantExternalTestPaths: [...relevantExternalPaths],
91
79
  });
92
80
  return {
93
81
  structuredContent: { prompt: promptText },
@@ -3,7 +3,7 @@ import * as path from "path";
3
3
  import { z } from "zod";
4
4
  import { logger } from "../../utils/logger.js";
5
5
  import { enumerateCandidateUiPages } from "../../utils/uiPageEnumerator.js";
6
- import { isFrontendFile, isTestFile } from "../../prompts/test-recommendation/scopeAssessment.js";
6
+ import { hasFlutterSdkDep, isFrontendFile, isTestFile } from "../../prompts/test-recommendation/scopeAssessment.js";
7
7
  import { parseChangedFilesFromDiff } from "../../utils/branchDiff.js";
8
8
  import { toolText } from "../../utils/utils.js";
9
9
  import { isTestbotEnabled } from "../../utils/featureFlags.js";
@@ -49,7 +49,13 @@ export async function runUiAnalyzeChanges(params) {
49
49
  instructions: DIFF_FILE_MISSING_INSTRUCTIONS,
50
50
  };
51
51
  }
52
- const frontendFiles = changedFiles.filter((f) => isFrontendFile(f) && !isTestFile(f));
52
+ // Compute hasFlutterSdkDep once at the tool boundary; pass into isFrontendFile
53
+ // so .dart files are recognised as frontend in Flutter repos. See Confluence
54
+ // "Flutter support in Testbot" — this is one of two budget-driving call sites
55
+ // that must thread the flag (the other is analyzeChangesTool). Without it,
56
+ // a Flutter PR shows zero frontend files and never enters the UI pipeline.
57
+ const flutterSdk = hasFlutterSdkDep(repoPath);
58
+ const frontendFiles = changedFiles.filter((f) => isFrontendFile(f, { hasFlutterSdkDep: flutterSdk }) && !isTestFile(f));
53
59
  if (frontendFiles.length === 0) {
54
60
  const uiContext = {
55
61
  changedFrontendFiles: [],