@skyramp/mcp 0.1.8 → 0.2.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/build/index.js +4 -2
  2. package/build/playwright/registerPlaywrightTools.js +12 -0
  3. package/build/playwright/traceRecordingPrompt.js +15 -0
  4. package/build/prompts/code-reuse.js +106 -7
  5. package/build/prompts/pom-aware-code-reuse.js +106 -7
  6. package/build/prompts/startTraceCollectionPrompts.js +37 -15
  7. package/build/prompts/test-maintenance/drift-analysis-prompt.js +26 -31
  8. package/build/prompts/test-maintenance/drift-analysis-prompt.test.js +40 -1
  9. package/build/prompts/test-maintenance/driftAnalysisSections.js +90 -86
  10. package/build/prompts/test-recommendation/analysisOutputPrompt.js +286 -163
  11. package/build/prompts/test-recommendation/analysisOutputPrompt.test.js +154 -45
  12. package/build/prompts/test-recommendation/diffExecutionPlan.js +246 -117
  13. package/build/prompts/test-recommendation/promptPlan.js +290 -0
  14. package/build/prompts/test-recommendation/promptPlan.test.js +336 -0
  15. package/build/prompts/test-recommendation/recommendationSections.js +4 -3
  16. package/build/prompts/test-recommendation/recommendationShared.js +23 -1
  17. package/build/prompts/test-recommendation/scopeAssessment.js +65 -14
  18. package/build/prompts/test-recommendation/scopeAssessment.test.js +93 -2
  19. package/build/prompts/test-recommendation/test-recommendation-prompt.js +36 -12
  20. package/build/prompts/test-recommendation/test-recommendation-prompt.test.js +316 -1
  21. package/build/prompts/testbot/testbot-prompts.js +73 -13
  22. package/build/prompts/testbot/testbot-prompts.test.js +114 -1
  23. package/build/resources/testbotResource.js +1 -1
  24. package/build/services/ScenarioGenerationService.integration.test.js +158 -0
  25. package/build/services/ScenarioGenerationService.js +47 -4
  26. package/build/services/ScenarioGenerationService.test.js +158 -22
  27. package/build/services/TestExecutionService.js +73 -15
  28. package/build/services/TestExecutionService.test.js +105 -0
  29. package/build/services/TestGenerationService.js +11 -1
  30. package/build/tools/executeSkyrampTestTool.js +1 -10
  31. package/build/tools/generate-tests/generateBatchScenarioRestTool.js +16 -4
  32. package/build/tools/generate-tests/generateIntegrationRestTool.js +2 -0
  33. package/build/tools/generate-tests/generateUIRestTool.js +2 -0
  34. package/build/tools/test-management/actionsTool.js +152 -63
  35. package/build/tools/test-management/analyzeChangesTool.js +178 -64
  36. package/build/tools/test-management/analyzeChangesTool.test.js +103 -16
  37. package/build/tools/test-management/analyzeTestHealthTool.js +30 -81
  38. package/build/tools/test-management/index.js +1 -0
  39. package/build/tools/test-management/uiAnalyzeChangesTool.js +149 -0
  40. package/build/tools/test-management/uiAnalyzeChangesTool.test.js +100 -0
  41. package/build/tools/trace/resolveSaveStoragePath.js +16 -0
  42. package/build/tools/trace/resolveSaveStoragePath.test.js +17 -0
  43. package/build/tools/trace/resolveSessionPaths.js +39 -0
  44. package/build/tools/trace/resolveSessionPaths.test.js +103 -0
  45. package/build/tools/trace/sessionState.js +14 -0
  46. package/build/tools/trace/sessionState.test.js +17 -0
  47. package/build/tools/trace/startTraceCollectionTool.js +84 -14
  48. package/build/tools/trace/stopTraceCollectionTool.js +9 -2
  49. package/build/types/TestAnalysis.js +50 -0
  50. package/build/types/TestRecommendation.js +6 -58
  51. package/build/types/TestTypes.js +1 -1
  52. package/build/utils/AnalysisStateManager.js +22 -11
  53. package/build/utils/branchDiff.js +11 -2
  54. package/build/utils/docker.test.js +1 -1
  55. package/build/utils/gitStaging.js +52 -3
  56. package/build/utils/gitStaging.test.js +19 -1
  57. package/build/utils/repoScanner.js +18 -10
  58. package/build/utils/repoScanner.test.js +92 -0
  59. package/build/utils/routeParsers.js +180 -25
  60. package/build/utils/routeParsers.test.js +180 -1
  61. package/build/utils/scenarioDrafting.js +220 -17
  62. package/build/utils/scenarioDrafting.test.js +182 -9
  63. package/build/utils/sourceRouteExtractor.js +806 -0
  64. package/build/utils/sourceRouteExtractor.test.js +565 -0
  65. package/build/utils/uiPageEnumerator.js +319 -0
  66. package/build/utils/uiPageEnumerator.test.js +422 -0
  67. package/build/utils/utils.js +27 -0
  68. package/build/utils/versions.js +1 -1
  69. package/build/utils/workspaceAuth.js +33 -4
  70. package/node_modules/playwright/ThirdPartyNotices.txt +6 -6
  71. package/node_modules/playwright/lib/dom-analyzer/analyze.js +111 -0
  72. package/node_modules/playwright/lib/dom-analyzer/blueprint.js +1210 -0
  73. package/node_modules/playwright/lib/dom-analyzer/blueprint.test.js +396 -0
  74. package/node_modules/playwright/lib/dom-analyzer/blueprintCache.js +57 -0
  75. package/node_modules/playwright/lib/dom-analyzer/blueprintCache.test.js +57 -0
  76. package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.js +254 -0
  77. package/node_modules/playwright/lib/dom-analyzer/blueprintDiff.test.js +304 -0
  78. package/node_modules/playwright/lib/dom-analyzer/crawler.js +384 -0
  79. package/node_modules/playwright/lib/dom-analyzer/curatedWidgets.js +73 -0
  80. package/node_modules/playwright/lib/dom-analyzer/dynamicId.js +43 -0
  81. package/node_modules/playwright/lib/dom-analyzer/dynamicId.test.js +85 -0
  82. package/node_modules/playwright/lib/dom-analyzer/fingerprint.js +90 -0
  83. package/node_modules/playwright/lib/dom-analyzer/fingerprint.test.js +231 -0
  84. package/node_modules/playwright/lib/dom-analyzer/fingerprintAblation.fixtures.js +145 -0
  85. package/node_modules/playwright/lib/dom-analyzer/fingerprintAblation.test.js +41 -0
  86. package/node_modules/playwright/lib/dom-analyzer/graph.js +36 -0
  87. package/node_modules/playwright/lib/dom-analyzer/liveFingerprints.js +43 -0
  88. package/node_modules/playwright/lib/dom-analyzer/logicalNameResolver.js +72 -0
  89. package/node_modules/playwright/lib/dom-analyzer/logicalNameResolver.test.js +182 -0
  90. package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.js +150 -0
  91. package/node_modules/playwright/lib/dom-analyzer/possibleAssertions.test.js +470 -0
  92. package/node_modules/playwright/lib/dom-analyzer/sectionGrouper.js +169 -0
  93. package/node_modules/playwright/lib/dom-analyzer/sectionGrouper.test.js +269 -0
  94. package/node_modules/playwright/lib/dom-analyzer/serialization.js +75 -0
  95. package/node_modules/playwright/lib/dom-analyzer/slug.js +30 -0
  96. package/node_modules/playwright/lib/dom-analyzer/slug.test.js +84 -0
  97. package/node_modules/playwright/lib/dom-analyzer/widgetContract.js +127 -0
  98. package/node_modules/playwright/lib/dom-analyzer/widgetContract.test.js +212 -0
  99. package/node_modules/playwright/lib/mcp/browser/browserContextFactory.js +3 -1
  100. package/node_modules/playwright/lib/mcp/browser/config.js +1 -1
  101. package/node_modules/playwright/lib/mcp/browser/context.js +17 -1
  102. package/node_modules/playwright/lib/mcp/browser/tab.js +38 -0
  103. package/node_modules/playwright/lib/mcp/browser/tools/domAnalyzer.js +261 -0
  104. package/node_modules/playwright/lib/mcp/browser/tools/keyboard.js +3 -3
  105. package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.js +146 -0
  106. package/node_modules/playwright/lib/mcp/browser/tools/pageBlueprint.test.js +140 -0
  107. package/node_modules/playwright/lib/mcp/browser/tools/sitemap.js +226 -0
  108. package/node_modules/playwright/lib/mcp/browser/tools/snapshot.js +2 -2
  109. package/node_modules/playwright/lib/mcp/browser/tools/widgetContract.js +168 -0
  110. package/node_modules/playwright/lib/mcp/browser/tools.js +6 -0
  111. package/node_modules/playwright/lib/mcp/skyramp/traceRecordingBackend.js +52 -12
  112. package/node_modules/playwright/lib/mcp/test/skyRampExport.js +64 -13
  113. package/node_modules/playwright/package.json +1 -1
  114. package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.3.tgz +0 -0
  115. package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.4.tgz +0 -0
  116. package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.5.tgz +0 -0
  117. package/node_modules/playwright/skyramp-playwright-1.58.2-skyramp.8.9.6.tgz +0 -0
  118. package/package.json +3 -3
  119. package/build/services/TestHealthService.js +0 -694
  120. package/build/services/TestHealthService.test.js +0 -241
  121. package/build/types/TestDriftAnalysis.js +0 -1
  122. package/build/types/TestHealth.js +0 -4
@@ -1,11 +1,14 @@
1
1
  import { z } from "zod";
2
+ import fs from "fs";
3
+ import path from "path";
2
4
  import { SkyrampClient } from "@skyramp/skyramp";
3
5
  import openProxyTerminalTracked from "../../utils/proxy-terminal.js";
4
6
  import { getEntryPoint } from "../../utils/telemetry.js";
5
7
  import { logger } from "../../utils/logger.js";
6
- import { basePlaywrightSchema, baseSchema, } from "../../types/TestTypes.js";
8
+ import { basePlaywrightSchema, baseSchema, SESSION_STORAGE_FILENAME, } from "../../types/TestTypes.js";
7
9
  import { AnalyticsService } from "../../services/AnalyticsService.js";
8
- import path from "path";
10
+ import { resolveSessionPaths } from "./resolveSessionPaths.js";
11
+ import { setSavedSessionPath } from "./sessionState.js";
9
12
  const TOOL_NAME = "skyramp_start_trace_collection";
10
13
  export function registerTraceTool(server) {
11
14
  server.registerTool(TOOL_NAME, {
@@ -21,12 +24,42 @@ WORKFLOW:
21
24
  3. Stop trace collection to save captured data
22
25
  4. Use traces to generate test scenarios
23
26
 
27
+ SESSION HANDLING:
28
+ Pass \`sessionMode\` to declare what you want to do with the workspace's session file. Defaults to \`auto\`, which does the right thing for most prompts.
29
+
30
+ \`sessionMode: "capture"\` — Use when the user wants to log in once and SAVE a session for reuse later. Triggers: "save my session", "log in once", "store the session", "create a new session", "capture login".
31
+ Workflow:
32
+ 1. Start trace collection (sessionMode=capture).
33
+ 2. USER logs in.
34
+ 3. Stop trace collection AFTER the post-login page is fully loaded — wait a few seconds for the app's initial authenticated API calls to fire, then stop. Do not interact further; those actions would pollute the trace and dilute the saved cookies.
35
+ 4. The session file lands at \`outputDir/${SESSION_STORAGE_FILENAME}\`.
36
+
37
+ \`sessionMode: "reuse"\` — Use when the user wants to RECORD a test flow against an existing authenticated session. Triggers: "load my session", "use my saved session", "skip login", "reuse the session", "I'm already logged in".
38
+ Workflow:
39
+ 1. Start trace collection (sessionMode=reuse). The recorder loads \`outputDir/${SESSION_STORAGE_FILENAME}\` so the browser starts already authenticated.
40
+ 2. USER walks through the test flow (no login needed).
41
+ 3. Stop trace collection when the flow is complete.
42
+ 4. The session file is NOT overwritten — this preserves the captured session for the next recording or test run.
43
+
44
+ \`sessionMode: "ignore"\` — Use when the user explicitly does NOT want session handling. Triggers: "fresh trace", "without session", "ignore my session".
45
+
46
+ \`sessionMode: "auto"\` (default) — Decides between capture and reuse based on whether a session file already exists in \`outputDir\`. Use when the user does not signal an intent either way. If a session file is present, behaves like \`reuse\`; otherwise like \`capture\`.
47
+
24
48
  For detailed documentation visit: https://www.skyramp.dev/docs/load-test/advanced-generation#start-trace-collection`,
25
49
  inputSchema: {
26
50
  playwright: z
27
51
  .boolean()
28
52
  .describe("Whether to enable Playwright for trace collection. Set to true for UI interactions, false for API-only tracing")
29
53
  .default(true),
54
+ sessionMode: z
55
+ .enum(["auto", "capture", "reuse", "ignore"])
56
+ .default("auto")
57
+ .describe(`Controls how Playwright session storage is handled for this trace. ` +
58
+ `"capture" = save a fresh session at outputDir/${SESSION_STORAGE_FILENAME} (use when user wants to log in once and save). ` +
59
+ `"reuse" = load that file into the browser and DO NOT overwrite it (use when user wants to record a test flow against an existing session). ` +
60
+ `"ignore" = neither load nor save. ` +
61
+ `"auto" (default) = reuse if the session file already exists in outputDir, otherwise capture. ` +
62
+ `Explicit playwrightStoragePath / playwrightSaveStoragePath overrides still win when provided.`),
30
63
  browser: basePlaywrightSchema.shape.browser,
31
64
  device: basePlaywrightSchema.shape.device,
32
65
  playwrightStoragePath: basePlaywrightSchema.shape.playwrightStoragePath,
@@ -95,14 +128,32 @@ For detailed documentation visit: https://www.skyramp.dev/docs/load-test/advance
95
128
  outputDir: params.outputDir,
96
129
  prompt: params.prompt,
97
130
  });
98
- let saveStoragePath = params.playwrightSaveStoragePath;
99
- if (saveStoragePath) {
100
- // If saveStoragePath is just a filename (no directory separators), use outputDir
101
- if (params.outputDir && !saveStoragePath.includes(path.sep) && !path.isAbsolute(saveStoragePath)) {
102
- saveStoragePath = path.join(params.outputDir, saveStoragePath);
131
+ const defaultSessionFile = path.join(params.outputDir, SESSION_STORAGE_FILENAME);
132
+ const sessionExists = (() => {
133
+ try {
134
+ return fs.existsSync(defaultSessionFile);
103
135
  }
104
- logger.info("Session storage will be saved to:", { saveStoragePath });
105
- }
136
+ catch {
137
+ return false;
138
+ }
139
+ })();
140
+ const { loadPath, savePath } = resolveSessionPaths({
141
+ mode: params.sessionMode,
142
+ loadOverride: params.playwrightStoragePath,
143
+ saveOverride: params.playwrightSaveStoragePath,
144
+ outputDir: params.outputDir,
145
+ sessionExists,
146
+ });
147
+ logger.info("Resolved session paths", {
148
+ sessionMode: params.sessionMode,
149
+ sessionExists,
150
+ loadPath: loadPath ?? "(not loading)",
151
+ savePath: savePath ?? "(not saving)",
152
+ });
153
+ // Carry the save path forward so the stop tool can name it in its message.
154
+ // When savePath is undefined (e.g. reuse mode), the stop tool just won't
155
+ // reference one — which is correct: the session file isn't being touched.
156
+ setSavedSessionPath(savePath);
106
157
  try {
107
158
  // Send initial progress
108
159
  await sendProgress(0, 100, "Initializing trace collection...");
@@ -119,13 +170,16 @@ For detailed documentation visit: https://www.skyramp.dev/docs/load-test/advance
119
170
  playwright: params.playwright,
120
171
  browser: params.browser,
121
172
  device: params.device,
122
- playwrightStoragePath: params.playwrightStoragePath,
123
173
  playwrightViewportSize: params.playwrightViewportSize,
124
174
  entrypoint: getEntryPoint(),
125
175
  };
126
- if (saveStoragePath) {
127
- generateOptions.playwrightSaveStoragePath = saveStoragePath;
128
- }
176
+ // Only pass the storage options when sessionMode actually wants them.
177
+ // Leaving them undefined tells the underlying recorder to skip load /
178
+ // save entirely — that's what prevents the silent-overwrite trap.
179
+ if (loadPath)
180
+ generateOptions.playwrightStoragePath = loadPath;
181
+ if (savePath)
182
+ generateOptions.playwrightSaveStoragePath = savePath;
129
183
  // Send progress for configuration
130
184
  const traceMode = params.playwright ? "UI + Backend" : "Backend-only";
131
185
  await sendProgress(30, 100, `Configuring ${traceMode} trace collection...`);
@@ -151,6 +205,8 @@ For detailed documentation visit: https://www.skyramp.dev/docs/load-test/advance
151
205
  clearInterval(progressInterval);
152
206
  }
153
207
  if (result.toLowerCase().includes("failed")) {
208
+ // Clear stashed session path so a failed start does not leak into a later unrelated stop.
209
+ setSavedSessionPath(undefined);
154
210
  errorResult = {
155
211
  content: [
156
212
  {
@@ -167,17 +223,31 @@ For detailed documentation visit: https://www.skyramp.dev/docs/load-test/advance
167
223
  await openProxyTerminalTracked();
168
224
  // Send completion progress
169
225
  await sendProgress(100, 100, "Trace collection started successfully");
226
+ const sessionGuidance = (() => {
227
+ if (loadPath && savePath) {
228
+ return `\n\nSession ${loadPath} loaded into the browser; will be re-saved to ${savePath} on stop.`;
229
+ }
230
+ if (loadPath) {
231
+ return `\n\nSession ${loadPath} loaded into the browser — the browser starts already authenticated. The session file will NOT be overwritten on stop.`;
232
+ }
233
+ if (savePath) {
234
+ return `\n\nPlaywright session storage will be saved to ${savePath}. If the goal is to capture an authenticated session, ask the user to log in and then call skyramp_stop_trace_collection a few seconds AFTER the post-login page is fully loaded — long enough for the app's initial authenticated API calls to fire, short enough that no test-flow interactions are captured.`;
235
+ }
236
+ return "";
237
+ })();
170
238
  errorResult = {
171
239
  content: [
172
240
  {
173
241
  type: "text",
174
- text: `Trace collection started: ${result}. Please let me know when you are ready to stop the trace collection.`,
242
+ text: `Trace collection started: ${result}. Please let me know when you are ready to stop the trace collection.${sessionGuidance}`,
175
243
  },
176
244
  ],
177
245
  };
178
246
  return errorResult;
179
247
  }
180
248
  catch (error) {
249
+ // Clear stashed session path so a failed start does not leak into a later unrelated stop.
250
+ setSavedSessionPath(undefined);
181
251
  errorResult = {
182
252
  content: [
183
253
  {
@@ -6,6 +6,7 @@ import { logger } from "../../utils/logger.js";
6
6
  import { baseSchema } from "../../types/TestTypes.js";
7
7
  import { existsSync, mkdirSync } from "fs";
8
8
  import { AnalyticsService } from "../../services/AnalyticsService.js";
9
+ import { consumeSavedSessionPath } from "./sessionState.js";
9
10
  const TOOL_NAME = "skyramp_stop_trace_collection";
10
11
  export function registerTraceStopTool(server) {
11
12
  server.registerTool(TOOL_NAME, {
@@ -103,14 +104,20 @@ For detailed documentation visit: https://www.skyramp.dev/docs/load-test/advance
103
104
  };
104
105
  return errorResult;
105
106
  }
107
+ const savedSession = consumeSavedSessionPath();
108
+ const sessionAppendix = savedSession
109
+ ? `\n\nPlaywright session storage saved to: ${savedSession}\nRe-use it by:\n` +
110
+ `• Pass \`playwrightStoragePath: "${savedSession}"\` to skyramp_start_trace_collection for future recordings (skips login).\n` +
111
+ `• Generated tests that reference \`storageState: "${savedSession}"\` will auto-mount the file when run via skyramp_execute_test.`
112
+ : "";
106
113
  errorResult = {
107
114
  content: [
108
115
  {
109
116
  type: "text",
110
117
  text: `Trace collection is stopped: ${result}. Trace is generated to given output file
111
-
118
+
112
119
  **IMPORTANT: GO THROUGH THE TRACE AND LET THE USER KNOW THE ENDPOINT DOMAINS CAPTURED AND MAKE SURE USER WANTS TO INCLUDE THEN FOR INTEGRATION/E2E/LOAD TEST GENERATION.
113
- UI TESTS CAN BE GENERATED USING PLAYWRIGHT FILES ONLY.**`,
120
+ UI TESTS CAN BE GENERATED USING PLAYWRIGHT FILES ONLY.**${sessionAppendix}`,
114
121
  },
115
122
  ],
116
123
  };
@@ -1,6 +1,56 @@
1
+ export var RecommendationPriority;
2
+ (function (RecommendationPriority) {
3
+ RecommendationPriority["High"] = "high";
4
+ RecommendationPriority["Medium"] = "medium";
5
+ RecommendationPriority["Low"] = "low";
6
+ })(RecommendationPriority || (RecommendationPriority = {}));
7
+ export var IssueSeverity;
8
+ (function (IssueSeverity) {
9
+ IssueSeverity["Low"] = "low";
10
+ IssueSeverity["Medium"] = "medium";
11
+ IssueSeverity["High"] = "high";
12
+ IssueSeverity["Critical"] = "critical";
13
+ })(IssueSeverity || (IssueSeverity = {}));
14
+ export var DriftChangeType;
15
+ (function (DriftChangeType) {
16
+ DriftChangeType["EndpointAdded"] = "endpoint_added";
17
+ DriftChangeType["EndpointRemoved"] = "endpoint_removed";
18
+ DriftChangeType["EndpointRenamed"] = "endpoint_renamed";
19
+ DriftChangeType["EndpointModified"] = "endpoint_modified";
20
+ DriftChangeType["AuthenticationChanged"] = "authentication_changed";
21
+ DriftChangeType["SchemaChanges"] = "schema_changes";
22
+ DriftChangeType["RouteChanged"] = "route_changed";
23
+ DriftChangeType["RouteAdded"] = "route_added";
24
+ DriftChangeType["RouteRemoved"] = "route_removed";
25
+ DriftChangeType["UiComponentAdded"] = "ui_component_added";
26
+ DriftChangeType["UiComponentRemoved"] = "ui_component_removed";
27
+ DriftChangeType["UiComponentModified"] = "ui_component_modified";
28
+ DriftChangeType["UiComponentRestructured"] = "ui_component_restructured";
29
+ DriftChangeType["DependencyChanged"] = "dependency_changed";
30
+ DriftChangeType["FunctionChanged"] = "function_changed";
31
+ DriftChangeType["ClassChanged"] = "class_changed";
32
+ DriftChangeType["BreakingChange"] = "breaking_change";
33
+ DriftChangeType["CodeChange"] = "code_change";
34
+ })(DriftChangeType || (DriftChangeType = {}));
1
35
  /** Origin of a test file — whether it was generated by Skyramp or is user/third-party maintained. */
2
36
  export var TestSource;
3
37
  (function (TestSource) {
4
38
  TestSource["Skyramp"] = "skyramp";
5
39
  TestSource["External"] = "external";
6
40
  })(TestSource || (TestSource = {}));
41
+ /** Drift action assigned by the LLM health assessment for an existing test. */
42
+ export var DriftAction;
43
+ (function (DriftAction) {
44
+ DriftAction["Update"] = "UPDATE";
45
+ DriftAction["Regenerate"] = "REGENERATE";
46
+ DriftAction["Delete"] = "DELETE";
47
+ DriftAction["Verify"] = "VERIFY";
48
+ DriftAction["Ignore"] = "IGNORE";
49
+ })(DriftAction || (DriftAction = {}));
50
+ /** Estimated effort to apply a drift UPDATE action. */
51
+ export var EstimatedWork;
52
+ (function (EstimatedWork) {
53
+ EstimatedWork["Small"] = "Small";
54
+ EstimatedWork["Medium"] = "Medium";
55
+ EstimatedWork["Large"] = "Large";
56
+ })(EstimatedWork || (EstimatedWork = {}));
@@ -1,14 +1,12 @@
1
- import { z } from "zod";
2
- import { TestType } from "./TestTypes.js";
3
1
  /** Internal-only categories (not submitted to tools). */
4
2
  const INTERNAL_CATEGORIES = [
5
3
  "new_endpoint", // CRITICAL - diff-direct scenarios always fill GENERATE slots first
4
+ "bug_caught", // CRITICAL - tests targeting a specific <bug_found> flaw identified during enrichment
6
5
  ];
7
6
  /** External categories valid for tool submissions, ordered by priority. */
8
7
  const CATEGORIES = [
9
- // CRITICAL priority
10
- "business_rule", // formula bugs, unique constraints, state machines — most common production failures
11
8
  // HIGH priority
9
+ "business_rule", // formula bugs, unique constraints, state machines — most common production failures
12
10
  "security_boundary", // auth, permission, cross-user isolation, idempotency
13
11
  "data_integrity", // cascade deletes, orphan prevention, referential integrity
14
12
  "breaking_change", // route renames, auth migration, response shape changes
@@ -27,7 +25,8 @@ export const TEST_CATEGORIES = CATEGORIES;
27
25
  /** Priority assignment for each category. */
28
26
  export const CATEGORY_PRIORITY = {
29
27
  new_endpoint: "CRITICAL",
30
- business_rule: "CRITICAL", // formula/business-logic bugs are the most common production failures
28
+ bug_caught: "CRITICAL", // tests targeting a <bug_found> flaw always in GENERATE
29
+ business_rule: "HIGH", // formula/business-logic bugs are high priority but CRITICAL is reserved for new-endpoint diff-direct scenarios
31
30
  security_boundary: "HIGH",
32
31
  data_integrity: "HIGH",
33
32
  breaking_change: "HIGH",
@@ -41,58 +40,7 @@ export const CATEGORY_PRIORITY = {
41
40
  export function externalCategory(cat) {
42
41
  if (cat === "new_endpoint")
43
42
  return "crud";
43
+ if (cat === "bug_caught")
44
+ return "business_rule";
44
45
  return cat;
45
46
  }
46
- // Test type to documentation URL mapping
47
- export const TEST_TYPE_DOCS = {
48
- [TestType.SMOKE]: "https://www.skyramp.dev/docs/smoke-tests",
49
- [TestType.CONTRACT]: "https://www.skyramp.dev/docs/contract-tests",
50
- [TestType.FUZZ]: "https://www.skyramp.dev/docs/fuzz-tests",
51
- [TestType.INTEGRATION]: "https://www.skyramp.dev/docs/integration-tests",
52
- [TestType.LOAD]: "https://www.skyramp.dev/docs/load-tests",
53
- [TestType.E2E]: "https://www.skyramp.dev/docs/e2e-tests",
54
- [TestType.UI]: "https://www.skyramp.dev/docs/ui-tests",
55
- [TestType.MOCK]: "https://www.skyramp.dev/docs/mocks",
56
- };
57
- // Zod schemas for validation
58
- export const specificTestSchema = z.object({
59
- testName: z.string(),
60
- description: z.string(),
61
- targetEndpoint: z.string().optional(),
62
- targetFlow: z.string().optional(),
63
- // generationPrompt: z.string(),
64
- requiredInputs: z.object({
65
- available: z.array(z.object({
66
- name: z.string(),
67
- path: z.string(),
68
- })),
69
- missing: z.array(z.object({
70
- name: z.string(),
71
- guidance: z.string(),
72
- })),
73
- }),
74
- estimatedValue: z.string(),
75
- });
76
- export const testTypeRecommendationSchema = z.object({
77
- priority: z.enum(["high", "medium", "low"]),
78
- testType: z.nativeEnum(TestType),
79
- category: z.enum(TEST_CATEGORIES),
80
- rationale: z.string(),
81
- reasoning: z.string(),
82
- specificTests: z.array(specificTestSchema),
83
- gettingStarted: z.object({
84
- prerequisites: z.array(z.string()),
85
- quickStartCommand: z.string().optional(),
86
- documentationUrl: z.string(),
87
- }),
88
- });
89
- export const testRecommendationSchema = z.object({
90
- summary: z.object({
91
- totalRecommended: z.number(),
92
- highPriorityCount: z.number(),
93
- estimatedEffort: z.string(),
94
- quickWins: z.array(z.string()),
95
- }),
96
- recommendations: z.array(testTypeRecommendationSchema),
97
- nextSteps: z.array(z.string()),
98
- });
@@ -141,7 +141,7 @@ export const basePlaywrightSchema = z.object({
141
141
  playwrightSaveStoragePath: z
142
142
  .string()
143
143
  .optional()
144
- .describe(`Path to SAVE Playwright session storage after trace collection. ONLY provide this when user explicitly says 'with session storage', 'save session', or similar. If user specifies this without a path, defaults to '${SESSION_STORAGE_FILENAME}' in the outputDir. This SAVES authentication state (cookies, localStorage, sessionStorage) when the browser closes. To LOAD existing auth state, use playwrightStoragePath instead. Can be relative (e.g., 'auth.json') or absolute path.`),
144
+ .describe(`Path to SAVE Playwright session storage after trace collection. Omit this argument to use the default \`${SESSION_STORAGE_FILENAME}\` resolved against the active outputDir, which produces an absolute path like '/abs/outputDir/${SESSION_STORAGE_FILENAME}'. SAVES authentication state (cookies, localStorage, sessionStorage) when the browser closes. To LOAD existing auth state, use playwrightStoragePath instead. Can be a bare filename (joined with outputDir), a relative path, or an absolute path.`),
145
145
  playwrightViewportSize: z
146
146
  .union([
147
147
  z.enum(["", "hd", "full-hd", "2k"]),
@@ -10,6 +10,18 @@ import { logger } from "./logger.js";
10
10
  * multiple MCP clients share the same filesystem (e.g. /tmp).
11
11
  */
12
12
  const processSessionRegistry = new Map();
13
+ /**
14
+ * Cross-repo test directory set by skyramp_analyze_changes when testsRepoDir
15
+ * is provided. Test generation tools read this to rewrite outputDir so files
16
+ * land in the test repo clone instead of the source repo.
17
+ */
18
+ let _testsRepoDir;
19
+ export function setTestsRepoDir(dir) {
20
+ _testsRepoDir = dir;
21
+ }
22
+ export function getTestsRepoDir() {
23
+ return _testsRepoDir;
24
+ }
13
25
  /**
14
26
  * In-memory session store: sessionId → { data, storedAt }.
15
27
  * Eliminates the need for the LLM to read/write state files on disk.
@@ -247,34 +259,33 @@ export class StateManager {
247
259
  * @param stateTypes Which state types to clean (defaults to all)
248
260
  * @returns Number of files deleted
249
261
  */
250
- static async cleanupOldStateFiles(maxAgeHours = 24, stateDir, stateTypes) {
262
+ static async cleanupOldFiles(maxAgeHours = 24, stateDir, stateTypes) {
251
263
  const baseDir = stateDir || os.tmpdir();
252
- const files = await fs.promises.readdir(baseDir);
253
- // Get prefixes to clean
254
- const prefixesToClean = stateTypes
264
+ const files = await fs.promises.readdir(baseDir).catch(() => []);
265
+ const statePrefixes = stateTypes
255
266
  ? stateTypes.map((t) => STATE_FILE_PREFIXES[t])
256
267
  : Object.values(STATE_FILE_PREFIXES);
257
- const stateFiles = files.filter((f) => prefixesToClean.some((prefix) => f.startsWith(prefix)));
268
+ const candidates = files.filter((f) => statePrefixes.some((prefix) => f.startsWith(prefix)) ||
269
+ (f.startsWith("skyramp-diff-") && f.endsWith(".diff")));
258
270
  let deletedCount = 0;
259
271
  const now = Date.now();
260
272
  const maxAge = maxAgeHours * 60 * 60 * 1000;
261
- for (const file of stateFiles) {
273
+ for (const file of candidates) {
262
274
  const filePath = path.join(baseDir, file);
263
275
  try {
264
276
  const stats = await fs.promises.stat(filePath);
265
- const age = now - stats.mtimeMs;
266
- if (age > maxAge) {
277
+ if (now - stats.mtimeMs > maxAge) {
267
278
  await fs.promises.unlink(filePath);
268
279
  deletedCount++;
269
- logger.debug(`Deleted old state file: ${filePath}`);
280
+ logger.debug(`Deleted old temp file: ${filePath}`);
270
281
  }
271
282
  }
272
283
  catch (error) {
273
- logger.error(`Failed to delete state file ${filePath}: ${error.message}`);
284
+ logger.error(`Failed to delete temp file ${filePath}: ${error.message}`);
274
285
  }
275
286
  }
276
287
  if (deletedCount > 0) {
277
- logger.info(`Cleaned up ${deletedCount} old state files`);
288
+ logger.info(`Cleaned up ${deletedCount} old temp files`);
278
289
  }
279
290
  return deletedCount;
280
291
  }
@@ -1,9 +1,18 @@
1
1
  import { simpleGit } from "simple-git";
2
2
  import { logger } from "./logger.js";
3
3
  /**
4
- * Try a git diff against the given ref. Returns undefined if the ref doesn't exist
5
- * or the diff fails, so the caller can try the next candidate.
4
+ * Extract every file path mentioned in a unified-diff `diff --git` header.
5
+ * Always uses the `b/` form so renames return the new path.
6
6
  */
7
+ export function parseChangedFilesFromDiff(rawDiff) {
8
+ const out = [];
9
+ const re = /^diff --git a\/\S+ b\/(\S+)/gm;
10
+ let m;
11
+ while ((m = re.exec(rawDiff)) !== null) {
12
+ out.push(m[1]);
13
+ }
14
+ return out;
15
+ }
7
16
  /** Parse diff headers to find newly created and deleted files. */
8
17
  function parseNewAndDeletedFiles(rawDiff) {
9
18
  const newFiles = [];
@@ -54,7 +54,7 @@ describe("dockerImageExistsLocally", () => {
54
54
  });
55
55
  });
56
56
  describe("pullDockerImage", () => {
57
- const IMAGE = "skyramp/executor:v1.3.24";
57
+ const IMAGE = "skyramp/executor:v1.3.25";
58
58
  beforeEach(() => jest.clearAllMocks());
59
59
  describe("on amd64 host", () => {
60
60
  const originalArch = process.arch;
@@ -1,18 +1,67 @@
1
+ import { execFileSync } from "child_process";
1
2
  import { execFile } from "child_process";
2
3
  import { promisify } from "util";
4
+ import fs from "fs";
5
+ import path from "path";
3
6
  import { logger } from "./logger.js";
4
7
  import { isTestbotEnabled } from "./featureFlags.js";
8
+ /**
9
+ * Check whether `child` is inside `parent` using resolved paths with
10
+ * a trailing separator to avoid prefix false positives (e.g. /tmp/test-repo2
11
+ * should not match /tmp/test-repo).
12
+ */
13
+ export function isInsideDir(child, parent) {
14
+ const resolved = path.resolve(child) + path.sep;
15
+ const resolvedParent = path.resolve(parent) + path.sep;
16
+ return resolved.startsWith(resolvedParent);
17
+ }
18
+ /**
19
+ * In cross-repo mode, redirects an outputDir to be under the test repo clone
20
+ * if it isn't already. Returns the original path unchanged when testsRepoDir
21
+ * is unset or the path is already inside it.
22
+ */
23
+ export function resolveOutputDir(outputDir, testsRepoDir) {
24
+ if (!testsRepoDir || isInsideDir(outputDir, testsRepoDir))
25
+ return outputDir;
26
+ const relative = path.isAbsolute(outputDir)
27
+ ? path.basename(outputDir)
28
+ : outputDir;
29
+ return path.join(testsRepoDir, relative);
30
+ }
5
31
  const execFileAsync = promisify(execFile);
32
+ /**
33
+ * Detect the git repository root for a given file or directory path.
34
+ * Returns undefined if detection fails (not inside a git repo).
35
+ */
36
+ function detectGitRoot(filePath) {
37
+ if (!path.isAbsolute(filePath))
38
+ return undefined;
39
+ try {
40
+ const dir = fs.statSync(filePath).isDirectory() ? filePath : path.dirname(filePath);
41
+ const stdout = execFileSync("git", ["rev-parse", "--show-toplevel"], { cwd: dir, encoding: "utf8" });
42
+ return stdout.trim() || undefined;
43
+ }
44
+ catch {
45
+ return undefined;
46
+ }
47
+ }
6
48
  /**
7
49
  * Stages a file path an MCP tool just wrote into the git index by
8
50
  * running `git add -- <path>`.
9
51
  *
52
+ * Automatically detects the git root of the target path so that staging
53
+ * works correctly in cross-repo mode (test repo clone separate from the
54
+ * source repo).
55
+ *
10
56
  * Gated by the SKYRAMP_FEATURE_TESTBOT=1 env var, which is set only
11
57
  * inside a testbot CI run.
12
58
  */
13
- export async function stageGeneratedPaths(path, cwd) {
59
+ export async function stageGeneratedPaths(filePath, cwd) {
14
60
  if (!isTestbotEnabled())
15
61
  return;
16
- await execFileAsync("git", ["add", "--", path], { cwd });
17
- logger.info("Staged generated file", { path });
62
+ const effectiveCwd = cwd ?? detectGitRoot(filePath);
63
+ await execFileAsync("git", ["add", "--", filePath], {
64
+ cwd: effectiveCwd,
65
+ });
66
+ logger.info("Staged generated file", { path: filePath, cwd: effectiveCwd });
18
67
  }
@@ -6,15 +6,25 @@ jest.mock("./logger.js", () => ({
6
6
  error: jest.fn(),
7
7
  },
8
8
  }));
9
+ jest.mock("fs", () => ({
10
+ statSync: (p) => ({
11
+ isDirectory: () => !p.includes("."),
12
+ }),
13
+ }));
9
14
  const execFileMock = jest.fn();
15
+ const execFileSyncMock = jest.fn();
10
16
  jest.mock("child_process", () => ({
11
17
  execFile: (cmd, args, opts, cb) => {
12
18
  const result = execFileMock(cmd, args, opts);
13
19
  const cbErr = result && typeof result === "object" && "err" in result
14
20
  ? result.err
15
21
  : null;
16
- cb(cbErr, "", "");
22
+ const cbStdout = result && typeof result === "object" && "stdout" in result
23
+ ? result.stdout
24
+ : "";
25
+ cb(cbErr, cbStdout, "");
17
26
  },
27
+ execFileSync: (cmd, args, opts) => execFileSyncMock(cmd, args, opts),
18
28
  }));
19
29
  import { stageGeneratedPaths } from "./gitStaging.js";
20
30
  import { logger } from "./logger.js";
@@ -33,6 +43,7 @@ afterAll(() => {
33
43
  });
34
44
  beforeEach(() => {
35
45
  execFileMock.mockReset();
46
+ execFileSyncMock.mockReset();
36
47
  loggerInfoMock.mockReset();
37
48
  });
38
49
  afterEach(() => {
@@ -49,6 +60,13 @@ describe("stageGeneratedPaths", () => {
49
60
  expect(execFileMock).toHaveBeenCalledWith("git", ["add", "--", "tests/a.py"], expect.objectContaining({}));
50
61
  expect(loggerInfoMock).toHaveBeenCalledWith("Staged generated file", expect.objectContaining({ path: "tests/a.py" }));
51
62
  });
63
+ it("auto-detects git root for absolute paths and uses it as cwd", async () => {
64
+ execFileSyncMock.mockReturnValue("/tmp/test-repo\n");
65
+ await stageGeneratedPaths("/tmp/test-repo/tests/a.py");
66
+ // Uses path.dirname for the cwd since the path is a file
67
+ expect(execFileSyncMock).toHaveBeenCalledWith("git", ["rev-parse", "--show-toplevel"], expect.objectContaining({ cwd: "/tmp/test-repo/tests" }));
68
+ expect(execFileMock).toHaveBeenCalledWith("git", ["add", "--", "/tmp/test-repo/tests/a.py"], expect.objectContaining({ cwd: "/tmp/test-repo" }));
69
+ });
52
70
  it("passes through the cwd option when provided", async () => {
53
71
  await stageGeneratedPaths("tests/a.py", "/repo/root");
54
72
  expect(execFileMock).toHaveBeenCalledWith("git", ["add", "--", "tests/a.py"], expect.objectContaining({ cwd: "/repo/root" }));
@@ -104,11 +104,13 @@ export function grepRouterMountingContext(repositoryPath) {
104
104
  function addEndpointToMap(endpointMap, apiPath, method, sourceFile, repositoryPath) {
105
105
  const relative = sourceFile.startsWith(repositoryPath)
106
106
  ? sourceFile.slice(repositoryPath.length + 1) : sourceFile;
107
- const existing = endpointMap.get(apiPath);
107
+ const normalizedPath = apiPath.startsWith("/") ? apiPath : `/${apiPath}`;
108
+ const key = `${relative}::${normalizedPath}`;
109
+ const existing = endpointMap.get(key);
108
110
  if (existing)
109
111
  existing.methods.add(method);
110
112
  else
111
- endpointMap.set(apiPath, { methods: new Set([method]), sourceFile: relative });
113
+ endpointMap.set(key, { path: normalizedPath, methods: new Set([method]), sourceFile: relative });
112
114
  }
113
115
  function scanNextjsFile(file, repositoryPath, endpointMap) {
114
116
  const relative = file.startsWith(repositoryPath)
@@ -140,7 +142,13 @@ function scanNextjsFile(file, repositoryPath, endpointMap) {
140
142
  return true;
141
143
  }
142
144
  /** Filename pattern used to identify candidate route/handler files. */
143
- const ROUTE_FILE_PATTERN = /route|controller|endpoint|handler|view|urls|api|router/i;
145
+ // Extended to cover NestJS (service, gateway, resolver) and other frameworks.
146
+ // Tested against the full relative path — terms here are specific enough not to over-match.
147
+ const ROUTE_FILE_PATTERN = /route|controller|endpoint|handler|view|urls|api|router|service|gateway|resolver|\bserver\b/i;
148
+ // Generic terms like "app" and "main" must only match the basename (filename),
149
+ // not directory names — otherwise every file under src/app/ would match by-name
150
+ // and fill MAX_CANDIDATE_FILES, potentially skipping the content pass.
151
+ const ROUTE_FILE_BASENAME_PATTERN = /\bapp\b|\bmain\b/i;
144
152
  /**
145
153
  * Content-based routing signature: a file is a route file if it contains BOTH
146
154
  * a URL-path-like string literal AND an HTTP method registration, regardless of
@@ -178,7 +186,7 @@ export function findCandidateRouteFiles(repositoryPath) {
178
186
  if (/test/i.test(f))
179
187
  continue;
180
188
  const relative = f.startsWith(repositoryPath) ? f.slice(repositoryPath.length + 1) : f;
181
- if (ROUTE_FILE_PATTERN.test(relative)) {
189
+ if (ROUTE_FILE_PATTERN.test(relative) || ROUTE_FILE_BASENAME_PATTERN.test(path.basename(relative))) {
182
190
  byName.push(relative);
183
191
  }
184
192
  else {
@@ -225,7 +233,7 @@ export function scanAllRepoEndpoints(repositoryPath) {
225
233
  continue;
226
234
  const relative = file.startsWith(repositoryPath)
227
235
  ? file.slice(repositoryPath.length + 1) : file;
228
- if (!ROUTE_FILE_PATTERN.test(relative))
236
+ if (!ROUTE_FILE_PATTERN.test(relative) && !ROUTE_FILE_BASENAME_PATTERN.test(path.basename(relative)))
229
237
  continue;
230
238
  const content = safeReadFile(file);
231
239
  if (content === null)
@@ -234,8 +242,8 @@ export function scanAllRepoEndpoints(repositoryPath) {
234
242
  addEndpointToMap(endpointMap, ep.path, ep.method, file, repositoryPath);
235
243
  }
236
244
  }
237
- return Array.from(endpointMap.entries()).map(([apiPath, data]) => ({
238
- path: apiPath,
245
+ return Array.from(endpointMap.values()).map((data) => ({
246
+ path: data.path,
239
247
  methods: Array.from(data.methods),
240
248
  sourceFile: data.sourceFile,
241
249
  }));
@@ -259,7 +267,7 @@ export function scanRelatedEndpoints(repositoryPath, changedFiles) {
259
267
  continue;
260
268
  const relative = file.startsWith(repositoryPath)
261
269
  ? file.slice(repositoryPath.length + 1) : file;
262
- if (!ROUTE_FILE_PATTERN.test(relative))
270
+ if (!ROUTE_FILE_PATTERN.test(relative) && !ROUTE_FILE_BASENAME_PATTERN.test(path.basename(relative)))
263
271
  continue;
264
272
  const fileContent = safeReadFile(file);
265
273
  if (fileContent === null)
@@ -269,8 +277,8 @@ export function scanRelatedEndpoints(repositoryPath, changedFiles) {
269
277
  }
270
278
  }
271
279
  }
272
- return Array.from(endpointMap.entries()).map(([apiPath, data]) => ({
273
- path: apiPath,
280
+ return Array.from(endpointMap.values()).map((data) => ({
281
+ path: data.path,
274
282
  methods: Array.from(data.methods),
275
283
  sourceFile: data.sourceFile,
276
284
  }));