retestkit 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. package/README.md +59 -40
  2. package/dist/config.js +8 -8
  3. package/dist/config.js.map +1 -1
  4. package/dist/logger.js +1 -1
  5. package/dist/logger.js.map +1 -1
  6. package/dist/prompts/index.d.ts +1 -1
  7. package/dist/prompts/index.d.ts.map +1 -1
  8. package/dist/prompts/index.js +21 -21
  9. package/dist/prompts/index.js.map +1 -1
  10. package/dist/prompts/templates/mcp/retest-crawl.md +7 -0
  11. package/{src/prompts/templates/mcp/webtest-discover-flows.md → dist/prompts/templates/mcp/retest-discover-flows.md} +1 -1
  12. package/{src/prompts/templates/mcp/webtest-discover.md → dist/prompts/templates/mcp/retest-discover.md} +2 -2
  13. package/dist/prompts/templates/mcp/retest-full-workflow.md +12 -0
  14. package/{src/prompts/templates/mcp/webtest-generate-tests.md → dist/prompts/templates/mcp/retest-generate-tests.md} +1 -1
  15. package/{src/prompts/templates/mcp/webtest-run-test.md → dist/prompts/templates/mcp/retest-run-test.md} +1 -1
  16. package/{src/prompts/templates/mcp/webtest-start.md → dist/prompts/templates/mcp/retest-start.md} +1 -1
  17. package/{src → dist}/prompts/templates/sampling/system-prefix.md +1 -1
  18. package/dist/resources/index.js +7 -7
  19. package/dist/resources/index.js.map +1 -1
  20. package/dist/schemas/config.js +2 -2
  21. package/dist/schemas/config.js.map +1 -1
  22. package/dist/security/index.js +1 -1
  23. package/dist/security/index.js.map +1 -1
  24. package/dist/server.js +3 -3
  25. package/dist/server.js.map +1 -1
  26. package/dist/test-utils/mock-context.js +22 -22
  27. package/dist/test-utils/mock-context.js.map +1 -1
  28. package/dist/tools/index.d.ts +1 -1
  29. package/dist/tools/index.d.ts.map +1 -1
  30. package/dist/tools/index.js +5 -5
  31. package/dist/tools/index.js.map +1 -1
  32. package/dist/tools/retest/crawl.d.ts.map +1 -0
  33. package/dist/tools/{webtest → retest}/crawl.js +7 -7
  34. package/dist/tools/retest/crawl.js.map +1 -0
  35. package/dist/tools/retest/discover-features.d.ts.map +1 -0
  36. package/dist/tools/{webtest → retest}/discover-features.js +6 -6
  37. package/dist/tools/retest/discover-features.js.map +1 -0
  38. package/dist/tools/retest/discover-flows.d.ts.map +1 -0
  39. package/dist/tools/{webtest → retest}/discover-flows.js +6 -6
  40. package/dist/tools/retest/discover-flows.js.map +1 -0
  41. package/dist/tools/retest/generate-tests.d.ts.map +1 -0
  42. package/dist/tools/{webtest → retest}/generate-tests.js +5 -5
  43. package/dist/tools/retest/generate-tests.js.map +1 -0
  44. package/dist/tools/retest/index.d.ts.map +1 -0
  45. package/dist/tools/retest/index.js.map +1 -0
  46. package/dist/tools/retest/run-test-case.d.ts.map +1 -0
  47. package/dist/tools/{webtest → retest}/run-test-case.js +3 -3
  48. package/dist/tools/retest/run-test-case.js.map +1 -0
  49. package/dist/tools/retest/schemas.d.ts.map +1 -0
  50. package/dist/tools/retest/schemas.js.map +1 -0
  51. package/dist/tools/retest/start-analysis.d.ts.map +1 -0
  52. package/dist/tools/{webtest → retest}/start-analysis.js +5 -5
  53. package/dist/tools/retest/start-analysis.js.map +1 -0
  54. package/dist/workspace/index.js +8 -8
  55. package/dist/workspace/index.js.map +1 -1
  56. package/dist/workspace/types.d.ts +2 -2
  57. package/dist/workspace/types.d.ts.map +1 -1
  58. package/package.json +6 -2
  59. package/.claude/commands/openspec/apply.md +0 -23
  60. package/.claude/commands/openspec/archive.md +0 -27
  61. package/.claude/commands/openspec/proposal.md +0 -28
  62. package/.gemini/commands/openspec/apply.toml +0 -21
  63. package/.gemini/commands/openspec/archive.toml +0 -25
  64. package/.gemini/commands/openspec/proposal.toml +0 -26
  65. package/.github/prompts/openspec-apply.prompt.md +0 -22
  66. package/.github/prompts/openspec-archive.prompt.md +0 -26
  67. package/.github/prompts/openspec-proposal.prompt.md +0 -27
  68. package/.github/workflows/release.yml +0 -33
  69. package/.kilocode/workflows/openspec-apply.md +0 -17
  70. package/.kilocode/workflows/openspec-archive.md +0 -21
  71. package/.kilocode/workflows/openspec-proposal.md +0 -22
  72. package/.mcp.json +0 -23
  73. package/.opencode/command/openspec-apply.md +0 -25
  74. package/.opencode/command/openspec-archive.md +0 -28
  75. package/.opencode/command/openspec-proposal.md +0 -30
  76. package/.roo/commands/openspec-apply.md +0 -20
  77. package/.roo/commands/openspec-archive.md +0 -24
  78. package/.roo/commands/openspec-proposal.md +0 -25
  79. package/.vscode/mcp.json +0 -23
  80. package/AGENTS.md +0 -18
  81. package/CLAUDE.md +0 -18
  82. package/dist/tools/webtest/crawl.d.ts.map +0 -1
  83. package/dist/tools/webtest/crawl.js.map +0 -1
  84. package/dist/tools/webtest/discover-features.d.ts.map +0 -1
  85. package/dist/tools/webtest/discover-features.js.map +0 -1
  86. package/dist/tools/webtest/discover-flows.d.ts.map +0 -1
  87. package/dist/tools/webtest/discover-flows.js.map +0 -1
  88. package/dist/tools/webtest/generate-tests.d.ts.map +0 -1
  89. package/dist/tools/webtest/generate-tests.js.map +0 -1
  90. package/dist/tools/webtest/index.d.ts.map +0 -1
  91. package/dist/tools/webtest/index.js.map +0 -1
  92. package/dist/tools/webtest/run-test-case.d.ts.map +0 -1
  93. package/dist/tools/webtest/run-test-case.js.map +0 -1
  94. package/dist/tools/webtest/schemas.d.ts.map +0 -1
  95. package/dist/tools/webtest/schemas.js.map +0 -1
  96. package/dist/tools/webtest/start-analysis.d.ts.map +0 -1
  97. package/dist/tools/webtest/start-analysis.js.map +0 -1
  98. package/openspec/AGENTS.md +0 -456
  99. package/openspec/changes/archive/2025-12-18-add-hybrid-artifact-paths/proposal.md +0 -33
  100. package/openspec/changes/archive/2025-12-18-add-hybrid-artifact-paths/specs/webtest-resources/spec.md +0 -27
  101. package/openspec/changes/archive/2025-12-18-add-hybrid-artifact-paths/specs/webtest-tools/spec.md +0 -304
  102. package/openspec/changes/archive/2025-12-18-add-hybrid-artifact-paths/tasks.md +0 -43
  103. package/openspec/changes/archive/2025-12-18-add-mcp-server-foundation/design.md +0 -209
  104. package/openspec/changes/archive/2025-12-18-add-mcp-server-foundation/proposal.md +0 -41
  105. package/openspec/changes/archive/2025-12-18-add-mcp-server-foundation/specs/mcp-server-core/spec.md +0 -183
  106. package/openspec/changes/archive/2025-12-18-add-mcp-server-foundation/tasks.md +0 -112
  107. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/design.md +0 -333
  108. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/proposal.md +0 -66
  109. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/mcp-server-core/spec.md +0 -129
  110. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-lifecycle/spec.md +0 -138
  111. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-logging/spec.md +0 -211
  112. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-prompts/spec.md +0 -157
  113. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-resources/spec.md +0 -213
  114. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-sampling/spec.md +0 -257
  115. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/specs/webtest-tools/spec.md +0 -501
  116. package/openspec/changes/archive/2025-12-18-add-webtest-orchestrator/tasks.md +0 -264
  117. package/openspec/changes/archive/2025-12-18-allow-analysis-of-incomplete-crawls/proposal.md +0 -24
  118. package/openspec/changes/archive/2025-12-18-allow-analysis-of-incomplete-crawls/specs/webtest-tools/spec.md +0 -80
  119. package/openspec/changes/archive/2025-12-18-allow-analysis-of-incomplete-crawls/tasks.md +0 -8
  120. package/openspec/changes/archive/2025-12-18-fix-crawl-loop-stability/design.md +0 -90
  121. package/openspec/changes/archive/2025-12-18-fix-crawl-loop-stability/proposal.md +0 -28
  122. package/openspec/changes/archive/2025-12-18-fix-crawl-loop-stability/specs/webtest-sampling/spec.md +0 -90
  123. package/openspec/changes/archive/2025-12-18-fix-crawl-loop-stability/tasks.md +0 -33
  124. package/openspec/changes/archive/2025-12-18-use-markdown-artifacts/design.md +0 -558
  125. package/openspec/changes/archive/2025-12-18-use-markdown-artifacts/proposal.md +0 -119
  126. package/openspec/changes/archive/2025-12-18-use-markdown-artifacts/specs/webtest-resources/spec.md +0 -109
  127. package/openspec/changes/archive/2025-12-18-use-markdown-artifacts/specs/webtest-tools/spec.md +0 -121
  128. package/openspec/changes/archive/2025-12-18-use-markdown-artifacts/tasks.md +0 -133
  129. package/openspec/changes/extract-prompts-to-markdown/design.md +0 -86
  130. package/openspec/changes/extract-prompts-to-markdown/proposal.md +0 -50
  131. package/openspec/changes/extract-prompts-to-markdown/specs/webtest-prompts/spec.md +0 -74
  132. package/openspec/changes/extract-prompts-to-markdown/tasks.md +0 -40
  133. package/openspec/changes/refactor-webtest-naming/design.md +0 -95
  134. package/openspec/changes/refactor-webtest-naming/proposal.md +0 -66
  135. package/openspec/changes/refactor-webtest-naming/specs/webtest-prompts/spec.md +0 -79
  136. package/openspec/changes/refactor-webtest-naming/specs/webtest-resources/spec.md +0 -80
  137. package/openspec/changes/refactor-webtest-naming/specs/webtest-sampling/spec.md +0 -122
  138. package/openspec/changes/refactor-webtest-naming/specs/webtest-tools/spec.md +0 -113
  139. package/openspec/changes/refactor-webtest-naming/tasks.md +0 -119
  140. package/openspec/changes/rename-package-to-retest/proposal.md +0 -52
  141. package/openspec/changes/rename-package-to-retest/specs/mcp-server-core/spec.md +0 -53
  142. package/openspec/changes/rename-package-to-retest/specs/retest-lifecycle/spec.md +0 -68
  143. package/openspec/changes/rename-package-to-retest/specs/retest-logging/spec.md +0 -35
  144. package/openspec/changes/rename-package-to-retest/specs/retest-prompts/spec.md +0 -159
  145. package/openspec/changes/rename-package-to-retest/specs/retest-resources/spec.md +0 -251
  146. package/openspec/changes/rename-package-to-retest/specs/retest-sampling/spec.md +0 -99
  147. package/openspec/changes/rename-package-to-retest/specs/retest-tools/spec.md +0 -295
  148. package/openspec/changes/rename-package-to-retest/tasks.md +0 -71
  149. package/openspec/project.md +0 -31
  150. package/openspec/specs/mcp-server-core/spec.md +0 -178
  151. package/openspec/specs/webtest-lifecycle/spec.md +0 -136
  152. package/openspec/specs/webtest-logging/spec.md +0 -209
  153. package/openspec/specs/webtest-prompts/spec.md +0 -155
  154. package/openspec/specs/webtest-resources/spec.md +0 -248
  155. package/openspec/specs/webtest-sampling/spec.md +0 -344
  156. package/openspec/specs/webtest-tools/spec.md +0 -282
  157. package/release.config.js +0 -9
  158. package/src/config.test.ts +0 -96
  159. package/src/config.ts +0 -32
  160. package/src/elicitation/index.test.ts +0 -399
  161. package/src/elicitation/index.ts +0 -171
  162. package/src/elicitation/types.ts +0 -68
  163. package/src/index.ts +0 -83
  164. package/src/lifecycle/index.test.ts +0 -260
  165. package/src/lifecycle/index.ts +0 -101
  166. package/src/logger.redaction.test.ts +0 -322
  167. package/src/logger.test.ts +0 -123
  168. package/src/logger.ts +0 -229
  169. package/src/playwright-client/index.ts +0 -392
  170. package/src/playwright-client/types.ts +0 -99
  171. package/src/progress/index.test.ts +0 -327
  172. package/src/progress/index.ts +0 -170
  173. package/src/progress/types.ts +0 -25
  174. package/src/prompts/index.test.ts +0 -451
  175. package/src/prompts/index.ts +0 -246
  176. package/src/prompts/loader.test.ts +0 -100
  177. package/src/prompts/loader.ts +0 -59
  178. package/src/prompts/templates/mcp/webtest-crawl.md +0 -7
  179. package/src/prompts/templates/mcp/webtest-full-workflow.md +0 -12
  180. package/src/resources/index.ts +0 -250
  181. package/src/resources/subscriptions.ts +0 -37
  182. package/src/sampling/index.test.ts +0 -414
  183. package/src/sampling/index.ts +0 -286
  184. package/src/sampling/prompts.ts +0 -194
  185. package/src/sampling/types.ts +0 -60
  186. package/src/schemas/config.ts +0 -39
  187. package/src/security/index.test.ts +0 -441
  188. package/src/security/index.ts +0 -361
  189. package/src/security/security-scenarios.test.ts +0 -468
  190. package/src/server.ts +0 -211
  191. package/src/test-utils/index.ts +0 -6
  192. package/src/test-utils/mock-context.ts +0 -426
  193. package/src/test-utils/mock-playwright-client.ts +0 -422
  194. package/src/tools/index.ts +0 -11
  195. package/src/tools/webtest/crawl.test.ts +0 -834
  196. package/src/tools/webtest/crawl.ts +0 -901
  197. package/src/tools/webtest/discover-features.ts +0 -412
  198. package/src/tools/webtest/discover-flows.ts +0 -408
  199. package/src/tools/webtest/generate-tests.test.ts +0 -532
  200. package/src/tools/webtest/generate-tests.ts +0 -425
  201. package/src/tools/webtest/index.ts +0 -7
  202. package/src/tools/webtest/integration.test.ts +0 -536
  203. package/src/tools/webtest/run-test-case.test.ts +0 -659
  204. package/src/tools/webtest/run-test-case.ts +0 -508
  205. package/src/tools/webtest/schemas.ts +0 -201
  206. package/src/tools/webtest/start-analysis.test.ts +0 -151
  207. package/src/tools/webtest/start-analysis.ts +0 -158
  208. package/src/transports/http.ts +0 -19
  209. package/src/transports/index.ts +0 -30
  210. package/src/transports/stdio.ts +0 -7
  211. package/src/types/capabilities.test.ts +0 -193
  212. package/src/types/capabilities.ts +0 -50
  213. package/src/types/context.ts +0 -21
  214. package/src/types/tool.ts +0 -11
  215. package/src/workspace/index.ts +0 -945
  216. package/src/workspace/markdown.ts +0 -272
  217. package/src/workspace/types.ts +0 -186
  218. package/tests/integration/server.test.ts +0 -89
  219. package/tests/integration/tools.test.ts +0 -99
  220. package/tsconfig.json +0 -20
  221. package/vitest.config.ts +0 -9
  222. package/vitest.integration.config.ts +0 -10
  223. /package/{src → dist}/prompts/templates/sampling/crawl-action.md +0 -0
  224. /package/{src → dist}/prompts/templates/sampling/feature-discovery.md +0 -0
  225. /package/{src → dist}/prompts/templates/sampling/flow-discovery.md +0 -0
  226. /package/{src → dist}/prompts/templates/sampling/page-content-wrapper.md +0 -0
  227. /package/{src → dist}/prompts/templates/sampling/test-evaluation.md +0 -0
  228. /package/{src → dist}/prompts/templates/sampling/test-generation.md +0 -0
  229. /package/dist/tools/{webtest → retest}/crawl.d.ts +0 -0
  230. /package/dist/tools/{webtest → retest}/discover-features.d.ts +0 -0
  231. /package/dist/tools/{webtest → retest}/discover-flows.d.ts +0 -0
  232. /package/dist/tools/{webtest → retest}/generate-tests.d.ts +0 -0
  233. /package/dist/tools/{webtest → retest}/index.d.ts +0 -0
  234. /package/dist/tools/{webtest → retest}/index.js +0 -0
  235. /package/dist/tools/{webtest → retest}/run-test-case.d.ts +0 -0
  236. /package/dist/tools/{webtest → retest}/schemas.d.ts +0 -0
  237. /package/dist/tools/{webtest → retest}/schemas.js +0 -0
  238. /package/dist/tools/{webtest → retest}/start-analysis.d.ts +0 -0
@@ -1,264 +0,0 @@
1
- # Tasks: Add Dynamic Web Testing Orchestrator
2
-
3
- ## Phase 1: Core Infrastructure
4
-
5
- - [x] 1.1 Add Playwright MCP client dependency and types
6
- - [x] 1.2 Create `src/playwright-client/` module for Playwright MCP subprocess management
7
- - [x] 1.2.1 Implement spawn/connect/disconnect lifecycle
8
- - [x] 1.2.2 Implement tool discovery via `tools/list`
9
- - [x] 1.2.3 Implement capability adapter (canonical name → actual tool name mapping)
10
- - [x] 1.2.4 Handle tool name variants (`browser_*`, `playwright_*`, unprefixed)
11
- - [x] 1.2.5 Log detected Playwright MCP implementation variant
12
- - [x] 1.2.6 Implement tool call wrapper with error handling
13
- - [x] 1.3 Create `src/lifecycle/` module for MCP lifecycle management
14
- - [x] 1.3.1 Implement protocol version negotiation (require 2025-06-18+)
15
- - [x] 1.3.2 Implement capability negotiation on initialize
16
- - [x] 1.3.3 Record capabilities: sampling, elicitation, logging, progress, resources.listChanged, resources.subscribe
17
- - [x] 1.3.4 Store capabilities in server context
18
- - [x] 1.3.5 Add capability query helpers (`hasSampling()`, `hasElicitation()`, etc.)
19
- - [x] 1.3.6 Log warning when running in degraded mode (older protocol)
20
- - [x] 1.4 Create `src/sampling/` module for MCP Sampling client
21
- - [x] 1.4.1 Implement `sampling/createMessage` request wrapper
22
- - [x] 1.4.2 Implement JSON schema enforcement in prompts
23
- - [x] 1.4.3 Implement response validation against schemas
24
- - [x] 1.4.4 Implement retry on validation failure (once, with error feedback)
25
- - [x] 1.4.5 Add fallback mode (return prompt resource when sampling unavailable)
26
- - [x] 1.5 Create `src/elicitation/` module for MCP Elicitation client
27
- - [x] 1.5.1 Implement `elicitation/create` request wrapper
28
- - [x] 1.5.2 Define allowed elicitation types (enum)
29
- - [x] 1.5.3 Add fallback mode (return questions in tool output)
30
- - [x] 1.6 Create `src/progress/` module for progress and cancellation
31
- - [x] 1.6.1 Implement progress notification emitter (with budget status)
32
- - [x] 1.6.2 Implement cancellation registry
33
- - [x] 1.6.3 Add `checkCancelled()` helper for loops
34
- - [x] 1.7 Update `src/logger.ts` for structured logging with MCP notifications
35
- - [x] 1.7.1 Emit `notifications/message` when client supports logging
36
- - [x] 1.7.2 Support `logging/setLevel` for dynamic level control
37
- - [x] 1.7.3 Add correlation ID support (analysisId, crawlId, testRunId, iteration, requestId)
38
- - [x] 1.7.4 Implement sensitive data redaction (URL params, cookies, passwords)
39
- - [x] 1.7.5 Truncate HTML content in logs
40
- - [x] 1.8 Write unit tests for all Phase 1 modules
41
-
42
- ## Phase 2: Workspace and Resources
43
-
44
- - [x] 2.1 Create `src/workspace/` module for analysis workspaces
45
- - [x] 2.1.1 Implement workspace directory creation
46
- - [x] 2.1.2 Implement `index.json` metadata management
47
- - [x] 2.1.3 Add workspace path resolution helpers
48
- - [x] 2.2 Create `src/resources/` module for MCP Resource management
49
- - [x] 2.2.1 Register resource templates with server
50
- - [x] 2.2.2 Implement `webtest://` URI scheme handler
51
- - [x] 2.2.3 Implement resource listing for analysis artifacts
52
- - [x] 2.2.4 Implement resource read for individual artifacts
53
- - [x] 2.2.5 Implement `resources/subscribe` handler
54
- - [x] 2.2.6 Emit `notifications/resources/list_changed` on new resource creation
55
- - [x] 2.2.7 Emit `notifications/resources/updated` for subscribed resources
56
- - [x] 2.2.8 Handle missing listChanged/subscribe capabilities gracefully
57
- - [x] 2.3 Create artifact capture utilities
58
- - [x] 2.3.1 Screenshot capture and storage
59
- - [x] 2.3.2 Snapshot JSON storage
60
- - [x] 2.3.3 HTML DOM capture and storage
61
- - [x] 2.3.4 Markdown report generation helpers
62
- - [x] 2.4 Add configuration for workspace location (`WEBTEST_WORKSPACE_DIR`)
63
- - [x] 2.5 Write unit tests for workspace and resource modules
64
-
65
- ## Phase 3: webtest_init Tool
66
-
67
- - [x] 3.1 Create `src/tools/webtest/start-analysis.ts`
68
- - [x] 3.1.1 Define input schema (url, focus, limits)
69
- - [x] 3.1.2 Validate URL and normalize domain
70
- - [x] 3.1.3 Generate `analysisId`
71
- - [x] 3.1.4 Create workspace directory structure
72
- - [x] 3.1.5 Write initial `index.json` resource
73
- - [x] 3.1.6 Return `analysisId`, `workspaceRootUri`, `statusUri`
74
- - [x] 3.2 Register tool in `src/tools/index.ts`
75
- - [x] 3.3 Write unit tests for start_analysis
76
- - [x] 3.4 Write integration test: start_analysis creates workspace
77
-
78
- ## Phase 4: webtest_crawl_app Tool
79
-
80
- - [x] 4.1 Create `src/tools/webtest/crawl.ts`
81
- - [x] 4.1.1 Define input schema (analysisId, goal, strategy, limits, artifacts)
82
- - [x] 4.1.2 Validate analysisId exists
83
- - [x] 4.2 Implement crawl loop core
84
- - [x] 4.2.1 Navigate to starting URL
85
- - [x] 4.2.2 Capture initial state (snapshot, screenshot, HTML)
86
- - [x] 4.2.3 Build sampling prompt with goal, history, current state
87
- - [x] 4.2.4 Request next action via sampling
88
- - [x] 4.2.5 Validate and execute Playwright actions
89
- - [x] 4.2.6 Record action result and update history
90
- - [x] 4.2.7 Check termination conditions (goal met, limits, cancellation)
91
- - [x] 4.2.8 Loop or finalize
92
- - [x] 4.3 Implement elicitation integration
93
- - [x] 4.3.1 Detect elicitation triggers (cookie banner, modal, ambiguity, auth)
94
- - [x] 4.3.2 Call elicitation or fallback
95
- - [x] 4.3.3 Incorporate user decision into crawl
96
- - [x] 4.4 Implement progress reporting
97
- - [x] 4.4.1 Emit progress on each iteration
98
- - [x] 4.4.2 Include step count, pages, current intent
99
- - [x] 4.5 Implement cancellation handling
100
- - [x] 4.5.1 Check cancellation registry each iteration
101
- - [x] 4.5.2 On cancel, finalize partial crawl index
102
- - [x] 4.6 Implement checkpointing
103
- - [x] 4.6.1 Write checkpoint every N steps (configurable, default 5)
104
- - [x] 4.6.2 Include: step count, visited pages, action history, goal progress, DOM signatures
105
- - [x] 4.6.3 Support `resume: true` input to continue from checkpoint
106
- - [x] 4.6.4 Update crawl index immediately on each page capture
107
- - [x] 4.7 Implement loop detection and prevention
108
- - [x] 4.7.1 Track DOM signatures (hash of key structural elements)
109
- - [x] 4.7.2 Detect same-state loops (3 consecutive same signatures)
110
- - [x] 4.7.3 Detect URL cycles (same URL > 3 times)
111
- - [x] 4.7.4 Detect action repeats (same tool+args 3 times consecutively)
112
- - [x] 4.7.5 Include loop state in sampling prompts
113
- - [x] 4.8 Implement budget enforcement
114
- - [x] 4.8.1 Enforce maxSteps limit
115
- - [x] 4.8.2 Enforce maxMinutes timeout
116
- - [x] 4.8.3 Enforce maxPages limit
117
- - [x] 4.8.4 Include budget status in progress notifications
118
- - [x] 4.9 Implement fallback mode (no sampling)
119
- - [x] 4.9.1 Return prompt resource for manual execution
120
- - [x] 4.9.2 Accept `manualNextActions` input to continue
121
- - [x] 4.10 Implement security checks
122
- - [x] 4.10.1 Validate actions target allowed domains
123
- - [x] 4.10.2 Block data exfiltration patterns (POST to external, URL params)
124
- - [x] 4.10.3 Log all sampling inputs/outputs for audit
125
- - [x] 4.11 Write crawl output resources
126
- - [x] 4.11.1 `crawlIndexUri` (JSON)
127
- - [x] 4.11.2 Per-page artifact URIs
128
- - [x] 4.11.3 `summaryUri` (markdown)
129
- - [x] 4.12 Register tool in `src/tools/index.ts`
130
- - [x] 4.13 Write unit tests for crawl logic
131
- - [x] 4.14 Write integration test: crawl with mock Playwright MCP
132
-
133
- ## Phase 5: webtest_analyze_app Tool
134
-
135
- - [x] 5.1 Create `src/tools/webtest/analyze-app.ts`
136
- - [x] 5.1.1 Define input schema (analysisId, crawlId)
137
- - [x] 5.1.2 Load crawl index and artifacts
138
- - [x] 5.2 Build analysis sampling prompt
139
- - [x] 5.2.1 Include crawl summary, page snapshots
140
- - [x] 5.2.2 Request: app purpose, entities, user flows, assertions
141
- - [x] 5.2.3 Define output JSON schema
142
- - [x] 5.3 Execute sampling and validate response
143
- - [x] 5.4 Write analysis outputs
144
- - [x] 5.4.1 `app-analysis.md` resource
145
- - [x] 5.4.2 `flows.json` resource
146
- - [x] 5.5 Implement fallback mode (prompt resource)
147
- - [x] 5.6 Register tool in `src/tools/index.ts`
148
- - [x] 5.7 Write unit tests for analyze_app
149
-
150
- ## Phase 6: webtest_generate_tests Tool
151
-
152
- - [x] 6.1 Create `src/tools/webtest/generate-tests.ts`
153
- - [x] 6.1.1 Define input schema (analysisId, appAnalysisUri, testStrategy)
154
- - [x] 6.1.2 Load app analysis and flows
155
- - [x] 6.2 Build test generation sampling prompt
156
- - [x] 6.2.1 Include analysis, flows, strategy preferences
157
- - [x] 6.2.2 Define test case output schema (id, name, purpose, preconditions, steps, expected)
158
- - [x] 6.3 Execute sampling and validate response
159
- - [x] 6.4 Write test outputs
160
- - [x] 6.4.1 `tests.md` resource
161
- - [x] 6.4.2 `tests.json` resource
162
- - [x] 6.5 Implement fallback mode (prompt resource)
163
- - [x] 6.6 Register tool in `src/tools/index.ts`
164
- - [x] 6.7 Write unit tests for generate_tests
165
-
166
- ## Phase 7: webtest_run_tests Tool
167
-
168
- - [x] 7.1 Create `src/tools/webtest/run-test-case.ts`
169
- - [x] 7.1.1 Define input schema (analysisId, testCaseId, testsUri, runOptions)
170
- - [x] 7.1.2 Load test case from tests index
171
- - [x] 7.2 Implement test execution loop
172
- - [x] 7.2.1 For each step: capture state
173
- - [x] 7.2.2 Use sampling to translate step to Playwright actions
174
- - [x] 7.2.3 Execute actions, capture evidence
175
- - [x] 7.2.4 Evaluate pass/fail via sampling
176
- - [x] 7.2.5 Record step result
177
- - [x] 7.3 Implement progress reporting per step
178
- - [x] 7.4 Implement cancellation handling
179
- - [x] 7.5 Write run outputs
180
- - [x] 7.5.1 `report.md` resource (pass/fail summary, evidence links)
181
- - [x] 7.5.2 `artifacts.json` resource (per-step artifacts)
182
- - [x] 7.6 Implement fallback mode
183
- - [x] 7.7 Register tool in `src/tools/index.ts`
184
- - [x] 7.8 Write unit tests for run_test_case
185
- - [x] 7.9 Write integration test: full test execution with mock Playwright
186
-
187
- ## Phase 8: Prompts
188
-
189
- - [x] 8.1 Create `src/prompts/` module
190
- - [x] 8.2 Implement prompt: "Start web testing analysis"
191
- - [x] 8.2.1 Gather URL, focus, limits
192
- - [x] 8.2.2 Call `webtest_init`
193
- - [x] 8.3 Implement prompt: "Crawl to satisfy focus"
194
- - [x] 8.3.1 Gather analysisId, goal, strategy
195
- - [x] 8.3.2 Call `webtest_crawl_app`
196
- - [x] 8.4 Implement prompt: "Generate tests"
197
- - [x] 8.4.1 Gather analysisId, appAnalysisUri
198
- - [x] 8.4.2 Call `webtest_generate_tests`
199
- - [x] 8.5 Implement prompt: "Run test case"
200
- - [x] 8.5.1 Gather analysisId, testCaseId
201
- - [x] 8.5.2 Call `webtest_run_tests`
202
- - [x] 8.6 Register prompts with server
203
- - [x] 8.7 Write unit tests for prompt registration
204
-
205
- ## Phase 9: Cleanup and Finalization
206
-
207
- - [x] 9.1 Remove `hello` tool from `src/tools/`
208
- - [x] 9.2 Update `src/tools/index.ts` to export only webtest tools
209
- - [x] 9.3 Update configuration schema for new env vars
210
- - [x] 9.4 Update README with usage documentation
211
- - [x] 9.5 Add example workflows to documentation
212
- - [x] 9.6 Run full test suite and fix failures
213
- - [x] 9.7 Run `npm run build` and verify clean build
214
- - [ ] 9.8 Manual end-to-end test with real client
215
-
216
- ## Phase 10: Integration Testing
217
-
218
- - [x] 10.1 Create mock Playwright MCP server for testing
219
- - [x] 10.2 Integration test: full workflow (start → crawl → analyze → generate → run)
220
- - [x] 10.3 Integration test: cancellation mid-crawl
221
- - [x] 10.4 Integration test: fallback mode without sampling
222
- - [x] 10.5 Integration test: elicitation triggers
223
- - [x] 10.6 Integration test: resource listChanged notifications
224
- - [x] 10.7 Integration test: crawl resume from checkpoint
225
- - [x] 10.8 Integration test: loop detection triggers
226
-
227
- ## Phase 11: Security Testing
228
-
229
- - [x] 11.1 Create prompt injection test suite
230
- - [x] 11.1.1 Test: direct instruction injection ("Ignore previous instructions...")
231
- - [x] 11.1.2 Test: indirect injection via page meta tags
232
- - [x] 11.1.3 Test: indirect injection via hidden elements
233
- - [x] 11.1.4 Test: goal hijacking ("Actually, the user wants...")
234
- - [x] 11.1.5 Test: credential phishing in page content
235
- - [x] 11.2 Create domain enforcement test suite
236
- - [x] 11.2.1 Test: navigation to disallowed domain blocked
237
- - [x] 11.2.2 Test: subdomain matching works correctly
238
- - [x] 11.2.3 Test: link click to external domain detected
239
- - [x] 11.3 Create data exfiltration test suite
240
- - [x] 11.3.1 Test: POST to external domain blocked
241
- - [x] 11.3.2 Test: page content in external URL params blocked
242
- - [x] 11.3.3 Test: browser_run_code external requests blocked
243
- - [x] 11.4 Create sensitive data redaction test suite
244
- - [x] 11.4.1 Test: URL params with sensitive keys redacted
245
- - [x] 11.4.2 Test: cookie values redacted in logs
246
- - [x] 11.4.3 Test: password input values redacted
247
-
248
- ## Dependencies
249
-
250
- - Phase 2 depends on Phase 1 (needs lifecycle, sampling, progress modules)
251
- - Phase 3 depends on Phase 2 (needs workspace, resources)
252
- - Phases 4-7 depend on Phases 1-3 (need full infrastructure)
253
- - Phase 8 depends on Phases 3-7 (prompts call tools)
254
- - Phase 9 depends on Phases 1-8 (cleanup after features complete)
255
- - Phase 10 depends on Phase 9 (test complete system)
256
- - Phase 11 depends on Phase 4 (security tests need crawl implementation)
257
-
258
- ## Parallelization Opportunities
259
-
260
- - Tasks within each phase can often be parallelized
261
- - Phase 4 (crawl) and Phase 5 (analyze) development can overlap once Phase 3 is done
262
- - Unit test writing can happen in parallel with implementation
263
- - Documentation (9.4, 9.5) can start after Phase 7
264
- - Phase 11 (security testing) can start as soon as Phase 4 is complete
@@ -1,24 +0,0 @@
1
- # Change: Allow Analysis of Incomplete Crawls
2
-
3
- ## Why
4
-
5
- Currently, `webtest_analyze_app` requires a crawl with `status === "completed"` before proceeding. When a crawl is interrupted (cancelled, timed out, errored) or still in progress, the user is blocked with "No completed crawls found" and forced to re-run crawling.
6
-
7
- This is problematic because:
8
- 1. Crawl data is captured incrementally - valuable artifacts exist even for incomplete crawls
9
- 2. Users may intentionally stop a crawl early after sufficient data is gathered
10
- 3. Re-running a crawl wastes time and resources when existing data is sufficient
11
- 4. The current behavior removes user agency to decide when enough data exists
12
-
13
- ## What Changes
14
-
15
- - `webtest_analyze_app` SHALL accept crawls with any status that has captured pages
16
- - When no `crawlId` is provided, the tool selects the most recent crawl with captured data (any status)
17
- - Analysis output SHALL indicate if the source crawl was incomplete
18
- - Users can still explicitly specify a `crawlId` to analyze any specific crawl
19
-
20
- ## Impact
21
-
22
- - Affected specs: `webtest-tools`
23
- - Affected code: `src/tools/webtest/analyze-app.ts`
24
- - Non-breaking: Existing behavior (analyzing completed crawls) still works
@@ -1,80 +0,0 @@
1
- ## MODIFIED Requirements
2
-
3
- ### Requirement: webtest_analyze_app Tool
4
-
5
- The system SHALL provide a `webtest_analyze_app` tool that reverse-engineers application structure from crawl data.
6
-
7
- #### Scenario: Analyze app loads crawl data
8
-
9
- - **GIVEN** the tool is called with valid analysisId and crawlId
10
- - **WHEN** execution begins
11
- - **THEN** it SHALL load crawl index and artifact references
12
- - **AND** load page snapshots for key pages
13
-
14
- #### Scenario: Analyze app selects most recent crawl with data
15
-
16
- - **GIVEN** the tool is called with valid analysisId but no crawlId
17
- - **WHEN** crawl selection occurs
18
- - **THEN** it SHALL select the most recent crawl that has captured at least one page
19
- - **AND** crawl status (completed, in_progress, failed, cancelled) SHALL NOT prevent selection
20
-
21
- #### Scenario: Analyze app checks crawl index when workspace metadata is stale
22
-
23
- - **GIVEN** the workspace index shows pagesVisited as 0 for a crawl
24
- - **AND** the actual crawl index contains captured pages
25
- - **WHEN** crawl selection occurs
26
- - **THEN** it SHALL read the crawl index to verify actual page count
27
- - **AND** select the crawl if pages exist in the index
28
-
29
- #### Scenario: Analyze app warns about incomplete crawl
30
-
31
- - **GIVEN** the selected crawl has status other than "completed"
32
- - **WHEN** analysis output is generated
33
- - **THEN** it SHALL include a warning indicating the crawl was incomplete
34
- - **AND** the warning SHALL include the crawl status and pages captured count
35
-
36
- #### Scenario: Analyze app rejects crawl with no captured pages
37
-
38
- - **GIVEN** the tool is called with analysisId
39
- - **WHEN** no crawls exist with captured pages
40
- - **THEN** it SHALL return an error: "No crawls with captured data found. Run webtest_crawl_app first."
41
-
42
- #### Scenario: Analyze app uses sampling for analysis
43
-
44
- - **GIVEN** crawl data is loaded
45
- - **WHEN** analysis is performed
46
- - **THEN** it SHALL construct sampling prompt with crawl summary and snapshots
47
- - **AND** request structured analysis via `sampling/createMessage`
48
-
49
- #### Scenario: Analyze app extracts application purpose
50
-
51
- - **GIVEN** analysis sampling completes
52
- - **WHEN** results are processed
53
- - **THEN** output SHALL include identified app purpose
54
- - **AND** key entities (users, products, orders, etc.)
55
-
56
- #### Scenario: Analyze app identifies user flows
57
-
58
- - **GIVEN** analysis sampling completes
59
- - **WHEN** results are processed
60
- - **THEN** output SHALL include discovered user flows
61
- - **AND** each flow SHALL have id, name, description, steps
62
-
63
- #### Scenario: Analyze app suggests assertions
64
-
65
- - **GIVEN** analysis sampling completes
66
- - **WHEN** results are processed
67
- - **THEN** output SHALL include suggested assertions for testing
68
- - **AND** potential risks or edge cases
69
-
70
- #### Scenario: Analyze app writes markdown report
71
-
72
- - **GIVEN** analysis is complete
73
- - **WHEN** output is generated
74
- - **THEN** it SHALL write `app-analysis.md` resource to workspace
75
-
76
- #### Scenario: Analyze app outputs URIs
77
-
78
- - **GIVEN** analysis is complete
79
- - **WHEN** tool returns
80
- - **THEN** it SHALL include `appAnalysisUri` and `flowsIndexUri`
@@ -1,8 +0,0 @@
1
- ## 1. Implementation
2
-
3
- - [x] 1.1 Update crawl selection logic in `analyze-app.ts` to accept any crawl with captured pages
4
- - [x] 1.2 Add warning to analysis output when source crawl status is not "completed"
5
- - [x] 1.3 Update error message to be more helpful when no crawls have captured data
6
- - [x] 1.4 Add fallback to check crawl index when workspace metadata shows 0 pages
7
- - [x] 1.5 Add unit tests for analyzing incomplete crawls
8
- - [x] 1.6 Update existing tests to reflect new behavior
@@ -1,90 +0,0 @@
1
- ## Context
2
-
3
- The crawl process uses AI sampling to decide next actions at each step. When loop detection mechanisms trigger warnings, the AI model may interpret "try a different action" as "start over from homepage" rather than "try a different element on the current page."
4
-
5
- This is exacerbated by:
6
- - DOM signatures that only capture structural elements, causing false positives on structurally similar pages
7
- - Loss of loop detection history when resuming from checkpoints
8
- - Limited action history context (10 actions) insufficient for complex multi-step flows
9
-
10
- ## Goals / Non-Goals
11
-
12
- **Goals:**
13
- - Prevent AI model from navigating back to start URL mid-flow
14
- - Reduce false positive loop detection on e-commerce sites
15
- - Maintain crawl progress context across checkpoint resume
16
- - Provide sufficient action history for complex flows (15-25 steps)
17
-
18
- **Non-Goals:**
19
- - Complete rewrite of loop detection mechanism
20
- - Adding new external dependencies
21
- - Changing the checkpoint file format significantly (additive changes only)
22
-
23
- ## Decisions
24
-
25
- ### Decision 1: Enhanced Sampling Prompt with Anti-Reset Guidance
26
-
27
- Add explicit instructions in the prompt:
28
- ```
29
- CRITICAL: Do NOT navigate back to the homepage or start URL to "reset" or "try again".
30
- If you seem stuck, try different elements on the current page first.
31
- Navigation to start URL is only allowed if the goal explicitly requires it.
32
- ```
33
-
34
- **Rationale:** The AI model responds well to explicit constraints. Making the anti-reset rule clear prevents the misinterpretation of loop warnings.
35
-
36
- ### Decision 2: Semantic DOM Signature
37
-
38
- Include in signature:
39
- - URL pathname (not query params)
40
- - Page `<title>` or first `<h1>` text
41
- - Button text content
42
- - Data attributes (`data-testid`, `data-page`, etc.)
43
- - Link hrefs
44
- - Input types
45
-
46
- **Rationale:** E-commerce pages like product listings, cart, and checkout share similar structural elements (forms, buttons, inputs) but differ in semantic content. Including text and data attributes differentiates them.
47
-
48
- ### Decision 3: Checkpoint State Preservation
49
-
50
- Serialize loop detection state to checkpoint:
51
- ```typescript
52
- interface CrawlCheckpoint {
53
- // existing fields...
54
- loopDetection?: {
55
- domSignatures: Array<[string, number]>; // Map entries
56
- urlVisits: Array<[string, number]>;
57
- recentActions: string[];
58
- };
59
- }
60
- ```
61
-
62
- **Rationale:** Maps can be serialized as arrays of entries. This is backwards-compatible (optional field).
63
-
64
- ### Decision 4: Navigation Guard
65
-
66
- Track `startUrl` in crawl state. After step 3, if AI requests navigation to start URL:
67
- 1. Log warning
68
- 2. Include warning in next prompt: "Navigation to start URL was blocked. Continue from current page."
69
- 3. Skip the action (don't execute it)
70
-
71
- **Rationale:** Legitimate flows rarely need to return to start URL mid-execution. Blocking this prevents restart loops while allowing initial navigation.
72
-
73
- ## Risks / Trade-offs
74
-
75
- | Risk | Mitigation |
76
- |------|------------|
77
- | Semantic signature may be too strict | Include only stable elements (title, h1, data-testid) |
78
- | Checkpoint file size increase | Loop detection data is small (~1KB) |
79
- | Navigation guard too restrictive | Only activate after step 3, allow explicit override |
80
-
81
- ## Migration Plan
82
-
83
- 1. All changes are additive and backwards-compatible
84
- 2. Old checkpoints without `loopDetection` field work as before (fresh state)
85
- 3. No breaking changes to existing APIs
86
-
87
- ## Open Questions
88
-
89
- - Should navigation guard threshold (step 3) be configurable?
90
- - Should DOM signature include visible text content (may be unstable)?
@@ -1,28 +0,0 @@
1
- # Change: Fix crawl loop stability and prevent restart from homepage
2
-
3
- ## Why
4
-
5
- During E2E testing on saucedemo.com, the crawl process exhibits unstable behavior: instead of progressing through a purchase flow (login → add to cart → checkout), it repeatedly restarts from the homepage. This is caused by:
6
-
7
- 1. **AI model misinterpreting loop warnings** as signals to "reset" by navigating home
8
- 2. **DOM signature collisions** between structurally similar pages triggering false loop detection
9
- 3. **Lost loop state on checkpoint resume** causing the AI to lose context
10
- 4. **Limited action history** (only 10 actions) insufficient for multi-step flows
11
- 5. **No explicit anti-reset guidance** in sampling prompts
12
-
13
- ## What Changes
14
-
15
- - **Enhance sampling prompt** with explicit anti-reset guidance preventing homepage navigation mid-flow
16
- - **Improve DOM signature algorithm** to include semantic context (text content, data attributes, URL path)
17
- - **Preserve loop detection state** across checkpoint resume
18
- - **Increase action history window** from 10 to 20 for complex flows
19
- - **Add navigation guard** that blocks navigation to start URL after initial steps
20
- - **Include flow stage tracking** in sampling prompt to maintain progress awareness
21
-
22
- ## Impact
23
-
24
- - Affected specs: `webtest-sampling`
25
- - Affected code:
26
- - `src/sampling/prompts.ts` - Enhanced prompt construction
27
- - `src/security/index.ts` - Improved DOM signature
28
- - `src/tools/webtest/crawl.ts` - Checkpoint state preservation, navigation guard
@@ -1,90 +0,0 @@
1
- ## ADDED Requirements
2
-
3
- ### Requirement: Anti-Reset Navigation Guidance
4
-
5
- The system SHALL include explicit guidance in crawl sampling prompts to prevent the AI model from navigating back to the start URL mid-flow.
6
-
7
- #### Scenario: Prompt includes anti-reset instruction
8
-
9
- - **GIVEN** a crawl sampling prompt is constructed
10
- - **WHEN** the prompt text is built
11
- - **THEN** it SHALL include an explicit instruction stating navigation to start URL is prohibited unless the goal requires it
12
- - **AND** the instruction SHALL advise trying different elements on the current page when stuck
13
-
14
- #### Scenario: Start URL navigation is identified
15
-
16
- - **GIVEN** a crawl is in progress past step 3
17
- - **WHEN** the AI model requests navigation to the start URL
18
- - **THEN** the system SHALL log a warning
19
- - **AND** skip the navigation action
20
- - **AND** include a warning in the next sampling prompt explaining the action was blocked
21
-
22
- ### Requirement: Extended Action History Context
23
-
24
- The system SHALL provide sufficient action history context for complex multi-step flows.
25
-
26
- #### Scenario: Action history window is extended
27
-
28
- - **GIVEN** a crawl sampling prompt is constructed
29
- - **WHEN** action history is included
30
- - **THEN** it SHALL include the last 20 actions (increased from 10)
31
- - **AND** each action SHALL include step number, tool, args, and reasoning
32
-
33
- #### Scenario: Flow progress indicator is included
34
-
35
- - **GIVEN** a crawl sampling prompt is constructed
36
- - **WHEN** progress information is included
37
- - **THEN** it SHALL include a flow stage indicator showing:
38
- - Current step number
39
- - Total steps taken
40
- - Percentage of budget used
41
- - Goal progress summary from previous iteration
42
-
43
- ### Requirement: Semantic DOM Signature
44
-
45
- The system SHALL use semantic content in DOM signatures to differentiate structurally similar pages.
46
-
47
- #### Scenario: DOM signature includes semantic elements
48
-
49
- - **GIVEN** a page DOM needs to be fingerprinted for loop detection
50
- - **WHEN** the DOM signature is created
51
- - **THEN** the signature SHALL include:
52
- - URL pathname (without query parameters)
53
- - Page title or first h1 heading text
54
- - Button text content
55
- - Data attributes (data-testid, data-page, etc.)
56
- - Link hrefs
57
- - Input types
58
-
59
- #### Scenario: Similar structure pages have different signatures
60
-
61
- - **GIVEN** two e-commerce pages with similar HTML structure
62
- - **WHEN** one is a product listing and another is a cart page
63
- - **THEN** their DOM signatures SHALL be different due to semantic content differences
64
-
65
- ### Requirement: Loop Detection State Preservation
66
-
67
- The system SHALL preserve loop detection state across checkpoint resume to maintain crawl context.
68
-
69
- #### Scenario: Checkpoint includes loop detection state
70
-
71
- - **GIVEN** a crawl checkpoint is saved
72
- - **WHEN** the checkpoint data is written
73
- - **THEN** it SHALL include serialized loop detection data:
74
- - DOM signature visit counts
75
- - URL visit counts
76
- - Recent actions list
77
-
78
- #### Scenario: Checkpoint resume restores loop detection state
79
-
80
- - **GIVEN** a crawl resumes from checkpoint
81
- - **WHEN** the checkpoint data is loaded
82
- - **THEN** the loop detection state SHALL be restored from checkpoint
83
- - **AND** the crawl SHALL continue with full context of previous iterations
84
-
85
- #### Scenario: Missing loop detection data uses fresh state
86
-
87
- - **GIVEN** a crawl resumes from an old checkpoint without loop detection data
88
- - **WHEN** the checkpoint data is loaded
89
- - **THEN** the system SHALL initialize fresh loop detection state
90
- - **AND** log a warning about missing historical context
@@ -1,33 +0,0 @@
1
- ## 1. Sampling Prompt Enhancements
2
-
3
- - [x] 1.1 Add explicit anti-reset guidance to `buildCrawlActionPrompt` in `src/sampling/prompts.ts`
4
- - [x] 1.2 Increase action history window from 10 to 20 actions
5
- - [x] 1.3 Add flow stage indicator showing progress percentage and steps completed
6
- - [x] 1.4 Add explicit instruction: "NEVER navigate back to homepage/start URL to 'reset'"
7
-
8
- ## 2. DOM Signature Improvements
9
-
10
- - [x] 2.1 Update `createDomSignature` in `src/security/index.ts` to include:
11
- - Page title/heading text
12
- - Data attributes (data-testid, data-*)
13
- - URL path component
14
- - Key text content from buttons and labels
15
- - [x] 2.2 Add tests for DOM signature differentiation between similar e-commerce pages
16
-
17
- ## 3. Checkpoint State Preservation
18
-
19
- - [x] 3.1 Modify checkpoint save in `crawl.ts` to include `loopDetection` state
20
- - [x] 3.2 Modify checkpoint resume to restore `loopDetection.domSignatures`, `urlVisits`, and `recentActions`
21
- - [x] 3.3 Update `CrawlCheckpoint` type to include loop detection data
22
-
23
- ## 4. Navigation Guard
24
-
25
- - [x] 4.1 Add `startUrl` tracking to crawl state
26
- - [x] 4.2 Implement navigation guard that warns/blocks navigation to start URL after step 3
27
- - [x] 4.3 Include guard warning in sampling prompt when blocked
28
-
29
- ## 5. Testing
30
-
31
- - [x] 5.1 Add unit tests for enhanced DOM signature
32
- - [x] 5.2 Add unit tests for checkpoint state preservation (covered by existing tests)
33
- - [x] 5.3 All 458 tests pass with no regressions