@plaited/acp-harness 0.2.6 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +175 -34
  3. package/bin/cli.ts +105 -636
  4. package/bin/tests/cli.spec.ts +218 -51
  5. package/package.json +21 -5
  6. package/src/acp-client.ts +5 -4
  7. package/src/acp-transport.ts +14 -7
  8. package/src/adapter-check.ts +542 -0
  9. package/src/adapter-scaffold.ts +934 -0
  10. package/src/balance.ts +257 -0
  11. package/src/calibrate.ts +319 -0
  12. package/src/capture.ts +457 -0
  13. package/src/constants.ts +94 -0
  14. package/src/grader-loader.ts +174 -0
  15. package/src/harness.ts +35 -0
  16. package/src/schemas-cli.ts +239 -0
  17. package/src/schemas.ts +567 -0
  18. package/src/summarize.ts +259 -0
  19. package/src/tests/adapter-check.spec.ts +70 -0
  20. package/src/tests/adapter-scaffold.spec.ts +112 -0
  21. package/src/tests/balance-helpers.spec.ts +279 -0
  22. package/src/tests/calibrate-helpers.spec.ts +226 -0
  23. package/src/tests/capture-helpers.spec.ts +553 -0
  24. package/src/tests/fixtures/grader-bad-module.ts +5 -0
  25. package/src/tests/fixtures/grader-exec-fail.py +9 -0
  26. package/src/tests/fixtures/grader-exec-invalid.py +6 -0
  27. package/src/tests/fixtures/grader-exec.py +29 -0
  28. package/src/tests/fixtures/grader-module.ts +14 -0
  29. package/src/tests/grader-loader.spec.ts +153 -0
  30. package/src/tests/summarize-helpers.spec.ts +339 -0
  31. package/src/tests/trials-calculations.spec.ts +209 -0
  32. package/src/trials.ts +407 -0
  33. package/src/validate-refs.ts +188 -0
  34. package/.claude/rules/accuracy.md +0 -43
  35. package/.claude/rules/bun-apis.md +0 -80
  36. package/.claude/rules/code-review.md +0 -254
  37. package/.claude/rules/git-workflow.md +0 -37
  38. package/.claude/rules/github.md +0 -154
  39. package/.claude/rules/testing.md +0 -172
  40. package/.claude/skills/acp-harness/SKILL.md +0 -310
  41. package/.claude/skills/acp-harness/assets/Dockerfile.acp +0 -25
  42. package/.claude/skills/acp-harness/assets/docker-compose.acp.yml +0 -19
  43. package/.claude/skills/acp-harness/references/downstream.md +0 -288
  44. package/.claude/skills/acp-harness/references/output-formats.md +0 -221
  45. package/.claude-plugin/marketplace.json +0 -15
  46. package/.claude-plugin/plugin.json +0 -16
  47. package/.github/CODEOWNERS +0 -6
  48. package/.github/workflows/ci.yml +0 -63
  49. package/.github/workflows/publish.yml +0 -146
  50. package/.mcp.json +0 -20
  51. package/CLAUDE.md +0 -92
  52. package/Dockerfile.test +0 -23
  53. package/biome.json +0 -96
  54. package/bun.lock +0 -513
  55. package/docker-compose.test.yml +0 -21
  56. package/scripts/bun-test-wrapper.sh +0 -46
  57. package/src/acp.constants.ts +0 -56
  58. package/src/acp.schemas.ts +0 -161
  59. package/src/acp.types.ts +0 -28
  60. package/src/tests/fixtures/.claude/settings.local.json +0 -8
  61. package/src/tests/fixtures/.claude/skills/greeting/SKILL.md +0 -17
  62. package/tsconfig.json +0 -32
package/src/trials.ts ADDED
@@ -0,0 +1,407 @@
1
+ /**
2
+ * Multi-run trials command for pass@k/pass^k analysis.
3
+ *
4
+ * @remarks
5
+ * Runs each prompt k times to measure non-determinism.
6
+ * Without a grader, captures raw trials. With a grader, computes:
7
+ * - passRate: Simple pass rate (passes / k)
8
+ * - passAtK: Probability of at least one pass in k samples
9
+ * - passExpK: Probability of all k samples passing
10
+ *
11
+ * @packageDocumentation
12
+ */
13
+
14
+ import { appendFile } from 'node:fs/promises'
15
+ import { parseArgs } from 'node:util'
16
+ import { createACPClient } from './acp-client.ts'
17
+ import { createPrompt } from './acp-helpers.ts'
18
+ import { extractOutput, extractTrajectory, loadPrompts } from './capture.ts'
19
+ import { DEFAULT_HARNESS_TIMEOUT, DEFAULT_TRIAL_COUNT } from './constants.ts'
20
+ import { loadGrader } from './grader-loader.ts'
21
+ import type { Grader, TrialEntry, TrialResult } from './schemas.ts'
22
+ import { McpServerSchema } from './schemas.ts'
23
+
24
+ // ============================================================================
25
+ // Pass@k/Pass^k Calculation
26
+ // ============================================================================
27
+
28
+ /**
29
+ * Calculate pass@k: probability of at least one pass in k samples.
30
+ *
31
+ * @remarks
32
+ * Uses the unbiased estimator: 1 - C(n-c, k) / C(n, k)
33
+ * where n = total samples, c = correct samples, k = samples per trial
34
+ *
35
+ * For our case where n = k (we run exactly k trials per prompt):
36
+ * pass@k = 1 - (1 - passRate)^k (simplified)
37
+ *
38
+ * @param passes - Number of passing trials
39
+ * @param k - Total number of trials
40
+ * @returns Probability of at least one pass
41
+ *
42
+ * @public
43
+ */
44
+ export const calculatePassAtK = (passes: number, k: number): number => {
45
+ if (passes >= k) return 1
46
+ if (passes === 0) return 0
47
+
48
+ // Simplified formula when n = k
49
+ const passRate = passes / k
50
+ return 1 - (1 - passRate) ** k
51
+ }
52
+
53
+ /**
54
+ * Calculate pass^k: probability of all k samples passing.
55
+ *
56
+ * @remarks
57
+ * This is simply passRate^k
58
+ *
59
+ * @param passes - Number of passing trials
60
+ * @param k - Total number of trials
61
+ * @returns Probability of all k samples passing
62
+ *
63
+ * @public
64
+ */
65
+ export const calculatePassExpK = (passes: number, k: number): number => {
66
+ if (passes === k) return 1
67
+ if (passes === 0) return 0
68
+
69
+ const passRate = passes / k
70
+ return passRate ** k
71
+ }
72
+
73
+ // ============================================================================
74
+ // Types
75
+ // ============================================================================
76
+
77
+ /** Configuration for trials command */
78
+ export type TrialsConfig = {
79
+ /** Path to prompts.jsonl file */
80
+ promptsPath: string
81
+ /** ACP agent command */
82
+ agentCommand: string[]
83
+ /** Number of trials per prompt */
84
+ k: number
85
+ /** Output file path */
86
+ outputPath?: string
87
+ /** Working directory for agent */
88
+ cwd?: string
89
+ /** Timeout per prompt in milliseconds */
90
+ timeout?: number
91
+ /** Show progress to stderr */
92
+ progress?: boolean
93
+ /** Append to output file */
94
+ append?: boolean
95
+ /** MCP server configurations */
96
+ mcpServers?: unknown[]
97
+ /** Optional grader function */
98
+ grader?: Grader
99
+ }
100
+
101
+ // ============================================================================
102
+ // Helpers
103
+ // ============================================================================
104
+
105
+ /** Resolve path relative to process.cwd() */
106
+ const resolvePath = (path: string): string => {
107
+ if (path.startsWith('/')) return path
108
+ return `${process.cwd()}/${path}`
109
+ }
110
+
111
+ /** Write output line */
112
+ const writeOutput = async (line: string, outputPath?: string, append?: boolean): Promise<void> => {
113
+ if (outputPath) {
114
+ if (append) {
115
+ await appendFile(outputPath, `${line}\n`)
116
+ } else {
117
+ await Bun.write(outputPath, `${line}\n`)
118
+ }
119
+ } else {
120
+ // biome-ignore lint/suspicious/noConsole: CLI stdout output
121
+ console.log(line)
122
+ }
123
+ }
124
+
125
+ /** Log progress to stderr */
126
+ const logProgress = (message: string, showProgress: boolean): void => {
127
+ if (showProgress) {
128
+ console.error(message)
129
+ }
130
+ }
131
+
132
+ // ============================================================================
133
+ // Trials Implementation
134
+ // ============================================================================
135
+
136
+ /**
137
+ * Execute trials with configuration object.
138
+ *
139
+ * @param config - Trials configuration
140
+ * @returns Array of trial results
141
+ */
142
+ export const runTrials = async (config: TrialsConfig): Promise<TrialResult[]> => {
143
+ const {
144
+ promptsPath,
145
+ agentCommand,
146
+ k,
147
+ outputPath,
148
+ cwd,
149
+ timeout = DEFAULT_HARNESS_TIMEOUT,
150
+ progress = false,
151
+ append = false,
152
+ mcpServers = [],
153
+ grader,
154
+ } = config
155
+
156
+ // Parse MCP server configurations
157
+ const parsedMcpServers = mcpServers.map((s) => McpServerSchema.parse(s))
158
+
159
+ // Load prompts
160
+ const prompts = await loadPrompts(promptsPath)
161
+
162
+ // Resolve output path
163
+ const resolvedOutputPath = outputPath ? resolvePath(outputPath) : undefined
164
+
165
+ // Log progress info
166
+ logProgress(`Loaded ${prompts.length} prompts from ${promptsPath}`, progress)
167
+ logProgress(`Running ${k} trials per prompt`, progress)
168
+ logProgress(`Command: ${agentCommand.join(' ')}`, progress)
169
+ if (grader) {
170
+ logProgress('Grader: enabled (will compute pass@k metrics)', progress)
171
+ }
172
+
173
+ // Create ACP client
174
+ const client = createACPClient({
175
+ command: agentCommand,
176
+ cwd,
177
+ timeout,
178
+ })
179
+
180
+ // Clear output file if not appending
181
+ if (resolvedOutputPath && !append) {
182
+ await Bun.write(resolvedOutputPath, '')
183
+ }
184
+
185
+ // Session params
186
+ const sessionParams = {
187
+ cwd: cwd ?? process.cwd(),
188
+ mcpServers: parsedMcpServers,
189
+ }
190
+
191
+ const results: TrialResult[] = []
192
+ let isFirstOutput = true
193
+
194
+ try {
195
+ logProgress('Connecting to agent...', progress)
196
+ await client.connect()
197
+ logProgress('Connected!', progress)
198
+
199
+ // Run evaluations
200
+ for (let i = 0; i < prompts.length; i++) {
201
+ const promptCase = prompts[i]
202
+ if (!promptCase) continue
203
+
204
+ logProgress(`[${i + 1}/${prompts.length}] ${promptCase.id}: Running ${k} trials...`, progress)
205
+
206
+ const trialEntries: TrialEntry[] = []
207
+
208
+ for (let trialNum = 1; trialNum <= k; trialNum++) {
209
+ // Create fresh session for each trial
210
+ const session = await client.createSession(sessionParams)
211
+ const startTime = Date.now()
212
+
213
+ try {
214
+ const prompt = createPrompt(promptCase.input)
215
+ const { updates } = await client.promptSync(session.id, prompt)
216
+
217
+ const endTime = Date.now()
218
+ const trajectory = extractTrajectory(updates, startTime)
219
+ const output = extractOutput(trajectory)
220
+
221
+ const entry: TrialEntry = {
222
+ trialNum,
223
+ output,
224
+ trajectory,
225
+ duration: endTime - startTime,
226
+ }
227
+
228
+ // Apply grader if provided
229
+ if (grader) {
230
+ const graderResult = await grader({
231
+ input: promptCase.input,
232
+ output,
233
+ expected: promptCase.expected,
234
+ trajectory,
235
+ })
236
+ entry.pass = graderResult.pass
237
+ entry.score = graderResult.score
238
+ entry.reasoning = graderResult.reasoning
239
+ }
240
+
241
+ trialEntries.push(entry)
242
+ logProgress(
243
+ ` Trial ${trialNum}/${k}: ${entry.pass !== undefined ? (entry.pass ? '✓' : '✗') : '?'}`,
244
+ progress,
245
+ )
246
+ } catch (error) {
247
+ const endTime = Date.now()
248
+ const message = error instanceof Error ? error.message : String(error)
249
+
250
+ trialEntries.push({
251
+ trialNum,
252
+ output: '',
253
+ trajectory: [],
254
+ duration: endTime - startTime,
255
+ pass: false,
256
+ reasoning: `Error: ${message}`,
257
+ })
258
+ logProgress(` Trial ${trialNum}/${k}: ! (error)`, progress)
259
+ }
260
+ }
261
+
262
+ // Build result
263
+ const result: TrialResult = {
264
+ id: promptCase.id,
265
+ input: promptCase.input,
266
+ ...(promptCase.expected && { expected: promptCase.expected }),
267
+ k,
268
+ trials: trialEntries,
269
+ }
270
+
271
+ // Calculate metrics if grader was used
272
+ if (grader) {
273
+ const passes = trialEntries.filter((t) => t.pass).length
274
+ result.passRate = passes / k
275
+ result.passAtK = calculatePassAtK(passes, k)
276
+ result.passExpK = calculatePassExpK(passes, k)
277
+ }
278
+
279
+ results.push(result)
280
+
281
+ // Write result immediately
282
+ const formatted = JSON.stringify(result)
283
+ await writeOutput(formatted, resolvedOutputPath, !isFirstOutput)
284
+ isFirstOutput = false
285
+
286
+ if (grader) {
287
+ logProgress(
288
+ ` → passRate=${(result.passRate ?? 0).toFixed(2)}, pass@${k}=${(result.passAtK ?? 0).toFixed(2)}`,
289
+ progress,
290
+ )
291
+ }
292
+ }
293
+ } finally {
294
+ logProgress('Disconnecting...', progress)
295
+ await client.disconnect()
296
+ }
297
+
298
+ logProgress('Done!', progress)
299
+ return results
300
+ }
301
+
302
+ // ============================================================================
303
+ // CLI Entry Point
304
+ // ============================================================================
305
+
306
+ /**
307
+ * Trials command CLI handler.
308
+ *
309
+ * @param args - Command line arguments (after 'trials')
310
+ */
311
+ export const trials = async (args: string[]): Promise<void> => {
312
+ const { values, positionals } = parseArgs({
313
+ args,
314
+ options: {
315
+ output: { type: 'string', short: 'o' },
316
+ k: { type: 'string', short: 'k', default: String(DEFAULT_TRIAL_COUNT) },
317
+ cwd: { type: 'string', short: 'c' },
318
+ timeout: { type: 'string', short: 't', default: String(DEFAULT_HARNESS_TIMEOUT) },
319
+ progress: { type: 'boolean', default: false },
320
+ append: { type: 'boolean', default: false },
321
+ 'mcp-server': { type: 'string', multiple: true },
322
+ grader: { type: 'string', short: 'g' },
323
+ help: { type: 'boolean', short: 'h' },
324
+ },
325
+ allowPositionals: true,
326
+ })
327
+
328
+ if (values.help) {
329
+ // biome-ignore lint/suspicious/noConsole: CLI help output
330
+ console.log(`
331
+ Usage: acp-harness trials <prompts.jsonl> <command> [args...] [options]
332
+
333
+ Arguments:
334
+ prompts.jsonl Input file with evaluation prompts
335
+ command [args] ACP agent command to execute
336
+
337
+ Options:
338
+ -o, --output Output file (default: stdout)
339
+ -k Number of trials per prompt (default: ${DEFAULT_TRIAL_COUNT})
340
+ -c, --cwd Working directory for agent
341
+ -t, --timeout Request timeout in ms (default: ${DEFAULT_HARNESS_TIMEOUT})
342
+ --progress Show progress to stderr
343
+ --append Append to output file
344
+ --mcp-server MCP server config JSON (repeatable)
345
+ -g, --grader Path to grader (.ts/.js module or executable script)
346
+ -h, --help Show this help message
347
+
348
+ Output Format:
349
+ Without grader: Raw trials with trajectories
350
+ With grader: Trials plus pass@k metrics (passRate, passAtK, passExpK)
351
+
352
+ Graders:
353
+ TS/JS modules must export a 'grade' function.
354
+ Executable scripts (Python, etc.) use stdin/stdout JSON protocol.
355
+
356
+ Examples:
357
+ # Capture only
358
+ acp-harness trials prompts.jsonl bunx claude-code-acp -k 5 -o trials.jsonl
359
+
360
+ # With TypeScript grader
361
+ acp-harness trials prompts.jsonl bunx claude-code-acp -k 5 --grader ./grader.ts -o trials.jsonl
362
+
363
+ # With Python grader
364
+ acp-harness trials prompts.jsonl bunx claude-code-acp -k 5 --grader ./grader.py -o trials.jsonl
365
+ `)
366
+ return
367
+ }
368
+
369
+ const promptsPath = positionals[0]
370
+ if (!promptsPath) {
371
+ console.error('Error: prompts.jsonl path is required')
372
+ process.exit(1)
373
+ }
374
+
375
+ const agentCommand = positionals.slice(1)
376
+ if (agentCommand.length === 0) {
377
+ console.error('Error: ACP agent command is required')
378
+ process.exit(1)
379
+ }
380
+
381
+ // Load grader if specified
382
+ let grader: Grader | undefined
383
+ if (values.grader) {
384
+ try {
385
+ grader = await loadGrader(values.grader)
386
+ } catch (error) {
387
+ console.error(`Error: ${error instanceof Error ? error.message : error}`)
388
+ process.exit(1)
389
+ }
390
+ }
391
+
392
+ // Parse MCP server configurations
393
+ const mcpServers = (values['mcp-server'] ?? []).map((json) => JSON.parse(json))
394
+
395
+ await runTrials({
396
+ promptsPath,
397
+ agentCommand,
398
+ k: Number.parseInt(values.k ?? String(DEFAULT_TRIAL_COUNT), 10),
399
+ outputPath: values.output,
400
+ cwd: values.cwd,
401
+ timeout: Number.parseInt(values.timeout ?? String(DEFAULT_HARNESS_TIMEOUT), 10),
402
+ progress: values.progress ?? false,
403
+ append: values.append ?? false,
404
+ mcpServers,
405
+ grader,
406
+ })
407
+ }
@@ -0,0 +1,188 @@
1
+ /**
2
+ * Validate-refs command - check reference solutions against grader.
3
+ *
4
+ * @remarks
5
+ * Validates that reference solutions in prompts.jsonl pass the grader.
6
+ * Helps identify prompts with broken or incorrect reference solutions.
7
+ *
8
+ * @packageDocumentation
9
+ */
10
+
11
+ import { parseArgs } from 'node:util'
12
+ import { loadPrompts } from './capture.ts'
13
+ import { loadGrader } from './grader-loader.ts'
14
+ import type { Grader, ValidationResult } from './schemas.ts'
15
+
16
+ // ============================================================================
17
+ // Types
18
+ // ============================================================================
19
+
20
+ /** Configuration for validate-refs command */
21
+ export type ValidateRefsConfig = {
22
+ /** Path to prompts.jsonl file */
23
+ promptsPath: string
24
+ /** Output file path */
25
+ outputPath?: string
26
+ /** Grader function */
27
+ grader: Grader
28
+ }
29
+
30
+ // ============================================================================
31
+ // Helpers
32
+ // ============================================================================
33
+
34
+ /** Resolve path relative to process.cwd() */
35
+ const resolvePath = (path: string): string => {
36
+ if (path.startsWith('/')) return path
37
+ return `${process.cwd()}/${path}`
38
+ }
39
+
40
+ // ============================================================================
41
+ // Validate-Refs Implementation
42
+ // ============================================================================
43
+
44
+ /**
45
+ * Execute validate-refs with configuration object.
46
+ *
47
+ * @param config - Validate-refs configuration
48
+ * @returns Array of validation results
49
+ */
50
+ export const runValidateRefs = async (config: ValidateRefsConfig): Promise<ValidationResult[]> => {
51
+ const { promptsPath, outputPath, grader } = config
52
+
53
+ // Load prompts
54
+ const prompts = await loadPrompts(promptsPath)
55
+
56
+ // Filter to prompts with reference solutions
57
+ const promptsWithRefs = prompts.filter((p) => p.reference !== undefined)
58
+
59
+ if (promptsWithRefs.length === 0) {
60
+ console.error('No prompts with reference solutions found')
61
+ return []
62
+ }
63
+
64
+ console.error(`Validating ${promptsWithRefs.length} reference solutions...`)
65
+
66
+ const results: ValidationResult[] = []
67
+
68
+ for (const prompt of promptsWithRefs) {
69
+ const graderResult = await grader({
70
+ input: prompt.input,
71
+ output: prompt.reference as string,
72
+ expected: prompt.expected,
73
+ trajectory: [], // No trajectory for reference validation
74
+ })
75
+
76
+ results.push({
77
+ id: prompt.id,
78
+ reference: prompt.reference as string,
79
+ passes: graderResult.pass,
80
+ graderResult,
81
+ })
82
+
83
+ const icon = graderResult.pass ? '✓' : '✗'
84
+ console.error(` ${icon} ${prompt.id}`)
85
+ }
86
+
87
+ // Format output
88
+ const output = results.map((r) => JSON.stringify(r)).join('\n')
89
+
90
+ // Write output
91
+ if (outputPath) {
92
+ await Bun.write(resolvePath(outputPath), output)
93
+ } else {
94
+ // biome-ignore lint/suspicious/noConsole: CLI stdout output
95
+ console.log(output)
96
+ }
97
+
98
+ // Summary
99
+ const passed = results.filter((r) => r.passes).length
100
+ const failed = results.length - passed
101
+ console.error(`\nResults: ${passed} passed, ${failed} failed`)
102
+
103
+ if (failed > 0) {
104
+ console.error('\nFailing references:')
105
+ for (const result of results.filter((r) => !r.passes)) {
106
+ console.error(` - ${result.id}: ${result.graderResult.reasoning ?? 'No reasoning'}`)
107
+ }
108
+ }
109
+
110
+ return results
111
+ }
112
+
113
+ // ============================================================================
114
+ // CLI Entry Point
115
+ // ============================================================================
116
+
117
+ /**
118
+ * Validate-refs command CLI handler.
119
+ *
120
+ * @param args - Command line arguments (after 'validate-refs')
121
+ */
122
+ export const validateRefs = async (args: string[]): Promise<void> => {
123
+ const { values, positionals } = parseArgs({
124
+ args,
125
+ options: {
126
+ output: { type: 'string', short: 'o' },
127
+ grader: { type: 'string', short: 'g' },
128
+ help: { type: 'boolean', short: 'h' },
129
+ },
130
+ allowPositionals: true,
131
+ })
132
+
133
+ if (values.help) {
134
+ // biome-ignore lint/suspicious/noConsole: CLI help output
135
+ console.log(`
136
+ Usage: acp-harness validate-refs <prompts.jsonl> --grader <grader.ts> [options]
137
+
138
+ Arguments:
139
+ prompts.jsonl Input file with prompts (must have 'reference' field)
140
+
141
+ Options:
142
+ -o, --output Output file (default: stdout)
143
+ -g, --grader Path to grader (.ts/.js module or executable script, required)
144
+ -h, --help Show this help message
145
+
146
+ Output:
147
+ JSONL with validation results for each reference solution.
148
+
149
+ Prompt Format:
150
+ {
151
+ "id": "test-001",
152
+ "input": "What is 2+2?",
153
+ "expected": "4",
154
+ "reference": "The answer is 4."
155
+ }
156
+
157
+ Examples:
158
+ acp-harness validate-refs prompts.jsonl --grader ./grader.ts -o validation.jsonl
159
+ `)
160
+ return
161
+ }
162
+
163
+ const promptsPath = positionals[0]
164
+ if (!promptsPath) {
165
+ console.error('Error: prompts.jsonl path is required')
166
+ process.exit(1)
167
+ }
168
+
169
+ if (!values.grader) {
170
+ console.error('Error: --grader is required for validate-refs')
171
+ process.exit(1)
172
+ }
173
+
174
+ // Load grader
175
+ let grader: Grader
176
+ try {
177
+ grader = await loadGrader(values.grader)
178
+ } catch (error) {
179
+ console.error(`Error: ${error instanceof Error ? error.message : error}`)
180
+ process.exit(1)
181
+ }
182
+
183
+ await runValidateRefs({
184
+ promptsPath,
185
+ outputPath: values.output,
186
+ grader,
187
+ })
188
+ }
@@ -1,43 +0,0 @@
1
- # Accuracy and Confidence Standards
2
-
3
- **Confidence Threshold**: 95% - Report uncertainty rather than guess
4
-
5
- ## Verification Protocol
6
-
7
- 1. **Verification First**: Before stating any specific implementation detail (function signature, file path, API schema), use the `typescript-lsp` skill to verify types and signatures, then read the relevant file in real-time to verify accuracy.
8
-
9
- 2. **Handling Uncertainty**: If you cannot verify information or find contradictions between instructions and live code, you must NOT provide speculative answers.
10
- - **Action**: Clearly state you cannot answer with high confidence and explain the discrepancy.
11
- - Example: "I cannot confirm [detail] because my instructions indicate [X], but the current file shows [Y]. My knowledge may be outdated."
12
-
13
- 3. **Dynamic Exploration**:
14
- - **PREFER typescript-lsp over Grep/Glob** for `.ts`, `.tsx`, `.js`, `.jsx` files
15
- - Use `lsp-find` to search for symbols, types, and patterns across the workspace
16
- - Use `lsp-references` to find all usages of a symbol
17
- - Use `lsp-hover` to verify type signatures
18
- - Only fall back to Grep/Glob for non-TypeScript files or when LSP is unavailable
19
- - Use Read for other file types. Always prioritize live code over instructions.
20
-
21
- 4. **Tool-Assisted Verification**: Use these skills to enhance verification accuracy:
22
- - **`typescript-lsp` skill**: Use `lsp-hover` to verify type signatures, `lsp-references` to find all usages before modifying, `lsp-symbols` for file structure, and `lsp-find` to search for patterns across the workspace.
23
- - **WebFetch**: Retrieve current documentation from authoritative sources (MDN Web Docs, WHATWG specs) when using web platform APIs.
24
- - These skills complement (but do not replace) reading live code - always verify outputs against actual implementation.
25
-
26
- ## Certainty Requirements
27
-
28
- You may only propose a specific change if you are **at least 95% certain** it is correct, based on direct comparison with current code.
29
-
30
- **When uncertain:**
31
- - Report the discrepancy clearly
32
- - State why you cannot confidently recommend a fix
33
- - Present the issue to the user for manual resolution
34
- - DO NOT invent solutions or infer changes
35
-
36
- ## For Agent-Specific Applications
37
-
38
- Agents should apply these standards to their specific domain:
39
-
40
- - **Documentation agents**: Only update TSDoc if parameter names/types match current code
41
- - **Architecture agents**: Verify referenced patterns exist in current codebase
42
- - **Code review agents**: Read files before commenting on implementation details
43
- - **Pattern agents**: Confirm examples reflect actual usage in codebase