@plaited/acp-harness 0.2.6 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +120 -16
  3. package/bin/cli.ts +105 -636
  4. package/bin/tests/cli.spec.ts +218 -51
  5. package/package.json +20 -4
  6. package/src/acp-client.ts +5 -4
  7. package/src/acp-transport.ts +14 -7
  8. package/src/adapter-check.ts +542 -0
  9. package/src/adapter-scaffold.ts +934 -0
  10. package/src/balance.ts +232 -0
  11. package/src/calibrate.ts +300 -0
  12. package/src/capture.ts +457 -0
  13. package/src/constants.ts +94 -0
  14. package/src/grader-loader.ts +174 -0
  15. package/src/harness.ts +35 -0
  16. package/src/schemas-cli.ts +239 -0
  17. package/src/schemas.ts +567 -0
  18. package/src/summarize.ts +245 -0
  19. package/src/tests/adapter-check.spec.ts +70 -0
  20. package/src/tests/adapter-scaffold.spec.ts +112 -0
  21. package/src/tests/fixtures/grader-bad-module.ts +5 -0
  22. package/src/tests/fixtures/grader-exec-fail.py +9 -0
  23. package/src/tests/fixtures/grader-exec-invalid.py +6 -0
  24. package/src/tests/fixtures/grader-exec.py +29 -0
  25. package/src/tests/fixtures/grader-module.ts +14 -0
  26. package/src/tests/grader-loader.spec.ts +153 -0
  27. package/src/trials.ts +395 -0
  28. package/src/validate-refs.ts +188 -0
  29. package/.claude/rules/accuracy.md +0 -43
  30. package/.claude/rules/bun-apis.md +0 -80
  31. package/.claude/rules/code-review.md +0 -254
  32. package/.claude/rules/git-workflow.md +0 -37
  33. package/.claude/rules/github.md +0 -154
  34. package/.claude/rules/testing.md +0 -172
  35. package/.claude/skills/acp-harness/SKILL.md +0 -310
  36. package/.claude/skills/acp-harness/assets/Dockerfile.acp +0 -25
  37. package/.claude/skills/acp-harness/assets/docker-compose.acp.yml +0 -19
  38. package/.claude/skills/acp-harness/references/downstream.md +0 -288
  39. package/.claude/skills/acp-harness/references/output-formats.md +0 -221
  40. package/.claude-plugin/marketplace.json +0 -15
  41. package/.claude-plugin/plugin.json +0 -16
  42. package/.github/CODEOWNERS +0 -6
  43. package/.github/workflows/ci.yml +0 -63
  44. package/.github/workflows/publish.yml +0 -146
  45. package/.mcp.json +0 -20
  46. package/CLAUDE.md +0 -92
  47. package/Dockerfile.test +0 -23
  48. package/biome.json +0 -96
  49. package/bun.lock +0 -513
  50. package/docker-compose.test.yml +0 -21
  51. package/scripts/bun-test-wrapper.sh +0 -46
  52. package/src/acp.constants.ts +0 -56
  53. package/src/acp.schemas.ts +0 -161
  54. package/src/acp.types.ts +0 -28
  55. package/src/tests/fixtures/.claude/settings.local.json +0 -8
  56. package/src/tests/fixtures/.claude/skills/greeting/SKILL.md +0 -17
  57. package/tsconfig.json +0 -32
package/src/trials.ts ADDED
@@ -0,0 +1,395 @@
1
+ /**
2
+ * Multi-run trials command for pass@k/pass^k analysis.
3
+ *
4
+ * @remarks
5
+ * Runs each prompt k times to measure non-determinism.
6
+ * Without a grader, captures raw trials. With a grader, computes:
7
+ * - passRate: Simple pass rate (passes / k)
8
+ * - passAtK: Probability of at least one pass in k samples
9
+ * - passExpK: Probability of all k samples passing
10
+ *
11
+ * @packageDocumentation
12
+ */
13
+
14
+ import { appendFile } from 'node:fs/promises'
15
+ import { parseArgs } from 'node:util'
16
+ import { createACPClient } from './acp-client.ts'
17
+ import { createPrompt } from './acp-helpers.ts'
18
+ import { extractOutput, extractTrajectory, loadPrompts } from './capture.ts'
19
+ import { DEFAULT_HARNESS_TIMEOUT, DEFAULT_TRIAL_COUNT } from './constants.ts'
20
+ import { loadGrader } from './grader-loader.ts'
21
+ import type { Grader, TrialEntry, TrialResult } from './schemas.ts'
22
+ import { McpServerSchema } from './schemas.ts'
23
+
24
+ // ============================================================================
25
+ // Pass@k/Pass^k Calculation
26
+ // ============================================================================
27
+
28
+ /**
29
+ * Calculate pass@k: probability of at least one pass in k samples.
30
+ *
31
+ * @remarks
32
+ * Uses the unbiased estimator: 1 - C(n-c, k) / C(n, k)
33
+ * where n = total samples, c = correct samples, k = samples per trial
34
+ *
35
+ * For our case where n = k (we run exactly k trials per prompt):
36
+ * pass@k = 1 - (1 - passRate)^k (simplified)
37
+ */
38
+ const calculatePassAtK = (passes: number, k: number): number => {
39
+ if (passes >= k) return 1
40
+ if (passes === 0) return 0
41
+
42
+ // Simplified formula when n = k
43
+ const passRate = passes / k
44
+ return 1 - (1 - passRate) ** k
45
+ }
46
+
47
+ /**
48
+ * Calculate pass^k: probability of all k samples passing.
49
+ *
50
+ * @remarks
51
+ * This is simply passRate^k
52
+ */
53
+ const calculatePassExpK = (passes: number, k: number): number => {
54
+ if (passes === k) return 1
55
+ if (passes === 0) return 0
56
+
57
+ const passRate = passes / k
58
+ return passRate ** k
59
+ }
60
+
61
+ // ============================================================================
62
+ // Types
63
+ // ============================================================================
64
+
65
+ /** Configuration for trials command */
66
+ export type TrialsConfig = {
67
+ /** Path to prompts.jsonl file */
68
+ promptsPath: string
69
+ /** ACP agent command */
70
+ agentCommand: string[]
71
+ /** Number of trials per prompt */
72
+ k: number
73
+ /** Output file path */
74
+ outputPath?: string
75
+ /** Working directory for agent */
76
+ cwd?: string
77
+ /** Timeout per prompt in milliseconds */
78
+ timeout?: number
79
+ /** Show progress to stderr */
80
+ progress?: boolean
81
+ /** Append to output file */
82
+ append?: boolean
83
+ /** MCP server configurations */
84
+ mcpServers?: unknown[]
85
+ /** Optional grader function */
86
+ grader?: Grader
87
+ }
88
+
89
+ // ============================================================================
90
+ // Helpers
91
+ // ============================================================================
92
+
93
+ /** Resolve path relative to process.cwd() */
94
+ const resolvePath = (path: string): string => {
95
+ if (path.startsWith('/')) return path
96
+ return `${process.cwd()}/${path}`
97
+ }
98
+
99
+ /** Write output line */
100
+ const writeOutput = async (line: string, outputPath?: string, append?: boolean): Promise<void> => {
101
+ if (outputPath) {
102
+ if (append) {
103
+ await appendFile(outputPath, `${line}\n`)
104
+ } else {
105
+ await Bun.write(outputPath, `${line}\n`)
106
+ }
107
+ } else {
108
+ // biome-ignore lint/suspicious/noConsole: CLI stdout output
109
+ console.log(line)
110
+ }
111
+ }
112
+
113
+ /** Log progress to stderr */
114
+ const logProgress = (message: string, showProgress: boolean): void => {
115
+ if (showProgress) {
116
+ console.error(message)
117
+ }
118
+ }
119
+
120
+ // ============================================================================
121
+ // Trials Implementation
122
+ // ============================================================================
123
+
124
+ /**
125
+ * Execute trials with configuration object.
126
+ *
127
+ * @param config - Trials configuration
128
+ * @returns Array of trial results
129
+ */
130
+ export const runTrials = async (config: TrialsConfig): Promise<TrialResult[]> => {
131
+ const {
132
+ promptsPath,
133
+ agentCommand,
134
+ k,
135
+ outputPath,
136
+ cwd,
137
+ timeout = DEFAULT_HARNESS_TIMEOUT,
138
+ progress = false,
139
+ append = false,
140
+ mcpServers = [],
141
+ grader,
142
+ } = config
143
+
144
+ // Parse MCP server configurations
145
+ const parsedMcpServers = mcpServers.map((s) => McpServerSchema.parse(s))
146
+
147
+ // Load prompts
148
+ const prompts = await loadPrompts(promptsPath)
149
+
150
+ // Resolve output path
151
+ const resolvedOutputPath = outputPath ? resolvePath(outputPath) : undefined
152
+
153
+ // Log progress info
154
+ logProgress(`Loaded ${prompts.length} prompts from ${promptsPath}`, progress)
155
+ logProgress(`Running ${k} trials per prompt`, progress)
156
+ logProgress(`Command: ${agentCommand.join(' ')}`, progress)
157
+ if (grader) {
158
+ logProgress('Grader: enabled (will compute pass@k metrics)', progress)
159
+ }
160
+
161
+ // Create ACP client
162
+ const client = createACPClient({
163
+ command: agentCommand,
164
+ cwd,
165
+ timeout,
166
+ })
167
+
168
+ // Clear output file if not appending
169
+ if (resolvedOutputPath && !append) {
170
+ await Bun.write(resolvedOutputPath, '')
171
+ }
172
+
173
+ // Session params
174
+ const sessionParams = {
175
+ cwd: cwd ?? process.cwd(),
176
+ mcpServers: parsedMcpServers,
177
+ }
178
+
179
+ const results: TrialResult[] = []
180
+ let isFirstOutput = true
181
+
182
+ try {
183
+ logProgress('Connecting to agent...', progress)
184
+ await client.connect()
185
+ logProgress('Connected!', progress)
186
+
187
+ // Run evaluations
188
+ for (let i = 0; i < prompts.length; i++) {
189
+ const promptCase = prompts[i]
190
+ if (!promptCase) continue
191
+
192
+ logProgress(`[${i + 1}/${prompts.length}] ${promptCase.id}: Running ${k} trials...`, progress)
193
+
194
+ const trialEntries: TrialEntry[] = []
195
+
196
+ for (let trialNum = 1; trialNum <= k; trialNum++) {
197
+ // Create fresh session for each trial
198
+ const session = await client.createSession(sessionParams)
199
+ const startTime = Date.now()
200
+
201
+ try {
202
+ const prompt = createPrompt(promptCase.input)
203
+ const { updates } = await client.promptSync(session.id, prompt)
204
+
205
+ const endTime = Date.now()
206
+ const trajectory = extractTrajectory(updates, startTime)
207
+ const output = extractOutput(trajectory)
208
+
209
+ const entry: TrialEntry = {
210
+ trialNum,
211
+ output,
212
+ trajectory,
213
+ duration: endTime - startTime,
214
+ }
215
+
216
+ // Apply grader if provided
217
+ if (grader) {
218
+ const graderResult = await grader({
219
+ input: promptCase.input,
220
+ output,
221
+ expected: promptCase.expected,
222
+ trajectory,
223
+ })
224
+ entry.pass = graderResult.pass
225
+ entry.score = graderResult.score
226
+ entry.reasoning = graderResult.reasoning
227
+ }
228
+
229
+ trialEntries.push(entry)
230
+ logProgress(
231
+ ` Trial ${trialNum}/${k}: ${entry.pass !== undefined ? (entry.pass ? '✓' : '✗') : '?'}`,
232
+ progress,
233
+ )
234
+ } catch (error) {
235
+ const endTime = Date.now()
236
+ const message = error instanceof Error ? error.message : String(error)
237
+
238
+ trialEntries.push({
239
+ trialNum,
240
+ output: '',
241
+ trajectory: [],
242
+ duration: endTime - startTime,
243
+ pass: false,
244
+ reasoning: `Error: ${message}`,
245
+ })
246
+ logProgress(` Trial ${trialNum}/${k}: ! (error)`, progress)
247
+ }
248
+ }
249
+
250
+ // Build result
251
+ const result: TrialResult = {
252
+ id: promptCase.id,
253
+ input: promptCase.input,
254
+ ...(promptCase.expected && { expected: promptCase.expected }),
255
+ k,
256
+ trials: trialEntries,
257
+ }
258
+
259
+ // Calculate metrics if grader was used
260
+ if (grader) {
261
+ const passes = trialEntries.filter((t) => t.pass).length
262
+ result.passRate = passes / k
263
+ result.passAtK = calculatePassAtK(passes, k)
264
+ result.passExpK = calculatePassExpK(passes, k)
265
+ }
266
+
267
+ results.push(result)
268
+
269
+ // Write result immediately
270
+ const formatted = JSON.stringify(result)
271
+ await writeOutput(formatted, resolvedOutputPath, !isFirstOutput)
272
+ isFirstOutput = false
273
+
274
+ if (grader) {
275
+ logProgress(
276
+ ` → passRate=${(result.passRate ?? 0).toFixed(2)}, pass@${k}=${(result.passAtK ?? 0).toFixed(2)}`,
277
+ progress,
278
+ )
279
+ }
280
+ }
281
+ } finally {
282
+ logProgress('Disconnecting...', progress)
283
+ await client.disconnect()
284
+ }
285
+
286
+ logProgress('Done!', progress)
287
+ return results
288
+ }
289
+
290
+ // ============================================================================
291
+ // CLI Entry Point
292
+ // ============================================================================
293
+
294
+ /**
295
+ * Trials command CLI handler.
296
+ *
297
+ * @param args - Command line arguments (after 'trials')
298
+ */
299
+ export const trials = async (args: string[]): Promise<void> => {
300
+ const { values, positionals } = parseArgs({
301
+ args,
302
+ options: {
303
+ output: { type: 'string', short: 'o' },
304
+ k: { type: 'string', short: 'k', default: String(DEFAULT_TRIAL_COUNT) },
305
+ cwd: { type: 'string', short: 'c' },
306
+ timeout: { type: 'string', short: 't', default: String(DEFAULT_HARNESS_TIMEOUT) },
307
+ progress: { type: 'boolean', default: false },
308
+ append: { type: 'boolean', default: false },
309
+ 'mcp-server': { type: 'string', multiple: true },
310
+ grader: { type: 'string', short: 'g' },
311
+ help: { type: 'boolean', short: 'h' },
312
+ },
313
+ allowPositionals: true,
314
+ })
315
+
316
+ if (values.help) {
317
+ // biome-ignore lint/suspicious/noConsole: CLI help output
318
+ console.log(`
319
+ Usage: acp-harness trials <prompts.jsonl> <command> [args...] [options]
320
+
321
+ Arguments:
322
+ prompts.jsonl Input file with evaluation prompts
323
+ command [args] ACP agent command to execute
324
+
325
+ Options:
326
+ -o, --output Output file (default: stdout)
327
+ -k Number of trials per prompt (default: ${DEFAULT_TRIAL_COUNT})
328
+ -c, --cwd Working directory for agent
329
+ -t, --timeout Request timeout in ms (default: ${DEFAULT_HARNESS_TIMEOUT})
330
+ --progress Show progress to stderr
331
+ --append Append to output file
332
+ --mcp-server MCP server config JSON (repeatable)
333
+ -g, --grader Path to grader (.ts/.js module or executable script)
334
+ -h, --help Show this help message
335
+
336
+ Output Format:
337
+ Without grader: Raw trials with trajectories
338
+ With grader: Trials plus pass@k metrics (passRate, passAtK, passExpK)
339
+
340
+ Graders:
341
+ TS/JS modules must export a 'grade' function.
342
+ Executable scripts (Python, etc.) use stdin/stdout JSON protocol.
343
+
344
+ Examples:
345
+ # Capture only
346
+ acp-harness trials prompts.jsonl bunx claude-code-acp -k 5 -o trials.jsonl
347
+
348
+ # With TypeScript grader
349
+ acp-harness trials prompts.jsonl bunx claude-code-acp -k 5 --grader ./grader.ts -o trials.jsonl
350
+
351
+ # With Python grader
352
+ acp-harness trials prompts.jsonl bunx claude-code-acp -k 5 --grader ./grader.py -o trials.jsonl
353
+ `)
354
+ return
355
+ }
356
+
357
+ const promptsPath = positionals[0]
358
+ if (!promptsPath) {
359
+ console.error('Error: prompts.jsonl path is required')
360
+ process.exit(1)
361
+ }
362
+
363
+ const agentCommand = positionals.slice(1)
364
+ if (agentCommand.length === 0) {
365
+ console.error('Error: ACP agent command is required')
366
+ process.exit(1)
367
+ }
368
+
369
+ // Load grader if specified
370
+ let grader: Grader | undefined
371
+ if (values.grader) {
372
+ try {
373
+ grader = await loadGrader(values.grader)
374
+ } catch (error) {
375
+ console.error(`Error: ${error instanceof Error ? error.message : error}`)
376
+ process.exit(1)
377
+ }
378
+ }
379
+
380
+ // Parse MCP server configurations
381
+ const mcpServers = (values['mcp-server'] ?? []).map((json) => JSON.parse(json))
382
+
383
+ await runTrials({
384
+ promptsPath,
385
+ agentCommand,
386
+ k: Number.parseInt(values.k ?? String(DEFAULT_TRIAL_COUNT), 10),
387
+ outputPath: values.output,
388
+ cwd: values.cwd,
389
+ timeout: Number.parseInt(values.timeout ?? String(DEFAULT_HARNESS_TIMEOUT), 10),
390
+ progress: values.progress ?? false,
391
+ append: values.append ?? false,
392
+ mcpServers,
393
+ grader,
394
+ })
395
+ }
@@ -0,0 +1,188 @@
1
+ /**
2
+ * Validate-refs command - check reference solutions against grader.
3
+ *
4
+ * @remarks
5
+ * Validates that reference solutions in prompts.jsonl pass the grader.
6
+ * Helps identify prompts with broken or incorrect reference solutions.
7
+ *
8
+ * @packageDocumentation
9
+ */
10
+
11
+ import { parseArgs } from 'node:util'
12
+ import { loadPrompts } from './capture.ts'
13
+ import { loadGrader } from './grader-loader.ts'
14
+ import type { Grader, ValidationResult } from './schemas.ts'
15
+
16
+ // ============================================================================
17
+ // Types
18
+ // ============================================================================
19
+
20
+ /** Configuration for validate-refs command */
21
+ export type ValidateRefsConfig = {
22
+ /** Path to prompts.jsonl file */
23
+ promptsPath: string
24
+ /** Output file path */
25
+ outputPath?: string
26
+ /** Grader function */
27
+ grader: Grader
28
+ }
29
+
30
+ // ============================================================================
31
+ // Helpers
32
+ // ============================================================================
33
+
34
+ /** Resolve path relative to process.cwd() */
35
+ const resolvePath = (path: string): string => {
36
+ if (path.startsWith('/')) return path
37
+ return `${process.cwd()}/${path}`
38
+ }
39
+
40
+ // ============================================================================
41
+ // Validate-Refs Implementation
42
+ // ============================================================================
43
+
44
+ /**
45
+ * Execute validate-refs with configuration object.
46
+ *
47
+ * @param config - Validate-refs configuration
48
+ * @returns Array of validation results
49
+ */
50
+ export const runValidateRefs = async (config: ValidateRefsConfig): Promise<ValidationResult[]> => {
51
+ const { promptsPath, outputPath, grader } = config
52
+
53
+ // Load prompts
54
+ const prompts = await loadPrompts(promptsPath)
55
+
56
+ // Filter to prompts with reference solutions
57
+ const promptsWithRefs = prompts.filter((p) => p.reference !== undefined)
58
+
59
+ if (promptsWithRefs.length === 0) {
60
+ console.error('No prompts with reference solutions found')
61
+ return []
62
+ }
63
+
64
+ console.error(`Validating ${promptsWithRefs.length} reference solutions...`)
65
+
66
+ const results: ValidationResult[] = []
67
+
68
+ for (const prompt of promptsWithRefs) {
69
+ const graderResult = await grader({
70
+ input: prompt.input,
71
+ output: prompt.reference as string,
72
+ expected: prompt.expected,
73
+ trajectory: [], // No trajectory for reference validation
74
+ })
75
+
76
+ results.push({
77
+ id: prompt.id,
78
+ reference: prompt.reference as string,
79
+ passes: graderResult.pass,
80
+ graderResult,
81
+ })
82
+
83
+ const icon = graderResult.pass ? '✓' : '✗'
84
+ console.error(` ${icon} ${prompt.id}`)
85
+ }
86
+
87
+ // Format output
88
+ const output = results.map((r) => JSON.stringify(r)).join('\n')
89
+
90
+ // Write output
91
+ if (outputPath) {
92
+ await Bun.write(resolvePath(outputPath), output)
93
+ } else {
94
+ // biome-ignore lint/suspicious/noConsole: CLI stdout output
95
+ console.log(output)
96
+ }
97
+
98
+ // Summary
99
+ const passed = results.filter((r) => r.passes).length
100
+ const failed = results.length - passed
101
+ console.error(`\nResults: ${passed} passed, ${failed} failed`)
102
+
103
+ if (failed > 0) {
104
+ console.error('\nFailing references:')
105
+ for (const result of results.filter((r) => !r.passes)) {
106
+ console.error(` - ${result.id}: ${result.graderResult.reasoning ?? 'No reasoning'}`)
107
+ }
108
+ }
109
+
110
+ return results
111
+ }
112
+
113
+ // ============================================================================
114
+ // CLI Entry Point
115
+ // ============================================================================
116
+
117
+ /**
118
+ * Validate-refs command CLI handler.
119
+ *
120
+ * @param args - Command line arguments (after 'validate-refs')
121
+ */
122
+ export const validateRefs = async (args: string[]): Promise<void> => {
123
+ const { values, positionals } = parseArgs({
124
+ args,
125
+ options: {
126
+ output: { type: 'string', short: 'o' },
127
+ grader: { type: 'string', short: 'g' },
128
+ help: { type: 'boolean', short: 'h' },
129
+ },
130
+ allowPositionals: true,
131
+ })
132
+
133
+ if (values.help) {
134
+ // biome-ignore lint/suspicious/noConsole: CLI help output
135
+ console.log(`
136
+ Usage: acp-harness validate-refs <prompts.jsonl> --grader <grader.ts> [options]
137
+
138
+ Arguments:
139
+ prompts.jsonl Input file with prompts (must have 'reference' field)
140
+
141
+ Options:
142
+ -o, --output Output file (default: stdout)
143
+ -g, --grader Path to grader (.ts/.js module or executable script, required)
144
+ -h, --help Show this help message
145
+
146
+ Output:
147
+ JSONL with validation results for each reference solution.
148
+
149
+ Prompt Format:
150
+ {
151
+ "id": "test-001",
152
+ "input": "What is 2+2?",
153
+ "expected": "4",
154
+ "reference": "The answer is 4."
155
+ }
156
+
157
+ Examples:
158
+ acp-harness validate-refs prompts.jsonl --grader ./grader.ts -o validation.jsonl
159
+ `)
160
+ return
161
+ }
162
+
163
+ const promptsPath = positionals[0]
164
+ if (!promptsPath) {
165
+ console.error('Error: prompts.jsonl path is required')
166
+ process.exit(1)
167
+ }
168
+
169
+ if (!values.grader) {
170
+ console.error('Error: --grader is required for validate-refs')
171
+ process.exit(1)
172
+ }
173
+
174
+ // Load grader
175
+ let grader: Grader
176
+ try {
177
+ grader = await loadGrader(values.grader)
178
+ } catch (error) {
179
+ console.error(`Error: ${error instanceof Error ? error.message : error}`)
180
+ process.exit(1)
181
+ }
182
+
183
+ await runValidateRefs({
184
+ promptsPath,
185
+ outputPath: values.output,
186
+ grader,
187
+ })
188
+ }
@@ -1,43 +0,0 @@
1
- # Accuracy and Confidence Standards
2
-
3
- **Confidence Threshold**: 95% - Report uncertainty rather than guess
4
-
5
- ## Verification Protocol
6
-
7
- 1. **Verification First**: Before stating any specific implementation detail (function signature, file path, API schema), use the `typescript-lsp` skill to verify types and signatures, then read the relevant file in real-time to verify accuracy.
8
-
9
- 2. **Handling Uncertainty**: If you cannot verify information or find contradictions between instructions and live code, you must NOT provide speculative answers.
10
- - **Action**: Clearly state you cannot answer with high confidence and explain the discrepancy.
11
- - Example: "I cannot confirm [detail] because my instructions indicate [X], but the current file shows [Y]. My knowledge may be outdated."
12
-
13
- 3. **Dynamic Exploration**:
14
- - **PREFER typescript-lsp over Grep/Glob** for `.ts`, `.tsx`, `.js`, `.jsx` files
15
- - Use `lsp-find` to search for symbols, types, and patterns across the workspace
16
- - Use `lsp-references` to find all usages of a symbol
17
- - Use `lsp-hover` to verify type signatures
18
- - Only fall back to Grep/Glob for non-TypeScript files or when LSP is unavailable
19
- - Use Read for other file types. Always prioritize live code over instructions.
20
-
21
- 4. **Tool-Assisted Verification**: Use these skills to enhance verification accuracy:
22
- - **`typescript-lsp` skill**: Use `lsp-hover` to verify type signatures, `lsp-references` to find all usages before modifying, `lsp-symbols` for file structure, and `lsp-find` to search for patterns across the workspace.
23
- - **WebFetch**: Retrieve current documentation from authoritative sources (MDN Web Docs, WHATWG specs) when using web platform APIs.
24
- - These skills complement (but do not replace) reading live code - always verify outputs against actual implementation.
25
-
26
- ## Certainty Requirements
27
-
28
- You may only propose a specific change if you are **at least 95% certain** it is correct, based on direct comparison with current code.
29
-
30
- **When uncertain:**
31
- - Report the discrepancy clearly
32
- - State why you cannot confidently recommend a fix
33
- - Present the issue to the user for manual resolution
34
- - DO NOT invent solutions or infer changes
35
-
36
- ## For Agent-Specific Applications
37
-
38
- Agents should apply these standards to their specific domain:
39
-
40
- - **Documentation agents**: Only update TSDoc if parameter names/types match current code
41
- - **Architecture agents**: Verify referenced patterns exist in current codebase
42
- - **Code review agents**: Read files before commenting on implementation details
43
- - **Pattern agents**: Confirm examples reflect actual usage in codebase
@@ -1,80 +0,0 @@
1
- # Bun Platform APIs
2
-
3
- **IMPORTANT**: Prefer Bun's native APIs over Node.js equivalents when running in the Bun environment.
4
-
5
- ## File System Operations
6
-
7
- - ✅ Use `Bun.file(path).exists()` instead of `fs.existsSync()`
8
- - ✅ Use `Bun.file(path)` API for reading/writing files
9
- - ✅ Use `Bun.write()` for efficient file writes
10
-
11
- ```typescript
12
- // ✅ Good: Bun APIs
13
- const exists = await Bun.file('config.json').exists()
14
- const content = await Bun.file('data.txt').text()
15
- await Bun.write('output.json', JSON.stringify(data))
16
-
17
- // ❌ Avoid: Node.js equivalents
18
- import { existsSync, readFileSync, writeFileSync } from 'node:fs'
19
- const exists = existsSync('config.json')
20
- ```
21
-
22
- ## Shell Commands
23
-
24
- - ✅ Use `Bun.$` template literal for shell commands
25
- - ❌ Avoid `child_process.spawn()` or `child_process.exec()`
26
-
27
- ```typescript
28
- // ✅ Good: Bun shell
29
- await Bun.$`npm install`
30
- const result = await Bun.$`git status`.text()
31
-
32
- // ❌ Avoid: Node.js child_process
33
- import { spawn } from 'node:child_process'
34
- spawn('npm', ['install'])
35
- ```
36
-
37
- ## Path Resolution
38
-
39
- - ✅ Use `Bun.resolveSync()` for module resolution
40
- - ✅ Use `import.meta.dir` for current directory
41
- - ⚠️ Keep `node:path` utilities for path manipulation (join, resolve, dirname)
42
-
43
- ```typescript
44
- // ✅ Good: Bun + node:path combo
45
- import { join } from 'node:path'
46
- const configPath = join(import.meta.dir, 'config.json')
47
- const resolved = Bun.resolveSync('./module', import.meta.dir)
48
- ```
49
-
50
- ## Package Management
51
-
52
- - ✅ Use `Bun.which(cmd)` to check for executables
53
- - ⚠️ No programmatic package manager API yet - use CLI commands via `Bun.$`
54
-
55
- ```typescript
56
- // ✅ Good: Check for executable
57
- const bunPath = Bun.which('bun')
58
- if (!bunPath) throw new Error('bun not found')
59
-
60
- // Install packages via shell
61
- await Bun.$`bun add zod`
62
- ```
63
-
64
- ## Environment Detection
65
-
66
- - ✅ Check `typeof Bun !== 'undefined'` for Bun runtime
67
- - ✅ Use `Bun.which('bun')` to verify bun executable exists
68
-
69
- ## When to Use Node.js APIs
70
-
71
- - Interactive input (readline)
72
- - Complex path manipulation (prefer `node:path` utilities)
73
- - APIs without Bun equivalents
74
-
75
- ## Documentation
76
-
77
- - Main docs: https://bun.sh/docs
78
- - Shell API: https://bun.sh/docs/runtime/shell
79
- - File I/O: https://bun.sh/docs/api/file-io
80
- - Runtime APIs: https://bun.sh/docs/runtime/bun-apis