@kodrunhq/opencode-autopilot 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/assets/commands/brainstorm.md +7 -0
  2. package/assets/commands/stocktake.md +7 -0
  3. package/assets/commands/tdd.md +7 -0
  4. package/assets/commands/update-docs.md +7 -0
  5. package/assets/commands/write-plan.md +7 -0
  6. package/assets/skills/brainstorming/SKILL.md +295 -0
  7. package/assets/skills/code-review/SKILL.md +241 -0
  8. package/assets/skills/e2e-testing/SKILL.md +266 -0
  9. package/assets/skills/git-worktrees/SKILL.md +296 -0
  10. package/assets/skills/go-patterns/SKILL.md +240 -0
  11. package/assets/skills/plan-executing/SKILL.md +258 -0
  12. package/assets/skills/plan-writing/SKILL.md +278 -0
  13. package/assets/skills/python-patterns/SKILL.md +255 -0
  14. package/assets/skills/rust-patterns/SKILL.md +293 -0
  15. package/assets/skills/strategic-compaction/SKILL.md +217 -0
  16. package/assets/skills/systematic-debugging/SKILL.md +299 -0
  17. package/assets/skills/tdd-workflow/SKILL.md +311 -0
  18. package/assets/skills/typescript-patterns/SKILL.md +278 -0
  19. package/assets/skills/verification/SKILL.md +240 -0
  20. package/package.json +1 -1
  21. package/src/index.ts +72 -1
  22. package/src/observability/context-monitor.ts +102 -0
  23. package/src/observability/event-emitter.ts +136 -0
  24. package/src/observability/event-handlers.ts +322 -0
  25. package/src/observability/event-store.ts +226 -0
  26. package/src/observability/index.ts +53 -0
  27. package/src/observability/log-reader.ts +152 -0
  28. package/src/observability/log-writer.ts +93 -0
  29. package/src/observability/mock/mock-provider.ts +72 -0
  30. package/src/observability/mock/types.ts +31 -0
  31. package/src/observability/retention.ts +57 -0
  32. package/src/observability/schemas.ts +83 -0
  33. package/src/observability/session-logger.ts +63 -0
  34. package/src/observability/summary-generator.ts +209 -0
  35. package/src/observability/token-tracker.ts +97 -0
  36. package/src/observability/types.ts +24 -0
  37. package/src/orchestrator/skill-injection.ts +38 -0
  38. package/src/review/sanitize.ts +1 -1
  39. package/src/skills/adaptive-injector.ts +122 -0
  40. package/src/skills/dependency-resolver.ts +88 -0
  41. package/src/skills/linter.ts +113 -0
  42. package/src/skills/loader.ts +88 -0
  43. package/src/templates/skill-template.ts +4 -0
  44. package/src/tools/create-skill.ts +12 -0
  45. package/src/tools/logs.ts +178 -0
  46. package/src/tools/mock-fallback.ts +100 -0
  47. package/src/tools/pipeline-report.ts +148 -0
  48. package/src/tools/session-stats.ts +185 -0
  49. package/src/tools/stocktake.ts +170 -0
  50. package/src/tools/update-docs.ts +116 -0
@@ -0,0 +1,278 @@
1
+ ---
2
+ name: typescript-patterns
3
+ description: TypeScript and Bun runtime patterns, testing idioms, type-level programming, and performance best practices
4
+ stacks:
5
+ - typescript
6
+ - bun
7
+ requires:
8
+ - coding-standards
9
+ ---
10
+
11
+ # TypeScript & Bun Patterns
12
+
13
+ TypeScript-specific patterns for projects running on the Bun runtime. Covers type-level programming, Bun-specific APIs, testing with bun:test, error handling, module design, and immutability idioms. Apply these when writing, reviewing, or refactoring TypeScript code.
14
+
15
+ ## 1. Type-Level Patterns
16
+
17
+ **DO:** Use the type system to make invalid states unrepresentable.
18
+
19
+ - Prefer `interface` over `type` for object shapes -- better error messages, declaration merging, and extendability
20
+ - Use discriminated unions for state machines:
21
+ ```ts
22
+ type RequestState =
23
+ | { status: "idle" }
24
+ | { status: "loading" }
25
+ | { status: "success"; data: Response }
26
+ | { status: "error"; error: Error }
27
+ ```
28
+ - Use `readonly` arrays and properties by default. Only remove `readonly` when mutation is explicitly required
29
+ - Use `as const` for literal types and frozen configuration objects
30
+ - Use branded types for domain identifiers to prevent mixing:
31
+ ```ts
32
+ type UserId = string & { readonly __brand: "UserId" }
33
+ type OrderId = string & { readonly __brand: "OrderId" }
34
+
35
+ function fetchUser(id: UserId): Promise<User> { ... }
36
+ // fetchUser(orderId) is now a compile error
37
+ ```
38
+ - Use template literal types for string patterns:
39
+ ```ts
40
+ type ApiRoute = `/api/${string}`
41
+ type EventName = `on${Capitalize<string>}`
42
+ ```
43
+ - Use `satisfies` to validate types without widening:
44
+ ```ts
45
+ const config = {
46
+ port: 3000,
47
+ host: "localhost",
48
+ } satisfies ServerConfig
49
+ // config.port is still `3000` (literal), not `number`
50
+ ```
51
+
52
+ **DON'T:**
53
+
54
+ - Use `any` -- use `unknown` and narrow with type guards. If `any` is unavoidable, add a `// biome-ignore lint/suspicious/noExplicitAny: [reason]` comment
55
+ - Use `enum` -- use `as const` objects or union types instead (enums have runtime cost and quirky behavior)
56
+ - Use `!` non-null assertion -- handle the null case explicitly or use optional chaining
57
+ - Cast with `as` when a type guard or conditional check is possible
58
+ - Use `Function` type -- use specific signatures: `(arg: string) => void`
59
+
60
+ ## 2. Bun Runtime Patterns
61
+
62
+ **DO:** Use Bun-native APIs where they provide clear advantages.
63
+
64
+ - Use `bun test` for testing -- built-in, fast, Jest-compatible API, no configuration needed
65
+ - Use `node:fs/promises` for all file I/O -- not `Bun.file()` or `Bun.write()` (portability and testability per project constraints)
66
+ - Use `Bun.serve()` for HTTP servers -- not Express or other Node frameworks
67
+ - Import from `bun:sqlite` for SQLite -- zero-dependency, built into the runtime
68
+ - Use `Bun.spawn()` for subprocesses -- streams stdout/stderr natively
69
+ - Use `Bun.hash()` for fast hashing -- faster than Node's crypto for non-cryptographic hashes
70
+ - Use `Bun.env` for environment variables -- typed access with auto-completion
71
+
72
+ **DON'T:**
73
+
74
+ - Install `jest`, `vitest`, or `mocha` -- `bun test` covers all standard test patterns
75
+ - Use `Bun.file()` or `Bun.write()` in library code -- prefer `node:fs/promises` for portability
76
+ - Use `node:child_process` when `Bun.spawn()` is available
77
+ - Mix CommonJS `require()` with ES module `import` -- use `import` exclusively
78
+
79
+ ## 3. Error Handling
80
+
81
+ **DO:** Use result types for expected failures. Reserve exceptions for unexpected bugs.
82
+
83
+ - Return result types instead of throwing:
84
+ ```ts
85
+ type Result<T, E = string> =
86
+ | { success: true; data: T }
87
+ | { success: false; error: E }
88
+ ```
89
+ - Catch at boundaries (HTTP handlers, CLI entry points), not in business logic
90
+ - Use `isEnoentError()` pattern for filesystem errors -- check error code, not message:
91
+ ```ts
92
+ function isEnoentError(error: unknown): boolean {
93
+ return error instanceof Error && "code" in error && error.code === "ENOENT"
94
+ }
95
+ ```
96
+ - Use `unknown` for catch clause variables and narrow before accessing properties:
97
+ ```ts
98
+ try { ... } catch (error: unknown) {
99
+ if (error instanceof ValidationError) { ... }
100
+ throw error // re-throw unknown errors
101
+ }
102
+ ```
103
+ - Validate external data at system boundaries with Zod schemas:
104
+ ```ts
105
+ const result = schema.safeParse(input)
106
+ if (!result.success) return { success: false, error: result.error.message }
107
+ ```
108
+
109
+ **DON'T:**
110
+
111
+ - Catch without re-throwing or logging -- silent catch is a bug
112
+ - Throw strings -- always throw `Error` instances or custom error classes
113
+ - Use `try/catch` for control flow -- use conditional checks or result types
114
+ - Ignore the return value of `safeParse` -- always check `.success`
115
+
116
+ ## 4. Module Patterns
117
+
118
+ **DO:** Design modules for composability and testability.
119
+
120
+ - Export pure functions and interfaces, not classes (unless state encapsulation is genuinely needed)
121
+ - Use barrel files (`index.ts`) only for public API surfaces -- internal modules import directly
122
+ - Follow strict top-down dependency flow -- no cycles. Use the dependency tree:
123
+ ```
124
+ entry point -> tools -> templates + utils -> Node built-ins
125
+ ```
126
+ - Follow the `*Core` function pattern: export a testable core function that accepts dependencies, and a thin wrapper that supplies defaults:
127
+ ```ts
128
+ // Testable core
129
+ export async function createAgentCore(name: string, baseDir: string): Promise<Result> { ... }
130
+
131
+ // Thin wrapper for production
132
+ export function tool() {
133
+ return { execute: (args) => createAgentCore(args.name, getGlobalConfigDir()) }
134
+ }
135
+ ```
136
+ - Target 200-400 lines per file, hard maximum of 800
137
+
138
+ **DON'T:**
139
+
140
+ - Create circular dependencies -- if A imports B and B imports A, extract shared types to C
141
+ - Use dynamic `import()` for modules that can be statically imported
142
+ - Re-export everything from a barrel file -- explicitly list public API
143
+ - Put multiple unrelated exports in a single file
144
+
145
+ ## 5. Testing Patterns
146
+
147
+ **DO:** Write focused tests that verify behavior, not implementation.
148
+
149
+ - Use `describe`/`test` (not `it`) for consistency across the project:
150
+ ```ts
151
+ describe("validateAssetName", () => {
152
+ test("accepts lowercase with hyphens", () => {
153
+ expect(validateAssetName("my-agent")).toEqual({ valid: true })
154
+ })
155
+
156
+ test("rejects uppercase characters", () => {
157
+ const result = validateAssetName("MyAgent")
158
+ expect(result.valid).toBe(false)
159
+ })
160
+ })
161
+ ```
162
+ - Test pure functions: input goes in, output comes out, no mocks needed
163
+ - Test side effects: mock the boundary (filesystem, network), verify the interaction:
164
+ ```ts
165
+ import { mock } from "bun:test"
166
+ const writeMock = mock(() => Promise.resolve())
167
+ // inject mock, call function, verify writeMock was called with expected args
168
+ ```
169
+ - Use `beforeEach` for test isolation, `afterEach` for cleanup
170
+ - Use `expect().toThrow()` for exception testing:
171
+ ```ts
172
+ expect(() => parseConfig(invalid)).toThrow("Invalid config")
173
+ ```
174
+ - Use `expect().toMatchSnapshot()` only for complex output where manual assertion is impractical
175
+
176
+ **DON'T:**
177
+
178
+ - Test implementation details (private methods, internal state)
179
+ - Use `it` instead of `test` -- project convention is `describe`/`test`
180
+ - Write tests that depend on execution order or shared mutable state
181
+ - Skip tests with `.skip` without a tracking comment (`// TODO(#123): flaky on CI`)
182
+ - Use `any` in test files to bypass type checking -- tests should be as typed as production code
183
+
184
+ ## 6. Immutability Patterns
185
+
186
+ **DO:** Build new objects instead of mutating existing ones.
187
+
188
+ - Use object spread for updates:
189
+ ```ts
190
+ const updated = { ...existing, status: "active" }
191
+ ```
192
+ - Use array spread for additions:
193
+ ```ts
194
+ const withNew = [...existing, newItem]
195
+ ```
196
+ - Use `Object.freeze()` for constants and configuration:
197
+ ```ts
198
+ const DEFAULTS = Object.freeze({
199
+ maxRetries: 3,
200
+ timeoutMs: 5000,
201
+ })
202
+ ```
203
+ - Use `ReadonlyArray<T>` and `Readonly<Record<K, V>>` for function parameters:
204
+ ```ts
205
+ function process(items: ReadonlyArray<Item>): Result { ... }
206
+ ```
207
+ - Use `map`, `filter`, `reduce` instead of mutating loops:
208
+ ```ts
209
+ const active = users.filter(u => u.isActive)
210
+ const names = active.map(u => u.name)
211
+ ```
212
+
213
+ **DON'T:**
214
+
215
+ - Push to arrays: `items.push(x)` -- use `[...items, x]`
216
+ - Reassign properties: `obj.status = "done"` -- use `{ ...obj, status: "done" }`
217
+ - Use `splice`, `pop`, `shift` on shared arrays
218
+ - Mutate function arguments -- always return new values
219
+
220
+ **Exception:** Mutation is acceptable when an API explicitly requires it (OpenCode config hooks, database transaction builders, stream writers). Document the mutation with a comment.
221
+
222
+ ## 7. Anti-Pattern Catalog
223
+
224
+ **Anti-Pattern: Over-typed Generics**
225
+ Writing `function get<T extends Record<string, unknown>, K extends keyof T>(obj: T, key: K): T[K]` when `function get(obj: Record<string, unknown>, key: string): unknown` suffices. Generics should earn their complexity by providing caller-site type narrowing.
226
+
227
+ **Anti-Pattern: Barrel File Hell**
228
+ Every directory gets an `index.ts` that re-exports everything. This creates implicit coupling, breaks tree-shaking, and makes imports ambiguous. Use barrel files only for the package's public API surface.
229
+
230
+ **Anti-Pattern: Type Assertion Chains**
231
+ `(value as unknown as TargetType)` is a code smell. If you need two casts, the types are wrong. Fix the source type or add a proper type guard.
232
+
233
+ **Anti-Pattern: Promise Constructor Anti-Pattern**
234
+ Wrapping an async function in `new Promise()` when you can just return the promise directly. If the function returns a promise, use `async/await` -- don't wrap it.
235
+
236
+ **Anti-Pattern: Callback-Style Error Handling**
237
+ Passing `(error, result)` tuples in TypeScript. Use `Result<T, E>` types or throw -- callbacks are a Node.js legacy, not a TypeScript pattern.
238
+
239
+ **Anti-Pattern: Default Export Confusion**
240
+ Using `export default` in library code makes imports inconsistent across consumers (each file names it differently). Use named exports: `export function createAgent()` not `export default function()`. Default exports are acceptable only for plugin/framework entry points that require them.
241
+
242
+ ## 8. Performance Patterns
243
+
244
+ **DO:** Write efficient TypeScript that leverages Bun's runtime characteristics.
245
+
246
+ - Pre-compute values at module level for constants used in hot paths:
247
+ ```ts
248
+ // Module level -- computed once
249
+ const VALID_PATTERN = /^[a-z0-9]+(-[a-z0-9]+)*$/
250
+ const DEFAULT_CONFIG = Object.freeze({ maxRetries: 3, timeoutMs: 5000 })
251
+
252
+ // Not inside the function -- recomputed every call
253
+ ```
254
+ - Use `Map` and `Set` for frequent lookups instead of plain objects and arrays:
255
+ ```ts
256
+ const BUILT_IN_COMMANDS = new Set(["help", "quit", "config"])
257
+ // O(1) lookup vs O(n) array.includes()
258
+ ```
259
+ - Use `structuredClone()` for deep copies -- built into the runtime, handles circular references
260
+ - Avoid unnecessary `await` in return position:
261
+ ```ts
262
+ // DO: Return the promise directly
263
+ function fetchUser(id: string): Promise<User> {
264
+ return db.query("SELECT * FROM users WHERE id = ?", [id])
265
+ }
266
+
267
+ // DON'T: Unnecessary await
268
+ async function fetchUser(id: string): Promise<User> {
269
+ return await db.query("SELECT * FROM users WHERE id = ?", [id])
270
+ }
271
+ ```
272
+
273
+ **DON'T:**
274
+
275
+ - Create regex objects inside loops or frequently-called functions
276
+ - Use `JSON.parse(JSON.stringify(obj))` for deep cloning -- use `structuredClone()`
277
+ - Allocate in hot paths -- pre-compute, cache, or use pooling for frequently created objects
278
+ - Use `Array.from()` when spread `[...iterable]` works -- spread is faster in Bun
@@ -0,0 +1,240 @@
1
+ ---
2
+ name: verification
3
+ description: Pre-completion verification checklist methodology to catch issues before marking work as done
4
+ stacks: []
5
+ requires: []
6
+ ---
7
+
8
+ # Verification
9
+
10
+ A systematic pre-completion checklist methodology. Apply this before marking any task, feature, or PR as complete. Every step is a gate — if it fails, the work is not done.
11
+
12
+ ## When to Use
13
+
14
+ - Before marking any task as complete
15
+ - Before committing code
16
+ - Before opening or merging a pull request
17
+ - Before deploying to any environment
18
+ - Before saying "this is done" to anyone
19
+ - After refactoring existing code
20
+ - After fixing a bug (to verify the fix and check for regressions)
21
+
22
+ The cost of catching issues before completion is 10x cheaper than catching them after merge and 100x cheaper than catching them in production. This checklist exists because developers consistently overestimate the completeness of their own work.
23
+
24
+ ## The Verification Checklist
25
+
26
+ ### Step 1: Requirements Check
27
+
28
+ Re-read the original requirement, task description, or issue. Do not rely on your memory of what was asked.
29
+
30
+ **Process:**
31
+ 1. Open the original requirement (ticket, issue, plan task, PR description)
32
+ 2. List every stated requirement — each one is a checkbox
33
+ 3. For each requirement, identify the specific code that satisfies it
34
+ 4. Mark each requirement as satisfied or not
35
+ 5. If any requirement is not satisfied, the work is not done
36
+
37
+ **What to check:**
38
+ - Every explicit requirement has a corresponding implementation
39
+ - Edge cases mentioned in the requirement are handled
40
+ - Acceptance criteria (if provided) are met
41
+ - The implementation does not introduce behavior that contradicts the requirement
42
+ - Optional requirements are either implemented or explicitly deferred with a reason
43
+
44
+ **Red flags:**
45
+ - You cannot point to specific code for a requirement — it is missing
46
+ - You implemented something adjacent to the requirement but not the requirement itself
47
+ - You added features that were not requested (scope creep)
48
+
49
+ ### Step 2: Code Quality Check
50
+
51
+ Run automated quality checks. Do not skip these because "it is a small change."
52
+
53
+ **Process:**
54
+ 1. Run the linter: `bun run lint` (or the project equivalent)
55
+ 2. Run the type checker: `bunx tsc --noEmit` (for TypeScript projects)
56
+ 3. Run the formatter: `bun run format` (or the project equivalent)
57
+ 4. Search for debug artifacts: `console.log`, `debugger`, `print()` statements
58
+ 5. Search for deferred work: `TODO`, `FIXME`, `HACK`, `XXX` comments that should be resolved
59
+ 6. Check file sizes — no file should exceed 800 lines
60
+
61
+ **What to check:**
62
+ - Zero linter errors (warnings are acceptable only if pre-existing)
63
+ - Zero type errors
64
+ - No formatting violations
65
+ - No debug statements left in production code
66
+ - No new TODO/FIXME comments that should be resolved before merge
67
+ - All new files are under 400 lines (target), none over 800 lines (hard limit)
68
+
69
+ **Red flags:**
70
+ - Suppressing linter rules with inline comments (`// eslint-disable`) without justification
71
+ - Type assertions (`as any`, `as unknown`) used to silence type errors instead of fixing them
72
+ - Large functions (over 50 lines) or deeply nested code (over 4 levels)
73
+
74
+ ### Step 3: Test Verification
75
+
76
+ Run the test suite. No exceptions.
77
+
78
+ **Process:**
79
+ 1. Run the full test suite: `bun test`
80
+ 2. Check that all existing tests pass (zero regressions)
81
+ 3. Verify that new code has test coverage
82
+ 4. If new functionality has no tests, write them before proceeding
83
+ 5. Check test quality — are tests testing behavior or implementation details?
84
+
85
+ **What to check:**
86
+ - All tests pass (not just the ones you think are relevant)
87
+ - New public functions and endpoints have at least one test
88
+ - Error paths are tested (not just the happy path)
89
+ - Edge cases identified in Step 1 have corresponding tests
90
+ - Tests are deterministic — no flaky tests introduced
91
+
92
+ **Red flags:**
93
+ - Skipped tests (`it.skip`, `xit`, `@pytest.mark.skip`) without a tracking issue
94
+ - Tests that pass by coincidence (testing the wrong thing)
95
+ - Tests that mock so heavily they do not test real behavior
96
+ - Missing tests for error handling paths
97
+
98
+ **Reference:** Use the tdd-workflow skill for writing tests when coverage is missing.
99
+
100
+ ### Step 4: Integration Check
101
+
102
+ Does the change work with the rest of the system? Unit tests passing is necessary but not sufficient.
103
+
104
+ **Process:**
105
+ 1. Trace all imports — are new exports consumed correctly by their callers?
106
+ 2. Check type compatibility at module boundaries — do interfaces match between producer and consumer?
107
+ 3. Run the application and manually verify the feature works end-to-end
108
+ 4. Check that the feature integrates with existing features without breaking them
109
+ 5. Verify configuration — are all required config values, environment variables, and feature flags set?
110
+
111
+ **What to check:**
112
+ - No broken imports or missing exports
113
+ - Type interfaces match between modules (producer returns what consumer expects)
114
+ - The feature works when invoked through its actual entry point (not just in isolation)
115
+ - Existing features that interact with the changed code still work
116
+ - Database migrations (if any) apply cleanly and are reversible
117
+
118
+ **Red flags:**
119
+ - The feature works in tests but fails when run for real
120
+ - You only tested the feature in isolation, never with the full system
121
+ - New environment variables or configuration are undocumented
122
+
123
+ ### Step 5: Edge Case Review
124
+
125
+ Think adversarially. What inputs or conditions could break this?
126
+
127
+ **Process:**
128
+ 1. For each input, consider: empty, null/undefined, very large, malformed, special characters
129
+ 2. For each external call, consider: timeout, network failure, unexpected response, rate limiting
130
+ 3. For each concurrent operation, consider: race conditions, duplicate submissions, stale data
131
+ 4. For each state transition, consider: invalid state, repeated transitions, partial failure
132
+
133
+ **What to check:**
134
+ - Empty input does not crash (returns appropriate error or default)
135
+ - Very large input does not cause memory issues or timeouts
136
+ - Null/undefined values are handled (not passed through to crash later)
137
+ - Concurrent access is safe (no race conditions on shared state)
138
+ - Network failures are handled gracefully (retry, timeout, fallback)
139
+ - Partial failures do not leave the system in an inconsistent state
140
+
141
+ **Red flags:**
142
+ - Functions that assume input is always valid without checking
143
+ - No timeout on external calls (HTTP requests, database queries)
144
+ - Shared mutable state without synchronization
145
+ - Error handling that swallows the error and continues with bad data
146
+
147
+ ### Step 6: Security Scan
148
+
149
+ Check for common security issues. This is not a full security audit — it is a pre-commit sanity check.
150
+
151
+ **Process:**
152
+ 1. Search for hardcoded secrets: API keys, passwords, tokens, connection strings
153
+ 2. Verify all user inputs are validated before use
154
+ 3. Check that error messages do not leak sensitive data (stack traces, SQL queries, internal paths)
155
+ 4. Verify authentication and authorization on new endpoints or tools
156
+ 5. Check for injection risks: SQL injection, XSS, command injection, path traversal
157
+
158
+ **What to check:**
159
+ - No secrets in source code (use environment variables or secret managers)
160
+ - All user input is validated at the boundary (schema validation preferred)
161
+ - Error messages are safe for end users (no internal details)
162
+ - New endpoints require authentication
163
+ - Authorized actions check permissions (not just authentication)
164
+ - Dynamic queries use parameterized statements (never string concatenation)
165
+
166
+ **Red flags:**
167
+ - API keys or tokens in source code or committed config files
168
+ - User input passed directly to database queries, shell commands, or HTML output
169
+ - Detailed error messages exposed to end users
170
+ - New endpoints accessible without authentication
171
+
172
+ ## Integration with Our Tools
173
+
174
+ Use these tools as part of the verification process:
175
+
176
+ - **`oc_review`** — Invoke before marking any task as complete. Provides automated code review that catches issues you might miss reviewing your own code. This is the single most important verification step.
177
+ - **`oc_doctor`** — Run to verify plugin health and configuration integrity. Catches broken tool registrations, missing assets, and config corruption.
178
+ - **`oc_session_stats`** — Check for error patterns in the current session. If the session shows repeated errors, investigate before declaring the work complete.
179
+ - **`oc_forensics`** — When a verification step fails and the root cause is not obvious, use forensics to trace the issue systematically.
180
+
181
+ ## Anti-Pattern Catalog
182
+
183
+ ### Anti-Pattern: "Works on My Machine"
184
+
185
+ **What goes wrong:** You test only in your local environment and miss environment-specific issues (different OS, different Node/Bun version, different config, missing env vars).
186
+
187
+ **Instead:** Check for environment-specific assumptions. Hardcoded paths, OS-specific APIs, version-specific features. If CI exists, verify it passes there too.
188
+
189
+ ### Anti-Pattern: Skipping Tests for Small Changes
190
+
191
+ **What goes wrong:** "It is just a one-line change" — and that one line breaks three other things. Small changes cause big bugs because they slip through review.
192
+
193
+ **Instead:** Always run the full test suite. The smaller the change, the faster the tests run anyway.
194
+
195
+ ### Anti-Pattern: Reviewing Your Own Code
196
+
197
+ **What goes wrong:** You will miss the same things you missed when writing the code. Confirmation bias means you see what you expect to see, not what is actually there.
198
+
199
+ **Instead:** Use `oc_review` for an independent automated review. For critical changes, request a human review as well.
200
+
201
+ ### Anti-Pattern: Verifying Only the Happy Path
202
+
203
+ **What goes wrong:** The feature works perfectly with valid input. It crashes spectacularly with empty input, null values, or unexpected types.
204
+
205
+ **Instead:** Step 5 (Edge Case Review) exists for this reason. Test the boundaries, not just the center.
206
+
207
+ ### Anti-Pattern: Deferring Verification to Later
208
+
209
+ **What goes wrong:** "I will add tests later" or "I will check security before release." Later never comes, and the technical debt compounds.
210
+
211
+ **Instead:** Verify now. Every step of this checklist should pass before the work leaves your hands.
212
+
213
+ ## Failure Modes
214
+
215
+ ### Linter Fails
216
+ Fix the issues before proceeding. If a linter rule is genuinely wrong for your case, add a justified inline suppression comment — but question whether the rule is actually wrong or your code needs restructuring.
217
+
218
+ ### Tests Fail
219
+ Do not comment out or skip the failing test. Diagnose the failure using the systematic-debugging skill. The test may be wrong (update it), or your code may be wrong (fix it). Determine which before changing anything.
220
+
221
+ ### Type Errors
222
+ Trace the type mismatch to its source. Do not use `as any` to suppress the error. The type system is telling you something — usually that your mental model of the data does not match reality.
223
+
224
+ ### Security Issue Found
225
+ Stop and fix it immediately. Do not defer security issues. If the fix requires significant changes, that is a sign the code needs restructuring, not that the security issue should be ignored.
226
+
227
+ ### Integration Failure
228
+ If the feature works in isolation but fails in integration, the issue is at a module boundary. Check: are you producing the data the consumer expects? Are interfaces aligned? Is the contract documented?
229
+
230
+ ## Quick Reference
231
+
232
+ For a fast pre-commit check, verify at minimum:
233
+
234
+ 1. `bun run lint` passes
235
+ 2. `bun test` passes
236
+ 3. No hardcoded secrets
237
+ 4. `oc_review` has no CRITICAL findings
238
+ 5. Every requirement has corresponding code
239
+
240
+ The full 6-step checklist is for marking work as complete. The quick reference is for every commit.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kodrunhq/opencode-autopilot",
3
- "version": "1.3.0",
3
+ "version": "1.5.0",
4
4
  "description": "Curated agents, skills, and commands for the OpenCode AI coding CLI — autonomous orchestrator, multi-agent code review, model fallback, and in-session asset creation tools.",
5
5
  "main": "src/index.ts",
6
6
  "keywords": [
package/src/index.ts CHANGED
@@ -3,6 +3,16 @@ import { configHook } from "./agents";
3
3
  import { isFirstLoad, loadConfig } from "./config";
4
4
  import { runHealthChecks } from "./health/runner";
5
5
  import { installAssets } from "./installer";
6
+ import { ContextMonitor } from "./observability/context-monitor";
7
+ import {
8
+ createObservabilityEventHandler,
9
+ createToolExecuteAfterHandler as createObsToolAfterHandler,
10
+ createToolExecuteBeforeHandler,
11
+ } from "./observability/event-handlers";
12
+ import { SessionEventStore } from "./observability/event-store";
13
+ import { writeSessionLog } from "./observability/log-writer";
14
+ import { pruneOldLogs } from "./observability/retention";
15
+ import type { SessionEvent } from "./observability/types";
6
16
  import type { SdkOperations } from "./orchestrator/fallback";
7
17
  import {
8
18
  createChatMessageHandler,
@@ -24,12 +34,18 @@ import { ocCreateCommand } from "./tools/create-command";
24
34
  import { ocCreateSkill } from "./tools/create-skill";
25
35
  import { ocDoctor } from "./tools/doctor";
26
36
  import { ocForensics } from "./tools/forensics";
37
+ import { ocLogs } from "./tools/logs";
38
+ import { ocMockFallback } from "./tools/mock-fallback";
27
39
  import { ocOrchestrate } from "./tools/orchestrate";
28
40
  import { ocPhase } from "./tools/phase";
41
+ import { ocPipelineReport } from "./tools/pipeline-report";
29
42
  import { ocPlan } from "./tools/plan";
30
43
  import { ocQuick } from "./tools/quick";
31
44
  import { ocReview } from "./tools/review";
45
+ import { ocSessionStats } from "./tools/session-stats";
32
46
  import { ocState } from "./tools/state";
47
+ import { ocStocktake } from "./tools/stocktake";
48
+ import { ocUpdateDocs } from "./tools/update-docs";
33
49
 
34
50
  let openCodeConfig: Config | null = null;
35
51
 
@@ -70,6 +86,15 @@ const plugin: Plugin = async (input) => {
70
86
  // Health check failures are non-fatal — oc_doctor provides manual diagnostics
71
87
  });
72
88
 
89
+ // --- Observability subsystem initialization ---
90
+ const eventStore = new SessionEventStore();
91
+ const contextMonitor = new ContextMonitor();
92
+
93
+ // Retention pruning on load (non-blocking per D-14)
94
+ pruneOldLogs().catch((err) => {
95
+ console.error("[opencode-autopilot]", err);
96
+ });
97
+
73
98
  // --- Fallback subsystem initialization ---
74
99
  const sdkOps: SdkOperations = {
75
100
  abortSession: async (sessionID) => {
@@ -123,6 +148,32 @@ const plugin: Plugin = async (input) => {
123
148
  const chatMessageHandler = createChatMessageHandler(manager);
124
149
  const toolExecuteAfterHandler = createToolExecuteAfterHandler(manager);
125
150
 
151
+ // --- Observability handlers ---
152
+ const toolStartTimes = new Map<string, number>();
153
+ const observabilityEventHandler = createObservabilityEventHandler({
154
+ eventStore,
155
+ contextMonitor,
156
+ showToast: sdkOps.showToast,
157
+ writeSessionLog: async (sessionData) => {
158
+ if (!sessionData) return;
159
+ // Filter to schema-valid event types that match SessionEvent discriminated union
160
+ const schemaEvents: SessionEvent[] = sessionData.events.filter(
161
+ (e): e is SessionEvent =>
162
+ e.type === "fallback" ||
163
+ e.type === "error" ||
164
+ e.type === "decision" ||
165
+ e.type === "model_switch",
166
+ );
167
+ await writeSessionLog({
168
+ sessionId: sessionData.sessionId,
169
+ startedAt: sessionData.startedAt,
170
+ events: schemaEvents,
171
+ });
172
+ },
173
+ });
174
+ const obsToolBeforeHandler = createToolExecuteBeforeHandler(toolStartTimes);
175
+ const obsToolAfterHandler = createObsToolAfterHandler(eventStore, toolStartTimes);
176
+
126
177
  return {
127
178
  tool: {
128
179
  oc_configure: ocConfigure,
@@ -138,8 +189,18 @@ const plugin: Plugin = async (input) => {
138
189
  oc_quick: ocQuick,
139
190
  oc_forensics: ocForensics,
140
191
  oc_review: ocReview,
192
+ oc_logs: ocLogs,
193
+ oc_session_stats: ocSessionStats,
194
+ oc_pipeline_report: ocPipelineReport,
195
+ oc_mock_fallback: ocMockFallback,
196
+ oc_stocktake: ocStocktake,
197
+ oc_update_docs: ocUpdateDocs,
141
198
  },
142
199
  event: async ({ event }) => {
200
+ // 1. Observability: collect (pure observer, no side effects on session)
201
+ await observabilityEventHandler({ event });
202
+
203
+ // 2. First-load toast
143
204
  if (event.type === "session.created" && isFirstLoad(config)) {
144
205
  await sdkOps.showToast(
145
206
  "Welcome to OpenCode Autopilot!",
@@ -148,7 +209,7 @@ const plugin: Plugin = async (input) => {
148
209
  );
149
210
  }
150
211
 
151
- // Fallback event handling (runs for all events)
212
+ // 3. Fallback event handling
152
213
  if (fallbackConfig.enabled) {
153
214
  await fallbackEventHandler({ event });
154
215
  }
@@ -173,6 +234,12 @@ const plugin: Plugin = async (input) => {
173
234
  await chatMessageHandler(hookInput, output);
174
235
  }
175
236
  },
237
+ "tool.execute.before": async (
238
+ input: { tool: string; sessionID: string; callID: string },
239
+ output: { args: unknown },
240
+ ) => {
241
+ obsToolBeforeHandler({ ...input, args: output.args });
242
+ },
176
243
  "tool.execute.after": async (
177
244
  hookInput: {
178
245
  readonly tool: string;
@@ -182,6 +249,10 @@ const plugin: Plugin = async (input) => {
182
249
  },
183
250
  output: { title: string; output: string; metadata: unknown },
184
251
  ) => {
252
+ // Observability: record tool execution (pure observer)
253
+ obsToolAfterHandler(hookInput, output);
254
+
255
+ // Fallback handling
185
256
  if (fallbackConfig.enabled) {
186
257
  await toolExecuteAfterHandler(hookInput, output);
187
258
  }