opencastle 0.8.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,343 @@
1
+ import { describe, it, expect } from 'vitest'
2
+ import { parseYaml, parseTimeout, validateSpec, applyDefaults } from './schema.js'
3
+
4
+ // ── parseYaml ──────────────────────────────────────────────────
5
+
6
+ describe('parseYaml', () => {
7
+ it('parses valid YAML mapping', () => {
8
+ const result = parseYaml('name: test\nvalue: 42')
9
+ expect(result).toEqual({ name: 'test', value: 42 })
10
+ })
11
+
12
+ it('throws on scalar top-level value', () => {
13
+ expect(() => parseYaml('hello')).toThrow('YAML must be a mapping')
14
+ })
15
+
16
+ it('throws on array top-level value', () => {
17
+ expect(() => parseYaml('- a\n- b')).toThrow('YAML must be a mapping')
18
+ })
19
+
20
+ it('throws on empty input', () => {
21
+ expect(() => parseYaml('')).toThrow('YAML must be a mapping')
22
+ })
23
+
24
+ it('handles nested objects', () => {
25
+ const result = parseYaml('parent:\n child: value')
26
+ expect(result).toEqual({ parent: { child: 'value' } })
27
+ })
28
+
29
+ it('handles multiline strings', () => {
30
+ const result = parseYaml('text: |\n line one\n line two')
31
+ expect(result.text).toContain('line one')
32
+ expect(result.text).toContain('line two')
33
+ })
34
+
35
+ it('rejects YAML with dangerous __proto__ key gracefully', () => {
36
+ // yaml npm package handles __proto__ safely by default
37
+ const result = parseYaml('__proto__:\n polluted: true')
38
+ expect(result).toBeDefined()
39
+ // Verify prototype pollution didn't work
40
+ expect(({} as Record<string, unknown>).polluted).toBeUndefined()
41
+ })
42
+ })
43
+
44
+ // ── parseTimeout ───────────────────────────────────────────────
45
+
46
+ describe('parseTimeout', () => {
47
+ it('parses seconds', () => {
48
+ expect(parseTimeout('30s')).toBe(30_000)
49
+ })
50
+
51
+ it('parses minutes', () => {
52
+ expect(parseTimeout('10m')).toBe(600_000)
53
+ })
54
+
55
+ it('parses hours', () => {
56
+ expect(parseTimeout('2h')).toBe(7_200_000)
57
+ })
58
+
59
+ it('returns NaN for invalid format', () => {
60
+ expect(parseTimeout('abc')).toBeNaN()
61
+ })
62
+
63
+ it('returns NaN for missing unit', () => {
64
+ expect(parseTimeout('10')).toBeNaN()
65
+ })
66
+
67
+ it('returns NaN for empty string', () => {
68
+ expect(parseTimeout('')).toBeNaN()
69
+ })
70
+
71
+ it('handles single digit', () => {
72
+ expect(parseTimeout('1s')).toBe(1_000)
73
+ })
74
+
75
+ it('handles large values', () => {
76
+ expect(parseTimeout('999m')).toBe(999 * 60 * 1000)
77
+ })
78
+ })
79
+
80
+ // ── validateSpec ───────────────────────────────────────────────
81
+
82
+ describe('validateSpec', () => {
83
+ const validSpec = {
84
+ name: 'test-run',
85
+ tasks: [
86
+ { id: 'task-1', prompt: 'Do something' },
87
+ { id: 'task-2', prompt: 'Do another thing', depends_on: ['task-1'] },
88
+ ],
89
+ }
90
+
91
+ it('accepts a valid minimal spec', () => {
92
+ const result = validateSpec(validSpec)
93
+ expect(result.valid).toBe(true)
94
+ expect(result.errors).toHaveLength(0)
95
+ })
96
+
97
+ it('rejects null input', () => {
98
+ const result = validateSpec(null)
99
+ expect(result.valid).toBe(false)
100
+ expect(result.errors[0]).toMatch(/must be a YAML object/)
101
+ })
102
+
103
+ it('rejects non-object input', () => {
104
+ const result = validateSpec('string')
105
+ expect(result.valid).toBe(false)
106
+ })
107
+
108
+ it('requires name field', () => {
109
+ const result = validateSpec({ tasks: [{ id: 'a', prompt: 'b' }] })
110
+ expect(result.valid).toBe(false)
111
+ expect(result.errors).toContainEqual(expect.stringContaining('name'))
112
+ })
113
+
114
+ it('requires tasks array', () => {
115
+ const result = validateSpec({ name: 'test' })
116
+ expect(result.valid).toBe(false)
117
+ expect(result.errors).toContainEqual(expect.stringContaining('tasks'))
118
+ })
119
+
120
+ it('rejects empty tasks array', () => {
121
+ const result = validateSpec({ name: 'test', tasks: [] })
122
+ expect(result.valid).toBe(false)
123
+ expect(result.errors).toContainEqual(expect.stringContaining('non-empty'))
124
+ })
125
+
126
+ it('rejects tasks without id', () => {
127
+ const result = validateSpec({ name: 'test', tasks: [{ prompt: 'x' }] })
128
+ expect(result.valid).toBe(false)
129
+ expect(result.errors).toContainEqual(expect.stringContaining('id'))
130
+ })
131
+
132
+ it('rejects tasks without prompt', () => {
133
+ const result = validateSpec({ name: 'test', tasks: [{ id: 'a' }] })
134
+ expect(result.valid).toBe(false)
135
+ expect(result.errors).toContainEqual(expect.stringContaining('prompt'))
136
+ })
137
+
138
+ it('rejects duplicate task ids', () => {
139
+ const result = validateSpec({
140
+ name: 'test',
141
+ tasks: [
142
+ { id: 'same', prompt: 'a' },
143
+ { id: 'same', prompt: 'b' },
144
+ ],
145
+ })
146
+ expect(result.valid).toBe(false)
147
+ expect(result.errors).toContainEqual(expect.stringContaining('duplicate'))
148
+ })
149
+
150
+ it('validates concurrency is a positive integer', () => {
151
+ const result = validateSpec({ ...validSpec, concurrency: -1 })
152
+ expect(result.valid).toBe(false)
153
+ expect(result.errors).toContainEqual(expect.stringContaining('concurrency'))
154
+ })
155
+
156
+ it('validates concurrency rejects zero', () => {
157
+ const result = validateSpec({ ...validSpec, concurrency: 0 })
158
+ expect(result.valid).toBe(false)
159
+ })
160
+
161
+ it('accepts valid concurrency', () => {
162
+ const result = validateSpec({ ...validSpec, concurrency: 3 })
163
+ expect(result.valid).toBe(true)
164
+ })
165
+
166
+ it('validates on_failure values', () => {
167
+ const result = validateSpec({ ...validSpec, on_failure: 'invalid' })
168
+ expect(result.valid).toBe(false)
169
+ expect(result.errors).toContainEqual(expect.stringContaining('on_failure'))
170
+ })
171
+
172
+ it('accepts valid on_failure values', () => {
173
+ expect(validateSpec({ ...validSpec, on_failure: 'continue' }).valid).toBe(true)
174
+ expect(validateSpec({ ...validSpec, on_failure: 'stop' }).valid).toBe(true)
175
+ })
176
+
177
+ it('validates adapter is a string', () => {
178
+ const result = validateSpec({ ...validSpec, adapter: 123 })
179
+ expect(result.valid).toBe(false)
180
+ expect(result.errors).toContainEqual(expect.stringContaining('adapter'))
181
+ })
182
+
183
+ it('validates invalid timeout format', () => {
184
+ const result = validateSpec({
185
+ name: 'test',
186
+ tasks: [{ id: 'a', prompt: 'x', timeout: 'bad' }],
187
+ })
188
+ expect(result.valid).toBe(false)
189
+ expect(result.errors).toContainEqual(expect.stringContaining('timeout'))
190
+ })
191
+
192
+ it('validates depends_on must be an array', () => {
193
+ const result = validateSpec({
194
+ name: 'test',
195
+ tasks: [{ id: 'a', prompt: 'x', depends_on: 'task-1' }],
196
+ })
197
+ expect(result.valid).toBe(false)
198
+ expect(result.errors).toContainEqual(expect.stringContaining('depends_on'))
199
+ })
200
+
201
+ it('validates depends_on references exist', () => {
202
+ const result = validateSpec({
203
+ name: 'test',
204
+ tasks: [{ id: 'a', prompt: 'x', depends_on: ['nonexistent'] }],
205
+ })
206
+ expect(result.valid).toBe(false)
207
+ expect(result.errors).toContainEqual(expect.stringContaining('unknown task'))
208
+ })
209
+
210
+ it('validates files must be an array', () => {
211
+ const result = validateSpec({
212
+ name: 'test',
213
+ tasks: [{ id: 'a', prompt: 'x', files: 'not-array' }],
214
+ })
215
+ expect(result.valid).toBe(false)
216
+ expect(result.errors).toContainEqual(expect.stringContaining('files'))
217
+ })
218
+
219
+ it('detects simple circular dependency (A→B→A)', () => {
220
+ const result = validateSpec({
221
+ name: 'test',
222
+ tasks: [
223
+ { id: 'a', prompt: 'x', depends_on: ['b'] },
224
+ { id: 'b', prompt: 'y', depends_on: ['a'] },
225
+ ],
226
+ })
227
+ expect(result.valid).toBe(false)
228
+ expect(result.errors).toContainEqual(expect.stringContaining('Circular'))
229
+ })
230
+
231
+ it('detects 3-node cycle (A→B→C→A)', () => {
232
+ const result = validateSpec({
233
+ name: 'test',
234
+ tasks: [
235
+ { id: 'a', prompt: 'x', depends_on: ['c'] },
236
+ { id: 'b', prompt: 'y', depends_on: ['a'] },
237
+ { id: 'c', prompt: 'z', depends_on: ['b'] },
238
+ ],
239
+ })
240
+ expect(result.valid).toBe(false)
241
+ expect(result.errors).toContainEqual(expect.stringContaining('Circular'))
242
+ })
243
+
244
+ it('accepts a valid DAG without cycles', () => {
245
+ const result = validateSpec({
246
+ name: 'test',
247
+ tasks: [
248
+ { id: 'a', prompt: 'x' },
249
+ { id: 'b', prompt: 'y', depends_on: ['a'] },
250
+ { id: 'c', prompt: 'z', depends_on: ['a', 'b'] },
251
+ ],
252
+ })
253
+ expect(result.valid).toBe(true)
254
+ })
255
+
256
+ it('accepts a self-contained task (no depends_on)', () => {
257
+ const result = validateSpec({
258
+ name: 'test',
259
+ tasks: [{ id: 'solo', prompt: 'do it' }],
260
+ })
261
+ expect(result.valid).toBe(true)
262
+ })
263
+
264
+ it('collects multiple errors at once', () => {
265
+ const result = validateSpec({
266
+ tasks: [{ id: 123, timeout: 'bad' }],
267
+ })
268
+ expect(result.valid).toBe(false)
269
+ expect(result.errors.length).toBeGreaterThan(1)
270
+ })
271
+ })
272
+
273
+ // ── applyDefaults ──────────────────────────────────────────────
274
+
275
+ describe('applyDefaults', () => {
276
+ it('applies default concurrency', () => {
277
+ const spec = applyDefaults({
278
+ name: 'test',
279
+ tasks: [{ id: 'a', prompt: 'x' }],
280
+ })
281
+ expect(spec.concurrency).toBe(1)
282
+ })
283
+
284
+ it('applies default on_failure', () => {
285
+ const spec = applyDefaults({
286
+ name: 'test',
287
+ tasks: [{ id: 'a', prompt: 'x' }],
288
+ })
289
+ expect(spec.on_failure).toBe('continue')
290
+ })
291
+
292
+ it('applies default adapter', () => {
293
+ const spec = applyDefaults({
294
+ name: 'test',
295
+ tasks: [{ id: 'a', prompt: 'x' }],
296
+ })
297
+ expect(spec.adapter).toBe('claude-code')
298
+ })
299
+
300
+ it('preserves user-specified values', () => {
301
+ const spec = applyDefaults({
302
+ name: 'test',
303
+ concurrency: 3,
304
+ on_failure: 'stop',
305
+ adapter: 'copilot',
306
+ tasks: [{ id: 'a', prompt: 'x' }],
307
+ })
308
+ expect(spec.concurrency).toBe(3)
309
+ expect(spec.on_failure).toBe('stop')
310
+ expect(spec.adapter).toBe('copilot')
311
+ })
312
+
313
+ it('applies task-level defaults', () => {
314
+ const spec = applyDefaults({
315
+ name: 'test',
316
+ tasks: [{ id: 'a', prompt: 'x' }],
317
+ })
318
+ const task = spec.tasks[0]
319
+ expect(task.agent).toBe('developer')
320
+ expect(task.timeout).toBe('30m')
321
+ expect(task.depends_on).toEqual([])
322
+ expect(task.files).toEqual([])
323
+ })
324
+
325
+ it('preserves user-specified task values', () => {
326
+ const spec = applyDefaults({
327
+ name: 'test',
328
+ tasks: [{
329
+ id: 'a',
330
+ prompt: 'x',
331
+ agent: 'ui-ux-expert',
332
+ timeout: '15m',
333
+ depends_on: ['b'],
334
+ files: ['src/'],
335
+ }],
336
+ })
337
+ const task = spec.tasks[0]
338
+ expect(task.agent).toBe('ui-ux-expert')
339
+ expect(task.timeout).toBe('15m')
340
+ expect(task.depends_on).toEqual(['b'])
341
+ expect(task.files).toEqual(['src/'])
342
+ })
343
+ })
@@ -8,7 +8,7 @@ import type { TaskSpec, ValidationResult } from '../types.js'
8
8
  */
9
9
  export function parseYaml(text: string): Record<string, unknown> {
10
10
  const result = yamlParse(text)
11
- if (!result || typeof result !== 'object') {
11
+ if (!result || typeof result !== 'object' || Array.isArray(result)) {
12
12
  throw new Error('YAML must be a mapping at the top level')
13
13
  }
14
14
  return result as Record<string, unknown>
@@ -1,25 +1,25 @@
1
1
  {
2
- "hash": "69236408",
2
+ "hash": "d4030ca8",
3
3
  "configHash": "30f8ea04",
4
- "lockfileHash": "9a76027c",
5
- "browserHash": "31f52ba6",
4
+ "lockfileHash": "30f69af6",
5
+ "browserHash": "5c8328e9",
6
6
  "optimized": {
7
7
  "astro > cssesc": {
8
8
  "src": "../../../../../node_modules/cssesc/cssesc.js",
9
9
  "file": "astro___cssesc.js",
10
- "fileHash": "1714fa2c",
10
+ "fileHash": "5802e199",
11
11
  "needsInterop": true
12
12
  },
13
13
  "astro > aria-query": {
14
14
  "src": "../../../../../node_modules/aria-query/lib/index.js",
15
15
  "file": "astro___aria-query.js",
16
- "fileHash": "24f1b1e8",
16
+ "fileHash": "3b8b6441",
17
17
  "needsInterop": true
18
18
  },
19
19
  "astro > axobject-query": {
20
20
  "src": "../../../../../node_modules/axobject-query/lib/index.js",
21
21
  "file": "astro___axobject-query.js",
22
- "fileHash": "415129f4",
22
+ "fileHash": "12a97af5",
23
23
  "needsInterop": true
24
24
  }
25
25
  },
@@ -0,0 +1,113 @@
1
+ ---
2
+ description: 'Lightweight compliance agent called by Team Lead as its final action. Verifies observability logs, lessons, and quality gates — then provides ready-to-run fix commands for any gaps.'
3
+ name: 'Session Guard'
4
+ model: Claude Opus 4.6
5
+ tools: [read/readFile, search/textSearch, search/fileSearch, execute/runInTerminal, execute/getTerminalOutput, read/terminalLastCommand]
6
+ ---
7
+
8
+ # Session Guard
9
+
10
+ You are a **compliance verification agent**. The Team Lead calls you as its **last action before responding to the user**. Your sole job: verify that all quality gates are satisfied and provide fix commands for any gaps.
11
+
12
+ You do NOT create or modify log entries yourself. You verify and report.
13
+
14
+ ## Input
15
+
16
+ The Team Lead provides a **session summary** with:
17
+
18
+ - **Task description** — what was accomplished
19
+ - **Delegations** — list of `(agent, task, mechanism)` for each delegation made
20
+ - **Reviews** — whether fast reviews or panel reviews were run (and for which delegations)
21
+ - **Retries** — whether any agent retried with a different approach
22
+ - **Discovered issues** — any pre-existing bugs found during work
23
+ - **Files changed** — count and key paths
24
+ - **Commits/branch** — whether changes were committed and to which branch
25
+
26
+ ## Checks
27
+
28
+ Run ALL checks. Report each as ✅ or ❌.
29
+
30
+ ### 1. Delegation Records
31
+
32
+ For each delegation in the session summary, verify a matching record exists in `.github/customizations/logs/delegations.ndjson`.
33
+
34
+ **How:** `tail -20 .github/customizations/logs/delegations.ndjson` and match agent + task against the summary.
35
+
36
+ **Fix command template:**
37
+ ```bash
38
+ echo '{"timestamp":"<ISO>","session_id":"<branch>","agent":"<name>","model":"<model>","tier":"<tier>","mechanism":"<sub-agent|background>","outcome":"<success|failure>","retries":0,"phase":N,"file_partition":["<paths>"]}' >> .github/customizations/logs/delegations.ndjson
39
+ ```
40
+
41
+ Also verify each delegation record includes `session_id` (branch name). Records missing `session_id` should be flagged.
42
+
43
+ ### 2. Session Record
44
+
45
+ Verify a session record exists in `.github/customizations/logs/sessions.ndjson` for the current task.
46
+
47
+ **How:** `tail -5 .github/customizations/logs/sessions.ndjson` and match task description.
48
+
49
+ **Fix command template:**
50
+ ```bash
51
+ echo '{"timestamp":"<ISO>","agent":"Team Lead","model":"<model>","task":"<description>","outcome":"success","duration_min":<N>,"files_changed":<N>,"retries":0,"lessons_added":[],"discoveries":[]}' >> .github/customizations/logs/sessions.ndjson
52
+ ```
53
+
54
+ ### 3. Lessons Captured
55
+
56
+ If the session summary indicates retries occurred, verify new entries exist in `.github/customizations/LESSONS-LEARNED.md`.
57
+
58
+ **How:** `grep -c "^### LES-" .github/customizations/LESSONS-LEARNED.md` — compare count with expected.
59
+
60
+ ### 4. Discovered Issues Tracked
61
+
62
+ If the session summary lists discovered issues, verify they appear in:
63
+ - `.github/customizations/KNOWN-ISSUES.md`, OR
64
+ - A task tracker ticket referenced in the summary
65
+
66
+ ### 5. Review & Panel Records
67
+
68
+ If the session summary mentions fast reviews or panel reviews, verify matching records exist in `.github/customizations/logs/reviews.ndjson` and/or `.github/customizations/logs/panels.ndjson`.
69
+
70
+ **How:** `tail -10 .github/customizations/logs/reviews.ndjson` and/or `tail -5 .github/customizations/logs/panels.ndjson`.
71
+
72
+ **Fix command templates:**
73
+ ```bash
74
+ # Fast review
75
+ echo '{"timestamp":"<ISO>","agent":"<reviewed-agent>","reviewer_model":"<model>","verdict":"<pass|fail>","attempt":1,"issues_critical":0,"issues_major":0,"issues_minor":0,"confidence":"high","escalated":false,"duration_sec":N}' >> .github/customizations/logs/reviews.ndjson
76
+ # Panel review
77
+ echo '{"timestamp":"<ISO>","panel_key":"<key>","verdict":"<pass|block>","pass_count":N,"block_count":N,"must_fix":0,"should_fix":0,"reviewer_model":"<model>","weighted":false,"attempt":1,"artifacts_count":N}' >> .github/customizations/logs/panels.ndjson
78
+ ```
79
+
80
+ ### 6. Uncommitted Changes
81
+
82
+ **How:** `git status --short`
83
+
84
+ Only flag if the session produced code changes that should have been committed. Research-only or analysis sessions may not produce commits.
85
+
86
+ ## Output
87
+
88
+ Return a structured report:
89
+
90
+ ```
91
+ ## Session Guard Report
92
+
93
+ **Verdict:** PASS | FAIL
94
+
95
+ ### Checks
96
+ 1. Delegation records: ✅ N/N found | ❌ M/N missing
97
+ 2. Session record: ✅ found | ❌ missing
98
+ 3. Lessons captured: ✅ N/A (no retries) | ❌ retries occurred, no lesson added
99
+ 4. Discovered issues: ✅ all tracked | ❌ untracked issues
100
+ 5. Review/panel records: ✅ N/A | ❌ M/N missing
101
+ 6. Uncommitted changes: ✅ clean | ⚠️ N files uncommitted
102
+
103
+ ### Fix Commands (only if FAIL)
104
+ <ready-to-run echo commands with filled-in values from the session summary>
105
+ ```
106
+
107
+ ## Rules
108
+
109
+ - **Complete in under 2 minutes** — this is fast verification, not an audit
110
+ - **Never modify files** — only read and report
111
+ - **Fill in fix commands completely** — use real values from the session summary, not placeholders
112
+ - **When in doubt, flag it** — false positives are better than missed gaps
113
+ - **No delegation records needed for research-only sub-agents** that produced no code changes