openspecpm 0.1.0-alpha.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cli/src/audit.js CHANGED
@@ -9,17 +9,21 @@ export function auditPath(cwd = process.cwd()) {
9
9
  return join(cwd, DIR, FILE);
10
10
  }
11
11
 
12
- export async function record({ command, args = {}, result = null, error = null, cwd = process.cwd() } = {}) {
12
+ export async function record({ command, args = {}, result = null, error = null, meta = null, cwd = process.cwd() } = {}) {
13
13
  if (!command) return;
14
14
  const path = auditPath(cwd);
15
15
  await mkdir(dirname(path), { recursive: true });
16
+ const errorText = error ? (typeof error === 'string' ? error : error.message ?? String(error)) : null;
16
17
  const entry = {
17
18
  ts: new Date().toISOString(),
18
19
  command,
19
20
  args: scrub(args),
20
- result: result ? truncate(result, 500) : null,
21
- error: error ? truncate(typeof error === 'string' ? error : error.message ?? String(error), 500) : null,
21
+ // result + error can carry user-supplied strings (e.g. a failing-fetch
22
+ // message containing a webhook URL). Run them through scrubValue too.
23
+ result: result ? truncate(scrubValue(String(result)), 500) : null,
24
+ error: errorText ? truncate(scrubValue(errorText), 500) : null,
22
25
  };
26
+ if (meta && typeof meta === 'object') entry.meta = scrub(meta);
23
27
  await appendFile(path, JSON.stringify(entry) + '\n', 'utf8');
24
28
  }
25
29
 
@@ -33,21 +37,49 @@ export async function tail(n = 50, cwd = process.cwd()) {
33
37
  });
34
38
  }
35
39
 
36
- const SECRET_KEYS = /token|secret|password|pat|api[_-]?key|auth|credential/i;
40
+ const SECRET_SEGMENTS = new Set([
41
+ // Original set.
42
+ 'token', 'secret', 'password', 'pat', 'auth', 'credential',
43
+ // Added: real-world key naming a CLI accumulates over time.
44
+ // bearer/cookie/session — bearer credentials by name.
45
+ // webhook — Slack/Teams URLs ARE the credential.
46
+ // signature — webhook HMAC sigs, request-signing headers.
47
+ // assertion — SAML / OIDC.
48
+ 'bearer', 'cookie', 'session', 'webhook', 'signature', 'assertion',
49
+ ]);
50
+
51
+ // Webhook URLs that act as bearer credentials. Anyone holding the URL can
52
+ // post to the channel. Redact in any string value so accidental logging
53
+ // (e.g. a failing-fetch error message embedding the URL) never leaks.
54
+ const WEBHOOK_URL_RE = /https:\/\/(?:hooks\.slack\.com\/services|[^\/\s"'`]*\.webhook\.office(?:365)?\.com|outlook\.office(?:365)?\.com\/webhook)[^\s"'`]+/gi;
55
+
56
+ function isSecretKey(k) {
57
+ if (/api[_-]?key/i.test(k)) return true;
58
+ for (const seg of k.toLowerCase().split(/[^a-z]+/)) {
59
+ if (seg && SECRET_SEGMENTS.has(seg)) return true;
60
+ }
61
+ return false;
62
+ }
63
+
64
+ function scrubValue(s) {
65
+ if (typeof s !== 'string') return s;
66
+ return s.replace(WEBHOOK_URL_RE, '<redacted-webhook>');
67
+ }
37
68
 
38
69
  function scrub(obj) {
70
+ if (typeof obj === 'string') return scrubValue(obj);
39
71
  if (!obj || typeof obj !== 'object') return obj;
40
72
  if (Array.isArray(obj)) return obj.map(scrub);
41
73
  const out = {};
42
74
  for (const [k, v] of Object.entries(obj)) {
43
- if (SECRET_KEYS.test(k)) {
75
+ if (isSecretKey(k)) {
44
76
  out[k] = '<redacted>';
45
77
  } else if (v && typeof v === 'object') {
46
78
  out[k] = scrub(v);
47
79
  } else if (typeof v === 'string' && v.length > 200) {
48
- out[k] = v.slice(0, 200) + '…';
80
+ out[k] = scrubValue(v).slice(0, 200) + '…';
49
81
  } else {
50
- out[k] = v;
82
+ out[k] = scrubValue(v);
51
83
  }
52
84
  }
53
85
  return out;
@@ -0,0 +1,216 @@
1
+ import { readFile, readdir } from 'node:fs/promises';
2
+ import { existsSync } from 'node:fs';
3
+ import { join } from 'node:path';
4
+
5
+ export const DEFAULT_MODEL = 'claude-haiku-4-5';
6
+ export const DEFAULT_MAX_FINDINGS_PER_SPEC = 8;
7
+ const MAX_CONCURRENT = 5;
8
+
9
+ const ALLOWED_RULES = new Set([
10
+ 'bdd/llm-contradiction',
11
+ 'bdd/llm-missing-coverage',
12
+ 'bdd/llm-vague-then',
13
+ ]);
14
+
15
+ const ALLOWED_SEVERITY = new Set(['error', 'warning']);
16
+
17
+ const REPORT_TOOL = {
18
+ name: 'report_findings',
19
+ description:
20
+ 'Report BDD scenario findings as a structured list. Each finding flags a specific defect that the heuristic linter cannot catch: cross-spec contradictions, missing coverage against success criteria, or vague Then predicates that pass regex checks but state no observable outcome.',
21
+ input_schema: {
22
+ type: 'object',
23
+ additionalProperties: false,
24
+ properties: {
25
+ findings: {
26
+ type: 'array',
27
+ items: {
28
+ type: 'object',
29
+ additionalProperties: false,
30
+ properties: {
31
+ severity: { type: 'string', enum: ['error', 'warning'] },
32
+ line: { type: 'integer', minimum: 1 },
33
+ scenario: { type: 'string' },
34
+ rule: {
35
+ type: 'string',
36
+ enum: ['bdd/llm-contradiction', 'bdd/llm-missing-coverage', 'bdd/llm-vague-then'],
37
+ },
38
+ message: { type: 'string' },
39
+ },
40
+ required: ['severity', 'scenario', 'rule', 'message'],
41
+ },
42
+ },
43
+ },
44
+ required: ['findings'],
45
+ },
46
+ };
47
+
48
+ const SYSTEM_PROMPT = `You are a BDD scenario reviewer. You augment a heuristic linter by catching defects it cannot see: cross-spec contradictions, missing coverage of declared success criteria, and Then predicates that state no observable outcome. You are reviewing one spec file at a time, with the full feature proposal as context.
49
+
50
+ Rules:
51
+ - Use the report_findings tool exactly once.
52
+ - Only emit findings for the three rules: bdd/llm-contradiction, bdd/llm-missing-coverage, bdd/llm-vague-then.
53
+ - Each finding must name the specific scenario by title and include the line number where the issue appears.
54
+ - bdd/llm-contradiction: a scenario contradicts another scenario in the same file or another spec referenced in the proposal.
55
+ - bdd/llm-missing-coverage: the proposal's success criteria contain a requirement with no scenario covering it.
56
+ - bdd/llm-vague-then: a Then predicate uses an observable verb but its outcome is not actually checkable (e.g. "Then the user receives confirmation" with no detail on what confirmation).
57
+ - Severity error for contradictions and uncovered hard requirements; severity warning for vague Thens and uncovered nice-to-haves.
58
+ - Empty findings array is the correct output when the spec is clean.
59
+ - Never invent rule names. Never include findings outside the three rules above.`;
60
+
61
+ export async function judgeChange(featureDir, opts = {}) {
62
+ const {
63
+ client,
64
+ model = DEFAULT_MODEL,
65
+ proposal = '',
66
+ maxFindingsPerSpec = DEFAULT_MAX_FINDINGS_PER_SPEC,
67
+ onUsage,
68
+ } = opts;
69
+
70
+ if (!client) throw new Error('judge: client is required');
71
+
72
+ const specsDir = join(featureDir, 'specs');
73
+ if (!existsSync(specsDir)) return [];
74
+
75
+ const files = (await readdir(specsDir)).filter((f) => f.endsWith('.md'));
76
+ if (!files.length) return [];
77
+
78
+ const tasks = files.map((f) => () =>
79
+ judgeSpec(join(specsDir, f), { client, model, proposal, maxFindingsPerSpec, onUsage }),
80
+ );
81
+
82
+ const results = await runBounded(tasks, MAX_CONCURRENT);
83
+ return results.flat();
84
+ }
85
+
86
+ async function judgeSpec(file, { client, model, proposal, maxFindingsPerSpec, onUsage }) {
87
+ let specSource;
88
+ try {
89
+ specSource = await readFile(file, 'utf8');
90
+ } catch (err) {
91
+ return [{
92
+ severity: 'warning',
93
+ file,
94
+ line: 1,
95
+ scenario: '(read failed)',
96
+ rule: 'bdd/llm-parse-error',
97
+ message: `Could not read spec file: ${err.message}`,
98
+ }];
99
+ }
100
+
101
+ const userPrompt = `Review the following BDD spec file. Use report_findings to report up to ${maxFindingsPerSpec} findings.
102
+
103
+ <spec file="${file}">
104
+ ${specSource}
105
+ </spec>`;
106
+
107
+ let response;
108
+ try {
109
+ response = await client.messages.create({
110
+ model,
111
+ max_tokens: 4096,
112
+ tools: [REPORT_TOOL],
113
+ tool_choice: { type: 'tool', name: 'report_findings' },
114
+ system: [
115
+ {
116
+ type: 'text',
117
+ text: SYSTEM_PROMPT,
118
+ },
119
+ {
120
+ type: 'text',
121
+ text: `Feature proposal (shared context across every spec in this feature):\n\n${proposal || '(no proposal.md available)'}`,
122
+ cache_control: { type: 'ephemeral' },
123
+ },
124
+ ],
125
+ messages: [{ role: 'user', content: userPrompt }],
126
+ });
127
+ } catch (err) {
128
+ return [{
129
+ severity: 'warning',
130
+ file,
131
+ line: 1,
132
+ scenario: '(judge failed)',
133
+ rule: 'bdd/llm-parse-error',
134
+ message: `LLM judge call failed: ${err.message}`,
135
+ }];
136
+ }
137
+
138
+ if (onUsage && response?.usage) {
139
+ try {
140
+ onUsage({
141
+ file,
142
+ model,
143
+ input_tokens: response.usage.input_tokens ?? 0,
144
+ output_tokens: response.usage.output_tokens ?? 0,
145
+ cache_creation_input_tokens: response.usage.cache_creation_input_tokens ?? 0,
146
+ cache_read_input_tokens: response.usage.cache_read_input_tokens ?? 0,
147
+ });
148
+ } catch { /* never break the judge on telemetry */ }
149
+ }
150
+
151
+ return extractFindings(response, file);
152
+ }
153
+
154
+ function extractFindings(response, file) {
155
+ const toolUse = (response?.content ?? []).find(
156
+ (b) => b.type === 'tool_use' && b.name === 'report_findings',
157
+ );
158
+ if (!toolUse) {
159
+ return [{
160
+ severity: 'warning',
161
+ file,
162
+ line: 1,
163
+ scenario: '(no findings reported)',
164
+ rule: 'bdd/llm-parse-error',
165
+ message: 'LLM did not call report_findings tool.',
166
+ }];
167
+ }
168
+ const raw = toolUse.input?.findings;
169
+ if (!Array.isArray(raw)) {
170
+ return [{
171
+ severity: 'warning',
172
+ file,
173
+ line: 1,
174
+ scenario: '(malformed response)',
175
+ rule: 'bdd/llm-parse-error',
176
+ message: 'report_findings input was not a findings array.',
177
+ }];
178
+ }
179
+ const out = [];
180
+ for (const f of raw) {
181
+ if (!f || typeof f !== 'object') continue;
182
+ if (!ALLOWED_RULES.has(f.rule)) continue;
183
+ if (!ALLOWED_SEVERITY.has(f.severity)) continue;
184
+ if (typeof f.scenario !== 'string' || !f.scenario) continue;
185
+ if (typeof f.message !== 'string' || !f.message) continue;
186
+ out.push({
187
+ severity: f.severity,
188
+ file,
189
+ line: Number.isInteger(f.line) && f.line > 0 ? f.line : undefined,
190
+ scenario: f.scenario,
191
+ rule: f.rule,
192
+ message: f.message,
193
+ });
194
+ }
195
+ return out;
196
+ }
197
+
198
+ async function runBounded(tasks, limit) {
199
+ const results = new Array(tasks.length);
200
+ let i = 0;
201
+ const workers = new Array(Math.min(limit, tasks.length)).fill(0).map(async () => {
202
+ while (true) {
203
+ const idx = i++;
204
+ if (idx >= tasks.length) return;
205
+ results[idx] = await tasks[idx]();
206
+ }
207
+ });
208
+ await Promise.all(workers);
209
+ return results;
210
+ }
211
+
212
+ export function defaultClient() {
213
+ return import('@anthropic-ai/sdk').then(({ default: Anthropic }) => {
214
+ return new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
215
+ });
216
+ }
@@ -31,6 +31,11 @@ export async function runSyncAll({ dryRun = false, force = false, yes = false }
31
31
  }
32
32
  }
33
33
  process.stdout.write(`\nSummary: ${synced} synced, ${failed} failed.\n`);
34
+ if (failed > 0) {
35
+ const err = new Error(`${failed} change(s) failed to sync.`);
36
+ err.remediation = 'See per-change errors above; re-run for affected features after fixing.';
37
+ throw err;
38
+ }
34
39
  }
35
40
 
36
41
  export async function runShipAllReady({ yes = false, skipArchive = false } = {}) {
@@ -64,4 +69,9 @@ export async function runShipAllReady({ yes = false, skipArchive = false } = {})
64
69
  }
65
70
  }
66
71
  process.stdout.write(`\nSummary: ${shipped} shipped, ${failed} failed.\n`);
72
+ if (failed > 0) {
73
+ const err = new Error(`${failed} change(s) failed to ship.`);
74
+ err.remediation = 'See per-change errors above; re-run for affected features after fixing.';
75
+ throw err;
76
+ }
67
77
  }
@@ -33,6 +33,17 @@ export async function runDoctor({ adapter, install = false, setupAuth = false }
33
33
  if (install) suggestAdapterInstall(name);
34
34
  if (setupAuth) suggestAuth(name);
35
35
  }
36
+
37
+ process.stdout.write('\n[judge]\n');
38
+ if (process.env.ANTHROPIC_API_KEY) {
39
+ line(true, 'ANTHROPIC_API_KEY is set (LLM BDD judge available)');
40
+ } else {
41
+ line(
42
+ false,
43
+ 'ANTHROPIC_API_KEY not set',
44
+ 'Create a key at https://console.anthropic.com/settings/keys, then set ANTHROPIC_API_KEY in your shell. Required for `openspecpm propose --llm` and `sync --llm`.',
45
+ );
46
+ }
36
47
  }
37
48
 
38
49
  function line(ok, msg, remediation) {
@@ -1,11 +1,14 @@
1
- import { mkdir, writeFile } from 'node:fs/promises';
1
+ import { mkdir, readFile, writeFile } from 'node:fs/promises';
2
2
  import { existsSync } from 'node:fs';
3
3
  import { join } from 'node:path';
4
4
  import { propose, changeExists, changeDir, OpenSpecError } from '../openspec-bridge.js';
5
5
  import { lintChange, summarize, formatFindings } from '../bdd/linter.js';
6
+ import { judgeChange, defaultClient, DEFAULT_MODEL } from '../bdd/judge.js';
6
7
  import { CHANGE_TYPES, proposalTemplate, specsTemplate, STARTER_TASKS } from '../bdd/templates.js';
8
+ import { readConfig } from '../config.js';
9
+ import { record } from '../audit.js';
7
10
 
8
- export async function runPropose({ feature, prompt, type = 'feature', offline = false } = {}) {
11
+ export async function runPropose({ feature, prompt, type = 'feature', offline = false, llm = false } = {}) {
9
12
  if (!feature) throw new Error('feature name is required');
10
13
  if (!CHANGE_TYPES.includes(type)) {
11
14
  const err = new Error(`Unknown change type "${type}".`);
@@ -15,14 +18,14 @@ export async function runPropose({ feature, prompt, type = 'feature', offline =
15
18
 
16
19
  if (changeExists(feature)) {
17
20
  process.stdout.write(`Change "${feature}" already exists at ${changeDir(feature)}. Skipping propose.\n`);
18
- await softLint(changeDir(feature));
21
+ await softLint(changeDir(feature), { llm, feature });
19
22
  return changeDir(feature);
20
23
  }
21
24
 
22
25
  if (offline) {
23
26
  const dir = await scaffoldOffline(feature, type);
24
27
  process.stdout.write(`\nProposal scaffolded offline at ${dir} (type=${type}).\n`);
25
- await softLint(dir);
28
+ await softLint(dir, { llm, feature });
26
29
  process.stdout.write(`Next: refine the templates, then run \`openspecpm sync ${feature}\`.\n`);
27
30
  return dir;
28
31
  }
@@ -31,7 +34,7 @@ export async function runPropose({ feature, prompt, type = 'feature', offline =
31
34
  try {
32
35
  const dir = await propose(feature, seed);
33
36
  process.stdout.write(`\nProposal created at ${dir}.\n`);
34
- await softLint(dir);
37
+ await softLint(dir, { llm, feature });
35
38
  process.stdout.write(`Next: review proposal.md + specs/, then run \`openspecpm sync ${feature}\`.\n`);
36
39
  return dir;
37
40
  } catch (err) {
@@ -57,11 +60,43 @@ async function scaffoldOffline(feature, type) {
57
60
  return dir;
58
61
  }
59
62
 
60
- async function softLint(dir) { // eslint-disable-line
63
+ async function softLint(dir, { llm = false, feature } = {}) { // eslint-disable-line
61
64
  const findings = await lintChange(dir);
65
+ const judgeEnabled = await isJudgeEnabled(llm);
66
+ if (judgeEnabled) {
67
+ const extra = await runJudgeSoft(dir, feature);
68
+ findings.push(...extra);
69
+ }
62
70
  const sum = summarize(findings);
63
71
  if (!sum.total) return;
64
72
  process.stdout.write(`\nBDD lint (soft): ${sum.errors} errors, ${sum.warnings} warnings\n`);
65
73
  process.stdout.write(formatFindings(findings));
66
74
  process.stdout.write('These will block `sync` unless you pass --force. Refine scenarios before pushing.\n');
67
75
  }
76
+
77
+ async function isJudgeEnabled(llm) {
78
+ if (llm) return true;
79
+ const cfg = await readConfig();
80
+ return Boolean(cfg?.judge?.enabled);
81
+ }
82
+
83
+ async function runJudgeSoft(dir, feature) {
84
+ try {
85
+ const cfg = await readConfig();
86
+ const model = cfg?.judge?.model ?? DEFAULT_MODEL;
87
+ const proposalPath = join(dir, 'proposal.md');
88
+ const proposal = existsSync(proposalPath) ? await readFile(proposalPath, 'utf8') : '';
89
+ const client = await defaultClient();
90
+ return await judgeChange(dir, {
91
+ client,
92
+ model,
93
+ proposal,
94
+ onUsage: (u) => {
95
+ record({ command: 'judge', args: { feature }, meta: u }).catch(() => {});
96
+ },
97
+ });
98
+ } catch (err) {
99
+ process.stdout.write(` (LLM judge skipped: ${err.message})\n`);
100
+ return [];
101
+ }
102
+ }
@@ -4,6 +4,7 @@ import { readConfig } from '../config.js';
4
4
  import { loadAdapter } from '../adapters/index.js';
5
5
  import { changeDir, changeExists } from '../openspec-bridge.js';
6
6
  import * as fm from '../frontmatter.js';
7
+ import { coerceItems, safeParseFrontmatter } from '../tracking.js';
7
8
 
8
9
  export async function runReconcile({ feature, dryRun = false } = {}) {
9
10
  if (!feature) throw new Error('feature name is required');
@@ -17,10 +18,22 @@ export async function runReconcile({ feature, dryRun = false } = {}) {
17
18
 
18
19
  const dir = changeDir(feature);
19
20
  const tasksPath = join(dir, 'tasks.md');
20
- let tasksRaw = '';
21
- try { tasksRaw = await readFile(tasksPath, 'utf8'); } catch { /* missing */ }
22
- const { data: tdata, body: tbody } = fm.parse(tasksRaw);
23
- const items = tdata.items ?? [];
21
+ // Read + validate through the same helpers loadChange uses, so a non-array
22
+ // items: (or malformed YAML) raises a clear error instead of iterating
23
+ // character-by-character.
24
+ let tdata = {};
25
+ let tbody = '';
26
+ try {
27
+ ({ data: tdata, body: tbody } = await safeParseFrontmatter(tasksPath, feature, 'tasks.md'));
28
+ } catch (err) {
29
+ if (err.code === 'ENOENT' || /no such file/i.test(err.message)) {
30
+ // tasks.md missing — nothing to reconcile.
31
+ process.stdout.write('No items in tasks.md to reconcile.\n');
32
+ return;
33
+ }
34
+ throw err;
35
+ }
36
+ const items = coerceItems(tdata.items, tbody, feature);
24
37
  if (!items.length) {
25
38
  process.stdout.write('No items in tasks.md to reconcile.\n');
26
39
  return;
@@ -5,9 +5,12 @@ import { readConfig } from '../config.js';
5
5
  import { loadAdapter } from '../adapters/index.js';
6
6
  import { changeDir, changeExists } from '../openspec-bridge.js';
7
7
  import { lintChange, summarize, formatFindings } from '../bdd/linter.js';
8
+ import { judgeChange, defaultClient, DEFAULT_MODEL } from '../bdd/judge.js';
8
9
  import * as fm from '../frontmatter.js';
10
+ import { coerceItems, safeParseFrontmatter } from '../tracking.js';
11
+ import { record } from '../audit.js';
9
12
 
10
- export async function runSync({ feature, dryRun = false, force = false, diff = false } = {}) {
13
+ export async function runSync({ feature, dryRun = false, force = false, diff = false, llm = false } = {}) {
11
14
  if (!feature) throw new Error('feature name is required');
12
15
  const config = await readConfig();
13
16
  if (!config) {
@@ -23,6 +26,30 @@ export async function runSync({ feature, dryRun = false, force = false, diff = f
23
26
 
24
27
  const dir = changeDir(feature);
25
28
  const findings = await lintChange(dir);
29
+ if (llm || config?.judge?.enabled) {
30
+ try {
31
+ const model = config?.judge?.model ?? DEFAULT_MODEL;
32
+ const proposalPath = join(dir, 'proposal.md');
33
+ const proposalForJudge = existsSync(proposalPath) ? await readFile(proposalPath, 'utf8') : '';
34
+ const client = await defaultClient();
35
+ const judgeFindings = await judgeChange(dir, {
36
+ client,
37
+ model,
38
+ proposal: proposalForJudge,
39
+ onUsage: (u) => {
40
+ record({ command: 'judge', args: { feature }, meta: u }).catch(() => {});
41
+ },
42
+ });
43
+ findings.push(...judgeFindings);
44
+ } catch (err) {
45
+ if (!force) {
46
+ const e = new Error(`LLM judge failed: ${err.message}`);
47
+ e.remediation = 'Run `openspecpm doctor` to check ANTHROPIC_API_KEY, or pass --force to skip the LLM judge.';
48
+ throw e;
49
+ }
50
+ process.stdout.write(` (LLM judge skipped under --force: ${err.message})\n`);
51
+ }
52
+ }
26
53
  const sum = summarize(findings);
27
54
  if (sum.errors > 0 && !force) {
28
55
  process.stderr.write(`BDD lint: ${sum.errors} errors, ${sum.warnings} warnings\n`);
@@ -76,9 +103,11 @@ export async function runSync({ feature, dryRun = false, force = false, diff = f
76
103
  out('No tasks.md found — only the epic was synced.');
77
104
  return;
78
105
  }
79
- const tasksRaw = await readFile(tasksPath, 'utf8');
80
- const { data: tdata, body: tbody } = fm.parse(tasksRaw);
81
- const items = tdata.items ?? parseChecklist(tbody);
106
+ // Route through the same parse+coerce helpers loadChange uses, so a
107
+ // non-array items: (or malformed YAML) is rejected here too — sync is the
108
+ // primary command and bypassing the validator was the H3 regression.
109
+ const { data: tdata, body: tbody } = await safeParseFrontmatter(tasksPath, feature, 'tasks.md');
110
+ const items = coerceItems(tdata.items, tbody, feature);
82
111
  const updatedItems = [];
83
112
 
84
113
  for (const task of items) {
@@ -106,16 +135,52 @@ export async function runSync({ feature, dryRun = false, force = false, diff = f
106
135
  const patched = fm.serialize({ ...tdata, items: updatedItems }, tbody);
107
136
  await writeFile(tasksPath, patched, 'utf8');
108
137
  }
138
+
139
+ // Exit with a non-zero status if any task failed, so CI invocations like
140
+ // `openspecpm sync feature && deploy` don't proceed on silent partial sync.
141
+ // The tasks.md patch above already persisted last_error per failed task.
142
+ const failed = updatedItems.filter((t) => t.sync_state === 'failed');
143
+ if (failed.length) {
144
+ const err = new Error(`${failed.length} task(s) failed to sync in "${feature}".`);
145
+ err.remediation = 'Inspect last_error in tasks.md frontmatter and re-run sync to retry only failed items.';
146
+ throw err;
147
+ }
109
148
  }
110
149
 
111
150
  function out(s) {
112
151
  process.stdout.write(s + '\n');
113
152
  }
114
153
 
154
+ // Strip C0/C1 control chars (except common whitespace), bidi overrides, and
155
+ // zero-width chars from text we forward to a remote tracker as an issue body.
156
+ // A proposal author could intentionally or accidentally include these and
157
+ // they show up confusingly (or as homograph-attack vectors) in GitHub/Jira
158
+ // issue UIs. Implemented as a codepoint predicate rather than a regex literal
159
+ // so the source file stays pure ASCII (a regex with literal control chars
160
+ // makes git treat the file as binary).
161
+ function isPrintableChar(cp) {
162
+ if (cp === 0x09 || cp === 0x0A || cp === 0x0D) return true; // keep TAB / LF / CR
163
+ if (cp <= 0x1F) return false; // C0 controls
164
+ if (cp === 0x7F) return false; // DEL
165
+ if (cp >= 0x80 && cp <= 0x9F) return false; // C1 controls
166
+ if (cp >= 0x200B && cp <= 0x200F) return false; // zero-width + joiners + LRM/RLM
167
+ if (cp >= 0x202A && cp <= 0x202E) return false; // LRE/RLE/PDF/LRO/RLO bidi overrides
168
+ if (cp >= 0x2066 && cp <= 0x2069) return false; // LRI/RLI/FSI/PDI isolates
169
+ return true;
170
+ }
171
+
172
+ function sanitizeText(s) {
173
+ const out = [];
174
+ for (const ch of String(s)) {
175
+ if (isPrintableChar(ch.codePointAt(0))) out.push(ch);
176
+ }
177
+ return out.join('');
178
+ }
179
+
115
180
  function extractSummary(md) {
116
181
  const { body } = fm.parse(md);
117
182
  const firstPara = body.split(/\r?\n\r?\n/).find((p) => p.trim() && !p.startsWith('#'));
118
- return (firstPara ?? '').trim().slice(0, 1000);
183
+ return sanitizeText((firstPara ?? '').trim()).slice(0, 1000);
119
184
  }
120
185
 
121
186
  function parseChecklist(body) {
@@ -1,13 +1,21 @@
1
1
  import { existsSync } from 'node:fs';
2
+ import { readFile } from 'node:fs/promises';
2
3
  import { join } from 'node:path';
3
4
  import { listChanges } from '../tracking.js';
4
5
  import { lintChange, summarize } from '../bdd/linter.js';
6
+ import { judgeChange, defaultClient, DEFAULT_MODEL } from '../bdd/judge.js';
7
+ import { readConfig } from '../config.js';
8
+ import { record } from '../audit.js';
5
9
 
6
10
  const REQUIRED_PROPOSAL = ['name'];
7
11
  const TASK_STATES = ['pending', 'created', 'failed'];
8
12
 
9
- export async function runValidate() {
13
+ export async function runValidate({ llm = false } = {}) {
10
14
  const changes = await listChanges();
15
+ const config = await readConfig();
16
+ const judgeEnabled = llm || Boolean(config?.judge?.enabled);
17
+ const model = config?.judge?.model ?? DEFAULT_MODEL;
18
+ const client = judgeEnabled ? await defaultClient().catch(() => null) : null;
11
19
  out(`openspecpm validate — ${changes.length} change(s)\n`);
12
20
  let totalIssues = 0;
13
21
 
@@ -52,6 +60,29 @@ export async function runValidate() {
52
60
 
53
61
  // BDD lint
54
62
  const findings = await lintChange(change.dir);
63
+ if (judgeEnabled && client) {
64
+ try {
65
+ const proposalPath = join(change.dir, 'proposal.md');
66
+ const proposal = existsSync(proposalPath) ? await readFile(proposalPath, 'utf8') : '';
67
+ const judgeFindings = await judgeChange(change.dir, {
68
+ client,
69
+ model,
70
+ proposal,
71
+ onUsage: (u) => {
72
+ record({ command: 'judge', args: { feature: change.name }, meta: u }).catch(() => {});
73
+ },
74
+ });
75
+ findings.push(...judgeFindings);
76
+ } catch (err) {
77
+ findings.push({
78
+ severity: 'warning',
79
+ file: change.dir,
80
+ scenario: '(judge failed)',
81
+ rule: 'bdd/llm-parse-error',
82
+ message: `LLM judge failed: ${err.message}`,
83
+ });
84
+ }
85
+ }
55
86
  const { errors, warnings } = summarize(findings);
56
87
 
57
88
  const total = issues.length + errors;
package/cli/src/http.js CHANGED
@@ -1,13 +1,16 @@
1
1
  import { AdapterError } from './adapters/base.js';
2
2
 
3
+ const DEFAULT_TIMEOUT_MS = 30_000;
4
+
3
5
  export class HttpClient {
4
6
  #baseUrl;
5
7
  #authHeader;
6
8
  #fetchImpl;
7
9
  #defaultHeaders;
8
10
  #remediationHint;
11
+ #timeoutMs;
9
12
 
10
- constructor({ baseUrl, auth, fetch: fetchImpl = globalThis.fetch, defaultHeaders = {}, remediationHint } = {}) {
13
+ constructor({ baseUrl, auth, fetch: fetchImpl = globalThis.fetch, defaultHeaders = {}, remediationHint, timeoutMs = DEFAULT_TIMEOUT_MS } = {}) {
11
14
  if (!baseUrl) throw new Error('HttpClient requires baseUrl');
12
15
  if (typeof fetchImpl !== 'function') throw new Error('global fetch not available; pass {fetch} explicitly');
13
16
  this.#baseUrl = baseUrl.replace(/\/+$/, '');
@@ -15,6 +18,7 @@ export class HttpClient {
15
18
  this.#fetchImpl = fetchImpl;
16
19
  this.#defaultHeaders = defaultHeaders;
17
20
  this.#remediationHint = remediationHint;
21
+ this.#timeoutMs = timeoutMs;
18
22
  }
19
23
 
20
24
  async request(method, path, { query, body, headers, contentType = 'application/json', accept = 'application/json' } = {}) {
@@ -35,10 +39,18 @@ export class HttpClient {
35
39
  finalHeaders['Content-Type'] = contentType;
36
40
  }
37
41
 
42
+ // Bound the request with an abort signal so a hung backend can't wedge sync --all.
43
+ const signal = AbortSignal.timeout(this.#timeoutMs);
38
44
  let res;
39
45
  try {
40
- res = await this.#fetchImpl(url, { method, headers: finalHeaders, body: payload });
46
+ res = await this.#fetchImpl(url, { method, headers: finalHeaders, body: payload, signal });
41
47
  } catch (err) {
48
+ if (err?.name === 'TimeoutError' || err?.name === 'AbortError') {
49
+ throw new AdapterError(`${method} ${url} timed out after ${this.#timeoutMs}ms`, {
50
+ remediation: 'Backend did not respond in time. Retry, or raise HttpClient timeoutMs if the endpoint is known-slow.',
51
+ cause: err,
52
+ });
53
+ }
42
54
  throw new AdapterError(`Network error calling ${method} ${url}: ${err.message}`, {
43
55
  remediation: this.#remediationHint ?? 'Check connectivity and base URL.',
44
56
  cause: err,