@besales/ops-framework 0.1.31 → 0.1.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.1.32
4
+
5
+ - Required explicit `review-budget-approval.json` before `--force-review-budget` can bypass Check/Verify review budgets.
6
+ - Added external CLI provider timeouts from `reviewBudgets.*.providerTimeoutMs` so Check/Verify provider calls cannot silently exceed the stage SLA.
7
+ - Recorded denied force attempts in Check/Verify timelines as `review_budget_force_denied`.
8
+ - Updated review budget docs and prompts so force review is treated as a human-approved exception, not a normal retry path.
9
+
3
10
  ## 0.1.31
4
11
 
5
12
  - Added bounded review budgets for Check and Verify: default 3 minute stage SLA and one external provider run per stage.
package/README.md CHANGED
@@ -194,11 +194,22 @@ External `run-check` and `run-verify` are bounded by default:
194
194
 
195
195
  - stage SLA: `180000ms`;
196
196
  - max external provider runs per stage: `1`.
197
+ - external provider timeout: `180000ms`.
197
198
 
198
199
  When the budget is exceeded, the framework writes `human_arbitration_required`
199
200
  instead of starting another provider loop. Consolidate the remaining findings in
200
201
  task artifacts, or rerun with `--force-review-budget` only after explicit human
201
- approval.
202
+ approval recorded in `review-budget-approval.json`:
203
+
204
+ ```json
205
+ {
206
+ "approved": true,
207
+ "stage": "check",
208
+ "reason": "Human approved one extra external review after consolidated remediation.",
209
+ "approvedBy": "human",
210
+ "expiresAt": "2026-06-05T12:00:00.000Z"
211
+ }
212
+ ```
202
213
 
203
214
  ## Learning Loop
204
215
 
@@ -1,7 +1,11 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+
1
4
  export function resolveStageReviewBudget(config, stage) {
2
5
  const defaults = {
3
6
  stageSlaMs: 180000,
4
7
  maxExternalRunsPerStage: 1,
8
+ providerTimeoutMs: 180000,
5
9
  };
6
10
  const reviewBudgets = config.reviewBudgets || {};
7
11
  return {
@@ -68,6 +72,108 @@ export function evaluateReviewBudget({ budget, summary, force = false }) {
68
72
  return { ok: true, reason: null };
69
73
  }
70
74
 
75
+ export function readReviewBudgetApproval({ taskDir, stage, now = new Date() }) {
76
+ const approvalPath = path.join(taskDir, 'review-budget-approval.json');
77
+ if (!fs.existsSync(approvalPath)) {
78
+ return {
79
+ ok: false,
80
+ reason: 'missing_review_budget_approval',
81
+ message: 'Using --force-review-budget requires review-budget-approval.json with approved=true.',
82
+ path: 'review-budget-approval.json',
83
+ };
84
+ }
85
+
86
+ let approval;
87
+ try {
88
+ approval = JSON.parse(fs.readFileSync(approvalPath, 'utf8'));
89
+ } catch (error) {
90
+ return {
91
+ ok: false,
92
+ reason: 'invalid_review_budget_approval',
93
+ message: `review-budget-approval.json is invalid JSON: ${error.message}`,
94
+ path: 'review-budget-approval.json',
95
+ };
96
+ }
97
+
98
+ if (approval.approved !== true) {
99
+ return {
100
+ ok: false,
101
+ reason: 'review_budget_approval_not_approved',
102
+ message: 'review-budget-approval.json must contain approved=true.',
103
+ path: 'review-budget-approval.json',
104
+ approval,
105
+ };
106
+ }
107
+
108
+ if (!['check', 'verify', 'both'].includes(String(approval.stage || ''))) {
109
+ return {
110
+ ok: false,
111
+ reason: 'review_budget_approval_stage_invalid',
112
+ message: 'review-budget-approval.json stage must be check, verify or both.',
113
+ path: 'review-budget-approval.json',
114
+ approval,
115
+ };
116
+ }
117
+
118
+ if (approval.stage !== stage && approval.stage !== 'both') {
119
+ return {
120
+ ok: false,
121
+ reason: 'review_budget_approval_stage_mismatch',
122
+ message: `review-budget-approval.json is for stage=${approval.stage}, not ${stage}.`,
123
+ path: 'review-budget-approval.json',
124
+ approval,
125
+ };
126
+ }
127
+
128
+ if (!approval.reason || typeof approval.reason !== 'string') {
129
+ return {
130
+ ok: false,
131
+ reason: 'review_budget_approval_reason_missing',
132
+ message: 'review-budget-approval.json must contain a human-readable reason.',
133
+ path: 'review-budget-approval.json',
134
+ approval,
135
+ };
136
+ }
137
+
138
+ if (!approval.approvedBy || typeof approval.approvedBy !== 'string') {
139
+ return {
140
+ ok: false,
141
+ reason: 'review_budget_approval_approver_missing',
142
+ message: 'review-budget-approval.json must contain approvedBy.',
143
+ path: 'review-budget-approval.json',
144
+ approval,
145
+ };
146
+ }
147
+
148
+ if (approval.expiresAt) {
149
+ const expiresAt = new Date(approval.expiresAt);
150
+ if (Number.isNaN(expiresAt.getTime())) {
151
+ return {
152
+ ok: false,
153
+ reason: 'review_budget_approval_expiry_invalid',
154
+ message: 'review-budget-approval.json expiresAt must be a valid ISO timestamp when present.',
155
+ path: 'review-budget-approval.json',
156
+ approval,
157
+ };
158
+ }
159
+ if (expiresAt.getTime() <= now.getTime()) {
160
+ return {
161
+ ok: false,
162
+ reason: 'review_budget_approval_expired',
163
+ message: `review-budget-approval.json expired at ${expiresAt.toISOString()}.`,
164
+ path: 'review-budget-approval.json',
165
+ approval,
166
+ };
167
+ }
168
+ }
169
+
170
+ return {
171
+ ok: true,
172
+ path: 'review-budget-approval.json',
173
+ approval,
174
+ };
175
+ }
176
+
71
177
  function firstValidDate(values) {
72
178
  for (const value of values) {
73
179
  const date = new Date(value);
@@ -1,6 +1,10 @@
1
1
  import { describe, expect, it } from 'vitest';
2
+ import fs from 'node:fs';
3
+ import os from 'node:os';
4
+ import path from 'node:path';
2
5
  import {
3
6
  evaluateReviewBudget,
7
+ readReviewBudgetApproval,
4
8
  resolveStageReviewBudget,
5
9
  summarizeReviewBudgetWindow,
6
10
  } from './review-budget-utils.mjs';
@@ -80,11 +84,62 @@ describe('review budget utils', () => {
80
84
  verify: {
81
85
  stageSlaMs: 120000,
82
86
  maxExternalRunsPerStage: 2,
87
+ providerTimeoutMs: 90000,
83
88
  },
84
89
  },
85
90
  }, 'verify')).toEqual({
86
91
  stageSlaMs: 120000,
87
92
  maxExternalRunsPerStage: 2,
93
+ providerTimeoutMs: 90000,
94
+ });
95
+ });
96
+
97
+ it('requires a valid force approval artifact', () => {
98
+ const taskDir = fs.mkdtempSync(path.join(os.tmpdir(), 'review-budget-approval-'));
99
+
100
+ expect(readReviewBudgetApproval({ taskDir, stage: 'verify' })).toMatchObject({
101
+ ok: false,
102
+ reason: 'missing_review_budget_approval',
103
+ });
104
+
105
+ fs.writeFileSync(path.join(taskDir, 'review-budget-approval.json'), JSON.stringify({
106
+ approved: true,
107
+ stage: 'verify',
108
+ reason: 'Human approved one extra external Verify after consolidated remediation.',
109
+ approvedBy: 'human',
110
+ expiresAt: '2026-06-04T12:30:00.000Z',
111
+ }, null, 2));
112
+
113
+ expect(readReviewBudgetApproval({
114
+ taskDir,
115
+ stage: 'verify',
116
+ now: new Date('2026-06-04T12:00:00.000Z'),
117
+ })).toMatchObject({
118
+ ok: true,
119
+ approval: {
120
+ approved: true,
121
+ stage: 'verify',
122
+ },
123
+ });
124
+ });
125
+
126
+ it('rejects expired force approval artifacts', () => {
127
+ const taskDir = fs.mkdtempSync(path.join(os.tmpdir(), 'review-budget-expired-'));
128
+ fs.writeFileSync(path.join(taskDir, 'review-budget-approval.json'), JSON.stringify({
129
+ approved: true,
130
+ stage: 'both',
131
+ reason: 'Expired approval',
132
+ approvedBy: 'human',
133
+ expiresAt: '2026-06-04T12:00:00.000Z',
134
+ }, null, 2));
135
+
136
+ expect(readReviewBudgetApproval({
137
+ taskDir,
138
+ stage: 'check',
139
+ now: new Date('2026-06-04T12:00:01.000Z'),
140
+ })).toMatchObject({
141
+ ok: false,
142
+ reason: 'review_budget_approval_expired',
88
143
  });
89
144
  });
90
145
  });
@@ -47,6 +47,7 @@ export async function runExternalCliChecker({
47
47
  reasoningEffort,
48
48
  prompt,
49
49
  cwd,
50
+ timeoutMs,
50
51
  }) {
51
52
  if (!providerConfig) {
52
53
  const error = new Error(`Unknown external CLI provider: ${providerName}`);
@@ -68,10 +69,11 @@ export async function runExternalCliChecker({
68
69
  input: providerConfig.input === 'stdin' ? prompt : undefined,
69
70
  encoding: 'utf8',
70
71
  maxBuffer: 1024 * 1024 * 20,
72
+ timeout: timeoutMs || undefined,
71
73
  });
72
74
 
73
75
  if (result.error) {
74
- result.error.failureReason = 'provider_unavailable';
76
+ result.error.failureReason = result.error.code === 'ETIMEDOUT' ? 'timeout' : 'provider_unavailable';
75
77
  throw result.error;
76
78
  }
77
79
  if (result.status !== 0) {
package/bin/run-check.mjs CHANGED
@@ -47,6 +47,7 @@ import {
47
47
  } from './lib/task-manifest-utils.mjs';
48
48
  import {
49
49
  evaluateReviewBudget,
50
+ readReviewBudgetApproval,
50
51
  resolveStageReviewBudget,
51
52
  summarizeReviewBudgetWindow,
52
53
  } from './lib/review-budget-utils.mjs';
@@ -239,6 +240,57 @@ async function runMain() {
239
240
  return;
240
241
  }
241
242
 
243
+ const forceApproval = forceReviewBudget
244
+ ? readReviewBudgetApproval({ taskDir, stage: 'check' })
245
+ : { ok: true, approval: null };
246
+ if (!forceApproval.ok) {
247
+ const budget = resolveStageReviewBudget(readAgentsConfig(), 'check');
248
+ const summary = summarizeReviewBudgetWindow({
249
+ timeline: readTimeline(taskDir, 'check-timeline.json'),
250
+ stage: 'check',
251
+ now: new Date(),
252
+ });
253
+ writeReviewBudgetReturn({
254
+ taskDir,
255
+ taskId,
256
+ checkContext,
257
+ checkerConfig,
258
+ checkerPromptSha,
259
+ cacheKey,
260
+ reason: forceApproval.reason,
261
+ message: forceApproval.message,
262
+ budget,
263
+ summary,
264
+ startedAt: runStartedAt,
265
+ approval: forceApproval,
266
+ });
267
+ appendCheckTimeline(taskDir, {
268
+ event: 'review_budget_force_denied',
269
+ verdict: 'human_arbitration_required',
270
+ reason: forceApproval.reason,
271
+ message: forceApproval.message,
272
+ budget,
273
+ summary,
274
+ timing: buildTiming(runStartedAt),
275
+ });
276
+ recordLlmInputUsage({
277
+ taskDir,
278
+ stage: 'check',
279
+ packMeta: promptPayload.pack.meta,
280
+ attempts: [
281
+ ...llmInputAttempts,
282
+ buildAttemptRecord(promptPayload.pack.meta, `review_budget_force_denied:${forceApproval.reason}`),
283
+ ],
284
+ rerunCount,
285
+ timing: buildTiming(runStartedAt),
286
+ });
287
+ refreshTaskManifestAfterCheck(taskDir);
288
+ runValidator(taskArg);
289
+ console.log(`Checker force review denied ${taskId}: human_arbitration_required`);
290
+ console.log(`- reason: ${forceApproval.reason}`);
291
+ return;
292
+ }
293
+
242
294
  const reviewBudget = evaluateCurrentReviewBudget({
243
295
  taskDir,
244
296
  stage: 'check',
@@ -258,6 +310,7 @@ async function runMain() {
258
310
  budget: reviewBudget.budget,
259
311
  summary: reviewBudget.summary,
260
312
  startedAt: runStartedAt,
313
+ approval: forceApproval,
261
314
  });
262
315
  appendCheckTimeline(taskDir, {
263
316
  event: 'review_budget_blocked',
@@ -304,6 +357,7 @@ async function runMain() {
304
357
  checkerConfig,
305
358
  messages: promptPayload.messages,
306
359
  prompt: promptPayload.prompt,
360
+ timeoutMs: reviewBudget.budget.providerTimeoutMs,
307
361
  });
308
362
  appendCheckTimeline(taskDir, {
309
363
  event: 'provider_completed',
@@ -434,6 +488,7 @@ function writeReviewBudgetReturn({
434
488
  budget,
435
489
  summary,
436
490
  startedAt,
491
+ approval = null,
437
492
  }) {
438
493
  const result = {
439
494
  taskId,
@@ -468,6 +523,7 @@ function writeReviewBudgetReturn({
468
523
  reason,
469
524
  budget,
470
525
  summary,
526
+ approval,
471
527
  forceFlag: '--force-review-budget',
472
528
  },
473
529
  readyForHumanGate: false,
@@ -489,13 +545,13 @@ function writeReviewBudgetReturn({
489
545
  '## Budget',
490
546
  '',
491
547
  '```json',
492
- JSON.stringify({ reason, budget, summary }, null, 2),
548
+ JSON.stringify({ reason, budget, summary, approval }, null, 2),
493
549
  '```',
494
550
  '',
495
551
  '## Required decision',
496
552
  '',
497
553
  '- Consolidate all remaining Check findings into `plan.md`, `status.md`, and `check-resolution.md`, then run one fresh Check after the window resets; or',
498
- '- Ask the human to approve an extra external review and rerun with `--force-review-budget`.',
554
+ '- Ask the human to approve an extra external review by writing `review-budget-approval.json`, then rerun with `--force-review-budget`.',
499
555
  '',
500
556
  '## Timing',
501
557
  '',
@@ -510,7 +566,7 @@ function writeReviewBudgetReturn({
510
566
  checkVerdict: '`human_arbitration_required`',
511
567
  checkResult: '- `check.result.json`: current; review budget blocked external Checker invocation',
512
568
  supervisorAction: 'Check review budget blocked another external provider loop.',
513
- nextStep: 'Human Arbitration: approve one extra external review with `--force-review-budget` or consolidate remaining findings before a fresh Check.',
569
+ nextStep: 'Human Arbitration: write `review-budget-approval.json` before using `--force-review-budget`, or consolidate remaining findings before a fresh Check.',
514
570
  humanApproval: 'yes',
515
571
  });
516
572
  appendOrchestrationLog(taskDir, `Check review budget blocked external checker; reason=${reason}; elapsedMs=${summary.elapsedMs}; providerStarted=${summary.providerStarted}; maxExternalRuns=${budget.maxExternalRunsPerStage}; stageSlaMs=${budget.stageSlaMs}`);
@@ -927,7 +983,7 @@ function buildCheckerPromptPayload({
927
983
  };
928
984
  }
929
985
 
930
- async function runProvider({ checkerConfig, messages, prompt }) {
986
+ async function runProvider({ checkerConfig, messages, prompt, timeoutMs }) {
931
987
  if (checkerConfig.provider === 'openai') {
932
988
  return runOpenAiChecker({
933
989
  apiKey: process.env.OPENAI_API_KEY,
@@ -944,6 +1000,7 @@ async function runProvider({ checkerConfig, messages, prompt }) {
944
1000
  reasoningEffort: checkerConfig.reasoningEffort,
945
1001
  prompt,
946
1002
  cwd: repoRoot,
1003
+ timeoutMs,
947
1004
  });
948
1005
  }
949
1006
 
@@ -30,6 +30,7 @@ import {
30
30
  import { recordLlmInputUsage } from './lib/task-manifest-utils.mjs';
31
31
  import {
32
32
  evaluateReviewBudget,
33
+ readReviewBudgetApproval,
33
34
  resolveStageReviewBudget,
34
35
  summarizeReviewBudgetWindow,
35
36
  } from './lib/review-budget-utils.mjs';
@@ -210,6 +211,54 @@ async function runMain() {
210
211
  return;
211
212
  }
212
213
 
214
+ const forceApproval = forceReviewBudget
215
+ ? readReviewBudgetApproval({ taskDir, stage: 'verify' })
216
+ : { ok: true, approval: null };
217
+ if (!forceApproval.ok) {
218
+ const budget = resolveStageReviewBudget(readAgentsConfig(), 'verify');
219
+ const summary = summarizeReviewBudgetWindow({
220
+ timeline: readTimeline(taskDir, 'verify-timeline.json'),
221
+ stage: 'verify',
222
+ now: new Date(),
223
+ });
224
+ writeVerifyReviewBudgetReturn({
225
+ taskDir,
226
+ taskId,
227
+ verifierConfig,
228
+ verifierRunId,
229
+ planSha,
230
+ executionSha,
231
+ reason: forceApproval.reason,
232
+ message: forceApproval.message,
233
+ budget,
234
+ summary,
235
+ approval: forceApproval,
236
+ });
237
+ appendVerifyTimeline(taskDir, {
238
+ event: 'review_budget_force_denied',
239
+ verdict: 'human_arbitration_required',
240
+ reason: forceApproval.reason,
241
+ message: forceApproval.message,
242
+ budget,
243
+ summary,
244
+ timing: buildTiming(runStartedAt),
245
+ });
246
+ recordLlmInputUsage({
247
+ taskDir,
248
+ stage: 'verify',
249
+ packMeta: promptPayload.pack.meta,
250
+ attempts: [
251
+ ...llmInputAttempts,
252
+ buildAttemptRecord(promptPayload.pack.meta, `review_budget_force_denied:${forceApproval.reason}`),
253
+ ],
254
+ rerunCount,
255
+ timing: buildTiming(runStartedAt),
256
+ });
257
+ console.log(`Verifier force review denied ${taskId}: human_arbitration_required`);
258
+ console.log(`- reason: ${forceApproval.reason}`);
259
+ return;
260
+ }
261
+
213
262
  const reviewBudget = evaluateCurrentReviewBudget({
214
263
  taskDir,
215
264
  stage: 'verify',
@@ -228,6 +277,7 @@ async function runMain() {
228
277
  message: reviewBudget.message,
229
278
  budget: reviewBudget.budget,
230
279
  summary: reviewBudget.summary,
280
+ approval: forceApproval,
231
281
  });
232
282
  appendVerifyTimeline(taskDir, {
233
283
  event: 'review_budget_blocked',
@@ -275,6 +325,7 @@ async function runMain() {
275
325
  reasoningEffort: verifierConfig.reasoningEffort,
276
326
  prompt: promptPayload.prompt,
277
327
  cwd: repoRoot,
328
+ timeoutMs: reviewBudget.budget.providerTimeoutMs,
278
329
  });
279
330
  appendVerifyTimeline(taskDir, {
280
331
  event: 'provider_completed',
@@ -412,6 +463,7 @@ function writeVerifyReviewBudgetReturn({
412
463
  message,
413
464
  budget,
414
465
  summary,
466
+ approval = null,
415
467
  }) {
416
468
  const verifyMarkdown = [
417
469
  '# Verify',
@@ -429,13 +481,13 @@ function writeVerifyReviewBudgetReturn({
429
481
  '## Budget',
430
482
  '',
431
483
  '```json',
432
- JSON.stringify({ reason, budget, summary }, null, 2),
484
+ JSON.stringify({ reason, budget, summary, approval }, null, 2),
433
485
  '```',
434
486
  '',
435
487
  '## Required decision',
436
488
  '',
437
489
  '- Consolidate remaining Verify findings in `execution.md` / evidence artifacts, then run one fresh Verify after the window resets; or',
438
- '- Ask the human to approve an extra external review and rerun with `--force-review-budget`.',
490
+ '- Ask the human to approve an extra external review by writing `review-budget-approval.json`, then rerun with `--force-review-budget`.',
439
491
  ].join('\n');
440
492
  const result = {
441
493
  schemaVersion: 1,
@@ -474,6 +526,7 @@ function writeVerifyReviewBudgetReturn({
474
526
  reason,
475
527
  budget,
476
528
  summary,
529
+ approval,
477
530
  forceFlag: '--force-review-budget',
478
531
  },
479
532
  };
@@ -21,11 +21,13 @@
21
21
  "reviewBudgets": {
22
22
  "check": {
23
23
  "stageSlaMs": 180000,
24
- "maxExternalRunsPerStage": 1
24
+ "maxExternalRunsPerStage": 1,
25
+ "providerTimeoutMs": 180000
25
26
  },
26
27
  "verify": {
27
28
  "stageSlaMs": 180000,
28
- "maxExternalRunsPerStage": 1
29
+ "maxExternalRunsPerStage": 1,
30
+ "providerTimeoutMs": 180000
29
31
  }
30
32
  },
31
33
  "checkerProviders": {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@besales/ops-framework",
3
- "version": "0.1.31",
3
+ "version": "0.1.32",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "ops-agent": "bin/ops-agent.mjs"
@@ -75,6 +75,7 @@ Project-specific context приходит только через task artifacts
75
75
  27. Если plan/task/checker-context показывает golden set/eval/regression fixtures/label cards/ground truth, Checker должен требовать `## Label Card Schema`, `## Coverage Matrix`, `## Negative / Edge Cases` и `## Harness Boundary`. Golden set без schema/coverage/negative cases/source evidence/non-goals/manual-vs-automated boundary является `return_to_plan`, даже если есть общий текст про expected outputs.
76
76
  28. Если remaining issue является процессной ясностью, wording polish или удобством статуса, а план уже содержит executable scope, acceptance, risk gates and verification evidence path, не возвращай `return_to_plan`; запиши как non-blocking note или human question. Цель Check - предотвратить дорогие ошибки до Execute, а не создавать повторные внешние циклы ради косметики.
77
77
  29. Если видишь несколько related blockers, объедини их в один consolidated finding с полным checklist. Не выдавай только первый найденный blocker, если следующий внешний Check очевидно найдет соседний.
78
+ 30. Если review budget уже требует Human Arbitration, не предлагай `--force-review-budget` как обычный retry. Он допустим только при наличии human-approved `review-budget-approval.json`.
78
79
 
79
80
  ## Контракт выхода
80
81
 
@@ -54,7 +54,7 @@
54
54
  18. Plan должен назвать risk tier (`R0`-`R5`), execution target and execution budget. Для `R1/R2` можно разрешить fast loop inside approved scope, но обязательно назвать stop rules.
55
55
  19. План проверки должен быть ladder-based: micro-verify during Execute, slice-verify before completion and external Verify requirement for closeout/high-risk claims.
56
56
  20. После `return_to_plan` Planner обязан выполнить один consolidated remediation pass: закрыть все blocking findings, precheck checklist и obvious adjacent gaps в `plan.md`/`check-resolution.md` до следующего Check. Не запускай внешний Check после единичной мелкой правки, если другие known blockers остаются открыты.
57
- 21. Если Check остановлен review budget gate (`human_arbitration_required` с `reviewBudget.reason`), Planner не должен пытаться обойти это повторным запуском. Нужно либо запросить human approval на `--force-review-budget`, либо укрупнить remediation и вернуться к Check после явного решения.
57
+ 21. Если Check остановлен review budget gate (`human_arbitration_required` с `reviewBudget.reason`), Planner не должен пытаться обойти это повторным запуском. Нужно либо запросить human approval и записать `review-budget-approval.json` перед `--force-review-budget`, либо укрупнить remediation и вернуться к Check после явного решения.
58
58
  20. План должен описывать meaningful slice. Не дроби локальную работу на отдельный Plan/Check/Verify для каждого микрофикса, если риски и target остаются внутри одного approved tier.
59
59
  21. Если risk triggers или `checker-context-pack.md` показывают O2/O3 hot-path work, Planner обязан добавить `## Optimization Strategy`: tier, hot paths, expected data size, chosen efficient approach, anti-patterns avoided and bounded optimizer budget/stop rule. Цель gate — предотвратить очевидно неэффективное решение до Execute, а не запускать бесконечную оптимизацию.
60
60
  22. Если задача создает golden set/eval/regression fixtures/label cards/ground truth, Planner обязан добавить `## Label Card Schema`, `## Coverage Matrix`, `## Negative / Edge Cases` и `## Harness Boundary`. Golden set должен быть test contract with expected outputs, non-goals, source refs, missing coverage policy and manual-vs-automated boundary.
@@ -61,8 +61,9 @@ Supervisor является code-level orchestrator по контракту: rou
61
61
  27. Если external verifier/checker/browser tooling начинает тратить непропорционально много времени или блокируется окружением, Supervisor обязан остановить loop и вынести human decision: принять internal verify/evidence, запустить external escalation вручную или изменить scope.
62
62
  28. Если deterministic Check preflight создал `precheck-remediation.md`, Supervisor не должен запускать повторный Check после точечной правки одного пункта. Сначала Planner/Executor должен закрыть весь checklist или явно отметить not-applicable с evidence/human decision в `plan.md`/`status.md`, затем допускается один fresh Check.
63
63
  29. Перед повторным Check после deterministic precheck Supervisor обязан сверить, что `precheck-remediation.md` был использован как consolidated checklist: все listed gates отражены в plan/research/status, а не закрывались по одному через серию precheck loops.
64
- 30. External Check и external Verify имеют stage SLA по умолчанию 3 минуты и максимум один external provider run на фазу. Если `check.result.json` или `verify.result.json` вернул `human_arbitration_required` с `reviewBudget.reason`, Supervisor не запускает еще один внешний review без явного human approval и `--force-review-budget`.
64
+ 30. External Check и external Verify имеют stage SLA по умолчанию 3 минуты, максимум один external provider run на фазу и hard provider timeout 3 минуты. Если `check.result.json` или `verify.result.json` вернул `human_arbitration_required` с `reviewBudget.reason`, Supervisor не запускает еще один внешний review без явного human approval, записанного в `review-budget-approval.json`, и `--force-review-budget`.
65
65
  31. После `return_to_plan` / `return_to_execute` Supervisor должен требовать один consolidated remediation pass. Запрещено запускать серию внешних Check/Verify для мелких последовательных правок, если их можно закрыть в одном artifact update.
66
+ 32. `--force-review-budget` запрещен как обычный retry flag. Он допустим только после human decision и должен быть виден в timeline вместе с approval artifact; без approval artifact команда должна остаться на Human Arbitration.
66
67
 
67
68
  ## Hard Gate: Material Scope Expansion -> Brief Reset
68
69
 
@@ -48,7 +48,8 @@
48
48
  24. Environment/tooling failures внешнего verifier/browser smoke не должны превращаться в бесконечный `return_to_execute` loop. Если implementation evidence достаточно, но внешний инструмент заблокирован окружением, используй `pass_with_notes` или `human_arbitration_required` согласно риску.
49
49
  25. Если `plan.md` содержит golden set/eval/regression fixture sections, verifier должен проверить `Golden Set / Regression Evidence`: label cards follow schema, coverage matrix is filled, negative/edge cases are selected or documented missing, expected outputs/non-goals are inspectable, source refs/snippets exist and manual-vs-automated harness boundary is explicit.
50
50
  26. External Verify должен укладываться в bounded review model: один внешний provider run по умолчанию. Если остаются несколько blockers, верни один consolidated `return_to_execute` finding с полным checklist. Minor documentation/status polish не должен запускать новый внешний цикл, если acceptance/evidence покрыты.
51
- 27. Если review budget gate уже вернул `human_arbitration_required`, не предлагай повторный external Verify как обычный следующий шаг. Следующий шаг: consolidated execution fix, internal evidence decision или явный human approval на `--force-review-budget`.
51
+ 27. Если review budget gate уже вернул `human_arbitration_required`, не предлагай повторный external Verify как обычный следующий шаг. Следующий шаг: consolidated execution fix, internal evidence decision или явный human approval, записанный в `review-budget-approval.json`, перед `--force-review-budget`.
52
+ 28. External provider timeout является hard gate. Если verifier не успевает уложиться в 3 минуты, верни `verifier_failed`/`timeout` или `human_arbitration_required`; не предлагай бесконечный retry с тем же input.
52
53
 
53
54
  ## Контракт выхода
54
55