@besales/ops-framework 0.1.23 → 0.1.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.25
|
|
4
|
+
|
|
5
|
+
- Stabilized Verify LLM `task-manifest.json` input by stripping volatile Check telemetry, timestamps and verbose loop history from verifier packs.
|
|
6
|
+
- Kept previous `verify.md` context compact in external Verify packs so verifier runs do not re-ingest large internal verify excerpts.
|
|
7
|
+
- Raised `standard_plus` context cap to keep ~27k compact Verify packs out of slow strict mode.
|
|
8
|
+
- Narrowed external Verify policy: local scratch DB migrations, fixture imports and bounded local-only backfills can close with internal Verify when evidence covers required gates; external CLI remains for production/real-data/destructive/security/financial/broad-risk work or explicit human request.
|
|
9
|
+
|
|
10
|
+
## 0.1.24
|
|
11
|
+
|
|
12
|
+
- Raised `standard_plus` Check/Verify context cap so near-cap plans stay in compact mode instead of jumping to slow strict context.
|
|
13
|
+
- Stabilized Check LLM `taskManifest` input by removing volatile `llmInput`, `lastCheckResult`, timestamps and verbose loop history from the prompt payload.
|
|
14
|
+
- Reduced duplicate external Check reruns after `task-manifest.json` refreshes by keeping prompt input stable when plan, memory and risk inputs have not changed.
|
|
15
|
+
|
|
3
16
|
## 0.1.23
|
|
4
17
|
|
|
5
18
|
- Added deterministic import/ingestion planning gates for representative real fixtures, raw downstream metadata extraction and explicit duplicate-import behavior.
|
|
@@ -12,8 +12,8 @@ export const LLM_CONTEXT_MODES = ['fast', 'standard', 'standard_plus', 'strict']
|
|
|
12
12
|
export const LLM_CONTEXT_CAPS = {
|
|
13
13
|
fast: 8000,
|
|
14
14
|
standard: 20000,
|
|
15
|
-
standard_plus:
|
|
16
|
-
strict:
|
|
15
|
+
standard_plus: 34000,
|
|
16
|
+
strict: 50000,
|
|
17
17
|
};
|
|
18
18
|
|
|
19
19
|
const TOKEN_ESTIMATE_CHARS_PER_TOKEN = 1.8;
|
|
@@ -225,7 +225,7 @@ export function buildCheckerLlmInputPack({
|
|
|
225
225
|
relevantPlaybooks: selectedMode === 'strict'
|
|
226
226
|
? renderRelevantPlaybooks(readRelevantPlaybooks(checkContext.riskTriggers || []), { mode: 'strict' })
|
|
227
227
|
: renderRelevantPlaybooks(readRelevantPlaybooks(checkContext.riskTriggers || []), { mode: 'compact' }),
|
|
228
|
-
taskManifest,
|
|
228
|
+
taskManifest: stableTaskManifestForCheck(taskManifest),
|
|
229
229
|
projectMemory: compactProjectMemory(projectMemory, selectedMode),
|
|
230
230
|
taskArtifacts: artifacts,
|
|
231
231
|
outputContract: {
|
|
@@ -249,6 +249,61 @@ export function buildCheckerLlmInputPack({
|
|
|
249
249
|
return withPackMetadata(input, selectedMode);
|
|
250
250
|
}
|
|
251
251
|
|
|
252
|
+
export function stableTaskManifestForCheck(taskManifest) {
|
|
253
|
+
let parsed = null;
|
|
254
|
+
if (typeof taskManifest === 'string') {
|
|
255
|
+
try {
|
|
256
|
+
parsed = JSON.parse(taskManifest);
|
|
257
|
+
} catch {
|
|
258
|
+
return taskManifest;
|
|
259
|
+
}
|
|
260
|
+
} else if (taskManifest && typeof taskManifest === 'object' && !Array.isArray(taskManifest)) {
|
|
261
|
+
parsed = taskManifest;
|
|
262
|
+
}
|
|
263
|
+
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
|
264
|
+
return taskManifest || '';
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
const stable = {
|
|
268
|
+
schemaVersion: parsed.schemaVersion,
|
|
269
|
+
taskId: parsed.taskId,
|
|
270
|
+
mode: parsed.mode,
|
|
271
|
+
phase: parsed.phase,
|
|
272
|
+
gates: parsed.gates,
|
|
273
|
+
context: {
|
|
274
|
+
planSha: parsed.context?.planSha,
|
|
275
|
+
planFingerprintVersion: parsed.context?.planFingerprintVersion,
|
|
276
|
+
memorySha: parsed.context?.memorySha,
|
|
277
|
+
riskProfile: parsed.context?.riskProfile,
|
|
278
|
+
riskTriggers: parsed.context?.riskTriggers,
|
|
279
|
+
riskWarnings: parsed.context?.riskWarnings,
|
|
280
|
+
checkContextCurrent: parsed.context?.checkContextCurrent,
|
|
281
|
+
},
|
|
282
|
+
requiredEvidenceIssues: parsed.requiredEvidenceIssues,
|
|
283
|
+
qualitySignals: parsed.qualitySignals,
|
|
284
|
+
loopDetector: {
|
|
285
|
+
threshold: parsed.loopDetector?.threshold,
|
|
286
|
+
requiresConsolidatedRemediation: parsed.loopDetector?.requiresConsolidatedRemediation,
|
|
287
|
+
repeatedReasons: parsed.loopDetector?.repeatedReasons,
|
|
288
|
+
},
|
|
289
|
+
consolidatedRemediationAccepted: parsed.consolidatedRemediationAccepted,
|
|
290
|
+
consolidatedRemediationArtifact: parsed.consolidatedRemediationArtifact,
|
|
291
|
+
};
|
|
292
|
+
return JSON.stringify(pruneUndefined(stable), null, 2);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
function pruneUndefined(value) {
|
|
296
|
+
if (Array.isArray(value)) {
|
|
297
|
+
return value.map(pruneUndefined);
|
|
298
|
+
}
|
|
299
|
+
if (!value || typeof value !== 'object') {
|
|
300
|
+
return value;
|
|
301
|
+
}
|
|
302
|
+
return Object.fromEntries(Object.entries(value)
|
|
303
|
+
.filter(([, item]) => item !== undefined)
|
|
304
|
+
.map(([key, item]) => [key, pruneUndefined(item)]));
|
|
305
|
+
}
|
|
306
|
+
|
|
252
307
|
export function buildVerifierLlmInputPack({
|
|
253
308
|
taskDir,
|
|
254
309
|
taskId,
|
|
@@ -263,7 +318,7 @@ export function buildVerifierLlmInputPack({
|
|
|
263
318
|
'brief.md': readTaskFile(taskDir, 'brief.md'),
|
|
264
319
|
'research.md': readTaskFile(taskDir, 'research.md'),
|
|
265
320
|
'plan.md': readTaskFile(taskDir, 'plan.md'),
|
|
266
|
-
'task-manifest.json': readTaskFile(taskDir, 'task-manifest.json'),
|
|
321
|
+
'task-manifest.json': stableTaskManifestForCheck(readTaskFile(taskDir, 'task-manifest.json')),
|
|
267
322
|
'check.result.json': readTaskFile(taskDir, 'check.result.json'),
|
|
268
323
|
'check.md': compactCheckMarkdown({
|
|
269
324
|
checkMarkdown: readTaskFile(taskDir, 'check.md'),
|
|
@@ -274,7 +329,7 @@ export function buildVerifierLlmInputPack({
|
|
|
274
329
|
'human-gate-summary.md': truncateMiddle(readTaskFile(taskDir, 'human-gate-summary.md'), 3500),
|
|
275
330
|
'execution.md': readTaskFile(taskDir, 'execution.md'),
|
|
276
331
|
'execution-ledger.json': compactExecutionLedger(readOptionalJson(taskDir, 'execution-ledger.json'), 'strict'),
|
|
277
|
-
'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'), '
|
|
332
|
+
'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'), 'fast'),
|
|
278
333
|
'status.md': compactStatus(readTaskFile(taskDir, 'status.md')),
|
|
279
334
|
'feedback.md': compactArtifact(taskDir, 'feedback.md', 'standard', ['feedback event', 'classification', 'supervisor decision']),
|
|
280
335
|
'execution-feedback.md': compactArtifact(taskDir, 'execution-feedback.md', 'standard', ['feedback event', 'classification', 'supervisor decision']),
|
|
@@ -284,7 +339,7 @@ export function buildVerifierLlmInputPack({
|
|
|
284
339
|
'brief.md': compactArtifact(taskDir, 'brief.md', selectedMode, ['goal', 'scope', 'success criteria']),
|
|
285
340
|
'research.md': compactArtifact(taskDir, 'research.md', selectedMode, ['findings', 'evidence', 'repo']),
|
|
286
341
|
'plan.md': compactArtifact(taskDir, 'plan.md', selectedMode, VERIFY_PLAN_SECTIONS),
|
|
287
|
-
'task-manifest.json': readTaskFile(taskDir, 'task-manifest.json'),
|
|
342
|
+
'task-manifest.json': stableTaskManifestForCheck(readTaskFile(taskDir, 'task-manifest.json')),
|
|
288
343
|
'check.result.json': readTaskFile(taskDir, 'check.result.json'),
|
|
289
344
|
'check.md': compactCheckMarkdown({
|
|
290
345
|
checkMarkdown: readTaskFile(taskDir, 'check.md'),
|
|
@@ -295,7 +350,7 @@ export function buildVerifierLlmInputPack({
|
|
|
295
350
|
'human-gate-summary.md': truncateMiddle(readTaskFile(taskDir, 'human-gate-summary.md'), charLimitForMode(selectedMode, 1200, 2500)),
|
|
296
351
|
'execution.md': compactArtifact(taskDir, 'execution.md', selectedMode, VERIFY_EXECUTION_SECTIONS),
|
|
297
352
|
'execution-ledger.json': compactExecutionLedger(readOptionalJson(taskDir, 'execution-ledger.json'), selectedMode),
|
|
298
|
-
'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'),
|
|
353
|
+
'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'), 'fast'),
|
|
299
354
|
'status.md': compactStatus(readTaskFile(taskDir, 'status.md')),
|
|
300
355
|
'feedback.md': compactArtifact(taskDir, 'feedback.md', selectedMode, ['feedback event', 'classification', 'supervisor decision']),
|
|
301
356
|
'orchestration-log.md': compactOrchestrationLog(readTaskFile(taskDir, 'orchestration-log.md'), selectedMode),
|
|
@@ -112,12 +112,61 @@ describe('llm input pack utilities', () => {
|
|
|
112
112
|
});
|
|
113
113
|
|
|
114
114
|
expect(pack.meta.mode).toBe('standard_plus');
|
|
115
|
-
expect(pack.meta.capTokens).toBe(
|
|
115
|
+
expect(pack.meta.capTokens).toBe(34000);
|
|
116
116
|
expect(pack.input.llmInputPolicy.contextInsufficientFallback).toBe('rerun_strict');
|
|
117
117
|
expect(pack.input.taskArtifacts['plan.md']).toContain('<!-- compacted:plan.md');
|
|
118
118
|
expect(pack.meta.compactedArtifacts).toContain('plan.md');
|
|
119
119
|
});
|
|
120
120
|
|
|
121
|
+
it('stabilizes checker task manifest by excluding volatile check telemetry', () => {
|
|
122
|
+
const taskDir = createTask();
|
|
123
|
+
const pack = buildCheckerLlmInputPack({
|
|
124
|
+
taskDir,
|
|
125
|
+
taskId: 'TASK-999-token-pack',
|
|
126
|
+
checkerPromptSha: 'sha256:test',
|
|
127
|
+
cacheKey: { test: true },
|
|
128
|
+
checkContext: {
|
|
129
|
+
planSha: 'sha256:plan',
|
|
130
|
+
memorySha: 'sha256:memory',
|
|
131
|
+
riskProfile: 'high',
|
|
132
|
+
riskTriggers: ['source-sync-provider'],
|
|
133
|
+
},
|
|
134
|
+
checkEvidence: '# Evidence\n\nok',
|
|
135
|
+
checkerContextPack: '# Checker Context Pack\n\nok',
|
|
136
|
+
taskManifest: JSON.stringify({
|
|
137
|
+
schemaVersion: 1,
|
|
138
|
+
taskId: 'TASK-999-token-pack',
|
|
139
|
+
mode: 'standard',
|
|
140
|
+
phase: 'check',
|
|
141
|
+
gates: { sourceSyncProvider: { required: true, planComplete: true } },
|
|
142
|
+
context: {
|
|
143
|
+
planSha: 'sha256:plan',
|
|
144
|
+
memorySha: 'sha256:memory',
|
|
145
|
+
riskProfile: 'high',
|
|
146
|
+
riskTriggers: ['source-sync-provider'],
|
|
147
|
+
checkContextCurrent: true,
|
|
148
|
+
},
|
|
149
|
+
llmInput: { check: { updatedAt: 'volatile', attempts: [{ mode: 'strict' }] } },
|
|
150
|
+
lastCheckResult: { verdict: 'ready_for_human_gate', createdAt: 'volatile' },
|
|
151
|
+
timestamps: { updatedAt: 'volatile' },
|
|
152
|
+
loopDetector: {
|
|
153
|
+
threshold: 2,
|
|
154
|
+
requiresConsolidatedRemediation: true,
|
|
155
|
+
repeatedReasons: [{ normalizedReason: 'context_overflow', count: 3 }],
|
|
156
|
+
reasons: { noisy: { normalizedReason: 'verbose history' } },
|
|
157
|
+
},
|
|
158
|
+
}, null, 2),
|
|
159
|
+
projectMemory: [],
|
|
160
|
+
mode: 'standard',
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
expect(pack.input.taskManifest).toContain('"requiresConsolidatedRemediation": true');
|
|
164
|
+
expect(pack.input.taskManifest).not.toContain('lastCheckResult');
|
|
165
|
+
expect(pack.input.taskManifest).not.toContain('llmInput');
|
|
166
|
+
expect(pack.input.taskManifest).not.toContain('timestamps');
|
|
167
|
+
expect(pack.input.taskManifest).not.toContain('verbose history');
|
|
168
|
+
});
|
|
169
|
+
|
|
121
170
|
it('preserves protected verification sections when compacting long plans', () => {
|
|
122
171
|
const taskDir = createTask();
|
|
123
172
|
const longPlan = [
|
package/bin/run-verify.mjs
CHANGED
|
@@ -503,7 +503,8 @@ function writeInternalSupervisorVerify({
|
|
|
503
503
|
'## residual risks',
|
|
504
504
|
'',
|
|
505
505
|
'- This is not an independent fresh-context verifier run.',
|
|
506
|
-
'- Use `--verify-mode external_cli` for production-readiness, R4/R5,
|
|
506
|
+
'- Use `--verify-mode external_cli` for production-readiness, R4/R5, production or real-user-data migrations/backfills, destructive/security/financial work, broad ambiguous refactors or explicit human request.',
|
|
507
|
+
'- Local scratch DB migrations, fixture imports and bounded local-only backfills can close with internal Verify when evidence covers the plan gates.',
|
|
507
508
|
'',
|
|
508
509
|
'## latest status excerpt',
|
|
509
510
|
'',
|
package/package.json
CHANGED
package/prompts/supervisor.md
CHANGED
|
@@ -57,7 +57,7 @@ Supervisor является code-level orchestrator по контракту: rou
|
|
|
57
57
|
23. Любой user feedback, вопрос, correction, review note или новое наблюдение на любом этапе сначала записывается в `feedback.md` через `ops-agent intake-feedback` и классифицируется. Feedback не является инструкцией к изменению implementation, пока не классифицирован.
|
|
58
58
|
24. После `Execute` задача не может перейти в `Retrospective`, `Human Closeout Gate`, `Closed`, `Accepted` или task switch без `Verify` и structured `verify.result.json`.
|
|
59
59
|
25. `verify.result.json` должен сверять `plan.md` с фактическим `execution.md`, diff/files/tests и явным execution evidence. Self-reported executor checks без verifier verdict не являются достаточным Verify.
|
|
60
|
-
26. `verify.result.json.verdict = pass | pass_with_notes` допустим при `verificationMode = internal_supervisor` для обычных `R0-R3` local engineering slices. Это cost-saving режим без независимого CLI/model verifier и он является default, если shared defaults или project agents override задают `verifier.mode = internal_supervisor`. `external_cli` обязателен только для R4/R5, production-readiness, destructive/security/financial/broad operational actions,
|
|
60
|
+
26. `verify.result.json.verdict = pass | pass_with_notes` допустим при `verificationMode = internal_supervisor` для обычных `R0-R3` local engineering slices. Это cost-saving режим без независимого CLI/model verifier и он является default, если shared defaults или project agents override задают `verifier.mode = internal_supervisor`. `external_cli` обязателен только для R4/R5, production-readiness, destructive/security/financial/broad operational actions, production or real-user-data Prisma/data migrations/backfills, broad ambiguous refactors или explicit human request. Local scratch DB migrations, fixture imports and bounded local-only backfills can close with internal Verify when execution evidence covers the plan gates.
|
|
61
61
|
27. Если external verifier/checker/browser tooling начинает тратить непропорционально много времени или блокируется окружением, Supervisor обязан остановить loop и вынести human decision: принять internal verify/evidence, запустить external escalation вручную или изменить scope.
|
|
62
62
|
|
|
63
63
|
## Hard Gate: Material Scope Expansion -> Brief Reset
|
package/prompts/verifier.md
CHANGED
|
@@ -69,7 +69,7 @@
|
|
|
69
69
|
|
|
70
70
|
Для обычных `R0-R3` local engineering slices verdict `pass` или `pass_with_notes` может быть выдан в режиме `internal_supervisor`, если verifier сверил `plan.md`, `execution.md`, diff/files/tests and explicit evidence.
|
|
71
71
|
|
|
72
|
-
External CLI verifier обязателен только для escalation triggers: `R4/R5`, production-readiness, destructive/security/financial/broad operational actions,
|
|
72
|
+
External CLI verifier обязателен только для escalation triggers: `R4/R5`, production-readiness, destructive/security/financial/broad operational actions, production or real-user-data Prisma/data migrations/backfills, broad ambiguous refactors или explicit human request. Local scratch DB migrations, fixture imports, docs-only changes and bounded R0-R3 local engineering slices can close with `internal_supervisor` when execution evidence includes the required tests/build/lint/smoke/apply proof.
|
|
73
73
|
|
|
74
74
|
Минимальный shape `verify.result.json`:
|
|
75
75
|
|
package/templates/verify.md
CHANGED
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
|
|
19
19
|
`internal_supervisor | external_cli`
|
|
20
20
|
|
|
21
|
-
Use `internal_supervisor` for ordinary local R0-R3 slices when focused tests/build/lint/smoke evidence is sufficient. Use `external_cli` for R4/R5, production-readiness, destructive/security/financial/broad operational work,
|
|
21
|
+
Use `internal_supervisor` for ordinary local R0-R3 slices when focused tests/build/lint/smoke evidence is sufficient. Use `external_cli` for R4/R5, production-readiness, destructive/security/financial/broad operational work, production or real-user-data migrations/backfills, broad ambiguous refactors or explicit human request. Local scratch DB migrations, fixture imports and bounded local-only backfills can close with internal Verify when evidence covers the plan gates.
|
|
22
22
|
|
|
23
23
|
## Verification ladder coverage
|
|
24
24
|
|