@besales/ops-framework 0.1.24 → 0.1.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.26
|
|
4
|
+
|
|
5
|
+
- Raised `standard_plus` context cap to 36k so compact Verify packs around 29-31k estimated tokens stay out of strict mode after safety margin.
|
|
6
|
+
|
|
7
|
+
## 0.1.25
|
|
8
|
+
|
|
9
|
+
- Stabilized Verify LLM `task-manifest.json` input by stripping volatile Check telemetry, timestamps and verbose loop history from verifier packs.
|
|
10
|
+
- Kept previous `verify.md` context compact in external Verify packs so verifier runs do not re-ingest large internal verify excerpts.
|
|
11
|
+
- Raised `standard_plus` context cap to keep ~27k compact Verify packs out of slow strict mode.
|
|
12
|
+
- Narrowed external Verify policy: local scratch DB migrations, fixture imports and bounded local-only backfills can close with internal Verify when evidence covers required gates; external CLI remains for production/real-data/destructive/security/financial/broad-risk work or explicit human request.
|
|
13
|
+
|
|
3
14
|
## 0.1.24
|
|
4
15
|
|
|
5
16
|
- Raised `standard_plus` Check/Verify context cap so near-cap plans stay in compact mode instead of jumping to slow strict context.
|
|
@@ -12,7 +12,7 @@ export const LLM_CONTEXT_MODES = ['fast', 'standard', 'standard_plus', 'strict']
|
|
|
12
12
|
export const LLM_CONTEXT_CAPS = {
|
|
13
13
|
fast: 8000,
|
|
14
14
|
standard: 20000,
|
|
15
|
-
standard_plus:
|
|
15
|
+
standard_plus: 36000,
|
|
16
16
|
strict: 50000,
|
|
17
17
|
};
|
|
18
18
|
|
|
@@ -318,7 +318,7 @@ export function buildVerifierLlmInputPack({
|
|
|
318
318
|
'brief.md': readTaskFile(taskDir, 'brief.md'),
|
|
319
319
|
'research.md': readTaskFile(taskDir, 'research.md'),
|
|
320
320
|
'plan.md': readTaskFile(taskDir, 'plan.md'),
|
|
321
|
-
'task-manifest.json': readTaskFile(taskDir, 'task-manifest.json'),
|
|
321
|
+
'task-manifest.json': stableTaskManifestForCheck(readTaskFile(taskDir, 'task-manifest.json')),
|
|
322
322
|
'check.result.json': readTaskFile(taskDir, 'check.result.json'),
|
|
323
323
|
'check.md': compactCheckMarkdown({
|
|
324
324
|
checkMarkdown: readTaskFile(taskDir, 'check.md'),
|
|
@@ -329,7 +329,7 @@ export function buildVerifierLlmInputPack({
|
|
|
329
329
|
'human-gate-summary.md': truncateMiddle(readTaskFile(taskDir, 'human-gate-summary.md'), 3500),
|
|
330
330
|
'execution.md': readTaskFile(taskDir, 'execution.md'),
|
|
331
331
|
'execution-ledger.json': compactExecutionLedger(readOptionalJson(taskDir, 'execution-ledger.json'), 'strict'),
|
|
332
|
-
'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'), '
|
|
332
|
+
'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'), 'fast'),
|
|
333
333
|
'status.md': compactStatus(readTaskFile(taskDir, 'status.md')),
|
|
334
334
|
'feedback.md': compactArtifact(taskDir, 'feedback.md', 'standard', ['feedback event', 'classification', 'supervisor decision']),
|
|
335
335
|
'execution-feedback.md': compactArtifact(taskDir, 'execution-feedback.md', 'standard', ['feedback event', 'classification', 'supervisor decision']),
|
|
@@ -339,7 +339,7 @@ export function buildVerifierLlmInputPack({
|
|
|
339
339
|
'brief.md': compactArtifact(taskDir, 'brief.md', selectedMode, ['goal', 'scope', 'success criteria']),
|
|
340
340
|
'research.md': compactArtifact(taskDir, 'research.md', selectedMode, ['findings', 'evidence', 'repo']),
|
|
341
341
|
'plan.md': compactArtifact(taskDir, 'plan.md', selectedMode, VERIFY_PLAN_SECTIONS),
|
|
342
|
-
'task-manifest.json': readTaskFile(taskDir, 'task-manifest.json'),
|
|
342
|
+
'task-manifest.json': stableTaskManifestForCheck(readTaskFile(taskDir, 'task-manifest.json')),
|
|
343
343
|
'check.result.json': readTaskFile(taskDir, 'check.result.json'),
|
|
344
344
|
'check.md': compactCheckMarkdown({
|
|
345
345
|
checkMarkdown: readTaskFile(taskDir, 'check.md'),
|
|
@@ -350,7 +350,7 @@ export function buildVerifierLlmInputPack({
|
|
|
350
350
|
'human-gate-summary.md': truncateMiddle(readTaskFile(taskDir, 'human-gate-summary.md'), charLimitForMode(selectedMode, 1200, 2500)),
|
|
351
351
|
'execution.md': compactArtifact(taskDir, 'execution.md', selectedMode, VERIFY_EXECUTION_SECTIONS),
|
|
352
352
|
'execution-ledger.json': compactExecutionLedger(readOptionalJson(taskDir, 'execution-ledger.json'), selectedMode),
|
|
353
|
-
'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'),
|
|
353
|
+
'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'), 'fast'),
|
|
354
354
|
'status.md': compactStatus(readTaskFile(taskDir, 'status.md')),
|
|
355
355
|
'feedback.md': compactArtifact(taskDir, 'feedback.md', selectedMode, ['feedback event', 'classification', 'supervisor decision']),
|
|
356
356
|
'orchestration-log.md': compactOrchestrationLog(readTaskFile(taskDir, 'orchestration-log.md'), selectedMode),
|
|
@@ -112,7 +112,7 @@ describe('llm input pack utilities', () => {
|
|
|
112
112
|
});
|
|
113
113
|
|
|
114
114
|
expect(pack.meta.mode).toBe('standard_plus');
|
|
115
|
-
expect(pack.meta.capTokens).toBe(
|
|
115
|
+
expect(pack.meta.capTokens).toBe(36000);
|
|
116
116
|
expect(pack.input.llmInputPolicy.contextInsufficientFallback).toBe('rerun_strict');
|
|
117
117
|
expect(pack.input.taskArtifacts['plan.md']).toContain('<!-- compacted:plan.md');
|
|
118
118
|
expect(pack.meta.compactedArtifacts).toContain('plan.md');
|
package/bin/run-verify.mjs
CHANGED
|
@@ -503,7 +503,8 @@ function writeInternalSupervisorVerify({
|
|
|
503
503
|
'## residual risks',
|
|
504
504
|
'',
|
|
505
505
|
'- This is not an independent fresh-context verifier run.',
|
|
506
|
-
'- Use `--verify-mode external_cli` for production-readiness, R4/R5,
|
|
506
|
+
'- Use `--verify-mode external_cli` for production-readiness, R4/R5, production or real-user-data migrations/backfills, destructive/security/financial work, broad ambiguous refactors or explicit human request.',
|
|
507
|
+
'- Local scratch DB migrations, fixture imports and bounded local-only backfills can close with internal Verify when evidence covers the plan gates.',
|
|
507
508
|
'',
|
|
508
509
|
'## latest status excerpt',
|
|
509
510
|
'',
|
package/package.json
CHANGED
package/prompts/supervisor.md
CHANGED
|
@@ -57,7 +57,7 @@ Supervisor является code-level orchestrator по контракту: rou
|
|
|
57
57
|
23. Любой user feedback, вопрос, correction, review note или новое наблюдение на любом этапе сначала записывается в `feedback.md` через `ops-agent intake-feedback` и классифицируется. Feedback не является инструкцией к изменению implementation, пока не классифицирован.
|
|
58
58
|
24. После `Execute` задача не может перейти в `Retrospective`, `Human Closeout Gate`, `Closed`, `Accepted` или task switch без `Verify` и structured `verify.result.json`.
|
|
59
59
|
25. `verify.result.json` должен сверять `plan.md` с фактическим `execution.md`, diff/files/tests и явным execution evidence. Self-reported executor checks без verifier verdict не являются достаточным Verify.
|
|
60
|
-
26. `verify.result.json.verdict = pass | pass_with_notes` допустим при `verificationMode = internal_supervisor` для обычных `R0-R3` local engineering slices. Это cost-saving режим без независимого CLI/model verifier и он является default, если shared defaults или project agents override задают `verifier.mode = internal_supervisor`. `external_cli` обязателен только для R4/R5, production-readiness, destructive/security/financial/broad operational actions,
|
|
60
|
+
26. `verify.result.json.verdict = pass | pass_with_notes` допустим при `verificationMode = internal_supervisor` для обычных `R0-R3` local engineering slices. Это cost-saving режим без независимого CLI/model verifier и он является default, если shared defaults или project agents override задают `verifier.mode = internal_supervisor`. `external_cli` обязателен только для R4/R5, production-readiness, destructive/security/financial/broad operational actions, production or real-user-data Prisma/data migrations/backfills, broad ambiguous refactors или explicit human request. Local scratch DB migrations, fixture imports and bounded local-only backfills can close with internal Verify when execution evidence covers the plan gates.
|
|
61
61
|
27. Если external verifier/checker/browser tooling начинает тратить непропорционально много времени или блокируется окружением, Supervisor обязан остановить loop и вынести human decision: принять internal verify/evidence, запустить external escalation вручную или изменить scope.
|
|
62
62
|
|
|
63
63
|
## Hard Gate: Material Scope Expansion -> Brief Reset
|
package/prompts/verifier.md
CHANGED
|
@@ -69,7 +69,7 @@
|
|
|
69
69
|
|
|
70
70
|
Для обычных `R0-R3` local engineering slices verdict `pass` или `pass_with_notes` может быть выдан в режиме `internal_supervisor`, если verifier сверил `plan.md`, `execution.md`, diff/files/tests and explicit evidence.
|
|
71
71
|
|
|
72
|
-
External CLI verifier обязателен только для escalation triggers: `R4/R5`, production-readiness, destructive/security/financial/broad operational actions,
|
|
72
|
+
External CLI verifier обязателен только для escalation triggers: `R4/R5`, production-readiness, destructive/security/financial/broad operational actions, production or real-user-data Prisma/data migrations/backfills, broad ambiguous refactors или explicit human request. Local scratch DB migrations, fixture imports, docs-only changes and bounded R0-R3 local engineering slices can close with `internal_supervisor` when execution evidence includes the required tests/build/lint/smoke/apply proof.
|
|
73
73
|
|
|
74
74
|
Минимальный shape `verify.result.json`:
|
|
75
75
|
|
package/templates/verify.md
CHANGED
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
|
|
19
19
|
`internal_supervisor | external_cli`
|
|
20
20
|
|
|
21
|
-
Use `internal_supervisor` for ordinary local R0-R3 slices when focused tests/build/lint/smoke evidence is sufficient. Use `external_cli` for R4/R5, production-readiness, destructive/security/financial/broad operational work,
|
|
21
|
+
Use `internal_supervisor` for ordinary local R0-R3 slices when focused tests/build/lint/smoke evidence is sufficient. Use `external_cli` for R4/R5, production-readiness, destructive/security/financial/broad operational work, production or real-user-data migrations/backfills, broad ambiguous refactors or explicit human request. Local scratch DB migrations, fixture imports and bounded local-only backfills can close with internal Verify when evidence covers the plan gates.
|
|
22
22
|
|
|
23
23
|
## Verification ladder coverage
|
|
24
24
|
|