npm - @besales/ops-framework - Versions diffs - 0.1.23 → 0.1.25 - Mend

@besales/ops-framework 0.1.23 → 0.1.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/CHANGELOG.md +13 -0
package/bin/lib/llm-input-pack-utils.mjs +62 -7
package/bin/lib/llm-input-pack-utils.test.mjs +50 -1
package/bin/run-verify.mjs +2 -1
package/package.json +1 -1
package/prompts/supervisor.md +1 -1
package/prompts/verifier.md +1 -1
package/templates/verify.md +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,18 @@
 # Changelog
+## 0.1.25
+- Stabilized Verify LLM `task-manifest.json` input by stripping volatile Check telemetry, timestamps and verbose loop history from verifier packs.
+- Kept previous `verify.md` context compact in external Verify packs so verifier runs do not re-ingest large internal verify excerpts.
+- Raised `standard_plus` context cap to keep ~27k compact Verify packs out of slow strict mode.
+- Narrowed external Verify policy: local scratch DB migrations, fixture imports and bounded local-only backfills can close with internal Verify when evidence covers required gates; external CLI remains for production/real-data/destructive/security/financial/broad-risk work or explicit human request.
+## 0.1.24
+- Raised `standard_plus` Check/Verify context cap so near-cap plans stay in compact mode instead of jumping to slow strict context.
+- Stabilized Check LLM `taskManifest` input by removing volatile `llmInput`, `lastCheckResult`, timestamps and verbose loop history from the prompt payload.
+- Reduced duplicate external Check reruns after `task-manifest.json` refreshes by keeping prompt input stable when plan, memory and risk inputs have not changed.
 ## 0.1.23
 - Added deterministic import/ingestion planning gates for representative real fixtures, raw downstream metadata extraction and explicit duplicate-import behavior.

package/bin/lib/llm-input-pack-utils.mjs CHANGED Viewed

@@ -12,8 +12,8 @@ export const LLM_CONTEXT_MODES = ['fast', 'standard', 'standard_plus', 'strict']
 export const LLM_CONTEXT_CAPS = {
   fast: 8000,
   standard: 20000,
-  standard_plus: 26000,
-  strict: 45000,
+  standard_plus: 34000,
+  strict: 50000,
 };
 const TOKEN_ESTIMATE_CHARS_PER_TOKEN = 1.8;
@@ -225,7 +225,7 @@ export function buildCheckerLlmInputPack({
     relevantPlaybooks: selectedMode === 'strict'
       ? renderRelevantPlaybooks(readRelevantPlaybooks(checkContext.riskTriggers || []), { mode: 'strict' })
       : renderRelevantPlaybooks(readRelevantPlaybooks(checkContext.riskTriggers || []), { mode: 'compact' }),
-    taskManifest,
+    taskManifest: stableTaskManifestForCheck(taskManifest),
     projectMemory: compactProjectMemory(projectMemory, selectedMode),
     taskArtifacts: artifacts,
     outputContract: {
@@ -249,6 +249,61 @@ export function buildCheckerLlmInputPack({
   return withPackMetadata(input, selectedMode);
 }
+export function stableTaskManifestForCheck(taskManifest) {
+  let parsed = null;
+  if (typeof taskManifest === 'string') {
+    try {
+      parsed = JSON.parse(taskManifest);
+    } catch {
+      return taskManifest;
+    }
+  } else if (taskManifest && typeof taskManifest === 'object' && !Array.isArray(taskManifest)) {
+    parsed = taskManifest;
+  }
+  if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
+    return taskManifest || '';
+  }
+  const stable = {
+    schemaVersion: parsed.schemaVersion,
+    taskId: parsed.taskId,
+    mode: parsed.mode,
+    phase: parsed.phase,
+    gates: parsed.gates,
+    context: {
+      planSha: parsed.context?.planSha,
+      planFingerprintVersion: parsed.context?.planFingerprintVersion,
+      memorySha: parsed.context?.memorySha,
+      riskProfile: parsed.context?.riskProfile,
+      riskTriggers: parsed.context?.riskTriggers,
+      riskWarnings: parsed.context?.riskWarnings,
+      checkContextCurrent: parsed.context?.checkContextCurrent,
+    },
+    requiredEvidenceIssues: parsed.requiredEvidenceIssues,
+    qualitySignals: parsed.qualitySignals,
+    loopDetector: {
+      threshold: parsed.loopDetector?.threshold,
+      requiresConsolidatedRemediation: parsed.loopDetector?.requiresConsolidatedRemediation,
+      repeatedReasons: parsed.loopDetector?.repeatedReasons,
+    },
+    consolidatedRemediationAccepted: parsed.consolidatedRemediationAccepted,
+    consolidatedRemediationArtifact: parsed.consolidatedRemediationArtifact,
+  };
+  return JSON.stringify(pruneUndefined(stable), null, 2);
+}
+function pruneUndefined(value) {
+  if (Array.isArray(value)) {
+    return value.map(pruneUndefined);
+  }
+  if (!value || typeof value !== 'object') {
+    return value;
+  }
+  return Object.fromEntries(Object.entries(value)
+    .filter(([, item]) => item !== undefined)
+    .map(([key, item]) => [key, pruneUndefined(item)]));
+}
 export function buildVerifierLlmInputPack({
   taskDir,
   taskId,
@@ -263,7 +318,7 @@ export function buildVerifierLlmInputPack({
         'brief.md': readTaskFile(taskDir, 'brief.md'),
         'research.md': readTaskFile(taskDir, 'research.md'),
         'plan.md': readTaskFile(taskDir, 'plan.md'),
-        'task-manifest.json': readTaskFile(taskDir, 'task-manifest.json'),
+        'task-manifest.json': stableTaskManifestForCheck(readTaskFile(taskDir, 'task-manifest.json')),
         'check.result.json': readTaskFile(taskDir, 'check.result.json'),
         'check.md': compactCheckMarkdown({
           checkMarkdown: readTaskFile(taskDir, 'check.md'),
@@ -274,7 +329,7 @@ export function buildVerifierLlmInputPack({
         'human-gate-summary.md': truncateMiddle(readTaskFile(taskDir, 'human-gate-summary.md'), 3500),
         'execution.md': readTaskFile(taskDir, 'execution.md'),
         'execution-ledger.json': compactExecutionLedger(readOptionalJson(taskDir, 'execution-ledger.json'), 'strict'),
-        'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'), 'standard'),
+        'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'), 'fast'),
         'status.md': compactStatus(readTaskFile(taskDir, 'status.md')),
         'feedback.md': compactArtifact(taskDir, 'feedback.md', 'standard', ['feedback event', 'classification', 'supervisor decision']),
         'execution-feedback.md': compactArtifact(taskDir, 'execution-feedback.md', 'standard', ['feedback event', 'classification', 'supervisor decision']),
@@ -284,7 +339,7 @@ export function buildVerifierLlmInputPack({
         'brief.md': compactArtifact(taskDir, 'brief.md', selectedMode, ['goal', 'scope', 'success criteria']),
         'research.md': compactArtifact(taskDir, 'research.md', selectedMode, ['findings', 'evidence', 'repo']),
         'plan.md': compactArtifact(taskDir, 'plan.md', selectedMode, VERIFY_PLAN_SECTIONS),
-        'task-manifest.json': readTaskFile(taskDir, 'task-manifest.json'),
+        'task-manifest.json': stableTaskManifestForCheck(readTaskFile(taskDir, 'task-manifest.json')),
         'check.result.json': readTaskFile(taskDir, 'check.result.json'),
         'check.md': compactCheckMarkdown({
           checkMarkdown: readTaskFile(taskDir, 'check.md'),
@@ -295,7 +350,7 @@ export function buildVerifierLlmInputPack({
         'human-gate-summary.md': truncateMiddle(readTaskFile(taskDir, 'human-gate-summary.md'), charLimitForMode(selectedMode, 1200, 2500)),
         'execution.md': compactArtifact(taskDir, 'execution.md', selectedMode, VERIFY_EXECUTION_SECTIONS),
         'execution-ledger.json': compactExecutionLedger(readOptionalJson(taskDir, 'execution-ledger.json'), selectedMode),
-        'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'), selectedMode),
+        'verify.md': compactVerifierMarkdown(readTaskFile(taskDir, 'verify.md'), readOptionalJson(taskDir, 'verify.result.json'), 'fast'),
         'status.md': compactStatus(readTaskFile(taskDir, 'status.md')),
         'feedback.md': compactArtifact(taskDir, 'feedback.md', selectedMode, ['feedback event', 'classification', 'supervisor decision']),
         'orchestration-log.md': compactOrchestrationLog(readTaskFile(taskDir, 'orchestration-log.md'), selectedMode),

package/bin/lib/llm-input-pack-utils.test.mjs CHANGED Viewed

@@ -112,12 +112,61 @@ describe('llm input pack utilities', () => {
     });
     expect(pack.meta.mode).toBe('standard_plus');
-    expect(pack.meta.capTokens).toBe(26000);
+    expect(pack.meta.capTokens).toBe(34000);
     expect(pack.input.llmInputPolicy.contextInsufficientFallback).toBe('rerun_strict');
     expect(pack.input.taskArtifacts['plan.md']).toContain('<!-- compacted:plan.md');
     expect(pack.meta.compactedArtifacts).toContain('plan.md');
   });
+  it('stabilizes checker task manifest by excluding volatile check telemetry', () => {
+    const taskDir = createTask();
+    const pack = buildCheckerLlmInputPack({
+      taskDir,
+      taskId: 'TASK-999-token-pack',
+      checkerPromptSha: 'sha256:test',
+      cacheKey: { test: true },
+      checkContext: {
+        planSha: 'sha256:plan',
+        memorySha: 'sha256:memory',
+        riskProfile: 'high',
+        riskTriggers: ['source-sync-provider'],
+      },
+      checkEvidence: '# Evidence\n\nok',
+      checkerContextPack: '# Checker Context Pack\n\nok',
+      taskManifest: JSON.stringify({
+        schemaVersion: 1,
+        taskId: 'TASK-999-token-pack',
+        mode: 'standard',
+        phase: 'check',
+        gates: { sourceSyncProvider: { required: true, planComplete: true } },
+        context: {
+          planSha: 'sha256:plan',
+          memorySha: 'sha256:memory',
+          riskProfile: 'high',
+          riskTriggers: ['source-sync-provider'],
+          checkContextCurrent: true,
+        },
+        llmInput: { check: { updatedAt: 'volatile', attempts: [{ mode: 'strict' }] } },
+        lastCheckResult: { verdict: 'ready_for_human_gate', createdAt: 'volatile' },
+        timestamps: { updatedAt: 'volatile' },
+        loopDetector: {
+          threshold: 2,
+          requiresConsolidatedRemediation: true,
+          repeatedReasons: [{ normalizedReason: 'context_overflow', count: 3 }],
+          reasons: { noisy: { normalizedReason: 'verbose history' } },
+        },
+      }, null, 2),
+      projectMemory: [],
+      mode: 'standard',
+    });
+    expect(pack.input.taskManifest).toContain('"requiresConsolidatedRemediation": true');
+    expect(pack.input.taskManifest).not.toContain('lastCheckResult');
+    expect(pack.input.taskManifest).not.toContain('llmInput');
+    expect(pack.input.taskManifest).not.toContain('timestamps');
+    expect(pack.input.taskManifest).not.toContain('verbose history');
+  });
   it('preserves protected verification sections when compacting long plans', () => {
     const taskDir = createTask();
     const longPlan = [

package/bin/run-verify.mjs CHANGED Viewed

@@ -503,7 +503,8 @@ function writeInternalSupervisorVerify({
     '## residual risks',
     '',
     '- This is not an independent fresh-context verifier run.',
-    '- Use `--verify-mode external_cli` for production-readiness, R4/R5, material migrations/backfills, destructive/security/financial work, broad ambiguous refactors or explicit human request.',
+    '- Use `--verify-mode external_cli` for production-readiness, R4/R5, production or real-user-data migrations/backfills, destructive/security/financial work, broad ambiguous refactors or explicit human request.',
+    '- Local scratch DB migrations, fixture imports and bounded local-only backfills can close with internal Verify when evidence covers the plan gates.',
     '',
     '## latest status excerpt',
     '',

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@besales/ops-framework",
-  "version": "0.1.23",
+  "version": "0.1.25",
   "type": "module",
   "bin": {
     "ops-agent": "bin/ops-agent.mjs"

package/prompts/supervisor.md CHANGED Viewed

@@ -57,7 +57,7 @@ Supervisor является code-level orchestrator по контракту: rou
 23. Любой user feedback, вопрос, correction, review note или новое наблюдение на любом этапе сначала записывается в `feedback.md` через `ops-agent intake-feedback` и классифицируется. Feedback не является инструкцией к изменению implementation, пока не классифицирован.
 24. После `Execute` задача не может перейти в `Retrospective`, `Human Closeout Gate`, `Closed`, `Accepted` или task switch без `Verify` и structured `verify.result.json`.
 25. `verify.result.json` должен сверять `plan.md` с фактическим `execution.md`, diff/files/tests и явным execution evidence. Self-reported executor checks без verifier verdict не являются достаточным Verify.
-26. `verify.result.json.verdict = pass | pass_with_notes` допустим при `verificationMode = internal_supervisor` для обычных `R0-R3` local engineering slices. Это cost-saving режим без независимого CLI/model verifier и он является default, если shared defaults или project agents override задают `verifier.mode = internal_supervisor`. `external_cli` обязателен только для R4/R5, production-readiness, destructive/security/financial/broad operational actions, material Prisma/data migrations/backfills, broad ambiguous refactors или explicit human request.
+26. `verify.result.json.verdict = pass | pass_with_notes` допустим при `verificationMode = internal_supervisor` для обычных `R0-R3` local engineering slices. Это cost-saving режим без независимого CLI/model verifier и он является default, если shared defaults или project agents override задают `verifier.mode = internal_supervisor`. `external_cli` обязателен только для R4/R5, production-readiness, destructive/security/financial/broad operational actions, production or real-user-data Prisma/data migrations/backfills, broad ambiguous refactors или explicit human request. Local scratch DB migrations, fixture imports and bounded local-only backfills can close with internal Verify when execution evidence covers the plan gates.
 27. Если external verifier/checker/browser tooling начинает тратить непропорционально много времени или блокируется окружением, Supervisor обязан остановить loop и вынести human decision: принять internal verify/evidence, запустить external escalation вручную или изменить scope.
 ## Hard Gate: Material Scope Expansion -> Brief Reset

package/prompts/verifier.md CHANGED Viewed

@@ -69,7 +69,7 @@
 Для обычных `R0-R3` local engineering slices verdict `pass` или `pass_with_notes` может быть выдан в режиме `internal_supervisor`, если verifier сверил `plan.md`, `execution.md`, diff/files/tests and explicit evidence.
-External CLI verifier обязателен только для escalation triggers: `R4/R5`, production-readiness, destructive/security/financial/broad operational actions, material Prisma/data migrations/backfills, broad ambiguous refactors или explicit human request.
+External CLI verifier обязателен только для escalation triggers: `R4/R5`, production-readiness, destructive/security/financial/broad operational actions, production or real-user-data Prisma/data migrations/backfills, broad ambiguous refactors или explicit human request. Local scratch DB migrations, fixture imports, docs-only changes and bounded R0-R3 local engineering slices can close with `internal_supervisor` when execution evidence includes the required tests/build/lint/smoke/apply proof.
 Минимальный shape `verify.result.json`:

package/templates/verify.md CHANGED Viewed

@@ -18,7 +18,7 @@
 `internal_supervisor | external_cli`
-Use `internal_supervisor` for ordinary local R0-R3 slices when focused tests/build/lint/smoke evidence is sufficient. Use `external_cli` for R4/R5, production-readiness, destructive/security/financial/broad operational work, material migrations/backfills, broad ambiguous refactors or explicit human request.
+Use `internal_supervisor` for ordinary local R0-R3 slices when focused tests/build/lint/smoke evidence is sufficient. Use `external_cli` for R4/R5, production-readiness, destructive/security/financial/broad operational work, production or real-user-data migrations/backfills, broad ambiguous refactors or explicit human request. Local scratch DB migrations, fixture imports and bounded local-only backfills can close with internal Verify when evidence covers the plan gates.
 ## Verification ladder coverage