@besales/ops-framework 0.1.22 → 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/bin/lib/check-context-utils.mjs +72 -5
- package/bin/lib/check-context-utils.test.mjs +80 -1
- package/bin/lib/llm-input-pack-utils.mjs +58 -3
- package/bin/lib/llm-input-pack-utils.test.mjs +50 -1
- package/bin/lib/task-manifest-utils.mjs +87 -0
- package/bin/lib/task-manifest-utils.test.mjs +68 -0
- package/package.json +1 -1
- package/playbooks/source-sync-provider.md +4 -1
- package/prompts/checker.md +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.24
|
|
4
|
+
|
|
5
|
+
- Raised `standard_plus` Check/Verify context cap so near-cap plans stay in compact mode instead of jumping to slow strict context.
|
|
6
|
+
- Stabilized Check LLM `taskManifest` input by removing volatile `llmInput`, `lastCheckResult`, timestamps and verbose loop history from the prompt payload.
|
|
7
|
+
- Reduced duplicate external Check reruns after `task-manifest.json` refreshes by keeping prompt input stable when plan, memory and risk inputs have not changed.
|
|
8
|
+
|
|
9
|
+
## 0.1.23
|
|
10
|
+
|
|
11
|
+
- Added deterministic import/ingestion planning gates for representative real fixtures, raw downstream metadata extraction and explicit duplicate-import behavior.
|
|
12
|
+
- Tightened Source Sync / Provider gates so idempotency must name a dedupe key and concrete duplicate action, not only "detected/reported".
|
|
13
|
+
- Added work-package dependency preflight for Execute/Verify/Human Gate so materialized WP tasks cannot proceed while declared dependencies remain in progress.
|
|
14
|
+
- Expanded Source Sync playbook and checker prompt with import/manual-upload fixture, metadata and dedupe expectations.
|
|
15
|
+
|
|
3
16
|
## 0.1.22
|
|
4
17
|
|
|
5
18
|
- Added a compact `standard_plus` LLM context mode between `standard` and `strict` so near-cap Check/Verify runs avoid full strict context when compact evidence is sufficient.
|
|
@@ -636,10 +636,10 @@ export function classifyRisk({ structuralLines, referencedFiles, planSections, r
|
|
|
636
636
|
if (hasRef((ref) => matchesAnyRoot(ref, workerRoots)) || hasText(/\b(worker|scheduler|queue|bullmq|backfill|pacing)\b/)) {
|
|
637
637
|
triggers.add('worker-queue');
|
|
638
638
|
}
|
|
639
|
-
if (hasText(/\b(source ingestion|source-ingestion|connector|oauth|callback|provider api|sync plan)\b/)) {
|
|
639
|
+
if (hasText(/\b(source ingestion|source-ingestion|connector|oauth|callback|provider api|sync plan|manual upload|upload path|import smoke|transcript import|transcripts?|vtt|evidence capture)\b/)) {
|
|
640
640
|
triggers.add('ingestion-provider');
|
|
641
641
|
}
|
|
642
|
-
if (hasText(/\b(source sync|source-sync|raw record|raw records|pagination|rate limit|retry|retries|idempotency|partial failure|provider stream|sync worker|sync scheduler)\b/)) {
|
|
642
|
+
if (hasText(/\b(source sync|source-sync|raw record|raw records|pagination|rate limit|retry|retries|idempotency|partial failure|provider stream|sync worker|sync scheduler|content hash|content_hash|dedupe|duplicate import|backfill|replay)\b/)) {
|
|
643
643
|
triggers.add('source-sync-provider');
|
|
644
644
|
}
|
|
645
645
|
if (hasText(/\b(production|railway|deploy|runtime env|environment variable)\b/)) {
|
|
@@ -776,6 +776,7 @@ export function analyzePlanQualityGates({ planContent, risk, referencedFiles = [
|
|
|
776
776
|
const optimizationStrategy = inspectOptimizationStrategy(sections);
|
|
777
777
|
const productionRollout = inspectProductionRolloutGate(sections);
|
|
778
778
|
const sourceSyncProvider = inspectSourceSyncProviderGate(sections);
|
|
779
|
+
const importIngestion = inspectImportIngestionGate(sections, planContent);
|
|
779
780
|
const missingSignals = [];
|
|
780
781
|
|
|
781
782
|
if (!executionMetadata.present) {
|
|
@@ -814,6 +815,9 @@ export function analyzePlanQualityGates({ planContent, risk, referencedFiles = [
|
|
|
814
815
|
if (sourceSyncProviderRequired && !sourceSyncProvider.present) {
|
|
815
816
|
missingSignals.push('Source sync/provider risk detected but `## Source Sync / Provider Gate` is missing or incomplete.');
|
|
816
817
|
}
|
|
818
|
+
if (sourceSyncProviderRequired && importIngestion.required && !importIngestion.present) {
|
|
819
|
+
missingSignals.push('Import/ingestion plan must include an Import / Ingestion Contract naming representative real fixtures or an explicit no-real-fixtures reason, raw metadata/speaker-label extraction, and duplicate-import policy.');
|
|
820
|
+
}
|
|
817
821
|
|
|
818
822
|
return {
|
|
819
823
|
executionMetadata,
|
|
@@ -833,6 +837,7 @@ export function analyzePlanQualityGates({ planContent, risk, referencedFiles = [
|
|
|
833
837
|
productionRollout,
|
|
834
838
|
sourceSyncProviderRequired,
|
|
835
839
|
sourceSyncProvider,
|
|
840
|
+
importIngestion,
|
|
836
841
|
missingSignals,
|
|
837
842
|
};
|
|
838
843
|
}
|
|
@@ -1218,10 +1223,53 @@ export function inspectSourceSyncProviderGate(sections) {
|
|
|
1218
1223
|
present: true,
|
|
1219
1224
|
hasScope: /scope|window|provider|stream|pagination|rate limit|raw record|объем|окно|провайдер/.test(normalized),
|
|
1220
1225
|
hasIdempotency: /idempot|dedupe|duplicate|retry|replay|повтор|дубликат/.test(normalized),
|
|
1226
|
+
hasDedupeKey: /content[_ -]?hash|idempotency key|dedupe key|unique key|natural key|source[_ -]?id|fingerprint|checksum|хеш|ключ/.test(normalized),
|
|
1227
|
+
hasDuplicateAction: /skip|link|update|upsert|reject|error|report|mark|reuse|ignore|пропуск|связ|обнов|ошиб|отчет|помет/.test(normalized),
|
|
1221
1228
|
hasFailureHandling: /partial failure|failure|timeout|backoff|resume|dead letter|ошиб|частич/.test(normalized),
|
|
1222
1229
|
hasCoverageEvidence: /coverage|parity|count|sample|audit|evidence|metric|сверк|покрыт/.test(normalized),
|
|
1223
1230
|
};
|
|
1224
|
-
result.complete = result.hasScope && result.hasIdempotency && result.hasFailureHandling && result.hasCoverageEvidence;
|
|
1231
|
+
result.complete = result.hasScope && result.hasIdempotency && result.hasDedupeKey && result.hasDuplicateAction && result.hasFailureHandling && result.hasCoverageEvidence;
|
|
1232
|
+
result.present = result.complete;
|
|
1233
|
+
return result;
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
export function inspectImportIngestionGate(sections, planContent = '') {
|
|
1237
|
+
const text = planContent || '';
|
|
1238
|
+
const required = /\b(import|ingest|ingestion|manual upload|upload path|transcripts?|vtt|fixture|fixtures|evidence capture|backfill)\b/i.test(text);
|
|
1239
|
+
const body = [
|
|
1240
|
+
readCanonicalSection(sections, ['import / ingestion contract', 'import ingestion contract', 'ingestion contract', 'import contract']),
|
|
1241
|
+
readCanonicalSection(sections, ['source sync / provider gate', 'source sync provider gate', 'source sync gate', 'provider gate']),
|
|
1242
|
+
].filter(Boolean).join('\n');
|
|
1243
|
+
if (!required) {
|
|
1244
|
+
return {
|
|
1245
|
+
required: false,
|
|
1246
|
+
present: true,
|
|
1247
|
+
hasRealFixtureDecision: false,
|
|
1248
|
+
hasMetadataExtraction: false,
|
|
1249
|
+
hasDedupePolicy: false,
|
|
1250
|
+
hasDownstreamFields: false,
|
|
1251
|
+
};
|
|
1252
|
+
}
|
|
1253
|
+
if (!body) {
|
|
1254
|
+
return {
|
|
1255
|
+
required,
|
|
1256
|
+
present: false,
|
|
1257
|
+
hasRealFixtureDecision: false,
|
|
1258
|
+
hasMetadataExtraction: false,
|
|
1259
|
+
hasDedupePolicy: false,
|
|
1260
|
+
hasDownstreamFields: false,
|
|
1261
|
+
};
|
|
1262
|
+
}
|
|
1263
|
+
const normalized = body.toLowerCase();
|
|
1264
|
+
const result = {
|
|
1265
|
+
required,
|
|
1266
|
+
present: true,
|
|
1267
|
+
hasRealFixtureDecision: /(real|representative|production-like|existing|fixture|fixtures|sample|golden|transcript|vtt|md|no real|unavailable|human-owned|реальн|репрезент|транскрипт)/.test(normalized),
|
|
1268
|
+
hasMetadataExtraction: /(raw_speaker_labels|speaker labels?|speaker[_ -]?label|single-label|single label|is_single_label_export|speaker_resolution_status|metadata|метаданн|лейбл)/.test(normalized),
|
|
1269
|
+
hasDedupePolicy: /(content[_ -]?hash|idempotency key|dedupe key|duplicate|skip|link|update|upsert|reject|report|повтор|дубликат|хеш)/.test(normalized),
|
|
1270
|
+
hasDownstreamFields: /(downstream|wp-\d{3}|process-meeting|golden set|schema field|raw_speaker_labels|speaker_resolution_status|is_single_label_export|meeting_track|потребител|следующ)/.test(normalized),
|
|
1271
|
+
};
|
|
1272
|
+
result.complete = result.hasRealFixtureDecision && result.hasMetadataExtraction && result.hasDedupePolicy && result.hasDownstreamFields;
|
|
1225
1273
|
result.present = result.complete;
|
|
1226
1274
|
return result;
|
|
1227
1275
|
}
|
|
@@ -1351,7 +1399,8 @@ export function buildCheckerContextPack({
|
|
|
1351
1399
|
? [
|
|
1352
1400
|
'- Source sync/provider risk detected.',
|
|
1353
1401
|
`- Source Sync / Provider Gate complete: \`${qualityGates.sourceSyncProvider.present ? 'yes' : 'no'}\`.`,
|
|
1354
|
-
|
|
1402
|
+
`- Import / Ingestion Contract complete: \`${qualityGates.importIngestion?.present ? 'yes' : 'no'}\`.`,
|
|
1403
|
+
'- Checker must return `return_to_plan` if scope/window, dedupe key, duplicate action, failure handling, representative fixtures and coverage evidence are not named.',
|
|
1355
1404
|
].join('\n')
|
|
1356
1405
|
: '- Source sync/provider gate is not required by detected triggers.',
|
|
1357
1406
|
'',
|
|
@@ -1559,7 +1608,7 @@ export function validateExecutionEvidenceForPlan({ planContent, executionContent
|
|
|
1559
1608
|
category: 'missing_evidence',
|
|
1560
1609
|
message: 'Plan contains Source Sync / Provider Gate but execution.md is missing Source Sync / Provider Evidence.',
|
|
1561
1610
|
});
|
|
1562
|
-
} else if (!/(idempot|retry|pagination|rate limit|raw record|coverage|parity|count|sample|partial failure|replay|audit)/i.test(evidence)) {
|
|
1611
|
+
} else if (!/(idempot|retry|pagination|rate limit|raw record|coverage|parity|count|sample|partial failure|replay|audit|content[_ -]?hash|dedupe|duplicate)/i.test(evidence)) {
|
|
1563
1612
|
errors.push({
|
|
1564
1613
|
category: 'insufficient_evidence',
|
|
1565
1614
|
message: 'Source Sync / Provider Evidence must include idempotency, retry/pagination/rate-limit, raw-record, coverage/parity, sample, replay, or failure-handling evidence.',
|
|
@@ -1567,6 +1616,21 @@ export function validateExecutionEvidenceForPlan({ planContent, executionContent
|
|
|
1567
1616
|
}
|
|
1568
1617
|
}
|
|
1569
1618
|
|
|
1619
|
+
if (hasAnySection(planSections, ['import / ingestion contract', 'import ingestion contract', 'ingestion contract', 'import contract'])) {
|
|
1620
|
+
const evidence = readAnySection(executionSections, ['import / ingestion evidence', 'import ingestion evidence', 'ingestion evidence', 'import evidence', 'source sync / provider evidence']);
|
|
1621
|
+
if (!evidence) {
|
|
1622
|
+
errors.push({
|
|
1623
|
+
category: 'missing_evidence',
|
|
1624
|
+
message: 'Plan contains Import / Ingestion Contract but execution.md is missing Import / Ingestion Evidence.',
|
|
1625
|
+
});
|
|
1626
|
+
} else if (!/(fixture|real|sample|transcript|vtt|raw_speaker_labels|speaker labels?|single-label|content[_ -]?hash|dedupe|duplicate|skip|link|update|upsert|report|count|audit)/i.test(evidence)) {
|
|
1627
|
+
errors.push({
|
|
1628
|
+
category: 'insufficient_evidence',
|
|
1629
|
+
message: 'Import / Ingestion Evidence must include fixture choice, raw metadata/speaker labels, duplicate policy result, count or audit evidence.',
|
|
1630
|
+
});
|
|
1631
|
+
}
|
|
1632
|
+
}
|
|
1633
|
+
|
|
1570
1634
|
return errors;
|
|
1571
1635
|
}
|
|
1572
1636
|
|
|
@@ -1615,6 +1679,9 @@ function buildCheckerQuestions({ risk, qualityGates }) {
|
|
|
1615
1679
|
if (qualityGates.sourceSyncProviderRequired || risk.riskTriggers.includes('worker-queue')) {
|
|
1616
1680
|
questions.push('Does the plan bound provider/worker execution by scope, window, credentials, queue ownership and coverage evidence?');
|
|
1617
1681
|
}
|
|
1682
|
+
if (qualityGates.importIngestion?.required) {
|
|
1683
|
+
questions.push('Does the import plan use representative real fixtures when available, extract raw metadata needed downstream, and define exact duplicate-import behavior?');
|
|
1684
|
+
}
|
|
1618
1685
|
return questions;
|
|
1619
1686
|
}
|
|
1620
1687
|
|
|
@@ -744,7 +744,7 @@ describe('agent pipeline quality gates', () => {
|
|
|
744
744
|
'## Source Sync / Provider Gate',
|
|
745
745
|
'',
|
|
746
746
|
'- Scope / provider window: GetCourse stream for May, paginated by window.',
|
|
747
|
-
'- Idempotency / duplicate handling: idempotency key and
|
|
747
|
+
'- Idempotency / duplicate handling: content_hash idempotency key; duplicate action is skip and link to existing record.',
|
|
748
748
|
'- Failure handling / retry boundaries: backoff, timeout and partial failure resume.',
|
|
749
749
|
'- Coverage / parity evidence: count audit and raw record samples.',
|
|
750
750
|
].join('\n'));
|
|
@@ -754,4 +754,83 @@ describe('agent pipeline quality gates', () => {
|
|
|
754
754
|
expect(result.present).toBe(true);
|
|
755
755
|
expect(result.hasCoverageEvidence).toBe(true);
|
|
756
756
|
});
|
|
757
|
+
|
|
758
|
+
it('requires import ingestion contract for transcript import plans', () => {
|
|
759
|
+
const result = analyzePlanQualityGates({
|
|
760
|
+
planContent: [
|
|
761
|
+
'# Plan',
|
|
762
|
+
'',
|
|
763
|
+
'## Risk tier and execution budget',
|
|
764
|
+
'',
|
|
765
|
+
'- Risk tier: `R2`',
|
|
766
|
+
'- Speed mode: `Standard`',
|
|
767
|
+
'- Approved execution target: manual upload transcript import.',
|
|
768
|
+
'- Requires return to Plan/Check if: ingestion scope changes.',
|
|
769
|
+
'',
|
|
770
|
+
'## Verification ladder',
|
|
771
|
+
'',
|
|
772
|
+
'- Micro-verify during Execute: import smoke.',
|
|
773
|
+
'- Slice-verify before completion: count audit.',
|
|
774
|
+
'- External Verify required before closeout: yes.',
|
|
775
|
+
'',
|
|
776
|
+
'## Source Sync / Provider Gate',
|
|
777
|
+
'',
|
|
778
|
+
'- Scope / provider window: manual transcript upload fixtures.',
|
|
779
|
+
'- Idempotency / duplicate handling: content_hash exists and duplicates are detected.',
|
|
780
|
+
'- Failure handling / retry boundaries: parse errors report partial failure.',
|
|
781
|
+
'- Coverage / parity evidence: count audit.',
|
|
782
|
+
].join('\n'),
|
|
783
|
+
risk: {
|
|
784
|
+
riskProfile: 'high',
|
|
785
|
+
riskTriggers: ['source-sync-provider', 'ingestion-provider'],
|
|
786
|
+
},
|
|
787
|
+
});
|
|
788
|
+
|
|
789
|
+
expect(result.importIngestion.required).toBe(true);
|
|
790
|
+
expect(result.importIngestion.present).toBe(false);
|
|
791
|
+
expect(result.missingSignals.some((signal) => signal.includes('Import/ingestion plan must include'))).toBe(true);
|
|
792
|
+
});
|
|
793
|
+
|
|
794
|
+
it('accepts import ingestion contract with real fixtures metadata and dedupe behavior', () => {
|
|
795
|
+
const result = analyzePlanQualityGates({
|
|
796
|
+
planContent: [
|
|
797
|
+
'# Plan',
|
|
798
|
+
'',
|
|
799
|
+
'## Risk tier and execution budget',
|
|
800
|
+
'',
|
|
801
|
+
'- Risk tier: `R2`',
|
|
802
|
+
'- Speed mode: `Standard`',
|
|
803
|
+
'- Approved execution target: manual upload transcript import.',
|
|
804
|
+
'- Requires return to Plan/Check if: ingestion scope changes.',
|
|
805
|
+
'',
|
|
806
|
+
'## Verification ladder',
|
|
807
|
+
'',
|
|
808
|
+
'- Micro-verify during Execute: import smoke.',
|
|
809
|
+
'- Slice-verify before completion: count audit.',
|
|
810
|
+
'- External Verify required before closeout: yes.',
|
|
811
|
+
'',
|
|
812
|
+
'## Source Sync / Provider Gate',
|
|
813
|
+
'',
|
|
814
|
+
'- Scope / provider window: manual transcript upload from real VTT fixtures.',
|
|
815
|
+
'- Idempotency / duplicate handling: content_hash idempotency key; duplicate action is skip and link to existing evidence.',
|
|
816
|
+
'- Failure handling / retry boundaries: parse errors report partial failure.',
|
|
817
|
+
'- Coverage / parity evidence: count audit and raw record samples.',
|
|
818
|
+
'',
|
|
819
|
+
'## Import / Ingestion Contract',
|
|
820
|
+
'',
|
|
821
|
+
'- Representative fixtures: real May transcript VTT/MD fixtures; no synthetic fixtures unless real files are unavailable.',
|
|
822
|
+
'- Raw metadata extraction: raw_speaker_labels, speaker_resolution_status and is_single_label_export are populated at import time.',
|
|
823
|
+
'- Downstream fields: WP-003 process-meeting consumes raw_speaker_labels and WP-005 golden set consumes the same fixtures.',
|
|
824
|
+
'- Duplicate policy: content_hash duplicate imports skip creation and link/reuse existing evidence.',
|
|
825
|
+
].join('\n'),
|
|
826
|
+
risk: {
|
|
827
|
+
riskProfile: 'high',
|
|
828
|
+
riskTriggers: ['source-sync-provider', 'ingestion-provider'],
|
|
829
|
+
},
|
|
830
|
+
});
|
|
831
|
+
|
|
832
|
+
expect(result.sourceSyncProvider.present).toBe(true);
|
|
833
|
+
expect(result.importIngestion.present).toBe(true);
|
|
834
|
+
expect(result.missingSignals.some((signal) => signal.includes('Import/ingestion plan must include'))).toBe(false);
|
|
835
|
+
});
|
|
757
836
|
});
|
|
@@ -12,8 +12,8 @@ export const LLM_CONTEXT_MODES = ['fast', 'standard', 'standard_plus', 'strict']
|
|
|
12
12
|
export const LLM_CONTEXT_CAPS = {
|
|
13
13
|
fast: 8000,
|
|
14
14
|
standard: 20000,
|
|
15
|
-
standard_plus:
|
|
16
|
-
strict:
|
|
15
|
+
standard_plus: 30000,
|
|
16
|
+
strict: 50000,
|
|
17
17
|
};
|
|
18
18
|
|
|
19
19
|
const TOKEN_ESTIMATE_CHARS_PER_TOKEN = 1.8;
|
|
@@ -225,7 +225,7 @@ export function buildCheckerLlmInputPack({
|
|
|
225
225
|
relevantPlaybooks: selectedMode === 'strict'
|
|
226
226
|
? renderRelevantPlaybooks(readRelevantPlaybooks(checkContext.riskTriggers || []), { mode: 'strict' })
|
|
227
227
|
: renderRelevantPlaybooks(readRelevantPlaybooks(checkContext.riskTriggers || []), { mode: 'compact' }),
|
|
228
|
-
taskManifest,
|
|
228
|
+
taskManifest: stableTaskManifestForCheck(taskManifest),
|
|
229
229
|
projectMemory: compactProjectMemory(projectMemory, selectedMode),
|
|
230
230
|
taskArtifacts: artifacts,
|
|
231
231
|
outputContract: {
|
|
@@ -249,6 +249,61 @@ export function buildCheckerLlmInputPack({
|
|
|
249
249
|
return withPackMetadata(input, selectedMode);
|
|
250
250
|
}
|
|
251
251
|
|
|
252
|
+
export function stableTaskManifestForCheck(taskManifest) {
|
|
253
|
+
let parsed = null;
|
|
254
|
+
if (typeof taskManifest === 'string') {
|
|
255
|
+
try {
|
|
256
|
+
parsed = JSON.parse(taskManifest);
|
|
257
|
+
} catch {
|
|
258
|
+
return taskManifest;
|
|
259
|
+
}
|
|
260
|
+
} else if (taskManifest && typeof taskManifest === 'object' && !Array.isArray(taskManifest)) {
|
|
261
|
+
parsed = taskManifest;
|
|
262
|
+
}
|
|
263
|
+
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
|
264
|
+
return taskManifest || '';
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
const stable = {
|
|
268
|
+
schemaVersion: parsed.schemaVersion,
|
|
269
|
+
taskId: parsed.taskId,
|
|
270
|
+
mode: parsed.mode,
|
|
271
|
+
phase: parsed.phase,
|
|
272
|
+
gates: parsed.gates,
|
|
273
|
+
context: {
|
|
274
|
+
planSha: parsed.context?.planSha,
|
|
275
|
+
planFingerprintVersion: parsed.context?.planFingerprintVersion,
|
|
276
|
+
memorySha: parsed.context?.memorySha,
|
|
277
|
+
riskProfile: parsed.context?.riskProfile,
|
|
278
|
+
riskTriggers: parsed.context?.riskTriggers,
|
|
279
|
+
riskWarnings: parsed.context?.riskWarnings,
|
|
280
|
+
checkContextCurrent: parsed.context?.checkContextCurrent,
|
|
281
|
+
},
|
|
282
|
+
requiredEvidenceIssues: parsed.requiredEvidenceIssues,
|
|
283
|
+
qualitySignals: parsed.qualitySignals,
|
|
284
|
+
loopDetector: {
|
|
285
|
+
threshold: parsed.loopDetector?.threshold,
|
|
286
|
+
requiresConsolidatedRemediation: parsed.loopDetector?.requiresConsolidatedRemediation,
|
|
287
|
+
repeatedReasons: parsed.loopDetector?.repeatedReasons,
|
|
288
|
+
},
|
|
289
|
+
consolidatedRemediationAccepted: parsed.consolidatedRemediationAccepted,
|
|
290
|
+
consolidatedRemediationArtifact: parsed.consolidatedRemediationArtifact,
|
|
291
|
+
};
|
|
292
|
+
return JSON.stringify(pruneUndefined(stable), null, 2);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
function pruneUndefined(value) {
|
|
296
|
+
if (Array.isArray(value)) {
|
|
297
|
+
return value.map(pruneUndefined);
|
|
298
|
+
}
|
|
299
|
+
if (!value || typeof value !== 'object') {
|
|
300
|
+
return value;
|
|
301
|
+
}
|
|
302
|
+
return Object.fromEntries(Object.entries(value)
|
|
303
|
+
.filter(([, item]) => item !== undefined)
|
|
304
|
+
.map(([key, item]) => [key, pruneUndefined(item)]));
|
|
305
|
+
}
|
|
306
|
+
|
|
252
307
|
export function buildVerifierLlmInputPack({
|
|
253
308
|
taskDir,
|
|
254
309
|
taskId,
|
|
@@ -112,12 +112,61 @@ describe('llm input pack utilities', () => {
|
|
|
112
112
|
});
|
|
113
113
|
|
|
114
114
|
expect(pack.meta.mode).toBe('standard_plus');
|
|
115
|
-
expect(pack.meta.capTokens).toBe(
|
|
115
|
+
expect(pack.meta.capTokens).toBe(30000);
|
|
116
116
|
expect(pack.input.llmInputPolicy.contextInsufficientFallback).toBe('rerun_strict');
|
|
117
117
|
expect(pack.input.taskArtifacts['plan.md']).toContain('<!-- compacted:plan.md');
|
|
118
118
|
expect(pack.meta.compactedArtifacts).toContain('plan.md');
|
|
119
119
|
});
|
|
120
120
|
|
|
121
|
+
it('stabilizes checker task manifest by excluding volatile check telemetry', () => {
|
|
122
|
+
const taskDir = createTask();
|
|
123
|
+
const pack = buildCheckerLlmInputPack({
|
|
124
|
+
taskDir,
|
|
125
|
+
taskId: 'TASK-999-token-pack',
|
|
126
|
+
checkerPromptSha: 'sha256:test',
|
|
127
|
+
cacheKey: { test: true },
|
|
128
|
+
checkContext: {
|
|
129
|
+
planSha: 'sha256:plan',
|
|
130
|
+
memorySha: 'sha256:memory',
|
|
131
|
+
riskProfile: 'high',
|
|
132
|
+
riskTriggers: ['source-sync-provider'],
|
|
133
|
+
},
|
|
134
|
+
checkEvidence: '# Evidence\n\nok',
|
|
135
|
+
checkerContextPack: '# Checker Context Pack\n\nok',
|
|
136
|
+
taskManifest: JSON.stringify({
|
|
137
|
+
schemaVersion: 1,
|
|
138
|
+
taskId: 'TASK-999-token-pack',
|
|
139
|
+
mode: 'standard',
|
|
140
|
+
phase: 'check',
|
|
141
|
+
gates: { sourceSyncProvider: { required: true, planComplete: true } },
|
|
142
|
+
context: {
|
|
143
|
+
planSha: 'sha256:plan',
|
|
144
|
+
memorySha: 'sha256:memory',
|
|
145
|
+
riskProfile: 'high',
|
|
146
|
+
riskTriggers: ['source-sync-provider'],
|
|
147
|
+
checkContextCurrent: true,
|
|
148
|
+
},
|
|
149
|
+
llmInput: { check: { updatedAt: 'volatile', attempts: [{ mode: 'strict' }] } },
|
|
150
|
+
lastCheckResult: { verdict: 'ready_for_human_gate', createdAt: 'volatile' },
|
|
151
|
+
timestamps: { updatedAt: 'volatile' },
|
|
152
|
+
loopDetector: {
|
|
153
|
+
threshold: 2,
|
|
154
|
+
requiresConsolidatedRemediation: true,
|
|
155
|
+
repeatedReasons: [{ normalizedReason: 'context_overflow', count: 3 }],
|
|
156
|
+
reasons: { noisy: { normalizedReason: 'verbose history' } },
|
|
157
|
+
},
|
|
158
|
+
}, null, 2),
|
|
159
|
+
projectMemory: [],
|
|
160
|
+
mode: 'standard',
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
expect(pack.input.taskManifest).toContain('"requiresConsolidatedRemediation": true');
|
|
164
|
+
expect(pack.input.taskManifest).not.toContain('lastCheckResult');
|
|
165
|
+
expect(pack.input.taskManifest).not.toContain('llmInput');
|
|
166
|
+
expect(pack.input.taskManifest).not.toContain('timestamps');
|
|
167
|
+
expect(pack.input.taskManifest).not.toContain('verbose history');
|
|
168
|
+
});
|
|
169
|
+
|
|
121
170
|
it('preserves protected verification sections when compacting long plans', () => {
|
|
122
171
|
const taskDir = createTask();
|
|
123
172
|
const longPlan = [
|
|
@@ -78,6 +78,12 @@ export function buildTaskManifest({ taskDir, now = new Date().toISOString(), exi
|
|
|
78
78
|
evidenceRequired: hasPlanGate(inputs.taskArtifacts.get('plan.md'), ['source sync / provider gate', 'source sync provider gate', 'source sync gate', 'provider gate']),
|
|
79
79
|
evidenceComplete: !evidenceIssues.some((issue) => issue.message.includes('Source Sync / Provider')),
|
|
80
80
|
},
|
|
81
|
+
importIngestion: {
|
|
82
|
+
required: Boolean(inputs.qualityGates.importIngestion?.required),
|
|
83
|
+
planComplete: Boolean(inputs.qualityGates.importIngestion?.present),
|
|
84
|
+
evidenceRequired: hasPlanGate(inputs.taskArtifacts.get('plan.md'), ['import / ingestion contract', 'import ingestion contract', 'ingestion contract', 'import contract']),
|
|
85
|
+
evidenceComplete: !evidenceIssues.some((issue) => issue.message.includes('Import / Ingestion')),
|
|
86
|
+
},
|
|
81
87
|
},
|
|
82
88
|
context: {
|
|
83
89
|
planSha: inputs.planFingerprint.planSha,
|
|
@@ -167,6 +173,9 @@ export function preflightTask({ taskDir, targetPhase }) {
|
|
|
167
173
|
message: 'Repeated checker/verifier return reason detected. Add consolidated remediation before continuing.',
|
|
168
174
|
});
|
|
169
175
|
}
|
|
176
|
+
if (targetPhase === 'execute' || targetPhase === 'verify' || targetPhase === 'human_gate') {
|
|
177
|
+
issues.push(...inspectWorkPackageDependencyPreflight(taskDir));
|
|
178
|
+
}
|
|
170
179
|
|
|
171
180
|
return {
|
|
172
181
|
ok: issues.length === 0,
|
|
@@ -177,6 +186,84 @@ export function preflightTask({ taskDir, targetPhase }) {
|
|
|
177
186
|
};
|
|
178
187
|
}
|
|
179
188
|
|
|
189
|
+
function inspectWorkPackageDependencyPreflight(taskDir) {
|
|
190
|
+
const brief = readTaskFile(taskDir, 'brief.md');
|
|
191
|
+
const initiativeMatch = /-\s+Initiative:\s+`?([^`\n]+)`?/i.exec(brief);
|
|
192
|
+
const workPackageMatch = /-\s+Work package:\s+`?([^`\n]+)`?/i.exec(brief);
|
|
193
|
+
const initiativeId = initiativeMatch?.[1]?.trim();
|
|
194
|
+
const workPackageId = workPackageMatch?.[1]?.trim();
|
|
195
|
+
if (!initiativeId || !workPackageId) {
|
|
196
|
+
return [];
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const initiativesRoot = resolveInitiativesRootForTask(taskDir);
|
|
200
|
+
const initiativeDir = path.join(initiativesRoot, initiativeId);
|
|
201
|
+
const workPackagePath = path.join(initiativeDir, 'work-packages', workPackageId, 'work-package.md');
|
|
202
|
+
if (!fs.existsSync(workPackagePath)) {
|
|
203
|
+
return [{
|
|
204
|
+
category: 'work_package_dependency',
|
|
205
|
+
message: `Task declares initiative work package ${initiativeId}/${workPackageId}, but its work-package.md was not found.`,
|
|
206
|
+
}];
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
const workPackage = fs.readFileSync(workPackagePath, 'utf8');
|
|
210
|
+
const dependencies = readWorkPackageDependsOn(workPackage);
|
|
211
|
+
const issues = [];
|
|
212
|
+
for (const dependencyId of dependencies) {
|
|
213
|
+
const dependencyPath = path.join(initiativeDir, 'work-packages', dependencyId, 'work-package.md');
|
|
214
|
+
if (!fs.existsSync(dependencyPath)) {
|
|
215
|
+
issues.push({
|
|
216
|
+
category: 'work_package_dependency',
|
|
217
|
+
message: `Work package ${workPackageId} depends on ${dependencyId}, but dependency work-package.md was not found.`,
|
|
218
|
+
});
|
|
219
|
+
continue;
|
|
220
|
+
}
|
|
221
|
+
const dependency = fs.readFileSync(dependencyPath, 'utf8');
|
|
222
|
+
const status = readInlineField(dependency, 'Status') || 'unknown';
|
|
223
|
+
if (!/^(done|complete|completed|verified|closed)$/i.test(status)) {
|
|
224
|
+
issues.push({
|
|
225
|
+
category: 'work_package_dependency',
|
|
226
|
+
message: `Work package ${workPackageId} depends on ${dependencyId}, but dependency status is ${status}; complete/verify it before Execute.`,
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return issues;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
function resolveInitiativesRootForTask(taskDir) {
|
|
234
|
+
const normalized = path.resolve(taskDir);
|
|
235
|
+
const parts = normalized.split(path.sep);
|
|
236
|
+
const tasksIndex = parts.lastIndexOf('tasks');
|
|
237
|
+
if (
|
|
238
|
+
tasksIndex >= 2
|
|
239
|
+
&& parts[tasksIndex - 1] === 'agent-pipeline'
|
|
240
|
+
&& parts[tasksIndex - 2] === 'ops'
|
|
241
|
+
) {
|
|
242
|
+
return path.join(parts.slice(0, tasksIndex).join(path.sep) || path.sep, 'initiatives');
|
|
243
|
+
}
|
|
244
|
+
return projectContext.initiativesRoot || path.join(projectContext.pipelineRoot, 'initiatives');
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
function readWorkPackageDependsOn(content) {
|
|
248
|
+
const match = /^-\s+Depends on:\s*(.*)$/im.exec(content || '');
|
|
249
|
+
if (!match) {
|
|
250
|
+
return [];
|
|
251
|
+
}
|
|
252
|
+
return String(match[1] || '')
|
|
253
|
+
.split(/[,;]/)
|
|
254
|
+
.map((item) => item.trim())
|
|
255
|
+
.filter((item) => item && !/^\(?none\)?$/i.test(item) && !/^\[?fill in\]?$/i.test(item));
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
function readInlineField(content, field) {
|
|
259
|
+
const match = new RegExp(`^${escapeRegExp(field)}:\\s*(.*)$`, 'm').exec(content || '');
|
|
260
|
+
return match ? match[1].trim() : '';
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function escapeRegExp(value) {
|
|
264
|
+
return String(value).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
265
|
+
}
|
|
266
|
+
|
|
180
267
|
export function transitionTaskManifest({ taskDir, targetPhase, now = new Date().toISOString() }) {
|
|
181
268
|
const normalizedPhase = normalizePhase(targetPhase);
|
|
182
269
|
const preflight = preflightTask({ taskDir, targetPhase: normalizedPhase });
|
|
@@ -130,6 +130,74 @@ describe('task manifest utilities', () => {
|
|
|
130
130
|
expect(result.manifest.mode).toBe('fast');
|
|
131
131
|
});
|
|
132
132
|
|
|
133
|
+
it('blocks execute preflight when work-package dependencies are not completed', () => {
|
|
134
|
+
const root = fs.mkdtempSync(path.join(os.tmpdir(), 'ops-wp-deps-'));
|
|
135
|
+
tempDirs.push(root);
|
|
136
|
+
const taskDir = path.join(root, 'ops', 'agent-pipeline', 'tasks', 'TASK-002-import');
|
|
137
|
+
const initiativeDir = path.join(root, 'ops', 'agent-pipeline', 'initiatives', 'delivery-os-mvp');
|
|
138
|
+
fs.mkdirSync(path.join(initiativeDir, 'work-packages', 'WP-001-foundation'), { recursive: true });
|
|
139
|
+
fs.mkdirSync(path.join(initiativeDir, 'work-packages', 'WP-002-import'), { recursive: true });
|
|
140
|
+
fs.mkdirSync(taskDir, { recursive: true });
|
|
141
|
+
fs.writeFileSync(path.join(initiativeDir, 'work-packages', 'WP-001-foundation', 'work-package.md'), [
|
|
142
|
+
'# Work Package',
|
|
143
|
+
'',
|
|
144
|
+
'ID: WP-001-foundation',
|
|
145
|
+
'Status: in_progress',
|
|
146
|
+
'',
|
|
147
|
+
].join('\n'));
|
|
148
|
+
fs.writeFileSync(path.join(initiativeDir, 'work-packages', 'WP-002-import', 'work-package.md'), [
|
|
149
|
+
'# Work Package',
|
|
150
|
+
'',
|
|
151
|
+
'ID: WP-002-import',
|
|
152
|
+
'Status: in_progress',
|
|
153
|
+
'',
|
|
154
|
+
'## Dependencies',
|
|
155
|
+
'',
|
|
156
|
+
'- Depends on: WP-001-foundation',
|
|
157
|
+
'',
|
|
158
|
+
].join('\n'));
|
|
159
|
+
fs.writeFileSync(path.join(taskDir, 'brief.md'), [
|
|
160
|
+
'# Brief',
|
|
161
|
+
'',
|
|
162
|
+
'## Initiative Context',
|
|
163
|
+
'',
|
|
164
|
+
'- Initiative: `delivery-os-mvp`',
|
|
165
|
+
'- Work package: `WP-002-import`',
|
|
166
|
+
'',
|
|
167
|
+
].join('\n'));
|
|
168
|
+
fs.writeFileSync(path.join(taskDir, 'research.md'), '# Research\n\n## Findings\n\n- `docs/example.md`\n');
|
|
169
|
+
fs.writeFileSync(path.join(taskDir, 'plan.md'), [
|
|
170
|
+
'# Plan',
|
|
171
|
+
'',
|
|
172
|
+
'## Risk tier and execution budget',
|
|
173
|
+
'',
|
|
174
|
+
'- Risk tier: `R1`',
|
|
175
|
+
'- Speed mode: `Fast`',
|
|
176
|
+
'- Approved execution target: docs/example.md only.',
|
|
177
|
+
'- Requires return to Plan/Check if: code changes are needed.',
|
|
178
|
+
'',
|
|
179
|
+
'## Verification ladder',
|
|
180
|
+
'',
|
|
181
|
+
'- Micro-verify during Execute: markdown review.',
|
|
182
|
+
'- Slice-verify before completion: self-test.',
|
|
183
|
+
'- External Verify required before closeout: no.',
|
|
184
|
+
'',
|
|
185
|
+
'## Затронутые модули и файлы',
|
|
186
|
+
'',
|
|
187
|
+
'- `docs/example.md`',
|
|
188
|
+
'',
|
|
189
|
+
].join('\n'));
|
|
190
|
+
fs.writeFileSync(path.join(taskDir, 'status.md'), '# Status\n\n## Текущий этап\n\nplan\n');
|
|
191
|
+
|
|
192
|
+
const result = preflightTask({ taskDir, targetPhase: 'execute' });
|
|
193
|
+
|
|
194
|
+
expect(result.ok).toBe(false);
|
|
195
|
+
expect(result.issues).toContainEqual({
|
|
196
|
+
category: 'work_package_dependency',
|
|
197
|
+
message: 'Work package WP-002-import depends on WP-001-foundation, but dependency status is in_progress; complete/verify it before Execute.',
|
|
198
|
+
});
|
|
199
|
+
});
|
|
200
|
+
|
|
133
201
|
it('blocks verify preflight when required execution evidence is missing', () => {
|
|
134
202
|
const taskDir = createTask({
|
|
135
203
|
plan: [
|
package/package.json
CHANGED
|
@@ -4,6 +4,9 @@ Use this for sync/import/provider pipelines, raw records, retries, pagination, r
|
|
|
4
4
|
|
|
5
5
|
- Plan must include `## Source Sync / Provider Gate`.
|
|
6
6
|
- Name scope/provider window, idempotency or duplicate handling, retry/failure boundaries and coverage/parity evidence.
|
|
7
|
+
- For import/manual-upload work, add `## Import / Ingestion Contract`: representative real fixtures when available (or an explicit no-real-fixtures reason), raw metadata extraction required by downstream work, and exact duplicate-import behavior.
|
|
8
|
+
- Duplicate handling must name the dedupe key and action on repeat import: skip, link/reuse, update/upsert, reject/error, or report-only.
|
|
9
|
+
- If raw source data contains speaker labels or similar downstream metadata, extraction should happen at import time unless explicitly deferred with a downstream reason.
|
|
7
10
|
- Keep provider credentials, queue ownership and affected windows explicit.
|
|
8
11
|
- Execution must include `## Source Sync / Provider Evidence`.
|
|
9
|
-
- Acceptable evidence includes raw-record samples, counts/parity, replay/audit output, retry/idempotency checks and partial-failure recovery proof.
|
|
12
|
+
- Acceptable evidence includes raw-record samples, representative fixture names, counts/parity, replay/audit output, retry/idempotency checks, duplicate-import results and partial-failure recovery proof.
|
package/prompts/checker.md
CHANGED
|
@@ -67,7 +67,7 @@ Project-specific context приходит только через task artifacts
|
|
|
67
67
|
19. Для O2/O3 hot-path work план обязан содержать `## Optimization Strategy`: tier, hot paths, expected size, chosen efficient approach, anti-patterns avoided and bounded optimizer budget/stop rule. Checker должен блокировать weak strategy before Execute, но не требовать endless optimization: O2 = one focused review on touched hot paths; O3 = one focused review plus one representative measurement.
|
|
68
68
|
20. Checker должен оценивать не только наличие UI/browser smoke path, а его результативность: смог бы этот сценарий поймать реальные ошибки пользователя, которые задача может породить?
|
|
69
69
|
21. Если `checker-context-pack.md`, `task-manifest.json` или risk triggers показывают migrations/env vars/cron/workers/billing/auth/external APIs/deployment/runtime behavior, план обязан содержать `## Production Rollout Gate`: impact/blast radius, environment/deploy variables, rollback/disable path and post-deploy evidence.
|
|
70
|
-
22. Если `checker-context-pack.md`, `task-manifest.json` или risk triggers показывают sync/import/provider/raw records/retries/pagination/rate limits/idempotency/replay/backfill/partial failure, план обязан содержать `## Source Sync / Provider Gate`: scope/provider window, idempotency, failure handling/retry boundaries and coverage/parity evidence.
|
|
70
|
+
22. Если `checker-context-pack.md`, `task-manifest.json` или risk triggers показывают sync/import/provider/raw records/retries/pagination/rate limits/idempotency/replay/backfill/partial failure, план обязан содержать `## Source Sync / Provider Gate`: scope/provider window, idempotency with dedupe key and duplicate action, failure handling/retry boundaries and coverage/parity evidence. Для import/manual-upload/transcript/evidence-capture задач план также обязан содержать `## Import / Ingestion Contract`: real representative fixtures when available or explicit no-real-fixtures reason, raw metadata/speaker-label extraction needed downstream, and repeat-import policy. Размытое "duplicates detected or reported" без skip/link/update/reject/report-only semantics недостаточно.
|
|
71
71
|
23. Если `task-manifest.json.loopDetector.requiresConsolidatedRemediation=true`, Checker должен блокировать повторный мелкий loop, пока plan/check-resolution не содержит consolidated remediation секцию, которая объединяет repeated reasons.
|
|
72
72
|
24. Если `llmInputPolicy.mode` не `strict` и отсутствующий full artifact реально нужен для честной оценки, verdict должен быть `context_insufficient`. Не используй `context_insufficient`, если deterministic gate уже явно показывает `return_to_plan`.
|
|
73
73
|
|