@besales/ops-framework 0.1.21 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/bin/lib/check-context-utils.mjs +72 -5
- package/bin/lib/check-context-utils.test.mjs +80 -1
- package/bin/lib/llm-input-pack-utils.mjs +21 -6
- package/bin/lib/llm-input-pack-utils.test.mjs +30 -2
- package/bin/lib/task-manifest-utils.mjs +87 -0
- package/bin/lib/task-manifest-utils.test.mjs +68 -0
- package/bin/run-check.mjs +11 -0
- package/package.json +1 -1
- package/playbooks/source-sync-provider.md +4 -1
- package/prompts/checker.md +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.23
|
|
4
|
+
|
|
5
|
+
- Added deterministic import/ingestion planning gates for representative real fixtures, raw downstream metadata extraction and explicit duplicate-import behavior.
|
|
6
|
+
- Tightened Source Sync / Provider gates so idempotency must name a dedupe key and concrete duplicate action, not only "detected/reported".
|
|
7
|
+
- Added work-package dependency preflight for Execute/Verify/Human Gate so materialized WP tasks cannot proceed while declared dependencies remain in progress.
|
|
8
|
+
- Expanded Source Sync playbook and checker prompt with import/manual-upload fixture, metadata and dedupe expectations.
|
|
9
|
+
|
|
10
|
+
## 0.1.22
|
|
11
|
+
|
|
12
|
+
- Added a compact `standard_plus` LLM context mode between `standard` and `strict` so near-cap Check/Verify runs avoid full strict context when compact evidence is sufficient.
|
|
13
|
+
- Refreshed `task-manifest.json` after successful Check and cache hits so `lastCheckResult` reflects the final current verdict instead of stale `return_to_plan` results.
|
|
14
|
+
|
|
3
15
|
## 0.1.21
|
|
4
16
|
|
|
5
17
|
- Added `verify-timeline.json` telemetry for Verify runs, including deterministic blocks, LLM input sizing, context-mode escalation, provider duration/failure and final verdict.
|
|
@@ -636,10 +636,10 @@ export function classifyRisk({ structuralLines, referencedFiles, planSections, r
|
|
|
636
636
|
if (hasRef((ref) => matchesAnyRoot(ref, workerRoots)) || hasText(/\b(worker|scheduler|queue|bullmq|backfill|pacing)\b/)) {
|
|
637
637
|
triggers.add('worker-queue');
|
|
638
638
|
}
|
|
639
|
-
if (hasText(/\b(source ingestion|source-ingestion|connector|oauth|callback|provider api|sync plan)\b/)) {
|
|
639
|
+
if (hasText(/\b(source ingestion|source-ingestion|connector|oauth|callback|provider api|sync plan|manual upload|upload path|import smoke|transcript import|transcripts?|vtt|evidence capture)\b/)) {
|
|
640
640
|
triggers.add('ingestion-provider');
|
|
641
641
|
}
|
|
642
|
-
if (hasText(/\b(source sync|source-sync|raw record|raw records|pagination|rate limit|retry|retries|idempotency|partial failure|provider stream|sync worker|sync scheduler)\b/)) {
|
|
642
|
+
if (hasText(/\b(source sync|source-sync|raw record|raw records|pagination|rate limit|retry|retries|idempotency|partial failure|provider stream|sync worker|sync scheduler|content hash|content_hash|dedupe|duplicate import|backfill|replay)\b/)) {
|
|
643
643
|
triggers.add('source-sync-provider');
|
|
644
644
|
}
|
|
645
645
|
if (hasText(/\b(production|railway|deploy|runtime env|environment variable)\b/)) {
|
|
@@ -776,6 +776,7 @@ export function analyzePlanQualityGates({ planContent, risk, referencedFiles = [
|
|
|
776
776
|
const optimizationStrategy = inspectOptimizationStrategy(sections);
|
|
777
777
|
const productionRollout = inspectProductionRolloutGate(sections);
|
|
778
778
|
const sourceSyncProvider = inspectSourceSyncProviderGate(sections);
|
|
779
|
+
const importIngestion = inspectImportIngestionGate(sections, planContent);
|
|
779
780
|
const missingSignals = [];
|
|
780
781
|
|
|
781
782
|
if (!executionMetadata.present) {
|
|
@@ -814,6 +815,9 @@ export function analyzePlanQualityGates({ planContent, risk, referencedFiles = [
|
|
|
814
815
|
if (sourceSyncProviderRequired && !sourceSyncProvider.present) {
|
|
815
816
|
missingSignals.push('Source sync/provider risk detected but `## Source Sync / Provider Gate` is missing or incomplete.');
|
|
816
817
|
}
|
|
818
|
+
if (sourceSyncProviderRequired && importIngestion.required && !importIngestion.present) {
|
|
819
|
+
missingSignals.push('Import/ingestion plan must include an Import / Ingestion Contract naming representative real fixtures or an explicit no-real-fixtures reason, raw metadata/speaker-label extraction, and duplicate-import policy.');
|
|
820
|
+
}
|
|
817
821
|
|
|
818
822
|
return {
|
|
819
823
|
executionMetadata,
|
|
@@ -833,6 +837,7 @@ export function analyzePlanQualityGates({ planContent, risk, referencedFiles = [
|
|
|
833
837
|
productionRollout,
|
|
834
838
|
sourceSyncProviderRequired,
|
|
835
839
|
sourceSyncProvider,
|
|
840
|
+
importIngestion,
|
|
836
841
|
missingSignals,
|
|
837
842
|
};
|
|
838
843
|
}
|
|
@@ -1218,10 +1223,53 @@ export function inspectSourceSyncProviderGate(sections) {
|
|
|
1218
1223
|
present: true,
|
|
1219
1224
|
hasScope: /scope|window|provider|stream|pagination|rate limit|raw record|объем|окно|провайдер/.test(normalized),
|
|
1220
1225
|
hasIdempotency: /idempot|dedupe|duplicate|retry|replay|повтор|дубликат/.test(normalized),
|
|
1226
|
+
hasDedupeKey: /content[_ -]?hash|idempotency key|dedupe key|unique key|natural key|source[_ -]?id|fingerprint|checksum|хеш|ключ/.test(normalized),
|
|
1227
|
+
hasDuplicateAction: /skip|link|update|upsert|reject|error|report|mark|reuse|ignore|пропуск|связ|обнов|ошиб|отчет|помет/.test(normalized),
|
|
1221
1228
|
hasFailureHandling: /partial failure|failure|timeout|backoff|resume|dead letter|ошиб|частич/.test(normalized),
|
|
1222
1229
|
hasCoverageEvidence: /coverage|parity|count|sample|audit|evidence|metric|сверк|покрыт/.test(normalized),
|
|
1223
1230
|
};
|
|
1224
|
-
result.complete = result.hasScope && result.hasIdempotency && result.hasFailureHandling && result.hasCoverageEvidence;
|
|
1231
|
+
result.complete = result.hasScope && result.hasIdempotency && result.hasDedupeKey && result.hasDuplicateAction && result.hasFailureHandling && result.hasCoverageEvidence;
|
|
1232
|
+
result.present = result.complete;
|
|
1233
|
+
return result;
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
export function inspectImportIngestionGate(sections, planContent = '') {
|
|
1237
|
+
const text = planContent || '';
|
|
1238
|
+
const required = /\b(import|ingest|ingestion|manual upload|upload path|transcripts?|vtt|fixture|fixtures|evidence capture|backfill)\b/i.test(text);
|
|
1239
|
+
const body = [
|
|
1240
|
+
readCanonicalSection(sections, ['import / ingestion contract', 'import ingestion contract', 'ingestion contract', 'import contract']),
|
|
1241
|
+
readCanonicalSection(sections, ['source sync / provider gate', 'source sync provider gate', 'source sync gate', 'provider gate']),
|
|
1242
|
+
].filter(Boolean).join('\n');
|
|
1243
|
+
if (!required) {
|
|
1244
|
+
return {
|
|
1245
|
+
required: false,
|
|
1246
|
+
present: true,
|
|
1247
|
+
hasRealFixtureDecision: false,
|
|
1248
|
+
hasMetadataExtraction: false,
|
|
1249
|
+
hasDedupePolicy: false,
|
|
1250
|
+
hasDownstreamFields: false,
|
|
1251
|
+
};
|
|
1252
|
+
}
|
|
1253
|
+
if (!body) {
|
|
1254
|
+
return {
|
|
1255
|
+
required,
|
|
1256
|
+
present: false,
|
|
1257
|
+
hasRealFixtureDecision: false,
|
|
1258
|
+
hasMetadataExtraction: false,
|
|
1259
|
+
hasDedupePolicy: false,
|
|
1260
|
+
hasDownstreamFields: false,
|
|
1261
|
+
};
|
|
1262
|
+
}
|
|
1263
|
+
const normalized = body.toLowerCase();
|
|
1264
|
+
const result = {
|
|
1265
|
+
required,
|
|
1266
|
+
present: true,
|
|
1267
|
+
hasRealFixtureDecision: /(real|representative|production-like|existing|fixture|fixtures|sample|golden|transcript|vtt|md|no real|unavailable|human-owned|реальн|репрезент|транскрипт)/.test(normalized),
|
|
1268
|
+
hasMetadataExtraction: /(raw_speaker_labels|speaker labels?|speaker[_ -]?label|single-label|single label|is_single_label_export|speaker_resolution_status|metadata|метаданн|лейбл)/.test(normalized),
|
|
1269
|
+
hasDedupePolicy: /(content[_ -]?hash|idempotency key|dedupe key|duplicate|skip|link|update|upsert|reject|report|повтор|дубликат|хеш)/.test(normalized),
|
|
1270
|
+
hasDownstreamFields: /(downstream|wp-\d{3}|process-meeting|golden set|schema field|raw_speaker_labels|speaker_resolution_status|is_single_label_export|meeting_track|потребител|следующ)/.test(normalized),
|
|
1271
|
+
};
|
|
1272
|
+
result.complete = result.hasRealFixtureDecision && result.hasMetadataExtraction && result.hasDedupePolicy && result.hasDownstreamFields;
|
|
1225
1273
|
result.present = result.complete;
|
|
1226
1274
|
return result;
|
|
1227
1275
|
}
|
|
@@ -1351,7 +1399,8 @@ export function buildCheckerContextPack({
|
|
|
1351
1399
|
? [
|
|
1352
1400
|
'- Source sync/provider risk detected.',
|
|
1353
1401
|
`- Source Sync / Provider Gate complete: \`${qualityGates.sourceSyncProvider.present ? 'yes' : 'no'}\`.`,
|
|
1354
|
-
|
|
1402
|
+
`- Import / Ingestion Contract complete: \`${qualityGates.importIngestion?.present ? 'yes' : 'no'}\`.`,
|
|
1403
|
+
'- Checker must return `return_to_plan` if scope/window, dedupe key, duplicate action, failure handling, representative fixtures and coverage evidence are not named.',
|
|
1355
1404
|
].join('\n')
|
|
1356
1405
|
: '- Source sync/provider gate is not required by detected triggers.',
|
|
1357
1406
|
'',
|
|
@@ -1559,7 +1608,7 @@ export function validateExecutionEvidenceForPlan({ planContent, executionContent
|
|
|
1559
1608
|
category: 'missing_evidence',
|
|
1560
1609
|
message: 'Plan contains Source Sync / Provider Gate but execution.md is missing Source Sync / Provider Evidence.',
|
|
1561
1610
|
});
|
|
1562
|
-
} else if (!/(idempot|retry|pagination|rate limit|raw record|coverage|parity|count|sample|partial failure|replay|audit)/i.test(evidence)) {
|
|
1611
|
+
} else if (!/(idempot|retry|pagination|rate limit|raw record|coverage|parity|count|sample|partial failure|replay|audit|content[_ -]?hash|dedupe|duplicate)/i.test(evidence)) {
|
|
1563
1612
|
errors.push({
|
|
1564
1613
|
category: 'insufficient_evidence',
|
|
1565
1614
|
message: 'Source Sync / Provider Evidence must include idempotency, retry/pagination/rate-limit, raw-record, coverage/parity, sample, replay, or failure-handling evidence.',
|
|
@@ -1567,6 +1616,21 @@ export function validateExecutionEvidenceForPlan({ planContent, executionContent
|
|
|
1567
1616
|
}
|
|
1568
1617
|
}
|
|
1569
1618
|
|
|
1619
|
+
if (hasAnySection(planSections, ['import / ingestion contract', 'import ingestion contract', 'ingestion contract', 'import contract'])) {
|
|
1620
|
+
const evidence = readAnySection(executionSections, ['import / ingestion evidence', 'import ingestion evidence', 'ingestion evidence', 'import evidence', 'source sync / provider evidence']);
|
|
1621
|
+
if (!evidence) {
|
|
1622
|
+
errors.push({
|
|
1623
|
+
category: 'missing_evidence',
|
|
1624
|
+
message: 'Plan contains Import / Ingestion Contract but execution.md is missing Import / Ingestion Evidence.',
|
|
1625
|
+
});
|
|
1626
|
+
} else if (!/(fixture|real|sample|transcript|vtt|raw_speaker_labels|speaker labels?|single-label|content[_ -]?hash|dedupe|duplicate|skip|link|update|upsert|report|count|audit)/i.test(evidence)) {
|
|
1627
|
+
errors.push({
|
|
1628
|
+
category: 'insufficient_evidence',
|
|
1629
|
+
message: 'Import / Ingestion Evidence must include fixture choice, raw metadata/speaker labels, duplicate policy result, count or audit evidence.',
|
|
1630
|
+
});
|
|
1631
|
+
}
|
|
1632
|
+
}
|
|
1633
|
+
|
|
1570
1634
|
return errors;
|
|
1571
1635
|
}
|
|
1572
1636
|
|
|
@@ -1615,6 +1679,9 @@ function buildCheckerQuestions({ risk, qualityGates }) {
|
|
|
1615
1679
|
if (qualityGates.sourceSyncProviderRequired || risk.riskTriggers.includes('worker-queue')) {
|
|
1616
1680
|
questions.push('Does the plan bound provider/worker execution by scope, window, credentials, queue ownership and coverage evidence?');
|
|
1617
1681
|
}
|
|
1682
|
+
if (qualityGates.importIngestion?.required) {
|
|
1683
|
+
questions.push('Does the import plan use representative real fixtures when available, extract raw metadata needed downstream, and define exact duplicate-import behavior?');
|
|
1684
|
+
}
|
|
1618
1685
|
return questions;
|
|
1619
1686
|
}
|
|
1620
1687
|
|
|
@@ -744,7 +744,7 @@ describe('agent pipeline quality gates', () => {
|
|
|
744
744
|
'## Source Sync / Provider Gate',
|
|
745
745
|
'',
|
|
746
746
|
'- Scope / provider window: GetCourse stream for May, paginated by window.',
|
|
747
|
-
'- Idempotency / duplicate handling: idempotency key and
|
|
747
|
+
'- Idempotency / duplicate handling: content_hash idempotency key; duplicate action is skip and link to existing record.',
|
|
748
748
|
'- Failure handling / retry boundaries: backoff, timeout and partial failure resume.',
|
|
749
749
|
'- Coverage / parity evidence: count audit and raw record samples.',
|
|
750
750
|
].join('\n'));
|
|
@@ -754,4 +754,83 @@ describe('agent pipeline quality gates', () => {
|
|
|
754
754
|
expect(result.present).toBe(true);
|
|
755
755
|
expect(result.hasCoverageEvidence).toBe(true);
|
|
756
756
|
});
|
|
757
|
+
|
|
758
|
+
it('requires import ingestion contract for transcript import plans', () => {
|
|
759
|
+
const result = analyzePlanQualityGates({
|
|
760
|
+
planContent: [
|
|
761
|
+
'# Plan',
|
|
762
|
+
'',
|
|
763
|
+
'## Risk tier and execution budget',
|
|
764
|
+
'',
|
|
765
|
+
'- Risk tier: `R2`',
|
|
766
|
+
'- Speed mode: `Standard`',
|
|
767
|
+
'- Approved execution target: manual upload transcript import.',
|
|
768
|
+
'- Requires return to Plan/Check if: ingestion scope changes.',
|
|
769
|
+
'',
|
|
770
|
+
'## Verification ladder',
|
|
771
|
+
'',
|
|
772
|
+
'- Micro-verify during Execute: import smoke.',
|
|
773
|
+
'- Slice-verify before completion: count audit.',
|
|
774
|
+
'- External Verify required before closeout: yes.',
|
|
775
|
+
'',
|
|
776
|
+
'## Source Sync / Provider Gate',
|
|
777
|
+
'',
|
|
778
|
+
'- Scope / provider window: manual transcript upload fixtures.',
|
|
779
|
+
'- Idempotency / duplicate handling: content_hash exists and duplicates are detected.',
|
|
780
|
+
'- Failure handling / retry boundaries: parse errors report partial failure.',
|
|
781
|
+
'- Coverage / parity evidence: count audit.',
|
|
782
|
+
].join('\n'),
|
|
783
|
+
risk: {
|
|
784
|
+
riskProfile: 'high',
|
|
785
|
+
riskTriggers: ['source-sync-provider', 'ingestion-provider'],
|
|
786
|
+
},
|
|
787
|
+
});
|
|
788
|
+
|
|
789
|
+
expect(result.importIngestion.required).toBe(true);
|
|
790
|
+
expect(result.importIngestion.present).toBe(false);
|
|
791
|
+
expect(result.missingSignals.some((signal) => signal.includes('Import/ingestion plan must include'))).toBe(true);
|
|
792
|
+
});
|
|
793
|
+
|
|
794
|
+
it('accepts import ingestion contract with real fixtures metadata and dedupe behavior', () => {
|
|
795
|
+
const result = analyzePlanQualityGates({
|
|
796
|
+
planContent: [
|
|
797
|
+
'# Plan',
|
|
798
|
+
'',
|
|
799
|
+
'## Risk tier and execution budget',
|
|
800
|
+
'',
|
|
801
|
+
'- Risk tier: `R2`',
|
|
802
|
+
'- Speed mode: `Standard`',
|
|
803
|
+
'- Approved execution target: manual upload transcript import.',
|
|
804
|
+
'- Requires return to Plan/Check if: ingestion scope changes.',
|
|
805
|
+
'',
|
|
806
|
+
'## Verification ladder',
|
|
807
|
+
'',
|
|
808
|
+
'- Micro-verify during Execute: import smoke.',
|
|
809
|
+
'- Slice-verify before completion: count audit.',
|
|
810
|
+
'- External Verify required before closeout: yes.',
|
|
811
|
+
'',
|
|
812
|
+
'## Source Sync / Provider Gate',
|
|
813
|
+
'',
|
|
814
|
+
'- Scope / provider window: manual transcript upload from real VTT fixtures.',
|
|
815
|
+
'- Idempotency / duplicate handling: content_hash idempotency key; duplicate action is skip and link to existing evidence.',
|
|
816
|
+
'- Failure handling / retry boundaries: parse errors report partial failure.',
|
|
817
|
+
'- Coverage / parity evidence: count audit and raw record samples.',
|
|
818
|
+
'',
|
|
819
|
+
'## Import / Ingestion Contract',
|
|
820
|
+
'',
|
|
821
|
+
'- Representative fixtures: real May transcript VTT/MD fixtures; no synthetic fixtures unless real files are unavailable.',
|
|
822
|
+
'- Raw metadata extraction: raw_speaker_labels, speaker_resolution_status and is_single_label_export are populated at import time.',
|
|
823
|
+
'- Downstream fields: WP-003 process-meeting consumes raw_speaker_labels and WP-005 golden set consumes the same fixtures.',
|
|
824
|
+
'- Duplicate policy: content_hash duplicate imports skip creation and link/reuse existing evidence.',
|
|
825
|
+
].join('\n'),
|
|
826
|
+
risk: {
|
|
827
|
+
riskProfile: 'high',
|
|
828
|
+
riskTriggers: ['source-sync-provider', 'ingestion-provider'],
|
|
829
|
+
},
|
|
830
|
+
});
|
|
831
|
+
|
|
832
|
+
expect(result.sourceSyncProvider.present).toBe(true);
|
|
833
|
+
expect(result.importIngestion.present).toBe(true);
|
|
834
|
+
expect(result.missingSignals.some((signal) => signal.includes('Import/ingestion plan must include'))).toBe(false);
|
|
835
|
+
});
|
|
757
836
|
});
|
|
@@ -8,10 +8,11 @@ import {
|
|
|
8
8
|
renderRelevantPlaybooks,
|
|
9
9
|
} from './check-context-utils.mjs';
|
|
10
10
|
|
|
11
|
-
export const LLM_CONTEXT_MODES = ['fast', 'standard', 'strict'];
|
|
11
|
+
export const LLM_CONTEXT_MODES = ['fast', 'standard', 'standard_plus', 'strict'];
|
|
12
12
|
export const LLM_CONTEXT_CAPS = {
|
|
13
13
|
fast: 8000,
|
|
14
14
|
standard: 20000,
|
|
15
|
+
standard_plus: 26000,
|
|
15
16
|
strict: 45000,
|
|
16
17
|
};
|
|
17
18
|
|
|
@@ -20,6 +21,7 @@ const PACK_CAP_SAFETY_MULTIPLIER = 1.15;
|
|
|
20
21
|
const MEMORY_MAX_CHARS = {
|
|
21
22
|
fast: 3000,
|
|
22
23
|
standard: 3500,
|
|
24
|
+
standard_plus: 4500,
|
|
23
25
|
strict: Infinity,
|
|
24
26
|
};
|
|
25
27
|
|
|
@@ -163,6 +165,9 @@ export function nextLlmContextMode(mode) {
|
|
|
163
165
|
return 'standard';
|
|
164
166
|
}
|
|
165
167
|
if (mode === 'standard') {
|
|
168
|
+
return 'standard_plus';
|
|
169
|
+
}
|
|
170
|
+
if (mode === 'standard_plus') {
|
|
166
171
|
return 'strict';
|
|
167
172
|
}
|
|
168
173
|
return null;
|
|
@@ -215,8 +220,8 @@ export function buildCheckerLlmInputPack({
|
|
|
215
220
|
fullContextAvailableViaStrict: selectedMode !== 'strict',
|
|
216
221
|
contextInsufficientFallback: selectedMode === 'strict' ? 'stop_and_report' : `rerun_${nextLlmContextMode(selectedMode)}`,
|
|
217
222
|
},
|
|
218
|
-
checkEvidence: compactGeneratedMarkdown('check-evidence.md', checkEvidence, selectedMode, { fast: 2800, standard: 4000 }),
|
|
219
|
-
checkerContextPack: compactGeneratedMarkdown('checker-context-pack.md', checkerContextPack, selectedMode, { fast: 3300, standard: 4600 }),
|
|
223
|
+
checkEvidence: compactGeneratedMarkdown('check-evidence.md', checkEvidence, selectedMode, { fast: 2800, standard: 4000, standard_plus: 5600 }),
|
|
224
|
+
checkerContextPack: compactGeneratedMarkdown('checker-context-pack.md', checkerContextPack, selectedMode, { fast: 3300, standard: 4600, standard_plus: 6200 }),
|
|
220
225
|
relevantPlaybooks: selectedMode === 'strict'
|
|
221
226
|
? renderRelevantPlaybooks(readRelevantPlaybooks(checkContext.riskTriggers || []), { mode: 'strict' })
|
|
222
227
|
: renderRelevantPlaybooks(readRelevantPlaybooks(checkContext.riskTriggers || []), { mode: 'compact' }),
|
|
@@ -521,7 +526,7 @@ function compactExecutionLedger(ledger, mode) {
|
|
|
521
526
|
}
|
|
522
527
|
const changedFiles = Array.isArray(ledger.git?.changedFiles) ? ledger.git.changedFiles : [];
|
|
523
528
|
const unrelatedDirtyFiles = Array.isArray(ledger.git?.unrelatedDirtyFiles) ? ledger.git.unrelatedDirtyFiles : [];
|
|
524
|
-
const limit = mode === 'fast' ? 40 : mode === 'standard' ? 90 : 160;
|
|
529
|
+
const limit = mode === 'fast' ? 40 : mode === 'standard' ? 90 : mode === 'standard_plus' ? 120 : 160;
|
|
525
530
|
const compact = {
|
|
526
531
|
schemaVersion: ledger.schemaVersion,
|
|
527
532
|
taskId: ledger.taskId,
|
|
@@ -580,7 +585,11 @@ function compactGeneratedMarkdown(fileName, content, mode, limits) {
|
|
|
580
585
|
if (mode === 'strict' || !content) {
|
|
581
586
|
return content;
|
|
582
587
|
}
|
|
583
|
-
const limit = mode === 'fast'
|
|
588
|
+
const limit = mode === 'fast'
|
|
589
|
+
? limits.fast
|
|
590
|
+
: mode === 'standard_plus'
|
|
591
|
+
? limits.standard_plus || Math.ceil(limits.standard * 1.35)
|
|
592
|
+
: limits.standard;
|
|
584
593
|
return markCompacted(fileName, content, truncateMiddle(content, limit));
|
|
585
594
|
}
|
|
586
595
|
|
|
@@ -707,7 +716,13 @@ function isProtectedSection(value) {
|
|
|
707
716
|
}
|
|
708
717
|
|
|
709
718
|
function charLimitForMode(mode, fastChars, standardChars) {
|
|
710
|
-
|
|
719
|
+
if (mode === 'fast') {
|
|
720
|
+
return fastChars;
|
|
721
|
+
}
|
|
722
|
+
if (mode === 'standard_plus') {
|
|
723
|
+
return Math.ceil(standardChars * 1.25);
|
|
724
|
+
}
|
|
725
|
+
return standardChars;
|
|
711
726
|
}
|
|
712
727
|
|
|
713
728
|
function readOptionalJson(taskDir, fileName) {
|
|
@@ -84,12 +84,40 @@ describe('llm input pack utilities', () => {
|
|
|
84
84
|
});
|
|
85
85
|
|
|
86
86
|
it('builds bounded fallback mode sequence for context insufficient results', () => {
|
|
87
|
-
expect(buildContextModeSequence('fast')).toEqual(['fast', 'standard', 'strict']);
|
|
88
|
-
expect(buildContextModeSequence('standard')).toEqual(['standard', 'strict']);
|
|
87
|
+
expect(buildContextModeSequence('fast')).toEqual(['fast', 'standard', 'standard_plus', 'strict']);
|
|
88
|
+
expect(buildContextModeSequence('standard')).toEqual(['standard', 'standard_plus', 'strict']);
|
|
89
|
+
expect(buildContextModeSequence('standard_plus')).toEqual(['standard_plus', 'strict']);
|
|
89
90
|
expect(buildContextModeSequence('strict')).toEqual(['strict']);
|
|
90
91
|
expect(isContextInsufficientResult({ verdict: 'context_insufficient' })).toBe(true);
|
|
91
92
|
});
|
|
92
93
|
|
|
94
|
+
it('provides a compact standard_plus check mode before strict', () => {
|
|
95
|
+
const taskDir = createTask();
|
|
96
|
+
const pack = buildCheckerLlmInputPack({
|
|
97
|
+
taskDir,
|
|
98
|
+
taskId: 'TASK-999-token-pack',
|
|
99
|
+
checkerPromptSha: 'sha256:test',
|
|
100
|
+
cacheKey: { test: true },
|
|
101
|
+
checkContext: {
|
|
102
|
+
planSha: 'sha256:plan',
|
|
103
|
+
memorySha: 'sha256:memory',
|
|
104
|
+
riskProfile: 'high',
|
|
105
|
+
riskTriggers: ['source-sync-provider', 'prisma-schema'],
|
|
106
|
+
},
|
|
107
|
+
checkEvidence: '# Evidence\n\nok\n'.repeat(900),
|
|
108
|
+
checkerContextPack: '# Checker Context Pack\n\nok\n'.repeat(900),
|
|
109
|
+
taskManifest: '{}',
|
|
110
|
+
projectMemory: [],
|
|
111
|
+
mode: 'standard_plus',
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
expect(pack.meta.mode).toBe('standard_plus');
|
|
115
|
+
expect(pack.meta.capTokens).toBe(26000);
|
|
116
|
+
expect(pack.input.llmInputPolicy.contextInsufficientFallback).toBe('rerun_strict');
|
|
117
|
+
expect(pack.input.taskArtifacts['plan.md']).toContain('<!-- compacted:plan.md');
|
|
118
|
+
expect(pack.meta.compactedArtifacts).toContain('plan.md');
|
|
119
|
+
});
|
|
120
|
+
|
|
93
121
|
it('preserves protected verification sections when compacting long plans', () => {
|
|
94
122
|
const taskDir = createTask();
|
|
95
123
|
const longPlan = [
|
|
@@ -78,6 +78,12 @@ export function buildTaskManifest({ taskDir, now = new Date().toISOString(), exi
|
|
|
78
78
|
evidenceRequired: hasPlanGate(inputs.taskArtifacts.get('plan.md'), ['source sync / provider gate', 'source sync provider gate', 'source sync gate', 'provider gate']),
|
|
79
79
|
evidenceComplete: !evidenceIssues.some((issue) => issue.message.includes('Source Sync / Provider')),
|
|
80
80
|
},
|
|
81
|
+
importIngestion: {
|
|
82
|
+
required: Boolean(inputs.qualityGates.importIngestion?.required),
|
|
83
|
+
planComplete: Boolean(inputs.qualityGates.importIngestion?.present),
|
|
84
|
+
evidenceRequired: hasPlanGate(inputs.taskArtifacts.get('plan.md'), ['import / ingestion contract', 'import ingestion contract', 'ingestion contract', 'import contract']),
|
|
85
|
+
evidenceComplete: !evidenceIssues.some((issue) => issue.message.includes('Import / Ingestion')),
|
|
86
|
+
},
|
|
81
87
|
},
|
|
82
88
|
context: {
|
|
83
89
|
planSha: inputs.planFingerprint.planSha,
|
|
@@ -167,6 +173,9 @@ export function preflightTask({ taskDir, targetPhase }) {
|
|
|
167
173
|
message: 'Repeated checker/verifier return reason detected. Add consolidated remediation before continuing.',
|
|
168
174
|
});
|
|
169
175
|
}
|
|
176
|
+
if (targetPhase === 'execute' || targetPhase === 'verify' || targetPhase === 'human_gate') {
|
|
177
|
+
issues.push(...inspectWorkPackageDependencyPreflight(taskDir));
|
|
178
|
+
}
|
|
170
179
|
|
|
171
180
|
return {
|
|
172
181
|
ok: issues.length === 0,
|
|
@@ -177,6 +186,84 @@ export function preflightTask({ taskDir, targetPhase }) {
|
|
|
177
186
|
};
|
|
178
187
|
}
|
|
179
188
|
|
|
189
|
+
function inspectWorkPackageDependencyPreflight(taskDir) {
|
|
190
|
+
const brief = readTaskFile(taskDir, 'brief.md');
|
|
191
|
+
const initiativeMatch = /-\s+Initiative:\s+`?([^`\n]+)`?/i.exec(brief);
|
|
192
|
+
const workPackageMatch = /-\s+Work package:\s+`?([^`\n]+)`?/i.exec(brief);
|
|
193
|
+
const initiativeId = initiativeMatch?.[1]?.trim();
|
|
194
|
+
const workPackageId = workPackageMatch?.[1]?.trim();
|
|
195
|
+
if (!initiativeId || !workPackageId) {
|
|
196
|
+
return [];
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const initiativesRoot = resolveInitiativesRootForTask(taskDir);
|
|
200
|
+
const initiativeDir = path.join(initiativesRoot, initiativeId);
|
|
201
|
+
const workPackagePath = path.join(initiativeDir, 'work-packages', workPackageId, 'work-package.md');
|
|
202
|
+
if (!fs.existsSync(workPackagePath)) {
|
|
203
|
+
return [{
|
|
204
|
+
category: 'work_package_dependency',
|
|
205
|
+
message: `Task declares initiative work package ${initiativeId}/${workPackageId}, but its work-package.md was not found.`,
|
|
206
|
+
}];
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
const workPackage = fs.readFileSync(workPackagePath, 'utf8');
|
|
210
|
+
const dependencies = readWorkPackageDependsOn(workPackage);
|
|
211
|
+
const issues = [];
|
|
212
|
+
for (const dependencyId of dependencies) {
|
|
213
|
+
const dependencyPath = path.join(initiativeDir, 'work-packages', dependencyId, 'work-package.md');
|
|
214
|
+
if (!fs.existsSync(dependencyPath)) {
|
|
215
|
+
issues.push({
|
|
216
|
+
category: 'work_package_dependency',
|
|
217
|
+
message: `Work package ${workPackageId} depends on ${dependencyId}, but dependency work-package.md was not found.`,
|
|
218
|
+
});
|
|
219
|
+
continue;
|
|
220
|
+
}
|
|
221
|
+
const dependency = fs.readFileSync(dependencyPath, 'utf8');
|
|
222
|
+
const status = readInlineField(dependency, 'Status') || 'unknown';
|
|
223
|
+
if (!/^(done|complete|completed|verified|closed)$/i.test(status)) {
|
|
224
|
+
issues.push({
|
|
225
|
+
category: 'work_package_dependency',
|
|
226
|
+
message: `Work package ${workPackageId} depends on ${dependencyId}, but dependency status is ${status}; complete/verify it before Execute.`,
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return issues;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
function resolveInitiativesRootForTask(taskDir) {
|
|
234
|
+
const normalized = path.resolve(taskDir);
|
|
235
|
+
const parts = normalized.split(path.sep);
|
|
236
|
+
const tasksIndex = parts.lastIndexOf('tasks');
|
|
237
|
+
if (
|
|
238
|
+
tasksIndex >= 2
|
|
239
|
+
&& parts[tasksIndex - 1] === 'agent-pipeline'
|
|
240
|
+
&& parts[tasksIndex - 2] === 'ops'
|
|
241
|
+
) {
|
|
242
|
+
return path.join(parts.slice(0, tasksIndex).join(path.sep) || path.sep, 'initiatives');
|
|
243
|
+
}
|
|
244
|
+
return projectContext.initiativesRoot || path.join(projectContext.pipelineRoot, 'initiatives');
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
function readWorkPackageDependsOn(content) {
|
|
248
|
+
const match = /^-\s+Depends on:\s*(.*)$/im.exec(content || '');
|
|
249
|
+
if (!match) {
|
|
250
|
+
return [];
|
|
251
|
+
}
|
|
252
|
+
return String(match[1] || '')
|
|
253
|
+
.split(/[,;]/)
|
|
254
|
+
.map((item) => item.trim())
|
|
255
|
+
.filter((item) => item && !/^\(?none\)?$/i.test(item) && !/^\[?fill in\]?$/i.test(item));
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
function readInlineField(content, field) {
|
|
259
|
+
const match = new RegExp(`^${escapeRegExp(field)}:\\s*(.*)$`, 'm').exec(content || '');
|
|
260
|
+
return match ? match[1].trim() : '';
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function escapeRegExp(value) {
|
|
264
|
+
return String(value).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
265
|
+
}
|
|
266
|
+
|
|
180
267
|
export function transitionTaskManifest({ taskDir, targetPhase, now = new Date().toISOString() }) {
|
|
181
268
|
const normalizedPhase = normalizePhase(targetPhase);
|
|
182
269
|
const preflight = preflightTask({ taskDir, targetPhase: normalizedPhase });
|
|
@@ -130,6 +130,74 @@ describe('task manifest utilities', () => {
|
|
|
130
130
|
expect(result.manifest.mode).toBe('fast');
|
|
131
131
|
});
|
|
132
132
|
|
|
133
|
+
it('blocks execute preflight when work-package dependencies are not completed', () => {
|
|
134
|
+
const root = fs.mkdtempSync(path.join(os.tmpdir(), 'ops-wp-deps-'));
|
|
135
|
+
tempDirs.push(root);
|
|
136
|
+
const taskDir = path.join(root, 'ops', 'agent-pipeline', 'tasks', 'TASK-002-import');
|
|
137
|
+
const initiativeDir = path.join(root, 'ops', 'agent-pipeline', 'initiatives', 'delivery-os-mvp');
|
|
138
|
+
fs.mkdirSync(path.join(initiativeDir, 'work-packages', 'WP-001-foundation'), { recursive: true });
|
|
139
|
+
fs.mkdirSync(path.join(initiativeDir, 'work-packages', 'WP-002-import'), { recursive: true });
|
|
140
|
+
fs.mkdirSync(taskDir, { recursive: true });
|
|
141
|
+
fs.writeFileSync(path.join(initiativeDir, 'work-packages', 'WP-001-foundation', 'work-package.md'), [
|
|
142
|
+
'# Work Package',
|
|
143
|
+
'',
|
|
144
|
+
'ID: WP-001-foundation',
|
|
145
|
+
'Status: in_progress',
|
|
146
|
+
'',
|
|
147
|
+
].join('\n'));
|
|
148
|
+
fs.writeFileSync(path.join(initiativeDir, 'work-packages', 'WP-002-import', 'work-package.md'), [
|
|
149
|
+
'# Work Package',
|
|
150
|
+
'',
|
|
151
|
+
'ID: WP-002-import',
|
|
152
|
+
'Status: in_progress',
|
|
153
|
+
'',
|
|
154
|
+
'## Dependencies',
|
|
155
|
+
'',
|
|
156
|
+
'- Depends on: WP-001-foundation',
|
|
157
|
+
'',
|
|
158
|
+
].join('\n'));
|
|
159
|
+
fs.writeFileSync(path.join(taskDir, 'brief.md'), [
|
|
160
|
+
'# Brief',
|
|
161
|
+
'',
|
|
162
|
+
'## Initiative Context',
|
|
163
|
+
'',
|
|
164
|
+
'- Initiative: `delivery-os-mvp`',
|
|
165
|
+
'- Work package: `WP-002-import`',
|
|
166
|
+
'',
|
|
167
|
+
].join('\n'));
|
|
168
|
+
fs.writeFileSync(path.join(taskDir, 'research.md'), '# Research\n\n## Findings\n\n- `docs/example.md`\n');
|
|
169
|
+
fs.writeFileSync(path.join(taskDir, 'plan.md'), [
|
|
170
|
+
'# Plan',
|
|
171
|
+
'',
|
|
172
|
+
'## Risk tier and execution budget',
|
|
173
|
+
'',
|
|
174
|
+
'- Risk tier: `R1`',
|
|
175
|
+
'- Speed mode: `Fast`',
|
|
176
|
+
'- Approved execution target: docs/example.md only.',
|
|
177
|
+
'- Requires return to Plan/Check if: code changes are needed.',
|
|
178
|
+
'',
|
|
179
|
+
'## Verification ladder',
|
|
180
|
+
'',
|
|
181
|
+
'- Micro-verify during Execute: markdown review.',
|
|
182
|
+
'- Slice-verify before completion: self-test.',
|
|
183
|
+
'- External Verify required before closeout: no.',
|
|
184
|
+
'',
|
|
185
|
+
'## Затронутые модули и файлы',
|
|
186
|
+
'',
|
|
187
|
+
'- `docs/example.md`',
|
|
188
|
+
'',
|
|
189
|
+
].join('\n'));
|
|
190
|
+
fs.writeFileSync(path.join(taskDir, 'status.md'), '# Status\n\n## Текущий этап\n\nplan\n');
|
|
191
|
+
|
|
192
|
+
const result = preflightTask({ taskDir, targetPhase: 'execute' });
|
|
193
|
+
|
|
194
|
+
expect(result.ok).toBe(false);
|
|
195
|
+
expect(result.issues).toContainEqual({
|
|
196
|
+
category: 'work_package_dependency',
|
|
197
|
+
message: 'Work package WP-002-import depends on WP-001-foundation, but dependency status is in_progress; complete/verify it before Execute.',
|
|
198
|
+
});
|
|
199
|
+
});
|
|
200
|
+
|
|
133
201
|
it('blocks verify preflight when required execution evidence is missing', () => {
|
|
134
202
|
const taskDir = createTask({
|
|
135
203
|
plan: [
|
package/bin/run-check.mjs
CHANGED
|
@@ -226,6 +226,7 @@ async function runMain() {
|
|
|
226
226
|
rerunCount,
|
|
227
227
|
timing: buildTiming(runStartedAt),
|
|
228
228
|
});
|
|
229
|
+
refreshTaskManifestAfterCheck(taskDir);
|
|
229
230
|
console.log(`Checker cache hit for ${taskId}: ${cacheKeySha}`);
|
|
230
231
|
return;
|
|
231
232
|
}
|
|
@@ -329,12 +330,22 @@ async function runMain() {
|
|
|
329
330
|
rerunCount,
|
|
330
331
|
timing: buildTiming(runStartedAt),
|
|
331
332
|
});
|
|
333
|
+
refreshTaskManifestAfterCheck(taskDir);
|
|
332
334
|
runValidator(taskArg);
|
|
333
335
|
console.log(`Checker run completed for ${taskId}: ${providerOutput.checkResultJson?.verdict}`);
|
|
334
336
|
console.log(`- finalLlmInputMode: ${promptPayload.pack.meta.mode}`);
|
|
335
337
|
console.log(`- finalEstimatedInputTokens: ${promptPayload.pack.meta.estimatedTokens}`);
|
|
336
338
|
}
|
|
337
339
|
|
|
340
|
+
function refreshTaskManifestAfterCheck(taskDir) {
|
|
341
|
+
const manifest = buildTaskManifest({ taskDir });
|
|
342
|
+
writeTaskManifest(taskDir, manifest);
|
|
343
|
+
appendCheckTimeline(taskDir, {
|
|
344
|
+
event: 'task_manifest_refreshed_after_check',
|
|
345
|
+
lastCheckResult: manifest.lastCheckResult,
|
|
346
|
+
});
|
|
347
|
+
}
|
|
348
|
+
|
|
338
349
|
function appendCheckTimeline(taskDir, event) {
|
|
339
350
|
const timelinePath = path.join(taskDir, 'check-timeline.json');
|
|
340
351
|
let existing = [];
|
package/package.json
CHANGED
|
@@ -4,6 +4,9 @@ Use this for sync/import/provider pipelines, raw records, retries, pagination, r
|
|
|
4
4
|
|
|
5
5
|
- Plan must include `## Source Sync / Provider Gate`.
|
|
6
6
|
- Name scope/provider window, idempotency or duplicate handling, retry/failure boundaries and coverage/parity evidence.
|
|
7
|
+
- For import/manual-upload work, add `## Import / Ingestion Contract`: representative real fixtures when available (or an explicit no-real-fixtures reason), raw metadata extraction required by downstream work, and exact duplicate-import behavior.
|
|
8
|
+
- Duplicate handling must name the dedupe key and action on repeat import: skip, link/reuse, update/upsert, reject/error, or report-only.
|
|
9
|
+
- If raw source data contains speaker labels or similar downstream metadata, extraction should happen at import time unless explicitly deferred with a downstream reason.
|
|
7
10
|
- Keep provider credentials, queue ownership and affected windows explicit.
|
|
8
11
|
- Execution must include `## Source Sync / Provider Evidence`.
|
|
9
|
-
- Acceptable evidence includes raw-record samples, counts/parity, replay/audit output, retry/idempotency checks and partial-failure recovery proof.
|
|
12
|
+
- Acceptable evidence includes raw-record samples, representative fixture names, counts/parity, replay/audit output, retry/idempotency checks, duplicate-import results and partial-failure recovery proof.
|
package/prompts/checker.md
CHANGED
|
@@ -67,7 +67,7 @@ Project-specific context приходит только через task artifacts
|
|
|
67
67
|
19. Для O2/O3 hot-path work план обязан содержать `## Optimization Strategy`: tier, hot paths, expected size, chosen efficient approach, anti-patterns avoided and bounded optimizer budget/stop rule. Checker должен блокировать weak strategy before Execute, но не требовать endless optimization: O2 = one focused review on touched hot paths; O3 = one focused review plus one representative measurement.
|
|
68
68
|
20. Checker должен оценивать не только наличие UI/browser smoke path, а его результативность: смог бы этот сценарий поймать реальные ошибки пользователя, которые задача может породить?
|
|
69
69
|
21. Если `checker-context-pack.md`, `task-manifest.json` или risk triggers показывают migrations/env vars/cron/workers/billing/auth/external APIs/deployment/runtime behavior, план обязан содержать `## Production Rollout Gate`: impact/blast radius, environment/deploy variables, rollback/disable path and post-deploy evidence.
|
|
70
|
-
22. Если `checker-context-pack.md`, `task-manifest.json` или risk triggers показывают sync/import/provider/raw records/retries/pagination/rate limits/idempotency/replay/backfill/partial failure, план обязан содержать `## Source Sync / Provider Gate`: scope/provider window, idempotency, failure handling/retry boundaries and coverage/parity evidence.
|
|
70
|
+
22. Если `checker-context-pack.md`, `task-manifest.json` или risk triggers показывают sync/import/provider/raw records/retries/pagination/rate limits/idempotency/replay/backfill/partial failure, план обязан содержать `## Source Sync / Provider Gate`: scope/provider window, idempotency with dedupe key and duplicate action, failure handling/retry boundaries and coverage/parity evidence. Для import/manual-upload/transcript/evidence-capture задач план также обязан содержать `## Import / Ingestion Contract`: real representative fixtures when available or explicit no-real-fixtures reason, raw metadata/speaker-label extraction needed downstream, and repeat-import policy. Размытое "duplicates detected or reported" без skip/link/update/reject/report-only semantics недостаточно.
|
|
71
71
|
23. Если `task-manifest.json.loopDetector.requiresConsolidatedRemediation=true`, Checker должен блокировать повторный мелкий loop, пока plan/check-resolution не содержит consolidated remediation секцию, которая объединяет repeated reasons.
|
|
72
72
|
24. Если `llmInputPolicy.mode` не `strict` и отсутствующий full artifact реально нужен для честной оценки, verdict должен быть `context_insufficient`. Не используй `context_insufficient`, если deterministic gate уже явно показывает `return_to_plan`.
|
|
73
73
|
|