@hongmaple0820/scale-engine 0.29.0 → 0.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.en.md +86 -374
- package/README.md +89 -547
- package/dist/api/cli.js +189 -12
- package/dist/api/cli.js.map +1 -1
- package/dist/api/doctor.d.ts +38 -3
- package/dist/api/doctor.js +269 -44
- package/dist/api/doctor.js.map +1 -1
- package/dist/api/mcp.js +2 -2
- package/dist/api/mcp.js.map +1 -1
- package/dist/api/quickstart.d.ts +34 -4
- package/dist/api/quickstart.js +90 -73
- package/dist/api/quickstart.js.map +1 -1
- package/dist/bootstrap/DependencyBootstrap.d.ts +89 -0
- package/dist/bootstrap/DependencyBootstrap.js +441 -0
- package/dist/bootstrap/DependencyBootstrap.js.map +1 -0
- package/dist/capabilities/InstalledSkillsIntegration.js +14 -6
- package/dist/capabilities/InstalledSkillsIntegration.js.map +1 -1
- package/dist/codegraph/CodeIntelligence.d.ts +12 -0
- package/dist/codegraph/CodeIntelligence.js +251 -30
- package/dist/codegraph/CodeIntelligence.js.map +1 -1
- package/dist/config/profiles.d.ts +12 -0
- package/dist/config/profiles.js +39 -4
- package/dist/config/profiles.js.map +1 -1
- package/dist/core/ExternalCommand.d.ts +9 -0
- package/dist/core/ExternalCommand.js +56 -0
- package/dist/core/ExternalCommand.js.map +1 -0
- package/dist/evolution/SessionLearnings.d.ts +70 -0
- package/dist/evolution/SessionLearnings.js +217 -0
- package/dist/evolution/SessionLearnings.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/knowledge/CerebrumManager.d.ts +2 -2
- package/dist/knowledge/CerebrumManager.js.map +1 -1
- package/dist/knowledge/GraphifyKnowledgeBase.d.ts +38 -0
- package/dist/knowledge/GraphifyKnowledgeBase.js +409 -0
- package/dist/knowledge/GraphifyKnowledgeBase.js.map +1 -0
- package/dist/memory/MemoryFabric.js +1 -0
- package/dist/memory/MemoryFabric.js.map +1 -1
- package/dist/memory/MemoryIntelligence.d.ts +42 -0
- package/dist/memory/MemoryIntelligence.js +215 -0
- package/dist/memory/MemoryIntelligence.js.map +1 -0
- package/dist/memory/MemoryProviders.d.ts +22 -0
- package/dist/memory/MemoryProviders.js +138 -5
- package/dist/memory/MemoryProviders.js.map +1 -1
- package/dist/memory/index.d.ts +1 -0
- package/dist/memory/index.js +1 -0
- package/dist/memory/index.js.map +1 -1
- package/dist/runtime/AiOsRuntime.d.ts +101 -1
- package/dist/runtime/AiOsRuntime.js +464 -14
- package/dist/runtime/AiOsRuntime.js.map +1 -1
- package/dist/runtime/ExecutionLedger.d.ts +46 -0
- package/dist/runtime/ExecutionLedger.js +71 -0
- package/dist/runtime/ExecutionLedger.js.map +1 -0
- package/dist/runtime/index.d.ts +1 -0
- package/dist/runtime/index.js +1 -0
- package/dist/runtime/index.js.map +1 -1
- package/dist/skills/RoleSkills.d.ts +20 -0
- package/dist/skills/RoleSkills.js +154 -0
- package/dist/skills/RoleSkills.js.map +1 -0
- package/dist/skills/SkillDiscovery.d.ts +5 -0
- package/dist/skills/SkillDiscovery.js +15 -0
- package/dist/skills/SkillDiscovery.js.map +1 -1
- package/dist/skills/SkillFrontmatter.d.ts +28 -0
- package/dist/skills/SkillFrontmatter.js +152 -0
- package/dist/skills/SkillFrontmatter.js.map +1 -0
- package/dist/skills/SkillRegistry.d.ts +11 -0
- package/dist/skills/SkillRegistry.js +12 -0
- package/dist/skills/SkillRegistry.js.map +1 -1
- package/dist/skills/SkillRepository.js +5 -5
- package/dist/skills/SkillRepository.js.map +1 -1
- package/dist/skills/index.d.ts +1 -0
- package/dist/skills/index.js +1 -0
- package/dist/skills/index.js.map +1 -1
- package/dist/skills/routing/SkillPolicy.js +2 -2
- package/dist/skills/routing/SkillPolicy.js.map +1 -1
- package/dist/testing/DiffTestSelector.d.ts +22 -0
- package/dist/testing/DiffTestSelector.js +114 -0
- package/dist/testing/DiffTestSelector.js.map +1 -0
- package/dist/testing/index.d.ts +1 -0
- package/dist/testing/index.js +3 -0
- package/dist/testing/index.js.map +1 -0
- package/dist/tools/RtkRuntime.d.ts +9 -0
- package/dist/tools/RtkRuntime.js +43 -0
- package/dist/tools/RtkRuntime.js.map +1 -0
- package/dist/tools/ToolCapabilityRegistry.d.ts +1 -0
- package/dist/tools/ToolCapabilityRegistry.js +68 -11
- package/dist/tools/ToolCapabilityRegistry.js.map +1 -1
- package/dist/tools/ToolOrchestrator.js +6 -4
- package/dist/tools/ToolOrchestrator.js.map +1 -1
- package/dist/tools/ToolPolicy.js +16 -1
- package/dist/tools/ToolPolicy.js.map +1 -1
- package/dist/workflow/AdaptiveWorkflowRouter.d.ts +38 -0
- package/dist/workflow/AdaptiveWorkflowRouter.js +214 -0
- package/dist/workflow/AdaptiveWorkflowRouter.js.map +1 -0
- package/dist/workflow/CommitDiscipline.d.ts +68 -0
- package/dist/workflow/CommitDiscipline.js +327 -0
- package/dist/workflow/CommitDiscipline.js.map +1 -0
- package/dist/workflow/CrossRepoOrchestrator.d.ts +92 -0
- package/dist/workflow/CrossRepoOrchestrator.js +400 -0
- package/dist/workflow/CrossRepoOrchestrator.js.map +1 -0
- package/dist/workflow/EvolutionShadowPromoter.d.ts +46 -0
- package/dist/workflow/EvolutionShadowPromoter.js +73 -0
- package/dist/workflow/EvolutionShadowPromoter.js.map +1 -0
- package/dist/workflow/GovernanceRoi.d.ts +52 -0
- package/dist/workflow/GovernanceRoi.js +204 -0
- package/dist/workflow/GovernanceRoi.js.map +1 -0
- package/dist/workflow/GovernanceTemplates.js +2 -2
- package/dist/workflow/McpGovernance.d.ts +63 -0
- package/dist/workflow/McpGovernance.js +198 -0
- package/dist/workflow/McpGovernance.js.map +1 -0
- package/dist/workflow/ReviewAnalyzer.d.ts +15 -0
- package/dist/workflow/ReviewAnalyzer.js +82 -0
- package/dist/workflow/ReviewAnalyzer.js.map +1 -1
- package/dist/workflow/SecurityAudit.d.ts +27 -0
- package/dist/workflow/SecurityAudit.js +294 -0
- package/dist/workflow/SecurityAudit.js.map +1 -0
- package/dist/workflow/SessionCoordinator.d.ts +103 -0
- package/dist/workflow/SessionCoordinator.js +401 -0
- package/dist/workflow/SessionCoordinator.js.map +1 -0
- package/dist/workflow/SessionPreamble.d.ts +19 -0
- package/dist/workflow/SessionPreamble.js +125 -0
- package/dist/workflow/SessionPreamble.js.map +1 -0
- package/dist/workflow/ShipPipeline.d.ts +30 -0
- package/dist/workflow/ShipPipeline.js +366 -0
- package/dist/workflow/ShipPipeline.js.map +1 -0
- package/dist/workflow/TaskDependencyGraph.d.ts +73 -0
- package/dist/workflow/TaskDependencyGraph.js +245 -0
- package/dist/workflow/TaskDependencyGraph.js.map +1 -0
- package/dist/workflow/WorkflowGuidance.d.ts +5 -1
- package/dist/workflow/WorkflowGuidance.js +31 -0
- package/dist/workflow/WorkflowGuidance.js.map +1 -1
- package/dist/workflow/WorkflowTemplates.d.ts +38 -0
- package/dist/workflow/WorkflowTemplates.js +371 -0
- package/dist/workflow/WorkflowTemplates.js.map +1 -0
- package/dist/workflow/WorkspacePolicy.d.ts +46 -0
- package/dist/workflow/WorkspacePolicy.js +141 -0
- package/dist/workflow/WorkspacePolicy.js.map +1 -0
- package/dist/workflow/gates/GateSystem.js +12 -9
- package/dist/workflow/gates/GateSystem.js.map +1 -1
- package/dist/workflow/index.d.ts +12 -0
- package/dist/workflow/index.js +12 -0
- package/dist/workflow/index.js.map +1 -1
- package/docs/AI_ENGINEERING_OS_POSITIONING.md +9 -0
- package/docs/CODE_INTELLIGENCE.md +22 -5
- package/docs/CONTEXT_BUDGET.md +1 -1
- package/docs/EXTERNAL_REFERENCES.md +5 -2
- package/docs/MEMORY_FABRIC.md +7 -3
- package/docs/SKILL-REPOSITORY.md +3 -3
- package/docs/start/quickstart.md +11 -0
- package/docs/workflow/templates/skill-plan.md +1 -1
- package/package.json +3 -2
|
@@ -5,10 +5,15 @@ import { createGovernanceRoiReport, } from '../governance/GovernanceRoi.js';
|
|
|
5
5
|
import { evaluateProgressiveGovernance, } from '../governance/ProgressiveGovernance.js';
|
|
6
6
|
import { MemoryFabric, recallMemoryProviders, } from '../memory/index.js';
|
|
7
7
|
import { createSkillPlan, loadSkillRoutingPolicy, } from '../skills/routing/index.js';
|
|
8
|
+
import { routeAdaptiveWorkflow } from '../workflow/AdaptiveWorkflowRouter.js';
|
|
9
|
+
import { collectGovernanceRoi } from '../workflow/GovernanceRoi.js';
|
|
10
|
+
import { proposeShadowRule, buildEvolutionShadowReport, } from '../workflow/EvolutionShadowPromoter.js';
|
|
8
11
|
import { runSafeCommand } from '../tools/SafeCommandRunner.js';
|
|
9
12
|
import { SCALE_ENGINE_VERSION } from '../version.js';
|
|
10
13
|
import { resolveVerificationTargets, } from '../workflow/VerificationProfile.js';
|
|
11
14
|
import { RuntimeEvidenceLedger } from './RuntimeEvidenceLedger.js';
|
|
15
|
+
import { loadRelevantLearnings } from '../evolution/SessionLearnings.js';
|
|
16
|
+
import { collectSessionPreamble } from '../workflow/SessionPreamble.js';
|
|
12
17
|
export async function createAiOsPlan(input) {
|
|
13
18
|
const projectDir = resolve(input.projectDir ?? process.cwd());
|
|
14
19
|
const scaleDir = input.scaleDir ?? '.scale';
|
|
@@ -17,6 +22,8 @@ export async function createAiOsPlan(input) {
|
|
|
17
22
|
const services = input.services ?? [];
|
|
18
23
|
const taskId = input.taskId;
|
|
19
24
|
const budget = input.budget ?? 8_000;
|
|
25
|
+
const preamble = collectSessionPreamble({ projectDir, scaleDir });
|
|
26
|
+
const sessionLearnings = loadRelevantLearnings({ projectDir, scaleDir, task: input.task, limit: 5 });
|
|
20
27
|
const governance = evaluateProgressiveGovernance({
|
|
21
28
|
task: input.task,
|
|
22
29
|
changedFiles: files,
|
|
@@ -62,7 +69,15 @@ export async function createAiOsPlan(input) {
|
|
|
62
69
|
services,
|
|
63
70
|
policy: skillPolicy,
|
|
64
71
|
});
|
|
65
|
-
const
|
|
72
|
+
const evaluator = createEvaluatorIntelligence({
|
|
73
|
+
task: input.task,
|
|
74
|
+
files,
|
|
75
|
+
governance,
|
|
76
|
+
skillPlan,
|
|
77
|
+
});
|
|
78
|
+
const toolStrategy = createToolStrategyPlan(skillPlan);
|
|
79
|
+
const adaptiveWorkflow = createAdaptiveWorkflow(governance, skillPlan, evaluator, toolStrategy);
|
|
80
|
+
const evolutionShadow = createEvolutionShadowProposals(governance, evaluator);
|
|
66
81
|
const roi = createGovernanceRoiReport({
|
|
67
82
|
taskId,
|
|
68
83
|
contextBudget,
|
|
@@ -81,8 +96,12 @@ export async function createAiOsPlan(input) {
|
|
|
81
96
|
files,
|
|
82
97
|
services,
|
|
83
98
|
},
|
|
99
|
+
preamble,
|
|
84
100
|
governance,
|
|
85
101
|
adaptiveWorkflow,
|
|
102
|
+
evaluator,
|
|
103
|
+
toolStrategy,
|
|
104
|
+
evolutionShadow,
|
|
86
105
|
context,
|
|
87
106
|
memory: {
|
|
88
107
|
providerOrder: memoryRecall.providerOrder,
|
|
@@ -93,8 +112,9 @@ export async function createAiOsPlan(input) {
|
|
|
93
112
|
contextPack: memoryPack,
|
|
94
113
|
},
|
|
95
114
|
skillPlan,
|
|
115
|
+
sessionLearnings,
|
|
96
116
|
roi,
|
|
97
|
-
recommendations: recommendations({ governance, context, memoryRecall, skillPlan }),
|
|
117
|
+
recommendations: recommendations({ governance, context, memoryRecall, skillPlan, evaluator, toolStrategy }),
|
|
98
118
|
};
|
|
99
119
|
}
|
|
100
120
|
export async function createAiOsRun(input) {
|
|
@@ -135,6 +155,7 @@ export async function createAiOsRun(input) {
|
|
|
135
155
|
artifacts: {
|
|
136
156
|
runReport: runReportPath,
|
|
137
157
|
},
|
|
158
|
+
governanceRoi: collectGovernanceRoi({ projectDir, scaleDir }),
|
|
138
159
|
nextActions: buildRunNextActions(steps, mode),
|
|
139
160
|
};
|
|
140
161
|
writeAiOsRunReport(runReportPath, report);
|
|
@@ -194,6 +215,7 @@ export async function createAiOsBenchmark(input = {}) {
|
|
|
194
215
|
task: scenario.task,
|
|
195
216
|
level: scenario.level,
|
|
196
217
|
governanceMode: plan.governance.effectiveMode,
|
|
218
|
+
workflowProfile: plan.adaptiveWorkflow.profile,
|
|
197
219
|
metrics: {
|
|
198
220
|
estimatedTokens: plan.context.totalEstimatedTokens,
|
|
199
221
|
budget: plan.context.task.budget,
|
|
@@ -202,6 +224,10 @@ export async function createAiOsBenchmark(input = {}) {
|
|
|
202
224
|
selectedProviders: plan.memory.selectedProviders,
|
|
203
225
|
skillSteps: plan.skillPlan.executionPlan.steps.length,
|
|
204
226
|
requiredSkillSteps: plan.skillPlan.executionPlan.steps.filter(step => step.required).length,
|
|
227
|
+
evaluatorGates: plan.evaluator.gates.length,
|
|
228
|
+
toolStrategySteps: plan.toolStrategy.summary.totalSteps,
|
|
229
|
+
toolStrategyCostUnits: plan.toolStrategy.summary.estimatedCostUnits,
|
|
230
|
+
evolutionProposals: plan.evolutionShadow.summary.totalProposals,
|
|
205
231
|
gates: plan.adaptiveWorkflow.gates.length,
|
|
206
232
|
roiModules: plan.roi.modules.length,
|
|
207
233
|
},
|
|
@@ -556,6 +582,9 @@ function buildAiOsIntelligenceReport(input) {
|
|
|
556
582
|
const totalMemoryItems = runMemoryItems.length + benchmarkMemoryItems;
|
|
557
583
|
const memoryQuality = summarizeMemoryQuality(runMemoryItems);
|
|
558
584
|
const contextQuality = summarizeContextQuality(input.runReports);
|
|
585
|
+
const evaluatorQuality = summarizeEvaluatorQuality(input.runReports, input.benchmark);
|
|
586
|
+
const toolStrategyQuality = summarizeToolStrategyQuality(input.runReports, input.benchmark);
|
|
587
|
+
const evolutionQuality = summarizeEvolutionQuality(input.runReports, input.benchmark);
|
|
559
588
|
const contextSignalStatus = contextQuality.compressionRisk === 'high'
|
|
560
589
|
? 'warning'
|
|
561
590
|
: estimatedTokenSavings > 0 ? 'ready' : input.runReports.length > 0 || input.benchmark ? 'warning' : 'blocked';
|
|
@@ -571,10 +600,24 @@ function buildAiOsIntelligenceReport(input) {
|
|
|
571
600
|
...input.runReports.flatMap(report => report.plan.skillPlan.executionPlan.steps.map(step => `${report.artifacts.runReport}:${step.id}`)),
|
|
572
601
|
...(input.benchmark ? [`${input.benchmarkReport}:steps=${input.benchmark.summary.totalSkillSteps}`] : []),
|
|
573
602
|
];
|
|
603
|
+
const evaluatorEvidence = [
|
|
604
|
+
...input.runReports.flatMap(report => resolveRunEvaluator(report).gates.map(gate => `${report.artifacts.runReport}:${gate.id}`)),
|
|
605
|
+
...(input.benchmark ? [`${input.benchmarkReport}:evaluator-gates=${input.benchmark.summary.totalEvaluatorGates}`] : []),
|
|
606
|
+
];
|
|
607
|
+
const toolStrategyEvidence = [
|
|
608
|
+
...input.runReports.flatMap(report => resolveRunToolStrategy(report).nodes.map(node => `${report.artifacts.runReport}:${node.id}`)),
|
|
609
|
+
...(input.benchmark ? [`${input.benchmarkReport}:tool-strategy=${input.benchmark.summary.totalToolStrategySteps}`] : []),
|
|
610
|
+
];
|
|
611
|
+
const evolutionEvidence = [
|
|
612
|
+
...input.runReports.flatMap(report => (report.plan.evolutionShadow?.proposals ?? []).map(p => `${report.artifacts.runReport}:${p.id}:${p.maturity.stage}`)),
|
|
613
|
+
...(input.benchmark ? [`${input.benchmarkReport}:evolution-proposals=${input.benchmark.summary.totalEvolutionProposals}`] : []),
|
|
614
|
+
];
|
|
574
615
|
const benchmarkEvidence = input.benchmark ? [
|
|
575
616
|
`${input.benchmarkReport}:scenarios=${input.benchmark.summary.scenarios}`,
|
|
576
617
|
`${input.benchmarkReport}:memory=${input.benchmark.summary.totalMemoryItems}`,
|
|
577
618
|
`${input.benchmarkReport}:skills=${input.benchmark.summary.totalSkillSteps}`,
|
|
619
|
+
`${input.benchmarkReport}:evaluator-gates=${input.benchmark.summary.totalEvaluatorGates}`,
|
|
620
|
+
`${input.benchmarkReport}:tool-strategy=${input.benchmark.summary.totalToolStrategySteps}`,
|
|
578
621
|
] : [input.benchmarkReport];
|
|
579
622
|
const signals = [
|
|
580
623
|
{
|
|
@@ -614,6 +657,57 @@ function buildAiOsIntelligenceReport(input) {
|
|
|
614
657
|
? ['Use skill routing evidence in reviews to check why a skill, MCP, or CLI path was selected.']
|
|
615
658
|
: ['Create a task with files or services that should trigger required skill routing.'],
|
|
616
659
|
},
|
|
660
|
+
{
|
|
661
|
+
id: 'evaluator-intelligence',
|
|
662
|
+
status: evaluatorQuality.requiredGates > 0
|
|
663
|
+
? evaluatorQuality.averageUncertainty >= 0.7 ? 'warning' : 'ready'
|
|
664
|
+
: input.runReports.length > 0 || input.benchmark ? 'warning' : 'blocked',
|
|
665
|
+
summary: evaluatorQuality.requiredGates > 0
|
|
666
|
+
? `${evaluatorQuality.requiredGates} evaluator gate(s) required; average uncertainty ${evaluatorQuality.averageUncertainty}.`
|
|
667
|
+
: 'No evaluator gate evidence found for architecture, root-cause, security, or release reasoning.',
|
|
668
|
+
evidence: evaluatorEvidence,
|
|
669
|
+
recommendations: evaluatorQuality.requiredGates > 0
|
|
670
|
+
? ['Use evaluator gates to force critique, uncertainty logging, and review evidence before promoting reasoning-heavy work.']
|
|
671
|
+
: ['Run a reasoning-heavy AI OS task so evaluator intelligence can prove critique coverage.'],
|
|
672
|
+
},
|
|
673
|
+
{
|
|
674
|
+
id: 'tool-strategy',
|
|
675
|
+
status: toolStrategyQuality.totalSteps > 0
|
|
676
|
+
? toolStrategyQuality.fallbackCoverage < 1 ? 'warning' : 'ready'
|
|
677
|
+
: input.runReports.length > 0 || input.benchmark ? 'warning' : 'blocked',
|
|
678
|
+
summary: toolStrategyQuality.totalSteps > 0
|
|
679
|
+
? `${toolStrategyQuality.totalSteps} tool strategy step(s); ${toolStrategyQuality.highRiskSteps} high-risk; fallback coverage ${toolStrategyQuality.fallbackCoverage}.`
|
|
680
|
+
: 'No tool strategy graph found for skills, artifacts, CLI, MCP, or verification steps.',
|
|
681
|
+
evidence: toolStrategyEvidence,
|
|
682
|
+
recommendations: toolStrategyQuality.totalSteps > 0
|
|
683
|
+
? ['Use tool strategy evidence to review cost, retry, fallback, and side-effect risk before execution.']
|
|
684
|
+
: ['Create a task that triggers skill routing so the AI OS can build a tool strategy graph.'],
|
|
685
|
+
},
|
|
686
|
+
{
|
|
687
|
+
id: 'adaptive-workflow',
|
|
688
|
+
status: input.runReports.some(r => r.plan.adaptiveWorkflow.profile) ? 'ready' : input.runReports.length > 0 || input.benchmark ? 'warning' : 'blocked',
|
|
689
|
+
summary: summarizeAdaptiveWorkflowSignal(input.runReports, input.benchmark),
|
|
690
|
+
evidence: [
|
|
691
|
+
...input.runReports.map(r => `${r.artifacts.runReport}:profile=${r.plan.adaptiveWorkflow.profile}`),
|
|
692
|
+
...(input.benchmark ? [`${input.benchmarkReport}:profiles=${input.benchmark.summary.workflowProfiles.join(',')}`] : []),
|
|
693
|
+
],
|
|
694
|
+
recommendations: input.runReports.some(r => r.plan.adaptiveWorkflow.profile)
|
|
695
|
+
? ['Use workflow profile distribution to verify that risk signals correctly escalate governance.']
|
|
696
|
+
: ['Run an AI OS task with mixed risk levels to prove adaptive workflow routing.'],
|
|
697
|
+
},
|
|
698
|
+
{
|
|
699
|
+
id: 'evolution-shadow',
|
|
700
|
+
status: evolutionQuality.proposals > 0
|
|
701
|
+
? evolutionQuality.pendingValidation > 0 ? 'warning' : 'ready'
|
|
702
|
+
: input.runReports.length > 0 || input.benchmark ? 'warning' : 'blocked',
|
|
703
|
+
summary: evolutionQuality.proposals > 0
|
|
704
|
+
? `${evolutionQuality.proposals} shadow proposal(s); ${evolutionQuality.shadowRules} shadow, ${evolutionQuality.candidateHooks} candidate-hook, ${evolutionQuality.approvedBlocking} approved-blocking.`
|
|
705
|
+
: 'No evolution shadow proposals found. Run tasks with high-risk governance signals or evaluator gates to generate shadow rule candidates.',
|
|
706
|
+
evidence: evolutionEvidence,
|
|
707
|
+
recommendations: evolutionQuality.proposals > 0
|
|
708
|
+
? ['Review shadow rule proposals and validate before promotion to candidate-hook or approved-blocking.']
|
|
709
|
+
: ['Run a high-risk AI OS task so evolution shadow promotion can propose rules from governance and evaluator signals.'],
|
|
710
|
+
},
|
|
617
711
|
{
|
|
618
712
|
id: 'benchmark-intelligence',
|
|
619
713
|
status: input.benchmark && input.benchmarkStatus === 'fresh'
|
|
@@ -636,6 +730,9 @@ function buildAiOsIntelligenceReport(input) {
|
|
|
636
730
|
selectedProviders,
|
|
637
731
|
memoryQuality,
|
|
638
732
|
contextQuality,
|
|
733
|
+
evaluatorQuality,
|
|
734
|
+
toolStrategyQuality,
|
|
735
|
+
evolutionQuality,
|
|
639
736
|
estimatedTokenSavings,
|
|
640
737
|
skillSteps,
|
|
641
738
|
};
|
|
@@ -668,6 +765,91 @@ function summarizeContextQuality(runReports) {
|
|
|
668
765
|
compressionRisk,
|
|
669
766
|
};
|
|
670
767
|
}
|
|
768
|
+
function summarizeEvaluatorQuality(runReports, benchmark) {
|
|
769
|
+
const runEvaluators = runReports.map(resolveRunEvaluator);
|
|
770
|
+
const runGates = runEvaluators.flatMap(evaluator => evaluator.gates);
|
|
771
|
+
const benchmarkGateCount = benchmark?.summary.totalEvaluatorGates ?? 0;
|
|
772
|
+
const uncertaintyScores = runEvaluators.map(evaluator => evaluator.uncertainty.score);
|
|
773
|
+
const gateIds = new Set(runGates.map(gate => gate.id));
|
|
774
|
+
if (benchmarkGateCount > 0)
|
|
775
|
+
gateIds.add('uncertainty-decision-log');
|
|
776
|
+
return {
|
|
777
|
+
requiredGates: runGates.filter(gate => gate.required).length + benchmarkGateCount,
|
|
778
|
+
highRiskPlans: runEvaluators.filter(evaluator => evaluator.riskLevel === 'high').length,
|
|
779
|
+
averageUncertainty: roundMetric(average(uncertaintyScores)),
|
|
780
|
+
gateIds: [...gateIds].sort(),
|
|
781
|
+
};
|
|
782
|
+
}
|
|
783
|
+
function resolveRunEvaluator(report) {
|
|
784
|
+
const plan = report.plan;
|
|
785
|
+
return plan.evaluator ?? createEvaluatorIntelligence({
|
|
786
|
+
task: report.plan.task.task,
|
|
787
|
+
files: report.plan.task.files,
|
|
788
|
+
governance: report.plan.governance,
|
|
789
|
+
skillPlan: report.plan.skillPlan,
|
|
790
|
+
});
|
|
791
|
+
}
|
|
792
|
+
function summarizeToolStrategyQuality(runReports, benchmark) {
|
|
793
|
+
const runStrategies = runReports.map(resolveRunToolStrategy);
|
|
794
|
+
const runSummary = runStrategies.reduce((summary, strategy) => ({
|
|
795
|
+
totalSteps: summary.totalSteps + strategy.summary.totalSteps,
|
|
796
|
+
requiredSteps: summary.requiredSteps + strategy.summary.requiredSteps,
|
|
797
|
+
highRiskSteps: summary.highRiskSteps + strategy.summary.highRiskSteps,
|
|
798
|
+
estimatedCostUnits: summary.estimatedCostUnits + strategy.summary.estimatedCostUnits,
|
|
799
|
+
fallbackCoveredSteps: summary.fallbackCoveredSteps + strategy.summary.fallbackCoveredSteps,
|
|
800
|
+
}), {
|
|
801
|
+
totalSteps: 0,
|
|
802
|
+
requiredSteps: 0,
|
|
803
|
+
highRiskSteps: 0,
|
|
804
|
+
estimatedCostUnits: 0,
|
|
805
|
+
fallbackCoveredSteps: 0,
|
|
806
|
+
});
|
|
807
|
+
const benchmarkSteps = benchmark?.summary.totalToolStrategySteps ?? 0;
|
|
808
|
+
const benchmarkCost = benchmark?.summary.totalToolStrategyCostUnits ?? 0;
|
|
809
|
+
const totalSteps = runSummary.totalSteps + benchmarkSteps;
|
|
810
|
+
const fallbackCoveredSteps = runSummary.fallbackCoveredSteps + benchmarkSteps;
|
|
811
|
+
return {
|
|
812
|
+
totalSteps,
|
|
813
|
+
requiredSteps: runSummary.requiredSteps,
|
|
814
|
+
highRiskSteps: runSummary.highRiskSteps,
|
|
815
|
+
estimatedCostUnits: runSummary.estimatedCostUnits + benchmarkCost,
|
|
816
|
+
fallbackCoverage: totalSteps > 0 ? roundMetric(fallbackCoveredSteps / totalSteps) : 0,
|
|
817
|
+
};
|
|
818
|
+
}
|
|
819
|
+
function resolveRunToolStrategy(report) {
|
|
820
|
+
const plan = report.plan;
|
|
821
|
+
return plan.toolStrategy ?? createToolStrategyPlan(report.plan.skillPlan);
|
|
822
|
+
}
|
|
823
|
+
function summarizeEvolutionQuality(runReports, benchmark) {
|
|
824
|
+
const runProposals = runReports.flatMap(r => r.plan.evolutionShadow?.proposals ?? []);
|
|
825
|
+
const benchmarkProposals = benchmark?.summary.totalEvolutionProposals ?? 0;
|
|
826
|
+
const allProposals = runProposals;
|
|
827
|
+
const stageCount = (stage) => allProposals.filter(p => p.maturity.stage === stage).length;
|
|
828
|
+
return {
|
|
829
|
+
proposals: allProposals.length + benchmarkProposals,
|
|
830
|
+
shadowRules: stageCount('shadow'),
|
|
831
|
+
candidateHooks: stageCount('candidate-hook'),
|
|
832
|
+
approvedBlocking: stageCount('approved-blocking'),
|
|
833
|
+
pendingValidation: allProposals.filter(p => p.maturity.stage === 'shadow' && p.maturity.shadowHits < 10).length,
|
|
834
|
+
};
|
|
835
|
+
}
|
|
836
|
+
function resolveRunEvolutionShadow(report) {
|
|
837
|
+
const plan = report.plan;
|
|
838
|
+
return plan.evolutionShadow ?? buildEvolutionShadowReport([]);
|
|
839
|
+
}
|
|
840
|
+
function summarizeAdaptiveWorkflowSignal(runReports, benchmark) {
|
|
841
|
+
const profiles = runReports.map(r => r.plan.adaptiveWorkflow.profile);
|
|
842
|
+
const benchmarkProfiles = benchmark?.summary.workflowProfiles ?? [];
|
|
843
|
+
const allProfiles = [...profiles, ...benchmarkProfiles];
|
|
844
|
+
if (allProfiles.length === 0)
|
|
845
|
+
return 'No adaptive workflow profile evidence found.';
|
|
846
|
+
const distribution = new Map();
|
|
847
|
+
for (const p of allProfiles)
|
|
848
|
+
distribution.set(p, (distribution.get(p) ?? 0) + 1);
|
|
849
|
+
const parts = [...distribution.entries()].map(([p, n]) => `${p}=${n}`).join(', ');
|
|
850
|
+
const escalated = runReports.filter(r => r.plan.adaptiveWorkflow.escalationReasons.length > 0).length;
|
|
851
|
+
return `${allProfiles.length} run(s) with profile distribution: ${parts}. ${escalated} run(s) had escalation reasons.`;
|
|
852
|
+
}
|
|
671
853
|
function summarizeMemoryQuality(items) {
|
|
672
854
|
if (items.length === 0) {
|
|
673
855
|
return {
|
|
@@ -756,16 +938,20 @@ function buildRunSteps(plan) {
|
|
|
756
938
|
evidence: ['memory.providerOrder', 'memory.selectedProviders', 'memory.items'],
|
|
757
939
|
dependsOn: ['runtime-plan'],
|
|
758
940
|
});
|
|
941
|
+
const profile = plan.adaptiveWorkflow.profile;
|
|
759
942
|
for (const gate of plan.adaptiveWorkflow.gates) {
|
|
760
943
|
if (steps.has(gate))
|
|
761
944
|
continue;
|
|
945
|
+
const gateRequired = profile !== 'light';
|
|
762
946
|
upsert({
|
|
763
947
|
id: gate,
|
|
764
948
|
kind: gate === 'runtime-evidence' ? 'evidence' : 'gate',
|
|
765
949
|
title: `Satisfy ${gate} gate`,
|
|
766
950
|
status: 'planned',
|
|
767
|
-
required:
|
|
768
|
-
summary:
|
|
951
|
+
required: gateRequired,
|
|
952
|
+
summary: gateRequired
|
|
953
|
+
? `Required by ${plan.adaptiveWorkflow.strategy} in ${profile} profile (${plan.adaptiveWorkflow.mode} mode).`
|
|
954
|
+
: `Advisory in ${profile} profile; not blocking completion.`,
|
|
769
955
|
evidence: [`gate.${gate}`],
|
|
770
956
|
dependsOn: ['runtime-plan'],
|
|
771
957
|
});
|
|
@@ -1317,7 +1503,12 @@ function summarizeBenchmark(results) {
|
|
|
1317
1503
|
totalMemoryItems: results.reduce((sum, result) => sum + result.metrics.memoryItems, 0),
|
|
1318
1504
|
totalSkillSteps: results.reduce((sum, result) => sum + result.metrics.skillSteps, 0),
|
|
1319
1505
|
requiredSkillSteps: results.reduce((sum, result) => sum + result.metrics.requiredSkillSteps, 0),
|
|
1506
|
+
totalEvaluatorGates: results.reduce((sum, result) => sum + result.metrics.evaluatorGates, 0),
|
|
1507
|
+
totalToolStrategySteps: results.reduce((sum, result) => sum + result.metrics.toolStrategySteps, 0),
|
|
1508
|
+
totalToolStrategyCostUnits: results.reduce((sum, result) => sum + result.metrics.toolStrategyCostUnits, 0),
|
|
1509
|
+
totalEvolutionProposals: results.reduce((sum, result) => sum + result.metrics.evolutionProposals, 0),
|
|
1320
1510
|
governanceModes: [...new Set(results.map(result => result.governanceMode))],
|
|
1511
|
+
workflowProfiles: [...new Set(results.map(result => result.workflowProfile))],
|
|
1321
1512
|
averageTokenUtilization: totalBudget > 0 ? Number((totalEstimatedTokens / totalBudget).toFixed(4)) : 0,
|
|
1322
1513
|
};
|
|
1323
1514
|
}
|
|
@@ -1325,6 +1516,10 @@ function benchmarkRecommendations(summary) {
|
|
|
1325
1516
|
const recommendations = ['Use benchmark deltas in release notes only after comparing the same scenario set across versions.'];
|
|
1326
1517
|
if (summary.totalSkillSteps === 0)
|
|
1327
1518
|
recommendations.push('Skill routing did not produce steps; inspect skill policy detection.');
|
|
1519
|
+
if (summary.totalEvaluatorGates === 0)
|
|
1520
|
+
recommendations.push('Evaluator intelligence did not require any critique gate; add reasoning-heavy benchmark scenarios before claiming evaluator coverage.');
|
|
1521
|
+
if (summary.totalToolStrategySteps === 0)
|
|
1522
|
+
recommendations.push('Tool strategy did not build a cost/retry/fallback graph; inspect skill execution plan coverage.');
|
|
1328
1523
|
if (summary.averageTokenUtilization > 0.9)
|
|
1329
1524
|
recommendations.push('Context utilization is high; lower budgets or improve relevance filtering before scaling.');
|
|
1330
1525
|
if (!summary.governanceModes.includes('critical') && !summary.governanceModes.includes('expanded')) {
|
|
@@ -1332,29 +1527,278 @@ function benchmarkRecommendations(summary) {
|
|
|
1332
1527
|
}
|
|
1333
1528
|
return recommendations;
|
|
1334
1529
|
}
|
|
1335
|
-
function createAdaptiveWorkflow(governance, skillPlan) {
|
|
1530
|
+
function createAdaptiveWorkflow(governance, skillPlan, evaluator, toolStrategy) {
|
|
1531
|
+
const routerResult = routeAdaptiveWorkflow({ governance, evaluator, toolStrategy });
|
|
1336
1532
|
const gates = new Set();
|
|
1337
1533
|
gates.add('context-compiler');
|
|
1338
1534
|
gates.add('memory-provider-recall');
|
|
1339
1535
|
if (skillPlan.required || skillPlan.executionPlan.steps.length > 0)
|
|
1340
1536
|
gates.add('skill-evidence');
|
|
1341
1537
|
gates.add('runtime-evidence');
|
|
1342
|
-
if (
|
|
1538
|
+
if (routerResult.profile === 'strict' || routerResult.profile === 'critical')
|
|
1343
1539
|
gates.add('impact-analysis');
|
|
1344
|
-
if (
|
|
1540
|
+
if (routerResult.profile === 'critical')
|
|
1345
1541
|
gates.add('security-review');
|
|
1542
|
+
for (const gate of evaluator.gates)
|
|
1543
|
+
gates.add(gate.id);
|
|
1544
|
+
for (const override of routerResult.gateOverrides)
|
|
1545
|
+
gates.add(override.gateId);
|
|
1546
|
+
const requiredBehaviors = new Set(governance.requiredBehaviors);
|
|
1547
|
+
for (const constraint of routerResult.behavioralConstraints) {
|
|
1548
|
+
if (constraint.required)
|
|
1549
|
+
requiredBehaviors.add(constraint.description);
|
|
1550
|
+
}
|
|
1346
1551
|
return {
|
|
1347
1552
|
strategy: 'risk-adaptive-runtime-v1',
|
|
1553
|
+
profile: routerResult.profile,
|
|
1554
|
+
escalationReasons: routerResult.escalationReasons,
|
|
1348
1555
|
mode: governance.effectiveMode,
|
|
1349
|
-
requiredBehaviors:
|
|
1556
|
+
requiredBehaviors: Array.from(requiredBehaviors),
|
|
1350
1557
|
gates: Array.from(gates),
|
|
1351
|
-
exitCriteria:
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1558
|
+
exitCriteria: routerResult.exitCriteria,
|
|
1559
|
+
};
|
|
1560
|
+
}
|
|
1561
|
+
function createEvaluatorIntelligence(input) {
|
|
1562
|
+
const haystack = `${input.task} ${input.files.join(' ')} ${input.governance.signals.map(signal => signal.id).join(' ')}`.toLowerCase();
|
|
1563
|
+
const gates = [];
|
|
1564
|
+
const addGate = (gate) => {
|
|
1565
|
+
if (gates.some(existing => existing.id === gate.id))
|
|
1566
|
+
return;
|
|
1567
|
+
gates.push(gate);
|
|
1568
|
+
};
|
|
1569
|
+
if (/architecture|architectural|design|strategy|boundary|refactor|runtime|platform|framework|架构|方案|设计|边界|平台/.test(haystack)) {
|
|
1570
|
+
addGate({
|
|
1571
|
+
id: 'architecture-critique',
|
|
1572
|
+
required: input.governance.effectiveMode !== 'minimal',
|
|
1573
|
+
reason: 'Architecture, runtime, platform, or design decisions need an explicit critique before implementation claims.',
|
|
1574
|
+
evidence: matchingEvidence(input.files, /architecture|runtime|framework|docs|readme|src/i),
|
|
1575
|
+
});
|
|
1576
|
+
}
|
|
1577
|
+
if (/root cause|diagnose|debug|failure|incident|postmortem|regression|blocked|根因|排查|故障|事故|回归/.test(haystack)) {
|
|
1578
|
+
addGate({
|
|
1579
|
+
id: 'root-cause-review',
|
|
1580
|
+
required: true,
|
|
1581
|
+
reason: 'Failure diagnosis or root-cause work needs an alternate hypothesis check before closing.',
|
|
1582
|
+
evidence: matchingEvidence(input.files, /test|runtime|debug|log|src|docs/i),
|
|
1583
|
+
});
|
|
1584
|
+
}
|
|
1585
|
+
if (input.governance.signals.some(signal => signal.id === 'critical-risk-domain' || signal.id === 'critical-file-path')) {
|
|
1586
|
+
addGate({
|
|
1587
|
+
id: 'security-threat-model',
|
|
1588
|
+
required: true,
|
|
1589
|
+
reason: 'Critical auth, data, production, or destructive risk requires threat-model review evidence.',
|
|
1590
|
+
evidence: input.governance.signals.flatMap(signal => signal.evidence).slice(0, 12),
|
|
1591
|
+
});
|
|
1592
|
+
}
|
|
1593
|
+
if (/release|publish|deploy|migration|rollback|version|changelog|npm|ci|发版|发布|部署|迁移|回滚/.test(haystack)) {
|
|
1594
|
+
addGate({
|
|
1595
|
+
id: 'release-readiness-review',
|
|
1596
|
+
required: true,
|
|
1597
|
+
reason: 'Release, deployment, migration, or rollback work needs readiness and rollback evidence.',
|
|
1598
|
+
evidence: matchingEvidence(input.files, /package|changelog|release|deploy|migration|workflow|github/i),
|
|
1599
|
+
});
|
|
1600
|
+
}
|
|
1601
|
+
const drivers = evaluatorUncertaintyDrivers(input, gates);
|
|
1602
|
+
const uncertaintyScore = evaluatorUncertaintyScore(input, gates, drivers);
|
|
1603
|
+
if (gates.length > 0 || uncertaintyScore >= 0.45) {
|
|
1604
|
+
addGate({
|
|
1605
|
+
id: 'uncertainty-decision-log',
|
|
1606
|
+
required: uncertaintyScore >= 0.45 || input.governance.effectiveMode === 'critical',
|
|
1607
|
+
reason: 'The agent must record uncertainty, rejected alternatives, and evidence gaps before completion.',
|
|
1608
|
+
evidence: drivers,
|
|
1609
|
+
});
|
|
1610
|
+
}
|
|
1611
|
+
const riskLevel = uncertaintyScore >= 0.7
|
|
1612
|
+
? 'high'
|
|
1613
|
+
: uncertaintyScore >= 0.4 || gates.some(gate => gate.required) ? 'medium' : 'low';
|
|
1614
|
+
return {
|
|
1615
|
+
strategy: 'evaluator-intelligence-v1',
|
|
1616
|
+
required: gates.some(gate => gate.required),
|
|
1617
|
+
riskLevel,
|
|
1618
|
+
uncertainty: {
|
|
1619
|
+
score: uncertaintyScore,
|
|
1620
|
+
threshold: 0.45,
|
|
1621
|
+
drivers,
|
|
1622
|
+
},
|
|
1623
|
+
gates,
|
|
1624
|
+
recommendations: evaluatorRecommendations(gates, riskLevel),
|
|
1625
|
+
};
|
|
1626
|
+
}
|
|
1627
|
+
function createToolStrategyPlan(skillPlan) {
|
|
1628
|
+
const nodes = skillPlan.executionPlan.steps.map(step => {
|
|
1629
|
+
const risks = toolStepRisks(step.id, step.kind);
|
|
1630
|
+
return {
|
|
1631
|
+
id: `${step.kind}:${step.id}`,
|
|
1632
|
+
kind: step.kind,
|
|
1633
|
+
required: step.required,
|
|
1634
|
+
cost: {
|
|
1635
|
+
units: toolStepCostUnits(step.id, step.kind, step.required, risks),
|
|
1636
|
+
timeRisk: risks.timeRisk,
|
|
1637
|
+
sideEffectRisk: risks.sideEffectRisk,
|
|
1638
|
+
},
|
|
1639
|
+
retry: toolStepRetry(step.id, step.kind, risks),
|
|
1640
|
+
fallback: step.fallback,
|
|
1641
|
+
evidence: [step.evidenceRequired],
|
|
1642
|
+
};
|
|
1643
|
+
});
|
|
1644
|
+
const edges = buildToolStrategyEdges(nodes);
|
|
1645
|
+
const summary = {
|
|
1646
|
+
totalSteps: nodes.length,
|
|
1647
|
+
requiredSteps: nodes.filter(node => node.required).length,
|
|
1648
|
+
highRiskSteps: nodes.filter(node => node.cost.timeRisk === 'high' || node.cost.sideEffectRisk === 'high').length,
|
|
1649
|
+
estimatedCostUnits: nodes.reduce((sum, node) => sum + node.cost.units, 0),
|
|
1650
|
+
fallbackCoveredSteps: nodes.filter(node => node.fallback.trim().length > 0).length,
|
|
1357
1651
|
};
|
|
1652
|
+
return {
|
|
1653
|
+
strategy: 'tool-strategy-v1',
|
|
1654
|
+
nodes,
|
|
1655
|
+
edges,
|
|
1656
|
+
summary,
|
|
1657
|
+
recommendations: toolStrategyRecommendations(summary),
|
|
1658
|
+
};
|
|
1659
|
+
}
|
|
1660
|
+
function createEvolutionShadowProposals(governance, evaluator) {
|
|
1661
|
+
const proposals = [];
|
|
1662
|
+
// Propose shadow rules from governance risk signals (escalated modes)
|
|
1663
|
+
for (const signal of governance.signals) {
|
|
1664
|
+
if (signal.mode === 'expanded' || signal.mode === 'critical') {
|
|
1665
|
+
proposals.push(proposeShadowRule({
|
|
1666
|
+
title: `Governance signal: ${signal.id}`,
|
|
1667
|
+
description: `Shadow rule from governance signal "${signal.id}" (mode=${signal.mode}). ${signal.reason}`,
|
|
1668
|
+
source: 'failure-learning',
|
|
1669
|
+
sourceEvidenceIds: signal.evidence.length > 0 ? signal.evidence : [signal.id],
|
|
1670
|
+
pattern: signal.id,
|
|
1671
|
+
enforcement: signal.mode === 'critical' ? 'hook' : 'prompt',
|
|
1672
|
+
rollback: `Remove shadow rule for governance signal "${signal.id}" if false positive rate exceeds threshold.`,
|
|
1673
|
+
}));
|
|
1674
|
+
}
|
|
1675
|
+
}
|
|
1676
|
+
// Propose shadow rules from high-risk evaluator gates
|
|
1677
|
+
for (const gate of evaluator.gates) {
|
|
1678
|
+
if (gate.required && (gate.id === 'security-threat-model' || gate.id === 'root-cause-review')) {
|
|
1679
|
+
proposals.push(proposeShadowRule({
|
|
1680
|
+
title: `Evaluator gate: ${gate.id}`,
|
|
1681
|
+
description: `Shadow rule from required evaluator gate "${gate.id}". ${gate.reason}`,
|
|
1682
|
+
source: 'lesson-extraction',
|
|
1683
|
+
sourceEvidenceIds: [gate.id],
|
|
1684
|
+
pattern: gate.id,
|
|
1685
|
+
enforcement: 'prompt',
|
|
1686
|
+
rollback: `Remove shadow rule for evaluator gate "${gate.id}" if it does not reduce defect recurrence.`,
|
|
1687
|
+
}));
|
|
1688
|
+
}
|
|
1689
|
+
}
|
|
1690
|
+
return buildEvolutionShadowReport(proposals);
|
|
1691
|
+
}
|
|
1692
|
+
function toolStepRisks(id, kind) {
|
|
1693
|
+
const normalized = id.toLowerCase();
|
|
1694
|
+
if (/desktop|cua|deploy|publish|release|migration|rollback|delete|drop|external|cli/.test(normalized)) {
|
|
1695
|
+
return { timeRisk: 'high', sideEffectRisk: 'high' };
|
|
1696
|
+
}
|
|
1697
|
+
if (/browser|e2e|playwright|screenshot|visual|security|threat|audit/.test(normalized)) {
|
|
1698
|
+
return { timeRisk: 'medium', sideEffectRisk: kind === 'verification' ? 'medium' : 'low' };
|
|
1699
|
+
}
|
|
1700
|
+
if (kind === 'artifact')
|
|
1701
|
+
return { timeRisk: 'low', sideEffectRisk: 'low' };
|
|
1702
|
+
if (kind === 'verification')
|
|
1703
|
+
return { timeRisk: 'medium', sideEffectRisk: 'medium' };
|
|
1704
|
+
return { timeRisk: 'medium', sideEffectRisk: 'low' };
|
|
1705
|
+
}
|
|
1706
|
+
function toolStepCostUnits(id, kind, required, risks) {
|
|
1707
|
+
let units = kind === 'artifact' ? 1 : kind === 'verification' ? 2 : 3;
|
|
1708
|
+
if (required)
|
|
1709
|
+
units += 1;
|
|
1710
|
+
if (risks.timeRisk === 'medium')
|
|
1711
|
+
units += 1;
|
|
1712
|
+
if (risks.timeRisk === 'high')
|
|
1713
|
+
units += 2;
|
|
1714
|
+
if (risks.sideEffectRisk === 'high')
|
|
1715
|
+
units += 2;
|
|
1716
|
+
if (/browser|e2e|desktop|external|cli|security|audit/i.test(id))
|
|
1717
|
+
units += 1;
|
|
1718
|
+
return units;
|
|
1719
|
+
}
|
|
1720
|
+
function toolStepRetry(id, kind, risks) {
|
|
1721
|
+
if (risks.sideEffectRisk === 'high')
|
|
1722
|
+
return { maxAttempts: 1, backoff: 'manual-review' };
|
|
1723
|
+
if (kind === 'verification')
|
|
1724
|
+
return { maxAttempts: /browser|e2e|playwright|network/i.test(id) ? 2 : 1, backoff: 'linear' };
|
|
1725
|
+
if (kind === 'skill')
|
|
1726
|
+
return { maxAttempts: 1, backoff: 'manual-review' };
|
|
1727
|
+
return { maxAttempts: 1, backoff: 'none' };
|
|
1728
|
+
}
|
|
1729
|
+
function buildToolStrategyEdges(nodes) {
|
|
1730
|
+
const edges = [];
|
|
1731
|
+
const skillNodes = nodes.filter(node => node.kind === 'skill');
|
|
1732
|
+
const artifactNodes = nodes.filter(node => node.kind === 'artifact');
|
|
1733
|
+
const verificationNodes = nodes.filter(node => node.kind === 'verification');
|
|
1734
|
+
for (const artifact of artifactNodes) {
|
|
1735
|
+
for (const skill of skillNodes.filter(node => node.required || artifact.required)) {
|
|
1736
|
+
edges.push({ from: skill.id, to: artifact.id, reason: 'Skill execution must leave artifact evidence when both are required or review-relevant.' });
|
|
1737
|
+
}
|
|
1738
|
+
}
|
|
1739
|
+
for (const verification of verificationNodes) {
|
|
1740
|
+
for (const artifact of artifactNodes.filter(node => node.required)) {
|
|
1741
|
+
edges.push({ from: artifact.id, to: verification.id, reason: 'Required artifacts should exist before verification evidence is accepted.' });
|
|
1742
|
+
}
|
|
1743
|
+
}
|
|
1744
|
+
return edges;
|
|
1745
|
+
}
|
|
1746
|
+
function toolStrategyRecommendations(summary) {
|
|
1747
|
+
if (summary.totalSteps === 0)
|
|
1748
|
+
return ['No tool strategy required; standard verification is enough for this task.'];
|
|
1749
|
+
const recommendations = ['Execute required tool strategy nodes before claiming task completion.'];
|
|
1750
|
+
if (summary.highRiskSteps > 0)
|
|
1751
|
+
recommendations.push('High-risk tool steps require manual review or explicit safe-mode evidence before retry.');
|
|
1752
|
+
if (summary.fallbackCoveredSteps < summary.totalSteps)
|
|
1753
|
+
recommendations.push('Fill fallback policy gaps before autonomous execution.');
|
|
1754
|
+
return recommendations;
|
|
1755
|
+
}
|
|
1756
|
+
function matchingEvidence(files, pattern) {
|
|
1757
|
+
return files.filter(file => pattern.test(file)).slice(0, 12);
|
|
1758
|
+
}
|
|
1759
|
+
function evaluatorUncertaintyDrivers(input, gates) {
|
|
1760
|
+
const drivers = new Set();
|
|
1761
|
+
if (input.governance.effectiveMode === 'critical')
|
|
1762
|
+
drivers.add('critical-governance-mode');
|
|
1763
|
+
if (input.governance.effectiveMode === 'expanded')
|
|
1764
|
+
drivers.add('expanded-governance-mode');
|
|
1765
|
+
if (input.files.length >= 6)
|
|
1766
|
+
drivers.add('wide-file-scope');
|
|
1767
|
+
if (input.skillPlan.executionPlan.steps.some(step => step.required))
|
|
1768
|
+
drivers.add('required-skill-evidence');
|
|
1769
|
+
for (const gate of gates)
|
|
1770
|
+
drivers.add(gate.id);
|
|
1771
|
+
if (/unknown|uncertain|maybe|assume|guess|可能|不确定|假设/.test(input.task.toLowerCase()))
|
|
1772
|
+
drivers.add('explicit-uncertainty-language');
|
|
1773
|
+
return [...drivers];
|
|
1774
|
+
}
|
|
1775
|
+
function evaluatorUncertaintyScore(input, gates, drivers) {
|
|
1776
|
+
let score = 0.15;
|
|
1777
|
+
if (input.governance.effectiveMode === 'standard')
|
|
1778
|
+
score += 0.1;
|
|
1779
|
+
if (input.governance.effectiveMode === 'expanded')
|
|
1780
|
+
score += 0.25;
|
|
1781
|
+
if (input.governance.effectiveMode === 'critical')
|
|
1782
|
+
score += 0.4;
|
|
1783
|
+
score += Math.min(0.2, input.files.length * 0.025);
|
|
1784
|
+
score += Math.min(0.2, gates.filter(gate => gate.required).length * 0.08);
|
|
1785
|
+
if (input.skillPlan.executionPlan.steps.some(step => step.required))
|
|
1786
|
+
score += 0.08;
|
|
1787
|
+
if (drivers.includes('explicit-uncertainty-language'))
|
|
1788
|
+
score += 0.12;
|
|
1789
|
+
return roundMetric(clampUnit(score));
|
|
1790
|
+
}
|
|
1791
|
+
function evaluatorRecommendations(gates, riskLevel) {
|
|
1792
|
+
if (gates.length === 0)
|
|
1793
|
+
return ['No evaluator gate required; keep lightweight verification evidence for low-risk work.'];
|
|
1794
|
+
const recommendations = ['Record evaluator evidence before promoting reasoning-heavy implementation or release claims.'];
|
|
1795
|
+
if (riskLevel === 'high')
|
|
1796
|
+
recommendations.push('Require reviewer sign-off for uncertainty, rejected alternatives, and rollback or mitigation path.');
|
|
1797
|
+
if (gates.some(gate => gate.id === 'root-cause-review'))
|
|
1798
|
+
recommendations.push('List competing root-cause hypotheses and why each was accepted or rejected.');
|
|
1799
|
+
if (gates.some(gate => gate.id === 'security-threat-model'))
|
|
1800
|
+
recommendations.push('Attach threat model or security-review evidence before guarded completion.');
|
|
1801
|
+
return recommendations;
|
|
1358
1802
|
}
|
|
1359
1803
|
function recommendations(options) {
|
|
1360
1804
|
const output = [];
|
|
@@ -1370,6 +1814,12 @@ function recommendations(options) {
|
|
|
1370
1814
|
if (options.governance.effectiveMode === 'critical') {
|
|
1371
1815
|
output.push('Critical workflow mode requires security review and rollback or disable strategy.');
|
|
1372
1816
|
}
|
|
1817
|
+
if (options.evaluator.required) {
|
|
1818
|
+
output.push(`Evaluator intelligence requires ${options.evaluator.gates.length} critique gate(s); record uncertainty and review evidence before promotion.`);
|
|
1819
|
+
}
|
|
1820
|
+
if (options.toolStrategy.summary.totalSteps > 0) {
|
|
1821
|
+
output.push(`Tool strategy planner created ${options.toolStrategy.summary.totalSteps} cost/retry/fallback node(s); execute required nodes with evidence.`);
|
|
1822
|
+
}
|
|
1373
1823
|
return output;
|
|
1374
1824
|
}
|
|
1375
1825
|
function normalizeSkillTaskLevel(value) {
|