npm - @hongmaple0820/scale-engine - Versions diffs - 0.28.0 → 0.33.0 - Mend

@hongmaple0820/scale-engine 0.28.0 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/README.en.md +3 -0
package/README.md +2 -0
package/dist/api/cli.js +12 -0
package/dist/api/cli.js.map +1 -1
package/dist/evolution/SessionLearnings.d.ts +70 -0
package/dist/evolution/SessionLearnings.js +217 -0
package/dist/evolution/SessionLearnings.js.map +1 -0
package/dist/runtime/AiOsRuntime.d.ts +138 -0
package/dist/runtime/AiOsRuntime.js +671 -14
package/dist/runtime/AiOsRuntime.js.map +1 -1
package/dist/skills/RoleSkills.d.ts +20 -0
package/dist/skills/RoleSkills.js +154 -0
package/dist/skills/RoleSkills.js.map +1 -0
package/dist/skills/SkillDiscovery.d.ts +5 -0
package/dist/skills/SkillDiscovery.js +15 -0
package/dist/skills/SkillDiscovery.js.map +1 -1
package/dist/skills/SkillFrontmatter.d.ts +28 -0
package/dist/skills/SkillFrontmatter.js +152 -0
package/dist/skills/SkillFrontmatter.js.map +1 -0
package/dist/skills/SkillRegistry.d.ts +11 -0
package/dist/skills/SkillRegistry.js +12 -0
package/dist/skills/SkillRegistry.js.map +1 -1
package/dist/skills/index.d.ts +1 -0
package/dist/skills/index.js +1 -0
package/dist/skills/index.js.map +1 -1
package/dist/testing/DiffTestSelector.d.ts +22 -0
package/dist/testing/DiffTestSelector.js +114 -0
package/dist/testing/DiffTestSelector.js.map +1 -0
package/dist/testing/index.d.ts +1 -0
package/dist/testing/index.js +3 -0
package/dist/testing/index.js.map +1 -0
package/dist/workflow/AdaptiveWorkflowRouter.d.ts +37 -0
package/dist/workflow/AdaptiveWorkflowRouter.js +211 -0
package/dist/workflow/AdaptiveWorkflowRouter.js.map +1 -0
package/dist/workflow/EvolutionShadowPromoter.d.ts +46 -0
package/dist/workflow/EvolutionShadowPromoter.js +73 -0
package/dist/workflow/EvolutionShadowPromoter.js.map +1 -0
package/dist/workflow/ReviewAnalyzer.d.ts +15 -0
package/dist/workflow/ReviewAnalyzer.js +82 -0
package/dist/workflow/ReviewAnalyzer.js.map +1 -1
package/dist/workflow/SecurityAudit.d.ts +27 -0
package/dist/workflow/SecurityAudit.js +294 -0
package/dist/workflow/SecurityAudit.js.map +1 -0
package/dist/workflow/SessionPreamble.d.ts +19 -0
package/dist/workflow/SessionPreamble.js +125 -0
package/dist/workflow/SessionPreamble.js.map +1 -0
package/dist/workflow/ShipPipeline.d.ts +30 -0
package/dist/workflow/ShipPipeline.js +366 -0
package/dist/workflow/ShipPipeline.js.map +1 -0
package/dist/workflow/WorkflowGuidance.d.ts +5 -1
package/dist/workflow/WorkflowGuidance.js +31 -0
package/dist/workflow/WorkflowGuidance.js.map +1 -1
package/dist/workflow/index.d.ts +5 -0
package/dist/workflow/index.js +5 -0
package/dist/workflow/index.js.map +1 -1
package/docs/AI_ENGINEERING_OS_POSITIONING.md +15 -0
package/docs/CONTEXT_BUDGET.md +1 -1
package/package.json +2 -1

package/dist/runtime/AiOsRuntime.js CHANGED Viewed

@@ -5,10 +5,14 @@ import { createGovernanceRoiReport, } from '../governance/GovernanceRoi.js';
 import { evaluateProgressiveGovernance, } from '../governance/ProgressiveGovernance.js';
 import { MemoryFabric, recallMemoryProviders, } from '../memory/index.js';
 import { createSkillPlan, loadSkillRoutingPolicy, } from '../skills/routing/index.js';
+import { routeAdaptiveWorkflow } from '../workflow/AdaptiveWorkflowRouter.js';
+import { proposeShadowRule, buildEvolutionShadowReport, } from '../workflow/EvolutionShadowPromoter.js';
 import { runSafeCommand } from '../tools/SafeCommandRunner.js';
 import { SCALE_ENGINE_VERSION } from '../version.js';
 import { resolveVerificationTargets, } from '../workflow/VerificationProfile.js';
 import { RuntimeEvidenceLedger } from './RuntimeEvidenceLedger.js';
+import { loadRelevantLearnings } from '../evolution/SessionLearnings.js';
+import { collectSessionPreamble } from '../workflow/SessionPreamble.js';
 export async function createAiOsPlan(input) {
     const projectDir = resolve(input.projectDir ?? process.cwd());
     const scaleDir = input.scaleDir ?? '.scale';
@@ -17,6 +21,8 @@ export async function createAiOsPlan(input) {
     const services = input.services ?? [];
     const taskId = input.taskId;
     const budget = input.budget ?? 8_000;
+    const preamble = collectSessionPreamble({ projectDir, scaleDir });
+    const sessionLearnings = loadRelevantLearnings({ projectDir, scaleDir, task: input.task, limit: 5 });
     const governance = evaluateProgressiveGovernance({
         task: input.task,
         changedFiles: files,
@@ -62,7 +68,15 @@ export async function createAiOsPlan(input) {
         services,
         policy: skillPolicy,
     });
-    const adaptiveWorkflow = createAdaptiveWorkflow(governance, skillPlan);
+    const evaluator = createEvaluatorIntelligence({
+        task: input.task,
+        files,
+        governance,
+        skillPlan,
+    });
+    const toolStrategy = createToolStrategyPlan(skillPlan);
+    const adaptiveWorkflow = createAdaptiveWorkflow(governance, skillPlan, evaluator, toolStrategy);
+    const evolutionShadow = createEvolutionShadowProposals(governance, evaluator);
     const roi = createGovernanceRoiReport({
         taskId,
         contextBudget,
@@ -81,8 +95,12 @@ export async function createAiOsPlan(input) {
             files,
             services,
         },
+        preamble,
         governance,
         adaptiveWorkflow,
+        evaluator,
+        toolStrategy,
+        evolutionShadow,
         context,
         memory: {
             providerOrder: memoryRecall.providerOrder,
@@ -93,8 +111,9 @@ export async function createAiOsPlan(input) {
             contextPack: memoryPack,
         },
         skillPlan,
+        sessionLearnings,
         roi,
-        recommendations: recommendations({ governance, context, memoryRecall, skillPlan }),
+        recommendations: recommendations({ governance, context, memoryRecall, skillPlan, evaluator, toolStrategy }),
     };
 }
 export async function createAiOsRun(input) {
@@ -194,6 +213,7 @@ export async function createAiOsBenchmark(input = {}) {
             task: scenario.task,
             level: scenario.level,
             governanceMode: plan.governance.effectiveMode,
+            workflowProfile: plan.adaptiveWorkflow.profile,
             metrics: {
                 estimatedTokens: plan.context.totalEstimatedTokens,
                 budget: plan.context.task.budget,
@@ -202,6 +222,10 @@ export async function createAiOsBenchmark(input = {}) {
                 selectedProviders: plan.memory.selectedProviders,
                 skillSteps: plan.skillPlan.executionPlan.steps.length,
                 requiredSkillSteps: plan.skillPlan.executionPlan.steps.filter(step => step.required).length,
+                evaluatorGates: plan.evaluator.gates.length,
+                toolStrategySteps: plan.toolStrategy.summary.totalSteps,
+                toolStrategyCostUnits: plan.toolStrategy.summary.estimatedCostUnits,
+                evolutionProposals: plan.evolutionShadow.summary.totalProposals,
                 gates: plan.adaptiveWorkflow.gates.length,
                 roiModules: plan.roi.modules.length,
             },
@@ -446,6 +470,15 @@ export function createAiOsStatus(input = {}) {
     const verificationRecommendations = buildVerificationRecommendations(projectDir, scaleDir, lang);
     const benchmarkReport = resolveBenchmarkReportPath(projectDir, scaleDir);
     const adoptionReport = resolveAdoptionReportPath(projectDir, scaleDir);
+    const intelligence = buildAiOsIntelligenceReport({
+        projectDir,
+        scaleDir,
+        runReports,
+        benchmark: readAiOsBenchmarkReport(benchmarkReport, warnings),
+        benchmarkStatus: doctor.benchmark.status,
+        benchmarkReport,
+        lang,
+    });
     const checks = [
         {
             id: 'runtime-dirs',
@@ -526,11 +559,351 @@ export function createAiOsStatus(input = {}) {
         summary,
         dashboard,
         doctor,
+        intelligence,
         verificationRecommendations,
         nextActions: aiOsStatusNextActions(status, checks, lang, verificationRecommendations),
         warnings: [...warnings, ...doctor.warnings],
     };
 }
+function buildAiOsIntelligenceReport(input) {
+    const runMemoryItems = input.runReports.flatMap(report => report.plan.memory.items);
+    const benchmarkMemoryItems = input.benchmark?.summary.totalMemoryItems ?? 0;
+    const runProviders = input.runReports.flatMap(report => report.plan.memory.selectedProviders);
+    const benchmarkProviders = input.benchmark?.scenarios.flatMap(scenario => scenario.metrics.selectedProviders) ?? [];
+    const selectedProviders = [...new Set([...runProviders, ...benchmarkProviders])].sort();
+    const runTokenSavings = input.runReports.reduce((sum, report) => sum + (report.plan.context.compiler?.estimatedTokenSavings ?? 0), 0);
+    const benchmarkTokenSavings = input.benchmark?.summary.totalEstimatedTokenSavings ?? 0;
+    const estimatedTokenSavings = runTokenSavings + benchmarkTokenSavings;
+    const runSkillSteps = input.runReports.reduce((sum, report) => sum + report.plan.skillPlan.executionPlan.steps.length, 0);
+    const benchmarkSkillSteps = input.benchmark?.summary.totalSkillSteps ?? 0;
+    const skillSteps = runSkillSteps + benchmarkSkillSteps;
+    const totalMemoryItems = runMemoryItems.length + benchmarkMemoryItems;
+    const memoryQuality = summarizeMemoryQuality(runMemoryItems);
+    const contextQuality = summarizeContextQuality(input.runReports);
+    const evaluatorQuality = summarizeEvaluatorQuality(input.runReports, input.benchmark);
+    const toolStrategyQuality = summarizeToolStrategyQuality(input.runReports, input.benchmark);
+    const evolutionQuality = summarizeEvolutionQuality(input.runReports, input.benchmark);
+    const contextSignalStatus = contextQuality.compressionRisk === 'high'
+        ? 'warning'
+        : estimatedTokenSavings > 0 ? 'ready' : input.runReports.length > 0 || input.benchmark ? 'warning' : 'blocked';
+    const memoryEvidence = [
+        ...runMemoryItems.map(item => `${item.provider}:${item.id}`),
+        ...(benchmarkMemoryItems > 0 ? [`benchmark:${input.benchmarkReport}:${benchmarkMemoryItems}`] : []),
+    ];
+    const contextEvidence = [
+        ...input.runReports.map(report => `${report.artifacts.runReport}:saved=${report.plan.context.compiler?.estimatedTokenSavings ?? 0}`),
+        ...(input.benchmark ? [`${input.benchmarkReport}:saved=${input.benchmark.summary.totalEstimatedTokenSavings}`] : []),
+    ];
+    const skillEvidence = [
+        ...input.runReports.flatMap(report => report.plan.skillPlan.executionPlan.steps.map(step => `${report.artifacts.runReport}:${step.id}`)),
+        ...(input.benchmark ? [`${input.benchmarkReport}:steps=${input.benchmark.summary.totalSkillSteps}`] : []),
+    ];
+    const evaluatorEvidence = [
+        ...input.runReports.flatMap(report => resolveRunEvaluator(report).gates.map(gate => `${report.artifacts.runReport}:${gate.id}`)),
+        ...(input.benchmark ? [`${input.benchmarkReport}:evaluator-gates=${input.benchmark.summary.totalEvaluatorGates}`] : []),
+    ];
+    const toolStrategyEvidence = [
+        ...input.runReports.flatMap(report => resolveRunToolStrategy(report).nodes.map(node => `${report.artifacts.runReport}:${node.id}`)),
+        ...(input.benchmark ? [`${input.benchmarkReport}:tool-strategy=${input.benchmark.summary.totalToolStrategySteps}`] : []),
+    ];
+    const evolutionEvidence = [
+        ...input.runReports.flatMap(report => (report.plan.evolutionShadow?.proposals ?? []).map(p => `${report.artifacts.runReport}:${p.id}:${p.maturity.stage}`)),
+        ...(input.benchmark ? [`${input.benchmarkReport}:evolution-proposals=${input.benchmark.summary.totalEvolutionProposals}`] : []),
+    ];
+    const benchmarkEvidence = input.benchmark ? [
+        `${input.benchmarkReport}:scenarios=${input.benchmark.summary.scenarios}`,
+        `${input.benchmarkReport}:memory=${input.benchmark.summary.totalMemoryItems}`,
+        `${input.benchmarkReport}:skills=${input.benchmark.summary.totalSkillSteps}`,
+        `${input.benchmarkReport}:evaluator-gates=${input.benchmark.summary.totalEvaluatorGates}`,
+        `${input.benchmarkReport}:tool-strategy=${input.benchmark.summary.totalToolStrategySteps}`,
+    ] : [input.benchmarkReport];
+    const signals = [
+        {
+            id: 'memory-recall',
+            status: totalMemoryItems > 0 ? 'ready' : selectedProviders.length > 0 ? 'warning' : 'blocked',
+            summary: totalMemoryItems > 0
+                ? `${totalMemoryItems} memory item(s) recalled through ${selectedProviders.join(', ') || 'configured providers'}; quality ${memoryQuality.score}/100.`
+                : selectedProviders.length > 0
+                    ? `Memory providers were selected (${selectedProviders.join(', ')}) but no relevant item was recalled.`
+                    : 'No memory recall evidence found in AI OS runs or benchmarks.',
+            evidence: memoryEvidence,
+            recommendations: totalMemoryItems > 0
+                ? ['Keep recording memory item ids with every run so later context assembly can explain recall.']
+                : ['Run an AI OS task that should match durable project memory before claiming memory intelligence.'],
+        },
+        {
+            id: 'context-savings',
+            status: contextSignalStatus,
+            summary: estimatedTokenSavings > 0
+                ? `${estimatedTokenSavings} estimated token(s) saved by context compilation evidence; compression risk ${contextQuality.compressionRisk}.`
+                : 'Context compiler evidence exists but has not shown measurable token savings yet.',
+            evidence: contextEvidence,
+            recommendations: contextQuality.evidenceLossWarnings.length > 0
+                ? ['Review omitted evidence-bearing context before claiming the task has enough context.']
+                : estimatedTokenSavings > 0
+                    ? ['Track savings deltas across releases before publishing token reduction claims.']
+                    : ['Add larger representative tasks to benchmark context slicing and token savings.'],
+        },
+        {
+            id: 'skill-routing',
+            status: skillSteps > 0 ? 'ready' : input.runReports.length > 0 || input.benchmark ? 'warning' : 'blocked',
+            summary: skillSteps > 0
+                ? `${skillSteps} skill routing step(s) planned across runs and benchmark scenarios.`
+                : 'No skill routing step evidence found.',
+            evidence: skillEvidence,
+            recommendations: skillSteps > 0
+                ? ['Use skill routing evidence in reviews to check why a skill, MCP, or CLI path was selected.']
+                : ['Create a task with files or services that should trigger required skill routing.'],
+        },
+        {
+            id: 'evaluator-intelligence',
+            status: evaluatorQuality.requiredGates > 0
+                ? evaluatorQuality.averageUncertainty >= 0.7 ? 'warning' : 'ready'
+                : input.runReports.length > 0 || input.benchmark ? 'warning' : 'blocked',
+            summary: evaluatorQuality.requiredGates > 0
+                ? `${evaluatorQuality.requiredGates} evaluator gate(s) required; average uncertainty ${evaluatorQuality.averageUncertainty}.`
+                : 'No evaluator gate evidence found for architecture, root-cause, security, or release reasoning.',
+            evidence: evaluatorEvidence,
+            recommendations: evaluatorQuality.requiredGates > 0
+                ? ['Use evaluator gates to force critique, uncertainty logging, and review evidence before promoting reasoning-heavy work.']
+                : ['Run a reasoning-heavy AI OS task so evaluator intelligence can prove critique coverage.'],
+        },
+        {
+            id: 'tool-strategy',
+            status: toolStrategyQuality.totalSteps > 0
+                ? toolStrategyQuality.fallbackCoverage < 1 ? 'warning' : 'ready'
+                : input.runReports.length > 0 || input.benchmark ? 'warning' : 'blocked',
+            summary: toolStrategyQuality.totalSteps > 0
+                ? `${toolStrategyQuality.totalSteps} tool strategy step(s); ${toolStrategyQuality.highRiskSteps} high-risk; fallback coverage ${toolStrategyQuality.fallbackCoverage}.`
+                : 'No tool strategy graph found for skills, artifacts, CLI, MCP, or verification steps.',
+            evidence: toolStrategyEvidence,
+            recommendations: toolStrategyQuality.totalSteps > 0
+                ? ['Use tool strategy evidence to review cost, retry, fallback, and side-effect risk before execution.']
+                : ['Create a task that triggers skill routing so the AI OS can build a tool strategy graph.'],
+        },
+        {
+            id: 'adaptive-workflow',
+            status: input.runReports.some(r => r.plan.adaptiveWorkflow.profile) ? 'ready' : input.runReports.length > 0 || input.benchmark ? 'warning' : 'blocked',
+            summary: summarizeAdaptiveWorkflowSignal(input.runReports, input.benchmark),
+            evidence: [
+                ...input.runReports.map(r => `${r.artifacts.runReport}:profile=${r.plan.adaptiveWorkflow.profile}`),
+                ...(input.benchmark ? [`${input.benchmarkReport}:profiles=${input.benchmark.summary.workflowProfiles.join(',')}`] : []),
+            ],
+            recommendations: input.runReports.some(r => r.plan.adaptiveWorkflow.profile)
+                ? ['Use workflow profile distribution to verify that risk signals correctly escalate governance.']
+                : ['Run an AI OS task with mixed risk levels to prove adaptive workflow routing.'],
+        },
+        {
+            id: 'evolution-shadow',
+            status: evolutionQuality.proposals > 0
+                ? evolutionQuality.pendingValidation > 0 ? 'warning' : 'ready'
+                : input.runReports.length > 0 || input.benchmark ? 'warning' : 'blocked',
+            summary: evolutionQuality.proposals > 0
+                ? `${evolutionQuality.proposals} shadow proposal(s); ${evolutionQuality.shadowRules} shadow, ${evolutionQuality.candidateHooks} candidate-hook, ${evolutionQuality.approvedBlocking} approved-blocking.`
+                : 'No evolution shadow proposals found. Run tasks with high-risk governance signals or evaluator gates to generate shadow rule candidates.',
+            evidence: evolutionEvidence,
+            recommendations: evolutionQuality.proposals > 0
+                ? ['Review shadow rule proposals and validate before promotion to candidate-hook or approved-blocking.']
+                : ['Run a high-risk AI OS task so evolution shadow promotion can propose rules from governance and evaluator signals.'],
+        },
+        {
+            id: 'benchmark-intelligence',
+            status: input.benchmark && input.benchmarkStatus === 'fresh'
+                ? 'ready'
+                : input.benchmark && input.benchmarkStatus === 'stale' ? 'warning' : 'blocked',
+            summary: input.benchmark
+                ? `${input.benchmark.summary.scenarios} benchmark scenario(s); benchmark status ${input.benchmarkStatus}.`
+                : 'No AI OS benchmark report available for intelligence metrics.',
+            evidence: benchmarkEvidence,
+            recommendations: input.benchmark && input.benchmarkStatus === 'fresh'
+                ? ['Use intelligence signals alongside benchmark deltas for release readiness reviews.']
+                : ['Run `scale ai-os benchmark --json` to refresh memory/context/skill intelligence metrics.'],
+        },
+    ];
+    const summary = {
+        ready: signals.filter(signal => signal.status === 'ready').length,
+        warning: signals.filter(signal => signal.status === 'warning').length,
+        blocked: signals.filter(signal => signal.status === 'blocked').length,
+        totalMemoryItems,
+        selectedProviders,
+        memoryQuality,
+        contextQuality,
+        evaluatorQuality,
+        toolStrategyQuality,
+        evolutionQuality,
+        estimatedTokenSavings,
+        skillSteps,
+    };
+    const status = summary.blocked > 0 ? 'blocked' : summary.warning > 0 ? 'warning' : 'ready';
+    const nextActions = aiOsIntelligenceNextActions(status, signals, input.lang);
+    return { status, summary, signals, nextActions };
+}
+function summarizeContextQuality(runReports) {
+    const omitted = runReports.flatMap(report => report.plan.context.omitted.map(item => {
+        const section = report.plan.context.sections.find(candidate => candidate.id === item.id);
+        return {
+            ...item,
+            category: section?.category,
+            runReport: report.artifacts.runReport,
+        };
+    }));
+    const totalOmittedTokens = omitted.reduce((sum, item) => sum + item.estimatedTokens, 0);
+    const highestOmittedTokens = omitted.reduce((max, item) => Math.max(max, item.estimatedTokens), 0);
+    const evidenceLossWarnings = omitted
+        .filter(item => item.category === 'evidence' || item.id.includes('evidence'))
+        .map(item => `${item.id} omitted from ${item.runReport} (${item.estimatedTokens} tokens; ${item.reason}).`);
+    const compressionRisk = evidenceLossWarnings.length > 0
+        ? 'high'
+        : omitted.length > 0 ? 'medium' : 'low';
+    return {
+        omittedSections: omitted.length,
+        totalOmittedTokens,
+        evidenceLossWarnings,
+        highestOmittedTokens,
+        compressionRisk,
+    };
+}
+function summarizeEvaluatorQuality(runReports, benchmark) {
+    const runEvaluators = runReports.map(resolveRunEvaluator);
+    const runGates = runEvaluators.flatMap(evaluator => evaluator.gates);
+    const benchmarkGateCount = benchmark?.summary.totalEvaluatorGates ?? 0;
+    const uncertaintyScores = runEvaluators.map(evaluator => evaluator.uncertainty.score);
+    const gateIds = new Set(runGates.map(gate => gate.id));
+    if (benchmarkGateCount > 0)
+        gateIds.add('uncertainty-decision-log');
+    return {
+        requiredGates: runGates.filter(gate => gate.required).length + benchmarkGateCount,
+        highRiskPlans: runEvaluators.filter(evaluator => evaluator.riskLevel === 'high').length,
+        averageUncertainty: roundMetric(average(uncertaintyScores)),
+        gateIds: [...gateIds].sort(),
+    };
+}
+function resolveRunEvaluator(report) {
+    const plan = report.plan;
+    return plan.evaluator ?? createEvaluatorIntelligence({
+        task: report.plan.task.task,
+        files: report.plan.task.files,
+        governance: report.plan.governance,
+        skillPlan: report.plan.skillPlan,
+    });
+}
+function summarizeToolStrategyQuality(runReports, benchmark) {
+    const runStrategies = runReports.map(resolveRunToolStrategy);
+    const runSummary = runStrategies.reduce((summary, strategy) => ({
+        totalSteps: summary.totalSteps + strategy.summary.totalSteps,
+        requiredSteps: summary.requiredSteps + strategy.summary.requiredSteps,
+        highRiskSteps: summary.highRiskSteps + strategy.summary.highRiskSteps,
+        estimatedCostUnits: summary.estimatedCostUnits + strategy.summary.estimatedCostUnits,
+        fallbackCoveredSteps: summary.fallbackCoveredSteps + strategy.summary.fallbackCoveredSteps,
+    }), {
+        totalSteps: 0,
+        requiredSteps: 0,
+        highRiskSteps: 0,
+        estimatedCostUnits: 0,
+        fallbackCoveredSteps: 0,
+    });
+    const benchmarkSteps = benchmark?.summary.totalToolStrategySteps ?? 0;
+    const benchmarkCost = benchmark?.summary.totalToolStrategyCostUnits ?? 0;
+    const totalSteps = runSummary.totalSteps + benchmarkSteps;
+    const fallbackCoveredSteps = runSummary.fallbackCoveredSteps + benchmarkSteps;
+    return {
+        totalSteps,
+        requiredSteps: runSummary.requiredSteps,
+        highRiskSteps: runSummary.highRiskSteps,
+        estimatedCostUnits: runSummary.estimatedCostUnits + benchmarkCost,
+        fallbackCoverage: totalSteps > 0 ? roundMetric(fallbackCoveredSteps / totalSteps) : 0,
+    };
+}
+function resolveRunToolStrategy(report) {
+    const plan = report.plan;
+    return plan.toolStrategy ?? createToolStrategyPlan(report.plan.skillPlan);
+}
+function summarizeEvolutionQuality(runReports, benchmark) {
+    const runProposals = runReports.flatMap(r => r.plan.evolutionShadow?.proposals ?? []);
+    const benchmarkProposals = benchmark?.summary.totalEvolutionProposals ?? 0;
+    const allProposals = runProposals;
+    const stageCount = (stage) => allProposals.filter(p => p.maturity.stage === stage).length;
+    return {
+        proposals: allProposals.length + benchmarkProposals,
+        shadowRules: stageCount('shadow'),
+        candidateHooks: stageCount('candidate-hook'),
+        approvedBlocking: stageCount('approved-blocking'),
+        pendingValidation: allProposals.filter(p => p.maturity.stage === 'shadow' && p.maturity.shadowHits < 10).length,
+    };
+}
+function resolveRunEvolutionShadow(report) {
+    const plan = report.plan;
+    return plan.evolutionShadow ?? buildEvolutionShadowReport([]);
+}
+function summarizeAdaptiveWorkflowSignal(runReports, benchmark) {
+    const profiles = runReports.map(r => r.plan.adaptiveWorkflow.profile);
+    const benchmarkProfiles = benchmark?.summary.workflowProfiles ?? [];
+    const allProfiles = [...profiles, ...benchmarkProfiles];
+    if (allProfiles.length === 0)
+        return 'No adaptive workflow profile evidence found.';
+    const distribution = new Map();
+    for (const p of allProfiles)
+        distribution.set(p, (distribution.get(p) ?? 0) + 1);
+    const parts = [...distribution.entries()].map(([p, n]) => `${p}=${n}`).join(', ');
+    const escalated = runReports.filter(r => r.plan.adaptiveWorkflow.escalationReasons.length > 0).length;
+    return `${allProfiles.length} run(s) with profile distribution: ${parts}. ${escalated} run(s) had escalation reasons.`;
+}
+function summarizeMemoryQuality(items) {
+    if (items.length === 0) {
+        return {
+            score: 0,
+            evidenceBackedItems: 0,
+            missingEvidenceItems: 0,
+            lowConfidenceItems: 0,
+            averageConfidence: 0,
+            averageRelevance: 0,
+        };
+    }
+    const evidenceBackedItems = items.filter(item => item.evidencePaths.length > 0).length;
+    const missingEvidenceItems = items.length - evidenceBackedItems;
+    const lowConfidenceItems = items.filter(item => item.confidence < 0.7).length;
+    const averageConfidence = average(items.map(item => clampUnit(item.confidence)));
+    const averageRelevance = average(items.map(item => clampUnit(item.score)));
+    const evidenceRatio = evidenceBackedItems / items.length;
+    const lowConfidenceRatio = lowConfidenceItems / items.length;
+    const score = Math.max(0, Math.round((averageConfidence * 40) + (averageRelevance * 30) + (evidenceRatio * 30) - (lowConfidenceRatio * 10)));
+    return {
+        score,
+        evidenceBackedItems,
+        missingEvidenceItems,
+        lowConfidenceItems,
+        averageConfidence: roundMetric(averageConfidence),
+        averageRelevance: roundMetric(averageRelevance),
+    };
+}
+function average(values) {
+    if (values.length === 0)
+        return 0;
+    return values.reduce((sum, value) => sum + value, 0) / values.length;
+}
+function clampUnit(value) {
+    if (!Number.isFinite(value))
+        return 0;
+    return Math.max(0, Math.min(1, value));
+}
+function roundMetric(value) {
+    return Number(value.toFixed(3));
+}
+function aiOsIntelligenceNextActions(status, signals, lang) {
+    const actions = [];
+    if (signals.some(signal => signal.status === 'ready')) {
+        actions.push('Use intelligence signals during release review to prove memory, context, and skill routing gains.');
+    }
+    if (status === 'ready')
+        return actions;
+    const blocked = signals.filter(signal => signal.status === 'blocked').map(signal => signal.id);
+    if (lang === 'zh') {
+        actions.push(`Refresh AI OS intelligence evidence for: ${blocked.join(', ') || 'warning signals'}.`);
+        return actions;
+    }
+    actions.push(`Refresh AI OS intelligence evidence for: ${blocked.join(', ') || 'warning signals'}.`);
+    return actions;
+}
 function buildRunSteps(plan) {
     const steps = new Map();
     const upsert = (step) => steps.set(step.id, step);
@@ -563,16 +936,20 @@ function buildRunSteps(plan) {
         evidence: ['memory.providerOrder', 'memory.selectedProviders', 'memory.items'],
         dependsOn: ['runtime-plan'],
     });
+    const profile = plan.adaptiveWorkflow.profile;
     for (const gate of plan.adaptiveWorkflow.gates) {
         if (steps.has(gate))
             continue;
+        const gateRequired = profile !== 'light';
         upsert({
             id: gate,
             kind: gate === 'runtime-evidence' ? 'evidence' : 'gate',
             title: `Satisfy ${gate} gate`,
             status: 'planned',
-            required: true,
-            summary: `Required by ${plan.adaptiveWorkflow.strategy} in ${plan.adaptiveWorkflow.mode} mode.`,
+            required: gateRequired,
+            summary: gateRequired
+                ? `Required by ${plan.adaptiveWorkflow.strategy} in ${profile} profile (${plan.adaptiveWorkflow.mode} mode).`
+                : `Advisory in ${profile} profile; not blocking completion.`,
             evidence: [`gate.${gate}`],
             dependsOn: ['runtime-plan'],
         });
@@ -945,6 +1322,22 @@ function inspectBenchmarkReport(projectDir, scaleDir, maxAgeHours, warnings) {
         return { status: 'invalid', reportPath };
     }
 }
+function readAiOsBenchmarkReport(reportPath, warnings) {
+    if (!existsSync(reportPath))
+        return undefined;
+    try {
+        const parsed = JSON.parse(readFileSync(reportPath, 'utf-8'));
+        if (!parsed || !parsed.summary || !Array.isArray(parsed.scenarios)) {
+            warnings.push(`Ignored invalid AI OS benchmark report: ${reportPath}`);
+            return undefined;
+        }
+        return parsed;
+    }
+    catch (error) {
+        warnings.push(`Ignored unreadable AI OS benchmark report: ${reportPath} (${error instanceof Error ? error.message : String(error)})`);
+        return undefined;
+    }
+}
 function summarizeBenchmarkDoctor(benchmark) {
     if (benchmark.status === 'missing')
         return 'No AI OS benchmark report found.';
@@ -1108,7 +1501,12 @@ function summarizeBenchmark(results) {
         totalMemoryItems: results.reduce((sum, result) => sum + result.metrics.memoryItems, 0),
         totalSkillSteps: results.reduce((sum, result) => sum + result.metrics.skillSteps, 0),
         requiredSkillSteps: results.reduce((sum, result) => sum + result.metrics.requiredSkillSteps, 0),
+        totalEvaluatorGates: results.reduce((sum, result) => sum + result.metrics.evaluatorGates, 0),
+        totalToolStrategySteps: results.reduce((sum, result) => sum + result.metrics.toolStrategySteps, 0),
+        totalToolStrategyCostUnits: results.reduce((sum, result) => sum + result.metrics.toolStrategyCostUnits, 0),
+        totalEvolutionProposals: results.reduce((sum, result) => sum + result.metrics.evolutionProposals, 0),
         governanceModes: [...new Set(results.map(result => result.governanceMode))],
+        workflowProfiles: [...new Set(results.map(result => result.workflowProfile))],
         averageTokenUtilization: totalBudget > 0 ? Number((totalEstimatedTokens / totalBudget).toFixed(4)) : 0,
     };
 }
@@ -1116,6 +1514,10 @@ function benchmarkRecommendations(summary) {
     const recommendations = ['Use benchmark deltas in release notes only after comparing the same scenario set across versions.'];
     if (summary.totalSkillSteps === 0)
         recommendations.push('Skill routing did not produce steps; inspect skill policy detection.');
+    if (summary.totalEvaluatorGates === 0)
+        recommendations.push('Evaluator intelligence did not require any critique gate; add reasoning-heavy benchmark scenarios before claiming evaluator coverage.');
+    if (summary.totalToolStrategySteps === 0)
+        recommendations.push('Tool strategy did not build a cost/retry/fallback graph; inspect skill execution plan coverage.');
     if (summary.averageTokenUtilization > 0.9)
         recommendations.push('Context utilization is high; lower budgets or improve relevance filtering before scaling.');
     if (!summary.governanceModes.includes('critical') && !summary.governanceModes.includes('expanded')) {
@@ -1123,30 +1525,279 @@ function benchmarkRecommendations(summary) {
     }
     return recommendations;
 }
-function createAdaptiveWorkflow(governance, skillPlan) {
+function createAdaptiveWorkflow(governance, skillPlan, evaluator, toolStrategy) {
+    const routerResult = routeAdaptiveWorkflow({ governance, evaluator, toolStrategy });
     const gates = new Set();
     gates.add('context-compiler');
     gates.add('memory-provider-recall');
     if (skillPlan.required || skillPlan.executionPlan.steps.length > 0)
         gates.add('skill-evidence');
     gates.add('runtime-evidence');
-    if (governance.effectiveMode === 'expanded' || governance.effectiveMode === 'critical')
+    if (routerResult.profile === 'strict' || routerResult.profile === 'critical')
         gates.add('impact-analysis');
-    if (governance.effectiveMode === 'critical')
+    if (routerResult.profile === 'critical')
         gates.add('security-review');
+    for (const gate of evaluator.gates)
+        gates.add(gate.id);
+    for (const override of routerResult.gateOverrides)
+        gates.add(override.gateId);
+    const requiredBehaviors = new Set(governance.requiredBehaviors);
+    for (const constraint of routerResult.behavioralConstraints) {
+        if (constraint.required)
+            requiredBehaviors.add(constraint.description);
+    }
     return {
         strategy: 'risk-adaptive-runtime-v1',
+        profile: routerResult.profile,
+        escalationReasons: routerResult.escalationReasons,
         mode: governance.effectiveMode,
-        requiredBehaviors: governance.requiredBehaviors,
+        requiredBehaviors: Array.from(requiredBehaviors),
         gates: Array.from(gates),
-        exitCriteria: [
-            'Context compiler explains included and omitted sections.',
-            'Memory recall records provider, score, and evidence paths.',
-            'Skill plan lists required proof and fallback policy.',
-            'Governance ROI states benefit and overhead before completion.',
-        ],
+        exitCriteria: routerResult.exitCriteria,
+    };
+}
+function createEvaluatorIntelligence(input) {
+    const haystack = `${input.task} ${input.files.join(' ')} ${input.governance.signals.map(signal => signal.id).join(' ')}`.toLowerCase();
+    const gates = [];
+    const addGate = (gate) => {
+        if (gates.some(existing => existing.id === gate.id))
+            return;
+        gates.push(gate);
+    };
+    if (/architecture|architectural|design|strategy|boundary|refactor|runtime|platform|framework|架构|方案|设计|边界|平台/.test(haystack)) {
+        addGate({
+            id: 'architecture-critique',
+            required: input.governance.effectiveMode !== 'minimal',
+            reason: 'Architecture, runtime, platform, or design decisions need an explicit critique before implementation claims.',
+            evidence: matchingEvidence(input.files, /architecture|runtime|framework|docs|readme|src/i),
+        });
+    }
+    if (/root cause|diagnose|debug|failure|incident|postmortem|regression|blocked|根因|排查|故障|事故|回归/.test(haystack)) {
+        addGate({
+            id: 'root-cause-review',
+            required: true,
+            reason: 'Failure diagnosis or root-cause work needs an alternate hypothesis check before closing.',
+            evidence: matchingEvidence(input.files, /test|runtime|debug|log|src|docs/i),
+        });
+    }
+    if (input.governance.signals.some(signal => signal.id === 'critical-risk-domain' || signal.id === 'critical-file-path')) {
+        addGate({
+            id: 'security-threat-model',
+            required: true,
+            reason: 'Critical auth, data, production, or destructive risk requires threat-model review evidence.',
+            evidence: input.governance.signals.flatMap(signal => signal.evidence).slice(0, 12),
+        });
+    }
+    if (/release|publish|deploy|migration|rollback|version|changelog|npm|ci|发版|发布|部署|迁移|回滚/.test(haystack)) {
+        addGate({
+            id: 'release-readiness-review',
+            required: true,
+            reason: 'Release, deployment, migration, or rollback work needs readiness and rollback evidence.',
+            evidence: matchingEvidence(input.files, /package|changelog|release|deploy|migration|workflow|github/i),
+        });
+    }
+    const drivers = evaluatorUncertaintyDrivers(input, gates);
+    const uncertaintyScore = evaluatorUncertaintyScore(input, gates, drivers);
+    if (gates.length > 0 || uncertaintyScore >= 0.45) {
+        addGate({
+            id: 'uncertainty-decision-log',
+            required: uncertaintyScore >= 0.45 || input.governance.effectiveMode === 'critical',
+            reason: 'The agent must record uncertainty, rejected alternatives, and evidence gaps before completion.',
+            evidence: drivers,
+        });
+    }
+    const riskLevel = uncertaintyScore >= 0.7
+        ? 'high'
+        : uncertaintyScore >= 0.4 || gates.some(gate => gate.required) ? 'medium' : 'low';
+    return {
+        strategy: 'evaluator-intelligence-v1',
+        required: gates.some(gate => gate.required),
+        riskLevel,
+        uncertainty: {
+            score: uncertaintyScore,
+            threshold: 0.45,
+            drivers,
+        },
+        gates,
+        recommendations: evaluatorRecommendations(gates, riskLevel),
     };
 }
+function createToolStrategyPlan(skillPlan) {
+    const nodes = skillPlan.executionPlan.steps.map(step => {
+        const risks = toolStepRisks(step.id, step.kind);
+        return {
+            id: `${step.kind}:${step.id}`,
+            kind: step.kind,
+            required: step.required,
+            cost: {
+                units: toolStepCostUnits(step.id, step.kind, step.required, risks),
+                timeRisk: risks.timeRisk,
+                sideEffectRisk: risks.sideEffectRisk,
+            },
+            retry: toolStepRetry(step.id, step.kind, risks),
+            fallback: step.fallback,
+            evidence: [step.evidenceRequired],
+        };
+    });
+    const edges = buildToolStrategyEdges(nodes);
+    const summary = {
+        totalSteps: nodes.length,
+        requiredSteps: nodes.filter(node => node.required).length,
+        highRiskSteps: nodes.filter(node => node.cost.timeRisk === 'high' || node.cost.sideEffectRisk === 'high').length,
+        estimatedCostUnits: nodes.reduce((sum, node) => sum + node.cost.units, 0),
+        fallbackCoveredSteps: nodes.filter(node => node.fallback.trim().length > 0).length,
+    };
+    return {
+        strategy: 'tool-strategy-v1',
+        nodes,
+        edges,
+        summary,
+        recommendations: toolStrategyRecommendations(summary),
+    };
+}
+function createEvolutionShadowProposals(governance, evaluator) {
+    const proposals = [];
+    // Propose shadow rules from governance risk signals (escalated modes)
+    for (const signal of governance.signals) {
+        if (signal.mode === 'expanded' || signal.mode === 'critical') {
+            proposals.push(proposeShadowRule({
+                title: `Governance signal: ${signal.id}`,
+                description: `Shadow rule from governance signal "${signal.id}" (mode=${signal.mode}). ${signal.reason}`,
+                source: 'failure-learning',
+                sourceEvidenceIds: signal.evidence.length > 0 ? signal.evidence : [signal.id],
+                pattern: signal.id,
+                enforcement: signal.mode === 'critical' ? 'hook' : 'prompt',
+                rollback: `Remove shadow rule for governance signal "${signal.id}" if false positive rate exceeds threshold.`,
+            }));
+        }
+    }
+    // Propose shadow rules from high-risk evaluator gates
+    for (const gate of evaluator.gates) {
+        if (gate.required && (gate.id === 'security-threat-model' || gate.id === 'root-cause-review')) {
+            proposals.push(proposeShadowRule({
+                title: `Evaluator gate: ${gate.id}`,
+                description: `Shadow rule from required evaluator gate "${gate.id}". ${gate.reason}`,
+                source: 'lesson-extraction',
+                sourceEvidenceIds: [gate.id],
+                pattern: gate.id,
+                enforcement: 'prompt',
+                rollback: `Remove shadow rule for evaluator gate "${gate.id}" if it does not reduce defect recurrence.`,
+            }));
+        }
+    }
+    return buildEvolutionShadowReport(proposals);
+}
+function toolStepRisks(id, kind) {
+    const normalized = id.toLowerCase();
+    if (/desktop|cua|deploy|publish|release|migration|rollback|delete|drop|external|cli/.test(normalized)) {
+        return { timeRisk: 'high', sideEffectRisk: 'high' };
+    }
+    if (/browser|e2e|playwright|screenshot|visual|security|threat|audit/.test(normalized)) {
+        return { timeRisk: 'medium', sideEffectRisk: kind === 'verification' ? 'medium' : 'low' };
+    }
+    if (kind === 'artifact')
+        return { timeRisk: 'low', sideEffectRisk: 'low' };
+    if (kind === 'verification')
+        return { timeRisk: 'medium', sideEffectRisk: 'medium' };
+    return { timeRisk: 'medium', sideEffectRisk: 'low' };
+}
+function toolStepCostUnits(id, kind, required, risks) {
+    let units = kind === 'artifact' ? 1 : kind === 'verification' ? 2 : 3;
+    if (required)
+        units += 1;
+    if (risks.timeRisk === 'medium')
+        units += 1;
+    if (risks.timeRisk === 'high')
+        units += 2;
+    if (risks.sideEffectRisk === 'high')
+        units += 2;
+    if (/browser|e2e|desktop|external|cli|security|audit/i.test(id))
+        units += 1;
+    return units;
+}
+function toolStepRetry(id, kind, risks) {
+    if (risks.sideEffectRisk === 'high')
+        return { maxAttempts: 1, backoff: 'manual-review' };
+    if (kind === 'verification')
+        return { maxAttempts: /browser|e2e|playwright|network/i.test(id) ? 2 : 1, backoff: 'linear' };
+    if (kind === 'skill')
+        return { maxAttempts: 1, backoff: 'manual-review' };
+    return { maxAttempts: 1, backoff: 'none' };
+}
+function buildToolStrategyEdges(nodes) {
+    const edges = [];
+    const skillNodes = nodes.filter(node => node.kind === 'skill');
+    const artifactNodes = nodes.filter(node => node.kind === 'artifact');
+    const verificationNodes = nodes.filter(node => node.kind === 'verification');
+    for (const artifact of artifactNodes) {
+        for (const skill of skillNodes.filter(node => node.required || artifact.required)) {
+            edges.push({ from: skill.id, to: artifact.id, reason: 'Skill execution must leave artifact evidence when both are required or review-relevant.' });
+        }
+    }
+    for (const verification of verificationNodes) {
+        for (const artifact of artifactNodes.filter(node => node.required)) {
+            edges.push({ from: artifact.id, to: verification.id, reason: 'Required artifacts should exist before verification evidence is accepted.' });
+        }
+    }
+    return edges;
+}
+function toolStrategyRecommendations(summary) {
+    if (summary.totalSteps === 0)
+        return ['No tool strategy required; standard verification is enough for this task.'];
+    const recommendations = ['Execute required tool strategy nodes before claiming task completion.'];
+    if (summary.highRiskSteps > 0)
+        recommendations.push('High-risk tool steps require manual review or explicit safe-mode evidence before retry.');
+    if (summary.fallbackCoveredSteps < summary.totalSteps)
+        recommendations.push('Fill fallback policy gaps before autonomous execution.');
+    return recommendations;
+}
+function matchingEvidence(files, pattern) {
+    return files.filter(file => pattern.test(file)).slice(0, 12);
+}
+function evaluatorUncertaintyDrivers(input, gates) {
+    const drivers = new Set();
+    if (input.governance.effectiveMode === 'critical')
+        drivers.add('critical-governance-mode');
+    if (input.governance.effectiveMode === 'expanded')
+        drivers.add('expanded-governance-mode');
+    if (input.files.length >= 6)
+        drivers.add('wide-file-scope');
+    if (input.skillPlan.executionPlan.steps.some(step => step.required))
+        drivers.add('required-skill-evidence');
+    for (const gate of gates)
+        drivers.add(gate.id);
+    if (/unknown|uncertain|maybe|assume|guess|可能|不确定|假设/.test(input.task.toLowerCase()))
+        drivers.add('explicit-uncertainty-language');
+    return [...drivers];
+}
+function evaluatorUncertaintyScore(input, gates, drivers) {
+    let score = 0.15;
+    if (input.governance.effectiveMode === 'standard')
+        score += 0.1;
+    if (input.governance.effectiveMode === 'expanded')
+        score += 0.25;
+    if (input.governance.effectiveMode === 'critical')
+        score += 0.4;
+    score += Math.min(0.2, input.files.length * 0.025);
+    score += Math.min(0.2, gates.filter(gate => gate.required).length * 0.08);
+    if (input.skillPlan.executionPlan.steps.some(step => step.required))
+        score += 0.08;
+    if (drivers.includes('explicit-uncertainty-language'))
+        score += 0.12;
+    return roundMetric(clampUnit(score));
+}
+function evaluatorRecommendations(gates, riskLevel) {
+    if (gates.length === 0)
+        return ['No evaluator gate required; keep lightweight verification evidence for low-risk work.'];
+    const recommendations = ['Record evaluator evidence before promoting reasoning-heavy implementation or release claims.'];
+    if (riskLevel === 'high')
+        recommendations.push('Require reviewer sign-off for uncertainty, rejected alternatives, and rollback or mitigation path.');
+    if (gates.some(gate => gate.id === 'root-cause-review'))
+        recommendations.push('List competing root-cause hypotheses and why each was accepted or rejected.');
+    if (gates.some(gate => gate.id === 'security-threat-model'))
+        recommendations.push('Attach threat model or security-review evidence before guarded completion.');
+    return recommendations;
+}
 function recommendations(options) {
     const output = [];
     if (options.context.compiler?.estimatedTokenSavings) {
@@ -1161,6 +1812,12 @@ function recommendations(options) {
     if (options.governance.effectiveMode === 'critical') {
         output.push('Critical workflow mode requires security review and rollback or disable strategy.');
     }
+    if (options.evaluator.required) {
+        output.push(`Evaluator intelligence requires ${options.evaluator.gates.length} critique gate(s); record uncertainty and review evidence before promotion.`);
+    }
+    if (options.toolStrategy.summary.totalSteps > 0) {
+        output.push(`Tool strategy planner created ${options.toolStrategy.summary.totalSteps} cost/retry/fallback node(s); execute required nodes with evidence.`);
+    }
     return output;
 }
 function normalizeSkillTaskLevel(value) {