@hongmaple0820/scale-engine 0.26.0 → 0.27.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.en.md +71 -3
- package/README.md +71 -3
- package/dist/api/cli.js +269 -12
- package/dist/api/cli.js.map +1 -1
- package/dist/cli/phaseCommands.js +8 -8
- package/dist/cli/phaseCommands.js.map +1 -1
- package/dist/context/ContextBudget.d.ts +14 -0
- package/dist/context/ContextBudget.js +50 -14
- package/dist/context/ContextBudget.js.map +1 -1
- package/dist/context/ContextCompiler.d.ts +34 -0
- package/dist/context/ContextCompiler.js +120 -0
- package/dist/context/ContextCompiler.js.map +1 -0
- package/dist/eval/WorkflowEval.js +4 -6
- package/dist/eval/WorkflowEval.js.map +1 -1
- package/dist/governance/GovernanceRoi.d.ts +6 -1
- package/dist/governance/GovernanceRoi.js +32 -0
- package/dist/governance/GovernanceRoi.js.map +1 -1
- package/dist/guardrails/DependencyAuditor.js +38 -0
- package/dist/guardrails/DependencyAuditor.js.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/runtime/AiOsRuntime.d.ts +269 -0
- package/dist/runtime/AiOsRuntime.js +840 -0
- package/dist/runtime/AiOsRuntime.js.map +1 -0
- package/dist/runtime/index.d.ts +1 -0
- package/dist/runtime/index.js +1 -0
- package/dist/runtime/index.js.map +1 -1
- package/dist/skills/routing/SkillPlanner.js +91 -3
- package/dist/skills/routing/SkillPlanner.js.map +1 -1
- package/dist/skills/routing/SkillRoutingTypes.d.ts +17 -0
- package/dist/tools/SafeCommandRunner.d.ts +16 -0
- package/dist/tools/SafeCommandRunner.js +83 -0
- package/dist/tools/SafeCommandRunner.js.map +1 -0
- package/dist/workflow/UpgradeManager.d.ts +4 -1
- package/dist/workflow/UpgradeManager.js +26 -0
- package/dist/workflow/UpgradeManager.js.map +1 -1
- package/dist/workflow/gates/GateSystem.js +3 -9
- package/dist/workflow/gates/GateSystem.js.map +1 -1
- package/docs/AI_ENGINEERING_OS_POSITIONING.md +560 -0
- package/docs/CONTEXT_BUDGET.md +43 -1
- package/docs/DEPENDENCY_AUDIT.md +29 -0
- package/docs/MEMORY_FABRIC.md +2 -0
- package/docs/README.md +1 -0
- package/docs/SKILL_RADAR.md +13 -0
- package/package.json +9 -2
|
@@ -0,0 +1,840 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from 'node:fs';
|
|
2
|
+
import { dirname, isAbsolute, join, resolve } from 'node:path';
|
|
3
|
+
import { buildContextPack, scanContextBudget, } from '../context/ContextBudget.js';
|
|
4
|
+
import { createGovernanceRoiReport, } from '../governance/GovernanceRoi.js';
|
|
5
|
+
import { evaluateProgressiveGovernance, } from '../governance/ProgressiveGovernance.js';
|
|
6
|
+
import { MemoryFabric, recallMemoryProviders, } from '../memory/index.js';
|
|
7
|
+
import { createSkillPlan, loadSkillRoutingPolicy, } from '../skills/routing/index.js';
|
|
8
|
+
import { runSafeCommand } from '../tools/SafeCommandRunner.js';
|
|
9
|
+
import { SCALE_ENGINE_VERSION } from '../version.js';
|
|
10
|
+
import { RuntimeEvidenceLedger } from './RuntimeEvidenceLedger.js';
|
|
11
|
+
export async function createAiOsPlan(input) {
|
|
12
|
+
const projectDir = resolve(input.projectDir ?? process.cwd());
|
|
13
|
+
const scaleDir = input.scaleDir ?? '.scale';
|
|
14
|
+
const level = normalizeSkillTaskLevel(input.level);
|
|
15
|
+
const files = input.files ?? [];
|
|
16
|
+
const services = input.services ?? [];
|
|
17
|
+
const taskId = input.taskId;
|
|
18
|
+
const budget = input.budget ?? 8_000;
|
|
19
|
+
const governance = evaluateProgressiveGovernance({
|
|
20
|
+
task: input.task,
|
|
21
|
+
changedFiles: files,
|
|
22
|
+
requestedMode: input.requestedMode,
|
|
23
|
+
});
|
|
24
|
+
const contextBudget = scanContextBudget({ projectDir, scaleDir, maxTaskTokens: budget });
|
|
25
|
+
const context = buildContextPack({
|
|
26
|
+
projectDir,
|
|
27
|
+
scaleDir,
|
|
28
|
+
task: input.task,
|
|
29
|
+
taskId,
|
|
30
|
+
level,
|
|
31
|
+
files,
|
|
32
|
+
budget,
|
|
33
|
+
});
|
|
34
|
+
const memoryRecall = await recallMemoryProviders({
|
|
35
|
+
projectDir,
|
|
36
|
+
scaleDir,
|
|
37
|
+
query: [input.task, files.join(' ')].filter(Boolean).join('\n'),
|
|
38
|
+
task: input.task,
|
|
39
|
+
files,
|
|
40
|
+
limit: input.memoryTopK ?? 5,
|
|
41
|
+
});
|
|
42
|
+
const memoryPack = await new MemoryFabric({
|
|
43
|
+
projectDir,
|
|
44
|
+
scaleDir,
|
|
45
|
+
knowledgeBase: input.knowledgeBase,
|
|
46
|
+
}).createContextPack({
|
|
47
|
+
task: input.task,
|
|
48
|
+
taskId,
|
|
49
|
+
level,
|
|
50
|
+
files,
|
|
51
|
+
budgetTokens: Math.max(1, Math.floor(budget / 2)),
|
|
52
|
+
knowledgeTopK: input.memoryTopK,
|
|
53
|
+
});
|
|
54
|
+
const skillPolicy = loadSkillRoutingPolicy(projectDir, scaleDir);
|
|
55
|
+
const skillPlan = createSkillPlan({
|
|
56
|
+
taskId: taskId ?? `AIOS-${Date.now()}`,
|
|
57
|
+
taskName: input.task,
|
|
58
|
+
description: input.task,
|
|
59
|
+
level,
|
|
60
|
+
files,
|
|
61
|
+
services,
|
|
62
|
+
policy: skillPolicy,
|
|
63
|
+
});
|
|
64
|
+
const adaptiveWorkflow = createAdaptiveWorkflow(governance, skillPlan);
|
|
65
|
+
const roi = createGovernanceRoiReport({
|
|
66
|
+
taskId,
|
|
67
|
+
contextBudget,
|
|
68
|
+
contextPack: context,
|
|
69
|
+
governance,
|
|
70
|
+
memoryRecall,
|
|
71
|
+
skillPlan,
|
|
72
|
+
});
|
|
73
|
+
return {
|
|
74
|
+
version: SCALE_ENGINE_VERSION,
|
|
75
|
+
generatedAt: new Date().toISOString(),
|
|
76
|
+
task: {
|
|
77
|
+
taskId,
|
|
78
|
+
task: input.task,
|
|
79
|
+
level,
|
|
80
|
+
files,
|
|
81
|
+
services,
|
|
82
|
+
},
|
|
83
|
+
governance,
|
|
84
|
+
adaptiveWorkflow,
|
|
85
|
+
context,
|
|
86
|
+
memory: {
|
|
87
|
+
providerOrder: memoryRecall.providerOrder,
|
|
88
|
+
selectedProviders: memoryRecall.selectedProviders,
|
|
89
|
+
fallbackUsed: memoryRecall.fallbackUsed,
|
|
90
|
+
items: memoryRecall.items,
|
|
91
|
+
warnings: memoryRecall.warnings,
|
|
92
|
+
contextPack: memoryPack,
|
|
93
|
+
},
|
|
94
|
+
skillPlan,
|
|
95
|
+
roi,
|
|
96
|
+
recommendations: recommendations({ governance, context, memoryRecall, skillPlan }),
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
export async function createAiOsRun(input) {
|
|
100
|
+
const projectDir = resolve(input.projectDir ?? process.cwd());
|
|
101
|
+
const scaleDir = input.scaleDir ?? '.scale';
|
|
102
|
+
const mode = input.mode ?? 'dry-run';
|
|
103
|
+
const plan = await createAiOsPlan({ ...input, projectDir, scaleDir });
|
|
104
|
+
const generatedAt = new Date().toISOString();
|
|
105
|
+
const runReportPath = resolveRunReportPath(projectDir, scaleDir, plan.task.taskId ?? `AIOS-RUN-${Date.now()}`);
|
|
106
|
+
const steps = buildRunSteps(plan);
|
|
107
|
+
const verification = await runGuardedVerification({
|
|
108
|
+
projectDir,
|
|
109
|
+
scaleDir,
|
|
110
|
+
plan,
|
|
111
|
+
steps,
|
|
112
|
+
commands: input.verificationCommands ?? [],
|
|
113
|
+
timeout: input.commandTimeoutMs,
|
|
114
|
+
allowShell: input.allowShell,
|
|
115
|
+
enabled: mode === 'guarded',
|
|
116
|
+
});
|
|
117
|
+
const failureCandidates = buildFailureLearningCandidates(plan, steps);
|
|
118
|
+
const evidence = summarizeRunEvidence(steps);
|
|
119
|
+
const status = steps.some(step => step.status === 'blocked') ? 'blocked' : 'ready';
|
|
120
|
+
const report = {
|
|
121
|
+
version: SCALE_ENGINE_VERSION,
|
|
122
|
+
generatedAt,
|
|
123
|
+
mode,
|
|
124
|
+
dryRun: mode === 'dry-run',
|
|
125
|
+
status,
|
|
126
|
+
plan,
|
|
127
|
+
steps,
|
|
128
|
+
evidence,
|
|
129
|
+
verification,
|
|
130
|
+
failureLearning: {
|
|
131
|
+
status: failureCandidates.length > 0 ? 'candidate-created' : 'idle',
|
|
132
|
+
candidates: failureCandidates,
|
|
133
|
+
},
|
|
134
|
+
artifacts: {
|
|
135
|
+
runReport: runReportPath,
|
|
136
|
+
},
|
|
137
|
+
nextActions: buildRunNextActions(steps, mode),
|
|
138
|
+
};
|
|
139
|
+
writeAiOsRunReport(runReportPath, report);
|
|
140
|
+
return report;
|
|
141
|
+
}
|
|
142
|
+
export function createAiOsDashboard(input = {}) {
|
|
143
|
+
const projectDir = resolve(input.projectDir ?? process.cwd());
|
|
144
|
+
const scaleDir = input.scaleDir ?? '.scale';
|
|
145
|
+
const runsDir = resolveRunsDir(projectDir, scaleDir);
|
|
146
|
+
const warnings = [];
|
|
147
|
+
const reports = readAiOsRunReports(runsDir, warnings);
|
|
148
|
+
const latestRuns = reports
|
|
149
|
+
.sort((a, b) => Date.parse(b.generatedAt) - Date.parse(a.generatedAt))
|
|
150
|
+
.slice(0, input.limit ?? 10)
|
|
151
|
+
.map(toDashboardRunSummary);
|
|
152
|
+
const summary = {
|
|
153
|
+
totalRuns: reports.length,
|
|
154
|
+
readyRuns: reports.filter(report => report.status === 'ready').length,
|
|
155
|
+
blockedRuns: reports.filter(report => report.status === 'blocked').length,
|
|
156
|
+
dryRunRuns: reports.filter(report => report.mode === 'dry-run').length,
|
|
157
|
+
guardedRuns: reports.filter(report => report.mode === 'guarded').length,
|
|
158
|
+
verificationCommands: reports.reduce((sum, report) => sum + report.verification.commands.length, 0),
|
|
159
|
+
failedVerificationCommands: reports.reduce((sum, report) => sum + report.verification.commands.filter(command => command.status === 'failed').length, 0),
|
|
160
|
+
pendingEvidence: reports.reduce((sum, report) => sum + report.evidence.pending.length, 0),
|
|
161
|
+
failureLearningCandidates: reports.reduce((sum, report) => sum + report.failureLearning.candidates.length, 0),
|
|
162
|
+
};
|
|
163
|
+
const health = summarizeDashboardHealth(summary);
|
|
164
|
+
return {
|
|
165
|
+
version: SCALE_ENGINE_VERSION,
|
|
166
|
+
generatedAt: new Date().toISOString(),
|
|
167
|
+
runsDir,
|
|
168
|
+
summary,
|
|
169
|
+
health,
|
|
170
|
+
latestRuns,
|
|
171
|
+
recommendations: dashboardRecommendations(summary),
|
|
172
|
+
warnings,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
export async function createAiOsBenchmark(input = {}) {
|
|
176
|
+
const projectDir = resolve(input.projectDir ?? process.cwd());
|
|
177
|
+
const scaleDir = input.scaleDir ?? '.scale';
|
|
178
|
+
const scenarios = defaultBenchmarkScenarios(input.budget);
|
|
179
|
+
const results = [];
|
|
180
|
+
for (const scenario of scenarios) {
|
|
181
|
+
const plan = await createAiOsPlan({
|
|
182
|
+
projectDir,
|
|
183
|
+
scaleDir,
|
|
184
|
+
taskId: `BENCH-${scenario.id}`,
|
|
185
|
+
task: scenario.task,
|
|
186
|
+
level: scenario.level,
|
|
187
|
+
files: scenario.files,
|
|
188
|
+
services: scenario.services,
|
|
189
|
+
budget: scenario.budget,
|
|
190
|
+
});
|
|
191
|
+
results.push({
|
|
192
|
+
id: scenario.id,
|
|
193
|
+
task: scenario.task,
|
|
194
|
+
level: scenario.level,
|
|
195
|
+
governanceMode: plan.governance.effectiveMode,
|
|
196
|
+
metrics: {
|
|
197
|
+
estimatedTokens: plan.context.totalEstimatedTokens,
|
|
198
|
+
budget: plan.context.task.budget,
|
|
199
|
+
estimatedTokenSavings: plan.context.compiler?.estimatedTokenSavings ?? 0,
|
|
200
|
+
memoryItems: plan.memory.items.length,
|
|
201
|
+
selectedProviders: plan.memory.selectedProviders,
|
|
202
|
+
skillSteps: plan.skillPlan.executionPlan.steps.length,
|
|
203
|
+
requiredSkillSteps: plan.skillPlan.executionPlan.steps.filter(step => step.required).length,
|
|
204
|
+
gates: plan.adaptiveWorkflow.gates.length,
|
|
205
|
+
roiModules: plan.roi.modules.length,
|
|
206
|
+
},
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
const summary = summarizeBenchmark(results);
|
|
210
|
+
const generatedAt = new Date().toISOString();
|
|
211
|
+
const benchmarkReport = resolveBenchmarkReportPath(projectDir, scaleDir);
|
|
212
|
+
const report = {
|
|
213
|
+
version: SCALE_ENGINE_VERSION,
|
|
214
|
+
generatedAt,
|
|
215
|
+
scenarios: results,
|
|
216
|
+
summary,
|
|
217
|
+
dashboard: createAiOsDashboard({ projectDir, scaleDir }),
|
|
218
|
+
artifacts: {
|
|
219
|
+
benchmarkReport,
|
|
220
|
+
},
|
|
221
|
+
recommendations: benchmarkRecommendations(summary),
|
|
222
|
+
};
|
|
223
|
+
writeAiOsBenchmarkReport(benchmarkReport, report);
|
|
224
|
+
return report;
|
|
225
|
+
}
|
|
226
|
+
export function createAiOsMigration(input = {}) {
|
|
227
|
+
const projectDir = resolve(input.projectDir ?? process.cwd());
|
|
228
|
+
const scaleDir = input.scaleDir ?? '.scale';
|
|
229
|
+
const scaleRoot = isAbsolute(scaleDir) ? scaleDir : join(projectDir, scaleDir);
|
|
230
|
+
const requiredDirs = [
|
|
231
|
+
join(scaleRoot, 'ai-os'),
|
|
232
|
+
join(scaleRoot, 'ai-os', 'runs'),
|
|
233
|
+
join(scaleRoot, 'ai-os', 'benchmarks'),
|
|
234
|
+
join(scaleRoot, 'ai-os', 'migrations'),
|
|
235
|
+
];
|
|
236
|
+
const created = [];
|
|
237
|
+
const existing = [];
|
|
238
|
+
for (const dir of requiredDirs) {
|
|
239
|
+
if (existsSync(dir)) {
|
|
240
|
+
existing.push(normalizeProjectPath(projectDir, dir));
|
|
241
|
+
continue;
|
|
242
|
+
}
|
|
243
|
+
mkdirSync(dir, { recursive: true });
|
|
244
|
+
created.push(normalizeProjectPath(projectDir, dir));
|
|
245
|
+
}
|
|
246
|
+
const migrationReport = join(scaleRoot, 'ai-os', 'migrations', 'migration.json');
|
|
247
|
+
const report = {
|
|
248
|
+
version: SCALE_ENGINE_VERSION,
|
|
249
|
+
generatedAt: new Date().toISOString(),
|
|
250
|
+
status: created.length > 0 ? 'migrated' : 'compatible',
|
|
251
|
+
scaleRoot,
|
|
252
|
+
created,
|
|
253
|
+
existing,
|
|
254
|
+
files: {
|
|
255
|
+
migrationReport,
|
|
256
|
+
},
|
|
257
|
+
warnings: [],
|
|
258
|
+
nextActions: created.length > 0
|
|
259
|
+
? ['Run `scale ai-os run --dry-run --json` to create the first AI OS runtime report.']
|
|
260
|
+
: ['AI OS runtime directories are compatible; continue with run, dashboard, or benchmark commands.'],
|
|
261
|
+
};
|
|
262
|
+
writeFileSync(migrationReport, JSON.stringify(report, null, 2), 'utf-8');
|
|
263
|
+
return report;
|
|
264
|
+
}
|
|
265
|
+
export function createAiOsDoctor(input = {}) {
|
|
266
|
+
const projectDir = resolve(input.projectDir ?? process.cwd());
|
|
267
|
+
const scaleDir = input.scaleDir ?? '.scale';
|
|
268
|
+
const scaleRoot = resolveScaleRoot(projectDir, scaleDir);
|
|
269
|
+
const benchmarkMaxAgeHours = input.benchmarkMaxAgeHours ?? 24;
|
|
270
|
+
const lang = input.lang ?? 'en';
|
|
271
|
+
const warnings = [];
|
|
272
|
+
const dashboard = createAiOsDashboard({ projectDir, scaleDir });
|
|
273
|
+
const benchmark = inspectBenchmarkReport(projectDir, scaleDir, benchmarkMaxAgeHours, warnings);
|
|
274
|
+
const requiredDirs = [
|
|
275
|
+
join(scaleRoot, 'ai-os'),
|
|
276
|
+
join(scaleRoot, 'ai-os', 'runs'),
|
|
277
|
+
join(scaleRoot, 'ai-os', 'benchmarks'),
|
|
278
|
+
join(scaleRoot, 'ai-os', 'migrations'),
|
|
279
|
+
];
|
|
280
|
+
const missingDirs = requiredDirs.filter(dir => !existsSync(dir)).map(dir => normalizeProjectPath(projectDir, dir));
|
|
281
|
+
const checks = [
|
|
282
|
+
{
|
|
283
|
+
id: 'ai-os-runtime-dirs',
|
|
284
|
+
title: 'AI OS runtime directories',
|
|
285
|
+
status: missingDirs.length === 0 ? 'passed' : 'blocked',
|
|
286
|
+
summary: missingDirs.length === 0
|
|
287
|
+
? 'Required AI OS runtime directories exist.'
|
|
288
|
+
: `Missing AI OS runtime directories: ${missingDirs.join(', ')}.`,
|
|
289
|
+
evidence: missingDirs.length === 0 ? requiredDirs.map(dir => normalizeProjectPath(projectDir, dir)) : missingDirs,
|
|
290
|
+
},
|
|
291
|
+
{
|
|
292
|
+
id: 'ai-os-run-history',
|
|
293
|
+
title: 'AI OS run history',
|
|
294
|
+
status: dashboard.summary.totalRuns > 0 ? 'passed' : 'warning',
|
|
295
|
+
summary: dashboard.summary.totalRuns > 0
|
|
296
|
+
? `${dashboard.summary.totalRuns} run report(s), ${dashboard.summary.guardedRuns} guarded.`
|
|
297
|
+
: 'No AI OS run reports found yet.',
|
|
298
|
+
evidence: dashboard.latestRuns.map(run => run.runReport),
|
|
299
|
+
},
|
|
300
|
+
{
|
|
301
|
+
id: 'ai-os-dashboard-health',
|
|
302
|
+
title: 'AI OS dashboard health',
|
|
303
|
+
status: dashboard.health.status === 'blocked'
|
|
304
|
+
? 'blocked'
|
|
305
|
+
: dashboard.health.status === 'healthy' ? 'passed' : 'warning',
|
|
306
|
+
summary: `${dashboard.health.status} (${dashboard.health.score}): ${dashboard.health.reasons.join('; ')}`,
|
|
307
|
+
evidence: dashboard.health.reasons,
|
|
308
|
+
},
|
|
309
|
+
{
|
|
310
|
+
id: 'ai-os-benchmark',
|
|
311
|
+
title: 'AI OS benchmark evidence',
|
|
312
|
+
status: benchmark.status === 'fresh' ? 'passed' : benchmark.status === 'invalid' ? 'blocked' : 'warning',
|
|
313
|
+
summary: summarizeBenchmarkDoctor(benchmark),
|
|
314
|
+
evidence: [benchmark.reportPath],
|
|
315
|
+
},
|
|
316
|
+
];
|
|
317
|
+
const summary = {
|
|
318
|
+
totalChecks: checks.length,
|
|
319
|
+
passedChecks: checks.filter(check => check.status === 'passed').length,
|
|
320
|
+
warningChecks: checks.filter(check => check.status === 'warning').length,
|
|
321
|
+
blockedChecks: checks.filter(check => check.status === 'blocked').length,
|
|
322
|
+
};
|
|
323
|
+
const status = summary.blockedChecks > 0
|
|
324
|
+
? 'blocked'
|
|
325
|
+
: summary.warningChecks > 0 ? 'warning' : 'ready';
|
|
326
|
+
return {
|
|
327
|
+
version: SCALE_ENGINE_VERSION,
|
|
328
|
+
generatedAt: new Date().toISOString(),
|
|
329
|
+
status,
|
|
330
|
+
projectDir,
|
|
331
|
+
scaleRoot,
|
|
332
|
+
dashboard,
|
|
333
|
+
benchmark,
|
|
334
|
+
checks,
|
|
335
|
+
summary,
|
|
336
|
+
warnings: [...warnings, ...dashboard.warnings],
|
|
337
|
+
nextActions: aiOsDoctorNextActions({ status, checks, dashboard, benchmark, lang }),
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
function buildRunSteps(plan) {
|
|
341
|
+
const steps = new Map();
|
|
342
|
+
const upsert = (step) => steps.set(step.id, step);
|
|
343
|
+
upsert({
|
|
344
|
+
id: 'runtime-plan',
|
|
345
|
+
kind: 'plan',
|
|
346
|
+
title: 'Create unified AI OS runtime plan',
|
|
347
|
+
status: 'passed',
|
|
348
|
+
required: true,
|
|
349
|
+
summary: `Governance mode ${plan.governance.effectiveMode}; ${plan.skillPlan.executionPlan.steps.length} skill step(s).`,
|
|
350
|
+
evidence: ['governance', 'context', 'memory', 'skillPlan', 'roi'],
|
|
351
|
+
});
|
|
352
|
+
upsert({
|
|
353
|
+
id: 'context-compiler',
|
|
354
|
+
kind: 'context',
|
|
355
|
+
title: 'Compile task context',
|
|
356
|
+
status: 'passed',
|
|
357
|
+
required: true,
|
|
358
|
+
summary: `${plan.context.totalEstimatedTokens}/${plan.context.task.budget} estimated tokens; saved ${plan.context.compiler?.estimatedTokenSavings ?? 0}.`,
|
|
359
|
+
evidence: ['context.compiler', 'context.includedSections', 'context.omittedSections'],
|
|
360
|
+
dependsOn: ['runtime-plan'],
|
|
361
|
+
});
|
|
362
|
+
upsert({
|
|
363
|
+
id: 'memory-provider-recall',
|
|
364
|
+
kind: 'memory',
|
|
365
|
+
title: 'Recall provider-backed memory',
|
|
366
|
+
status: 'passed',
|
|
367
|
+
required: true,
|
|
368
|
+
summary: `${plan.memory.items.length} recalled item(s); providers ${plan.memory.providerOrder.join(' -> ')}.`,
|
|
369
|
+
evidence: ['memory.providerOrder', 'memory.selectedProviders', 'memory.items'],
|
|
370
|
+
dependsOn: ['runtime-plan'],
|
|
371
|
+
});
|
|
372
|
+
for (const gate of plan.adaptiveWorkflow.gates) {
|
|
373
|
+
if (steps.has(gate))
|
|
374
|
+
continue;
|
|
375
|
+
upsert({
|
|
376
|
+
id: gate,
|
|
377
|
+
kind: gate === 'runtime-evidence' ? 'evidence' : 'gate',
|
|
378
|
+
title: `Satisfy ${gate} gate`,
|
|
379
|
+
status: 'planned',
|
|
380
|
+
required: true,
|
|
381
|
+
summary: `Required by ${plan.adaptiveWorkflow.strategy} in ${plan.adaptiveWorkflow.mode} mode.`,
|
|
382
|
+
evidence: [`gate.${gate}`],
|
|
383
|
+
dependsOn: ['runtime-plan'],
|
|
384
|
+
});
|
|
385
|
+
}
|
|
386
|
+
for (const skillStep of plan.skillPlan.executionPlan.steps) {
|
|
387
|
+
upsert({
|
|
388
|
+
id: `skill:${skillStep.id}`,
|
|
389
|
+
kind: 'skill',
|
|
390
|
+
title: `${skillStep.kind}: ${skillStep.id}`,
|
|
391
|
+
status: 'planned',
|
|
392
|
+
required: skillStep.required,
|
|
393
|
+
summary: `${skillStep.reason} Fallback: ${skillStep.fallback}.`,
|
|
394
|
+
evidence: [skillStep.evidenceRequired],
|
|
395
|
+
dependsOn: ['skill-evidence'],
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
upsert({
|
|
399
|
+
id: 'failure-learning',
|
|
400
|
+
kind: 'learning',
|
|
401
|
+
title: 'Prepare failure learning settlement',
|
|
402
|
+
status: 'planned',
|
|
403
|
+
required: false,
|
|
404
|
+
summary: 'Create lesson or rule candidates only when a gate, verification step, or evidence requirement fails.',
|
|
405
|
+
evidence: ['failureLearning.candidates'],
|
|
406
|
+
dependsOn: ['runtime-evidence'],
|
|
407
|
+
});
|
|
408
|
+
return [...steps.values()];
|
|
409
|
+
}
|
|
410
|
+
async function runGuardedVerification(options) {
|
|
411
|
+
if (!options.enabled || options.commands.length === 0) {
|
|
412
|
+
return { commands: [], allPassed: options.commands.length === 0 };
|
|
413
|
+
}
|
|
414
|
+
const ledger = new RuntimeEvidenceLedger({
|
|
415
|
+
projectDir: options.projectDir,
|
|
416
|
+
scaleDir: options.scaleDir,
|
|
417
|
+
});
|
|
418
|
+
const reports = [];
|
|
419
|
+
for (const [index, command] of options.commands.entries()) {
|
|
420
|
+
const stepId = `verify-command:${index + 1}`;
|
|
421
|
+
let result;
|
|
422
|
+
try {
|
|
423
|
+
result = await runSafeCommand(command, {
|
|
424
|
+
cwd: options.projectDir,
|
|
425
|
+
timeout: options.timeout ?? 120_000,
|
|
426
|
+
allowShell: options.allowShell,
|
|
427
|
+
});
|
|
428
|
+
}
|
|
429
|
+
catch (error) {
|
|
430
|
+
result = {
|
|
431
|
+
exitCode: 1,
|
|
432
|
+
stdout: '',
|
|
433
|
+
stderr: error instanceof Error ? error.message : String(error),
|
|
434
|
+
};
|
|
435
|
+
}
|
|
436
|
+
const passed = result.exitCode === 0;
|
|
437
|
+
const evidence = ledger.record({
|
|
438
|
+
taskId: options.plan.task.taskId,
|
|
439
|
+
kind: 'command',
|
|
440
|
+
title: `AI OS verification command ${index + 1}`,
|
|
441
|
+
status: passed ? 'passed' : 'failed',
|
|
442
|
+
command,
|
|
443
|
+
exitCode: result.exitCode,
|
|
444
|
+
summary: passed
|
|
445
|
+
? `Guarded verification command passed: ${command}`
|
|
446
|
+
: `Guarded verification command failed with exit code ${result.exitCode}: ${command}`,
|
|
447
|
+
metadata: {
|
|
448
|
+
aiOsRun: true,
|
|
449
|
+
stepId,
|
|
450
|
+
stdoutPreview: truncate(result.stdout),
|
|
451
|
+
stderrPreview: truncate(result.stderr),
|
|
452
|
+
},
|
|
453
|
+
});
|
|
454
|
+
reports.push({
|
|
455
|
+
command,
|
|
456
|
+
status: passed ? 'passed' : 'failed',
|
|
457
|
+
exitCode: result.exitCode,
|
|
458
|
+
stdout: result.stdout,
|
|
459
|
+
stderr: result.stderr,
|
|
460
|
+
evidenceId: evidence.id,
|
|
461
|
+
});
|
|
462
|
+
options.steps.push({
|
|
463
|
+
id: stepId,
|
|
464
|
+
kind: 'evidence',
|
|
465
|
+
title: `Run verification command ${index + 1}`,
|
|
466
|
+
status: passed ? 'passed' : 'blocked',
|
|
467
|
+
required: true,
|
|
468
|
+
summary: passed
|
|
469
|
+
? `Command passed and runtime evidence was recorded as ${evidence.id}.`
|
|
470
|
+
: `Command failed and runtime evidence was recorded as ${evidence.id}.`,
|
|
471
|
+
evidence: [evidence.id],
|
|
472
|
+
dependsOn: ['runtime-evidence'],
|
|
473
|
+
});
|
|
474
|
+
}
|
|
475
|
+
const runtimeEvidenceStep = options.steps.find(step => step.id === 'runtime-evidence');
|
|
476
|
+
if (runtimeEvidenceStep) {
|
|
477
|
+
const allPassed = reports.every(report => report.status === 'passed');
|
|
478
|
+
runtimeEvidenceStep.status = allPassed ? 'passed' : 'blocked';
|
|
479
|
+
runtimeEvidenceStep.summary = allPassed
|
|
480
|
+
? `${reports.length} guarded verification command(s) passed and were recorded as runtime evidence.`
|
|
481
|
+
: `${reports.filter(report => report.status === 'failed').length}/${reports.length} guarded verification command(s) failed.`;
|
|
482
|
+
runtimeEvidenceStep.evidence = reports.map(report => report.evidenceId);
|
|
483
|
+
}
|
|
484
|
+
return {
|
|
485
|
+
commands: reports,
|
|
486
|
+
allPassed: reports.every(report => report.status === 'passed'),
|
|
487
|
+
};
|
|
488
|
+
}
|
|
489
|
+
function summarizeRunEvidence(steps) {
|
|
490
|
+
const required = new Set();
|
|
491
|
+
const produced = new Set();
|
|
492
|
+
const pending = new Set();
|
|
493
|
+
for (const step of steps) {
|
|
494
|
+
if (step.required) {
|
|
495
|
+
for (const item of evidenceCategory(step))
|
|
496
|
+
required.add(item);
|
|
497
|
+
}
|
|
498
|
+
if (step.status === 'passed') {
|
|
499
|
+
for (const item of evidenceCategory(step))
|
|
500
|
+
produced.add(item);
|
|
501
|
+
}
|
|
502
|
+
else if (step.required && step.status === 'planned') {
|
|
503
|
+
for (const item of evidenceCategory(step))
|
|
504
|
+
pending.add(item);
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
return {
|
|
508
|
+
required: [...required],
|
|
509
|
+
produced: [...produced],
|
|
510
|
+
pending: [...pending],
|
|
511
|
+
};
|
|
512
|
+
}
|
|
513
|
+
function evidenceCategory(step) {
|
|
514
|
+
if (step.id === 'runtime-plan')
|
|
515
|
+
return ['ai-os-plan'];
|
|
516
|
+
if (step.id === 'context-compiler')
|
|
517
|
+
return ['context-compiler'];
|
|
518
|
+
if (step.id === 'memory-provider-recall')
|
|
519
|
+
return ['memory-provider-recall'];
|
|
520
|
+
if (step.id === 'skill-evidence' || step.kind === 'skill')
|
|
521
|
+
return ['skill-routing-engine'];
|
|
522
|
+
if (step.id === 'runtime-evidence' || step.kind === 'evidence')
|
|
523
|
+
return ['runtime-evidence'];
|
|
524
|
+
if (step.kind === 'gate')
|
|
525
|
+
return [`gate:${step.id}`];
|
|
526
|
+
return [step.id];
|
|
527
|
+
}
|
|
528
|
+
function buildFailureLearningCandidates(plan, steps) {
|
|
529
|
+
const hasBlockedVerification = steps.some(step => step.status === 'blocked' && step.id.startsWith('verify-command:'));
|
|
530
|
+
const failed = steps.filter(step => step.status === 'blocked' && !(hasBlockedVerification && step.id === 'runtime-evidence'));
|
|
531
|
+
return failed.map(step => ({
|
|
532
|
+
id: `AIO-FLC-${safePathSegment(plan.task.taskId ?? step.id)}-${safePathSegment(step.id)}`,
|
|
533
|
+
source: 'failed-step',
|
|
534
|
+
title: `Failure learning candidate: ${step.title}`,
|
|
535
|
+
summary: step.summary,
|
|
536
|
+
recommendedAction: 'resolve-before-promotion',
|
|
537
|
+
evidenceRefs: step.evidence,
|
|
538
|
+
promotable: false,
|
|
539
|
+
}));
|
|
540
|
+
}
|
|
541
|
+
function buildRunNextActions(steps, mode) {
|
|
542
|
+
const actions = [];
|
|
543
|
+
for (const step of steps) {
|
|
544
|
+
if (step.status !== 'planned' || !step.required)
|
|
545
|
+
continue;
|
|
546
|
+
if (step.kind === 'skill')
|
|
547
|
+
actions.push(`Execute required skill step "${step.title}" and attach evidence: ${step.evidence.join(', ')}.`);
|
|
548
|
+
else if (step.kind === 'evidence')
|
|
549
|
+
actions.push(`Record runtime evidence for "${step.id}" before claiming completion.`);
|
|
550
|
+
else if (step.kind === 'gate')
|
|
551
|
+
actions.push(`Satisfy gate "${step.id}" before ship.`);
|
|
552
|
+
}
|
|
553
|
+
if (mode === 'dry-run')
|
|
554
|
+
actions.push('Re-run with guarded execution only after reviewing the dry-run report.');
|
|
555
|
+
return actions;
|
|
556
|
+
}
|
|
557
|
+
function resolveRunReportPath(projectDir, scaleDir, taskId) {
|
|
558
|
+
return join(resolveRunsDir(projectDir, scaleDir), `${safePathSegment(taskId)}.json`);
|
|
559
|
+
}
|
|
560
|
+
function writeAiOsRunReport(path, report) {
|
|
561
|
+
const dir = dirname(path);
|
|
562
|
+
if (dir && !existsSync(dir))
|
|
563
|
+
mkdirSync(dir, { recursive: true });
|
|
564
|
+
writeFileSync(path, JSON.stringify(report, null, 2), 'utf-8');
|
|
565
|
+
}
|
|
566
|
+
function safePathSegment(value) {
|
|
567
|
+
return value.replace(/[^a-zA-Z0-9._-]/g, '-').slice(0, 120) || 'ai-os-run';
|
|
568
|
+
}
|
|
569
|
+
function truncate(value, max = 1000) {
|
|
570
|
+
return value.length > max ? `${value.slice(0, max)}...` : value;
|
|
571
|
+
}
|
|
572
|
+
function normalizeProjectPath(projectDir, path) {
|
|
573
|
+
const normalizedProject = resolve(projectDir);
|
|
574
|
+
const normalizedPath = resolve(path);
|
|
575
|
+
if (normalizedPath.startsWith(normalizedProject)) {
|
|
576
|
+
return normalizedPath.slice(normalizedProject.length + 1).replace(/\\/g, '/');
|
|
577
|
+
}
|
|
578
|
+
return normalizedPath.replace(/\\/g, '/');
|
|
579
|
+
}
|
|
580
|
+
function resolveScaleRoot(projectDir, scaleDir) {
|
|
581
|
+
return isAbsolute(scaleDir) ? scaleDir : join(projectDir, scaleDir);
|
|
582
|
+
}
|
|
583
|
+
function resolveRunsDir(projectDir, scaleDir) {
|
|
584
|
+
return join(resolveScaleRoot(projectDir, scaleDir), 'ai-os', 'runs');
|
|
585
|
+
}
|
|
586
|
+
function resolveBenchmarkReportPath(projectDir, scaleDir) {
|
|
587
|
+
return join(resolveScaleRoot(projectDir, scaleDir), 'ai-os', 'benchmarks', 'latest.json');
|
|
588
|
+
}
|
|
589
|
+
function inspectBenchmarkReport(projectDir, scaleDir, maxAgeHours, warnings) {
|
|
590
|
+
const reportPath = resolveBenchmarkReportPath(projectDir, scaleDir);
|
|
591
|
+
if (!existsSync(reportPath))
|
|
592
|
+
return { status: 'missing', reportPath };
|
|
593
|
+
try {
|
|
594
|
+
const parsed = JSON.parse(readFileSync(reportPath, 'utf-8'));
|
|
595
|
+
if (!parsed.generatedAt || !parsed.summary || typeof parsed.summary.scenarios !== 'number') {
|
|
596
|
+
warnings.push(`Invalid AI OS benchmark report: ${reportPath}`);
|
|
597
|
+
return { status: 'invalid', reportPath };
|
|
598
|
+
}
|
|
599
|
+
const generatedAtMs = Date.parse(parsed.generatedAt);
|
|
600
|
+
const ageHours = Number(((Date.now() - generatedAtMs) / 3_600_000).toFixed(2));
|
|
601
|
+
const fileAgeHours = Number(((Date.now() - statSync(reportPath).mtimeMs) / 3_600_000).toFixed(2));
|
|
602
|
+
const effectiveAgeHours = Number.isFinite(ageHours) ? ageHours : fileAgeHours;
|
|
603
|
+
return {
|
|
604
|
+
status: effectiveAgeHours <= maxAgeHours ? 'fresh' : 'stale',
|
|
605
|
+
reportPath,
|
|
606
|
+
generatedAt: parsed.generatedAt,
|
|
607
|
+
ageHours: effectiveAgeHours,
|
|
608
|
+
scenarios: parsed.summary.scenarios,
|
|
609
|
+
};
|
|
610
|
+
}
|
|
611
|
+
catch (error) {
|
|
612
|
+
warnings.push(`Unreadable AI OS benchmark report: ${reportPath} (${error instanceof Error ? error.message : String(error)})`);
|
|
613
|
+
return { status: 'invalid', reportPath };
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
function summarizeBenchmarkDoctor(benchmark) {
|
|
617
|
+
if (benchmark.status === 'missing')
|
|
618
|
+
return 'No AI OS benchmark report found.';
|
|
619
|
+
if (benchmark.status === 'invalid')
|
|
620
|
+
return 'AI OS benchmark report is invalid or unreadable.';
|
|
621
|
+
const age = benchmark.ageHours === undefined ? 'unknown age' : `${benchmark.ageHours}h old`;
|
|
622
|
+
return `${benchmark.scenarios ?? 0} benchmark scenario(s); ${age}; status ${benchmark.status}.`;
|
|
623
|
+
}
|
|
624
|
+
function aiOsDoctorNextActions(input) {
|
|
625
|
+
if (input.lang === 'zh')
|
|
626
|
+
return aiOsDoctorNextActionsZh(input);
|
|
627
|
+
return aiOsDoctorNextActionsEn(input);
|
|
628
|
+
}
|
|
629
|
+
function aiOsDoctorNextActionsEn(input) {
|
|
630
|
+
const actions = [];
|
|
631
|
+
if (input.checks.some(check => check.id === 'ai-os-runtime-dirs' && check.status === 'blocked')) {
|
|
632
|
+
actions.push('Run `scale ai-os migrate --json` before using the AI OS beta runtime.');
|
|
633
|
+
}
|
|
634
|
+
if (input.dashboard.summary.totalRuns === 0) {
|
|
635
|
+
actions.push('Run `scale ai-os run --dry-run --json` to create the first AI OS run report.');
|
|
636
|
+
}
|
|
637
|
+
if (input.dashboard.summary.blockedRuns > 0) {
|
|
638
|
+
actions.push('Resolve blocked AI OS runs before claiming the project is ready.');
|
|
639
|
+
}
|
|
640
|
+
if (input.benchmark.status === 'missing' || input.benchmark.status === 'stale') {
|
|
641
|
+
actions.push('Run `scale ai-os benchmark --json` before release or milestone review.');
|
|
642
|
+
}
|
|
643
|
+
if (input.status === 'ready')
|
|
644
|
+
actions.push('AI OS beta runtime is ready for guarded project tasks.');
|
|
645
|
+
return actions;
|
|
646
|
+
}
|
|
647
|
+
function aiOsDoctorNextActionsZh(input) {
|
|
648
|
+
const actions = [];
|
|
649
|
+
if (input.checks.some(check => check.id === 'ai-os-runtime-dirs' && check.status === 'blocked')) {
|
|
650
|
+
actions.push('先运行 `scale ai-os migrate --json`,再接入 AI OS beta runtime。');
|
|
651
|
+
}
|
|
652
|
+
if (input.dashboard.summary.totalRuns === 0) {
|
|
653
|
+
actions.push('运行 `scale ai-os run --dry-run --json` 生成第一份 AI OS 运行报告。');
|
|
654
|
+
}
|
|
655
|
+
if (input.dashboard.summary.blockedRuns > 0) {
|
|
656
|
+
actions.push('先处理 blocked 的 AI OS run,再声明项目运行态就绪。');
|
|
657
|
+
}
|
|
658
|
+
if (input.benchmark.status === 'missing' || input.benchmark.status === 'stale') {
|
|
659
|
+
actions.push('发版或阶段验收前运行 `scale ai-os benchmark --json`。');
|
|
660
|
+
}
|
|
661
|
+
if (input.status === 'ready')
|
|
662
|
+
actions.push('AI OS beta runtime 已可用于 guarded 项目任务。');
|
|
663
|
+
return actions;
|
|
664
|
+
}
|
|
665
|
+
function writeAiOsBenchmarkReport(path, report) {
|
|
666
|
+
const dir = dirname(path);
|
|
667
|
+
if (dir && !existsSync(dir))
|
|
668
|
+
mkdirSync(dir, { recursive: true });
|
|
669
|
+
writeFileSync(path, JSON.stringify(report, null, 2), 'utf-8');
|
|
670
|
+
}
|
|
671
|
+
function readAiOsRunReports(runsDir, warnings) {
|
|
672
|
+
if (!existsSync(runsDir))
|
|
673
|
+
return [];
|
|
674
|
+
return readdirSync(runsDir)
|
|
675
|
+
.filter(file => file.endsWith('.json'))
|
|
676
|
+
.flatMap(file => {
|
|
677
|
+
const path = join(runsDir, file);
|
|
678
|
+
try {
|
|
679
|
+
const parsed = JSON.parse(readFileSync(path, 'utf-8'));
|
|
680
|
+
if (!parsed || !parsed.plan || !parsed.evidence || !parsed.verification) {
|
|
681
|
+
warnings.push(`Ignored invalid AI OS run report: ${path}`);
|
|
682
|
+
return [];
|
|
683
|
+
}
|
|
684
|
+
return [parsed];
|
|
685
|
+
}
|
|
686
|
+
catch (error) {
|
|
687
|
+
warnings.push(`Ignored unreadable AI OS run report: ${path} (${error instanceof Error ? error.message : String(error)})`);
|
|
688
|
+
return [];
|
|
689
|
+
}
|
|
690
|
+
});
|
|
691
|
+
}
|
|
692
|
+
function toDashboardRunSummary(report) {
|
|
693
|
+
return {
|
|
694
|
+
taskId: report.plan.task.taskId,
|
|
695
|
+
task: report.plan.task.task,
|
|
696
|
+
mode: report.mode,
|
|
697
|
+
status: report.status,
|
|
698
|
+
generatedAt: report.generatedAt,
|
|
699
|
+
runReport: report.artifacts.runReport,
|
|
700
|
+
verificationCommands: report.verification.commands.length,
|
|
701
|
+
failedVerificationCommands: report.verification.commands.filter(command => command.status === 'failed').length,
|
|
702
|
+
pendingEvidence: report.evidence.pending.length,
|
|
703
|
+
failureLearningCandidates: report.failureLearning.candidates.length,
|
|
704
|
+
};
|
|
705
|
+
}
|
|
706
|
+
function summarizeDashboardHealth(summary) {
|
|
707
|
+
if (summary.totalRuns === 0) {
|
|
708
|
+
return { status: 'empty', score: 0, reasons: ['No AI OS run reports found.'] };
|
|
709
|
+
}
|
|
710
|
+
const reasons = [];
|
|
711
|
+
if (summary.blockedRuns > 0)
|
|
712
|
+
reasons.push(`${summary.blockedRuns} blocked AI OS run(s).`);
|
|
713
|
+
if (summary.failedVerificationCommands > 0)
|
|
714
|
+
reasons.push(`${summary.failedVerificationCommands} failed guarded verification command(s).`);
|
|
715
|
+
if (summary.failureLearningCandidates > 0)
|
|
716
|
+
reasons.push(`${summary.failureLearningCandidates} failure learning candidate(s) need review.`);
|
|
717
|
+
const score = Math.max(0, Math.round(((summary.readyRuns / summary.totalRuns) * 100) - (summary.failedVerificationCommands * 10) - (summary.failureLearningCandidates * 5)));
|
|
718
|
+
if (summary.blockedRuns === summary.totalRuns)
|
|
719
|
+
return { status: 'blocked', score, reasons };
|
|
720
|
+
if (reasons.length > 0)
|
|
721
|
+
return { status: 'attention', score, reasons };
|
|
722
|
+
return { status: 'healthy', score: 100, reasons: ['All AI OS runs are ready.'] };
|
|
723
|
+
}
|
|
724
|
+
function dashboardRecommendations(summary) {
|
|
725
|
+
const recommendations = [];
|
|
726
|
+
if (summary.totalRuns === 0) {
|
|
727
|
+
recommendations.push('Run `scale ai-os run --dry-run` to create the first AI OS execution report.');
|
|
728
|
+
return recommendations;
|
|
729
|
+
}
|
|
730
|
+
if (summary.blockedRuns > 0)
|
|
731
|
+
recommendations.push('Resolve blocked AI OS run reports before promoting lessons or shipping.');
|
|
732
|
+
if (summary.failedVerificationCommands > 0)
|
|
733
|
+
recommendations.push('Inspect failed guarded verification runtime evidence and fix the underlying command or code issue.');
|
|
734
|
+
if (summary.failureLearningCandidates > 0)
|
|
735
|
+
recommendations.push('Review failure learning candidates before turning them into durable rules.');
|
|
736
|
+
if (summary.guardedRuns === 0)
|
|
737
|
+
recommendations.push('Add guarded verification runs for at least one representative task to validate evidence flow.');
|
|
738
|
+
return recommendations;
|
|
739
|
+
}
|
|
740
|
+
function defaultBenchmarkScenarios(budget = 8_000) {
|
|
741
|
+
return [
|
|
742
|
+
{
|
|
743
|
+
id: 'docs-governance',
|
|
744
|
+
task: 'Update bilingual governance documentation and keep README, docs map, and strategy aligned',
|
|
745
|
+
level: 'M',
|
|
746
|
+
files: ['README.md', 'README.en.md', 'docs/README.md', 'docs/AI_ENGINEERING_OS_POSITIONING.md'],
|
|
747
|
+
services: ['docs'],
|
|
748
|
+
budget,
|
|
749
|
+
},
|
|
750
|
+
{
|
|
751
|
+
id: 'security-code-change',
|
|
752
|
+
task: 'Harden auth token handling and verify runtime evidence for a security-sensitive code change',
|
|
753
|
+
level: 'L',
|
|
754
|
+
files: ['src/auth/token.ts', 'src/runtime/AiOsRuntime.ts', 'tests/runtime/aiOsRuntime.test.ts'],
|
|
755
|
+
services: ['runtime', 'security'],
|
|
756
|
+
budget,
|
|
757
|
+
},
|
|
758
|
+
{
|
|
759
|
+
id: 'browser-ui-flow',
|
|
760
|
+
task: 'Verify a browser callback UI flow with screenshots, runtime evidence, and guarded workflow gates',
|
|
761
|
+
level: 'L',
|
|
762
|
+
files: ['src/ui/callback.tsx', 'tests/api/aiOsCli.test.ts'],
|
|
763
|
+
services: ['ui', 'browser'],
|
|
764
|
+
budget,
|
|
765
|
+
},
|
|
766
|
+
];
|
|
767
|
+
}
|
|
768
|
+
function summarizeBenchmark(results) {
|
|
769
|
+
const totalBudget = results.reduce((sum, result) => sum + result.metrics.budget, 0);
|
|
770
|
+
const totalEstimatedTokens = results.reduce((sum, result) => sum + result.metrics.estimatedTokens, 0);
|
|
771
|
+
return {
|
|
772
|
+
scenarios: results.length,
|
|
773
|
+
totalEstimatedTokens,
|
|
774
|
+
totalBudget,
|
|
775
|
+
totalEstimatedTokenSavings: results.reduce((sum, result) => sum + result.metrics.estimatedTokenSavings, 0),
|
|
776
|
+
totalMemoryItems: results.reduce((sum, result) => sum + result.metrics.memoryItems, 0),
|
|
777
|
+
totalSkillSteps: results.reduce((sum, result) => sum + result.metrics.skillSteps, 0),
|
|
778
|
+
requiredSkillSteps: results.reduce((sum, result) => sum + result.metrics.requiredSkillSteps, 0),
|
|
779
|
+
governanceModes: [...new Set(results.map(result => result.governanceMode))],
|
|
780
|
+
averageTokenUtilization: totalBudget > 0 ? Number((totalEstimatedTokens / totalBudget).toFixed(4)) : 0,
|
|
781
|
+
};
|
|
782
|
+
}
|
|
783
|
+
function benchmarkRecommendations(summary) {
|
|
784
|
+
const recommendations = ['Use benchmark deltas in release notes only after comparing the same scenario set across versions.'];
|
|
785
|
+
if (summary.totalSkillSteps === 0)
|
|
786
|
+
recommendations.push('Skill routing did not produce steps; inspect skill policy detection.');
|
|
787
|
+
if (summary.averageTokenUtilization > 0.9)
|
|
788
|
+
recommendations.push('Context utilization is high; lower budgets or improve relevance filtering before scaling.');
|
|
789
|
+
if (!summary.governanceModes.includes('critical') && !summary.governanceModes.includes('expanded')) {
|
|
790
|
+
recommendations.push('Add at least one high-risk benchmark scenario before claiming adaptive governance coverage.');
|
|
791
|
+
}
|
|
792
|
+
return recommendations;
|
|
793
|
+
}
|
|
794
|
+
function createAdaptiveWorkflow(governance, skillPlan) {
|
|
795
|
+
const gates = new Set();
|
|
796
|
+
gates.add('context-compiler');
|
|
797
|
+
gates.add('memory-provider-recall');
|
|
798
|
+
if (skillPlan.required || skillPlan.executionPlan.steps.length > 0)
|
|
799
|
+
gates.add('skill-evidence');
|
|
800
|
+
gates.add('runtime-evidence');
|
|
801
|
+
if (governance.effectiveMode === 'expanded' || governance.effectiveMode === 'critical')
|
|
802
|
+
gates.add('impact-analysis');
|
|
803
|
+
if (governance.effectiveMode === 'critical')
|
|
804
|
+
gates.add('security-review');
|
|
805
|
+
return {
|
|
806
|
+
strategy: 'risk-adaptive-runtime-v1',
|
|
807
|
+
mode: governance.effectiveMode,
|
|
808
|
+
requiredBehaviors: governance.requiredBehaviors,
|
|
809
|
+
gates: Array.from(gates),
|
|
810
|
+
exitCriteria: [
|
|
811
|
+
'Context compiler explains included and omitted sections.',
|
|
812
|
+
'Memory recall records provider, score, and evidence paths.',
|
|
813
|
+
'Skill plan lists required proof and fallback policy.',
|
|
814
|
+
'Governance ROI states benefit and overhead before completion.',
|
|
815
|
+
],
|
|
816
|
+
};
|
|
817
|
+
}
|
|
818
|
+
function recommendations(options) {
|
|
819
|
+
const output = [];
|
|
820
|
+
if (options.context.compiler?.estimatedTokenSavings) {
|
|
821
|
+
output.push(`Keep context compiler active; estimated savings ${options.context.compiler.estimatedTokenSavings} tokens for this task pack.`);
|
|
822
|
+
}
|
|
823
|
+
if (options.memoryRecall.items.length === 0) {
|
|
824
|
+
output.push('No memory recall result found; continue with local evidence and settle reusable knowledge after verification.');
|
|
825
|
+
}
|
|
826
|
+
if (options.skillPlan.executionPlan.steps.length > 0) {
|
|
827
|
+
output.push(`Follow ${options.skillPlan.executionPlan.steps.length} skill routing step(s) and record evidence before ship.`);
|
|
828
|
+
}
|
|
829
|
+
if (options.governance.effectiveMode === 'critical') {
|
|
830
|
+
output.push('Critical workflow mode requires security review and rollback or disable strategy.');
|
|
831
|
+
}
|
|
832
|
+
return output;
|
|
833
|
+
}
|
|
834
|
+
function normalizeSkillTaskLevel(value) {
|
|
835
|
+
const normalized = String(value ?? 'M').trim().toUpperCase();
|
|
836
|
+
if (normalized === 'S' || normalized === 'M' || normalized === 'L' || normalized === 'CRITICAL')
|
|
837
|
+
return normalized;
|
|
838
|
+
throw new Error(`Invalid task level "${String(value)}"; expected S, M, L, or CRITICAL.`);
|
|
839
|
+
}
|
|
840
|
+
//# sourceMappingURL=AiOsRuntime.js.map
|