@hongmaple0820/scale-engine 0.48.0 → 0.50.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. package/README.en.md +2 -2
  2. package/README.md +2 -2
  3. package/dist/agents/evidenceDiscipline.d.ts +7 -0
  4. package/dist/agents/evidenceDiscipline.js +21 -0
  5. package/dist/agents/evidenceDiscipline.js.map +1 -0
  6. package/dist/agents/profiles.js +8 -1
  7. package/dist/agents/profiles.js.map +1 -1
  8. package/dist/agents/types.d.ts +1 -0
  9. package/dist/api/DashboardHttpConfig.d.ts +28 -0
  10. package/dist/api/DashboardHttpConfig.js +110 -0
  11. package/dist/api/DashboardHttpConfig.js.map +1 -0
  12. package/dist/api/cli.js +102 -11
  13. package/dist/api/cli.js.map +1 -1
  14. package/dist/api/http.d.ts +1 -0
  15. package/dist/api/http.js +50 -0
  16. package/dist/api/http.js.map +1 -0
  17. package/dist/artifact/types.d.ts +64 -0
  18. package/dist/artifact/types.js.map +1 -1
  19. package/dist/bootstrap/DependencyBootstrap.d.ts +1 -0
  20. package/dist/bootstrap/DependencyBootstrap.js +14 -3
  21. package/dist/bootstrap/DependencyBootstrap.js.map +1 -1
  22. package/dist/cli/cortexApplyCommand.d.ts +26 -0
  23. package/dist/cli/cortexApplyCommand.js +74 -0
  24. package/dist/cli/cortexApplyCommand.js.map +1 -0
  25. package/dist/cli/cortexCandidateCommands.d.ts +42 -0
  26. package/dist/cli/cortexCandidateCommands.js +119 -0
  27. package/dist/cli/cortexCandidateCommands.js.map +1 -0
  28. package/dist/cli/cortexCommands.d.ts +51 -0
  29. package/dist/cli/cortexCommands.js +127 -13
  30. package/dist/cli/cortexCommands.js.map +1 -1
  31. package/dist/cli/engineBootstrap.d.ts +1 -1
  32. package/dist/cli/engineBootstrap.js +2 -0
  33. package/dist/cli/engineBootstrap.js.map +1 -1
  34. package/dist/cli/evalCommands.js +13 -1
  35. package/dist/cli/evalCommands.js.map +1 -1
  36. package/dist/cli/phaseCommands.d.ts +81 -1
  37. package/dist/cli/phaseCommands.js +465 -31
  38. package/dist/cli/phaseCommands.js.map +1 -1
  39. package/dist/cli/runtimeSkillCommands.js +12 -2
  40. package/dist/cli/runtimeSkillCommands.js.map +1 -1
  41. package/dist/cli/shieldCommands.d.ts +1 -0
  42. package/dist/cli/shieldCommands.js +20 -7
  43. package/dist/cli/shieldCommands.js.map +1 -1
  44. package/dist/cli/workflowEvidenceCommands.d.ts +120 -0
  45. package/dist/cli/workflowEvidenceCommands.js +228 -2
  46. package/dist/cli/workflowEvidenceCommands.js.map +1 -1
  47. package/dist/cortex/AutoFixEventObservations.d.ts +11 -0
  48. package/dist/cortex/AutoFixEventObservations.js +72 -0
  49. package/dist/cortex/AutoFixEventObservations.js.map +1 -0
  50. package/dist/cortex/GateEvidenceObservations.d.ts +22 -0
  51. package/dist/cortex/GateEvidenceObservations.js +179 -0
  52. package/dist/cortex/GateEvidenceObservations.js.map +1 -0
  53. package/dist/cortex/GovernanceMetrics.d.ts +2 -0
  54. package/dist/cortex/GovernanceMetrics.js +112 -22
  55. package/dist/cortex/GovernanceMetrics.js.map +1 -1
  56. package/dist/cortex/InstinctApplicationRecorder.d.ts +28 -0
  57. package/dist/cortex/InstinctApplicationRecorder.js +145 -0
  58. package/dist/cortex/InstinctApplicationRecorder.js.map +1 -0
  59. package/dist/cortex/InstinctCandidateAudit.d.ts +3 -0
  60. package/dist/cortex/InstinctCandidateAudit.js +39 -0
  61. package/dist/cortex/InstinctCandidateAudit.js.map +1 -0
  62. package/dist/cortex/InstinctCandidateReview.d.ts +32 -0
  63. package/dist/cortex/InstinctCandidateReview.js +125 -0
  64. package/dist/cortex/InstinctCandidateReview.js.map +1 -0
  65. package/dist/cortex/InstinctExtractor.d.ts +1 -0
  66. package/dist/cortex/InstinctExtractor.js +24 -17
  67. package/dist/cortex/InstinctExtractor.js.map +1 -1
  68. package/dist/cortex/InstinctRuntimeEvidence.d.ts +14 -0
  69. package/dist/cortex/InstinctRuntimeEvidence.js +120 -0
  70. package/dist/cortex/InstinctRuntimeEvidence.js.map +1 -0
  71. package/dist/cortex/InstinctStore.d.ts +50 -4
  72. package/dist/cortex/InstinctStore.js +262 -48
  73. package/dist/cortex/InstinctStore.js.map +1 -1
  74. package/dist/cortex/InstinctValidation.d.ts +9 -0
  75. package/dist/cortex/InstinctValidation.js +55 -0
  76. package/dist/cortex/InstinctValidation.js.map +1 -0
  77. package/dist/cortex/SessionInjector.d.ts +1 -0
  78. package/dist/cortex/SessionInjector.js +28 -8
  79. package/dist/cortex/SessionInjector.js.map +1 -1
  80. package/dist/dashboard/DashboardServer.d.ts +79 -0
  81. package/dist/dashboard/DashboardServer.js +330 -6
  82. package/dist/dashboard/DashboardServer.js.map +1 -1
  83. package/dist/dashboard/spa/app.js +515 -0
  84. package/dist/dashboard/spa/components/DataTable.js +53 -0
  85. package/dist/dashboard/spa/components/EventStream.js +66 -0
  86. package/dist/dashboard/spa/components/LoadingState.js +39 -0
  87. package/dist/dashboard/spa/components/MetricCard.js +30 -0
  88. package/dist/dashboard/spa/components/Panel.js +27 -0
  89. package/dist/dashboard/spa/components/StatusBadge.js +51 -0
  90. package/dist/dashboard/spa/i18n.js +767 -0
  91. package/dist/dashboard/spa/index.html +463 -0
  92. package/dist/dashboard/spa/pages/costs.js +522 -0
  93. package/dist/dashboard/spa/pages/documents.js +540 -0
  94. package/dist/dashboard/spa/pages/knowledge.js +457 -0
  95. package/dist/dashboard/spa/pages/monitoring.js +361 -0
  96. package/dist/dashboard/spa/pages/overview.js +301 -0
  97. package/dist/dashboard/spa/pages/topology-renderers.js +251 -0
  98. package/dist/dashboard/spa/pages/topology.js +370 -0
  99. package/dist/dashboard/spa/pages/workflow-renderers.js +239 -0
  100. package/dist/dashboard/spa/pages/workflow.js +217 -0
  101. package/dist/env/EnvironmentDoctor.js +12 -7
  102. package/dist/env/EnvironmentDoctor.js.map +1 -1
  103. package/dist/eval/BenchmarkPublisher.d.ts +2 -0
  104. package/dist/eval/BenchmarkPublisher.js +43 -0
  105. package/dist/eval/BenchmarkPublisher.js.map +1 -1
  106. package/dist/eval/WorkflowEval.d.ts +9 -0
  107. package/dist/eval/WorkflowEval.js +348 -2
  108. package/dist/eval/WorkflowEval.js.map +1 -1
  109. package/dist/guardrails/ast/confirmers.d.ts +18 -0
  110. package/dist/guardrails/ast/confirmers.js +69 -0
  111. package/dist/guardrails/ast/confirmers.js.map +1 -0
  112. package/dist/guardrails/ast/parse.d.ts +20 -0
  113. package/dist/guardrails/ast/parse.js +51 -0
  114. package/dist/guardrails/ast/parse.js.map +1 -0
  115. package/dist/memory/MemoryBrain.d.ts +13 -0
  116. package/dist/memory/MemoryBrain.js +47 -0
  117. package/dist/memory/MemoryBrain.js.map +1 -1
  118. package/dist/memory/MemoryFabric.d.ts +1 -0
  119. package/dist/memory/MemoryFabric.js +12 -8
  120. package/dist/memory/MemoryFabric.js.map +1 -1
  121. package/dist/memory/MemoryLearning.d.ts +1 -0
  122. package/dist/memory/MemoryLearning.js +6 -3
  123. package/dist/memory/MemoryLearning.js.map +1 -1
  124. package/dist/memory/MemoryProviders.d.ts +8 -1
  125. package/dist/memory/MemoryProviders.js +143 -29
  126. package/dist/memory/MemoryProviders.js.map +1 -1
  127. package/dist/output/HTMLDocumentRenderer.d.ts +9 -0
  128. package/dist/output/HTMLDocumentRenderer.js +19 -0
  129. package/dist/output/HTMLDocumentRenderer.js.map +1 -1
  130. package/dist/review/FreshContextVerifier.d.ts +35 -0
  131. package/dist/review/FreshContextVerifier.js +120 -0
  132. package/dist/review/FreshContextVerifier.js.map +1 -0
  133. package/dist/review/JsonLlmClient.d.ts +37 -0
  134. package/dist/review/JsonLlmClient.js +94 -0
  135. package/dist/review/JsonLlmClient.js.map +1 -0
  136. package/dist/review/LlmJudge.d.ts +61 -0
  137. package/dist/review/LlmJudge.js +167 -0
  138. package/dist/review/LlmJudge.js.map +1 -0
  139. package/dist/runtime/AiOsRuntime.d.ts +14 -1
  140. package/dist/runtime/AiOsRuntime.js +59 -3
  141. package/dist/runtime/AiOsRuntime.js.map +1 -1
  142. package/dist/runtime/RuntimeDoctor.js +3 -1
  143. package/dist/runtime/RuntimeDoctor.js.map +1 -1
  144. package/dist/runtime/RuntimeEvidenceLedger.d.ts +6 -0
  145. package/dist/runtime/RuntimeEvidenceLedger.js +52 -1
  146. package/dist/runtime/RuntimeEvidenceLedger.js.map +1 -1
  147. package/dist/runtime/SessionLedger.d.ts +2 -0
  148. package/dist/runtime/SessionLedger.js +4 -0
  149. package/dist/runtime/SessionLedger.js.map +1 -1
  150. package/dist/setup/SetupVerification.js +53 -5
  151. package/dist/setup/SetupVerification.js.map +1 -1
  152. package/dist/shield/PolicyCompiler.js +73 -12
  153. package/dist/shield/PolicyCompiler.js.map +1 -1
  154. package/dist/shield/ProtectedPaths.js +4 -2
  155. package/dist/shield/ProtectedPaths.js.map +1 -1
  156. package/dist/skills/SkillCatalog.d.ts +2 -0
  157. package/dist/skills/SkillCatalog.js +8 -0
  158. package/dist/skills/SkillCatalog.js.map +1 -1
  159. package/dist/skills/SkillDoctor.d.ts +19 -2
  160. package/dist/skills/SkillDoctor.js +163 -13
  161. package/dist/skills/SkillDoctor.js.map +1 -1
  162. package/dist/tools/SafeCommandRunner.d.ts +1 -0
  163. package/dist/tools/SafeCommandRunner.js +1 -0
  164. package/dist/tools/SafeCommandRunner.js.map +1 -1
  165. package/dist/tools/ToolCapabilityRegistry.js +25 -3
  166. package/dist/tools/ToolCapabilityRegistry.js.map +1 -1
  167. package/dist/tools/ToolOrchestrator.js +21 -0
  168. package/dist/tools/ToolOrchestrator.js.map +1 -1
  169. package/dist/version.d.ts +1 -1
  170. package/dist/version.js +1 -1
  171. package/dist/workflow/AgentLoopReadiness.d.ts +103 -0
  172. package/dist/workflow/AgentLoopReadiness.js +371 -0
  173. package/dist/workflow/AgentLoopReadiness.js.map +1 -0
  174. package/dist/workflow/BoundaryEnforcement.d.ts +60 -0
  175. package/dist/workflow/BoundaryEnforcement.js +182 -0
  176. package/dist/workflow/BoundaryEnforcement.js.map +1 -0
  177. package/dist/workflow/EcosystemReadinessGate.d.ts +46 -0
  178. package/dist/workflow/EcosystemReadinessGate.js +126 -0
  179. package/dist/workflow/EcosystemReadinessGate.js.map +1 -0
  180. package/dist/workflow/EngineeringStandards.js +67 -12
  181. package/dist/workflow/EngineeringStandards.js.map +1 -1
  182. package/dist/workflow/GateCatalog.js +21 -2
  183. package/dist/workflow/GateCatalog.js.map +1 -1
  184. package/dist/workflow/GovernanceTemplatePacks.js +2 -26
  185. package/dist/workflow/GovernanceTemplatePacks.js.map +1 -1
  186. package/dist/workflow/GovernanceTemplates.js +8 -1
  187. package/dist/workflow/GovernanceTemplates.js.map +1 -1
  188. package/dist/workflow/ProfileEnforcement.d.ts +7 -0
  189. package/dist/workflow/ProfileEnforcement.js +12 -0
  190. package/dist/workflow/ProfileEnforcement.js.map +1 -0
  191. package/dist/workflow/ReleaseDeploymentLedger.d.ts +63 -0
  192. package/dist/workflow/ReleaseDeploymentLedger.js +154 -0
  193. package/dist/workflow/ReleaseDeploymentLedger.js.map +1 -0
  194. package/dist/workflow/ReviewAnalyzer.js +50 -3
  195. package/dist/workflow/ReviewAnalyzer.js.map +1 -1
  196. package/dist/workflow/ReviewStore.d.ts +10 -0
  197. package/dist/workflow/ReviewStore.js.map +1 -1
  198. package/dist/workflow/SessionPreamble.d.ts +7 -0
  199. package/dist/workflow/SessionPreamble.js +48 -9
  200. package/dist/workflow/SessionPreamble.js.map +1 -1
  201. package/dist/workflow/SurfaceCoverage.d.ts +19 -0
  202. package/dist/workflow/SurfaceCoverage.js +57 -0
  203. package/dist/workflow/SurfaceCoverage.js.map +1 -0
  204. package/dist/workflow/VerificationCommands.d.ts +1 -0
  205. package/dist/workflow/VerificationCommands.js.map +1 -1
  206. package/dist/workflow/VerificationProfile.d.ts +5 -0
  207. package/dist/workflow/VerificationProfile.js +26 -0
  208. package/dist/workflow/VerificationProfile.js.map +1 -1
  209. package/dist/workflow/VerificationSchema.d.ts +3 -0
  210. package/dist/workflow/VerificationSchema.js +6 -0
  211. package/dist/workflow/VerificationSchema.js.map +1 -1
  212. package/dist/workflow/WorkflowEffectiveness.d.ts +97 -0
  213. package/dist/workflow/WorkflowEffectiveness.js +302 -0
  214. package/dist/workflow/WorkflowEffectiveness.js.map +1 -0
  215. package/dist/workflow/WorkflowEffectivenessRenderer.d.ts +2 -0
  216. package/dist/workflow/WorkflowEffectivenessRenderer.js +67 -0
  217. package/dist/workflow/WorkflowEffectivenessRenderer.js.map +1 -0
  218. package/dist/workflow/WorkflowEffectivenessScoring.d.ts +6 -0
  219. package/dist/workflow/WorkflowEffectivenessScoring.js +243 -0
  220. package/dist/workflow/WorkflowEffectivenessScoring.js.map +1 -0
  221. package/dist/workflow/gates/EnhancedGates.js +2 -0
  222. package/dist/workflow/gates/EnhancedGates.js.map +1 -1
  223. package/dist/workflow/gates/GateSystem.d.ts +16 -0
  224. package/dist/workflow/gates/GateSystem.js +208 -41
  225. package/dist/workflow/gates/GateSystem.js.map +1 -1
  226. package/dist/workflow/gates/MetaGovernanceGates.js +269 -8
  227. package/dist/workflow/gates/MetaGovernanceGates.js.map +1 -1
  228. package/dist/workflow/gates/TestIntegrityGate.d.ts +51 -0
  229. package/dist/workflow/gates/TestIntegrityGate.js +175 -0
  230. package/dist/workflow/gates/TestIntegrityGate.js.map +1 -0
  231. package/dist/workflow/types.d.ts +1 -1
  232. package/docs/guides/DEVELOPMENT_WORKFLOW.md +28 -0
  233. package/docs/reference/cli.md +2 -1
  234. package/docs/start/agent-governance-demo.md +1 -1
  235. package/docs/workflow/E2E_EXAMPLE.md +133 -0
  236. package/docs/workflow/README.md +7 -1
  237. package/docs/workflow/TEMPLATE_GUIDE.md +162 -0
  238. package/docs/workflow/templates/github-actions-scale-preflight.yml +4 -1
  239. package/docs/workflow/templates/plan.md +26 -0
  240. package/docs/workflow/templates/spec.md +28 -0
  241. package/package.json +7 -3
  242. package/scripts/workflow/run-vitest.mjs +123 -0
@@ -0,0 +1,120 @@
1
+ // SCALE Engine — Fresh-context verifier sub-agent (P2.2)
2
+ // An independent verification pass that judges whether a diff actually
3
+ // satisfies the Spec, deliberately fed ONLY the declared verification surface,
4
+ // the diff and a gate summary — and *no* build-agent conversation/history
5
+ // (decision N1). The isolated input is what eliminates the build agent's
6
+ // self-rationalisation bias.
7
+ //
8
+ // Like LlmJudge / ReflexionEngine it is env-gated (SCALE_LOCAL_MODEL) with a
9
+ // deterministic heuristic fallback, and it is advisory only (decision O1): the
10
+ // verdict is recorded but never blocks ship in this PR.
11
+ import { logger } from '../core/logger.js';
12
+ import { JsonLlmClient } from './JsonLlmClient.js';
13
+ const SYSTEM_PROMPT = 'You are an independent verification sub-agent. You did NOT write this code and have NO access to the author\'s reasoning. ' +
14
+ 'Using only the declared verification surface, the diff and the gate summary, decide whether the outcome is independently verifiable from the artifacts alone. ' +
15
+ 'Do not assume intent that is not evidenced by the diff. Output strict JSON only.';
16
+ export class FreshContextVerifier {
17
+ constructor(client = new JsonLlmClient()) {
18
+ this.client = client;
19
+ }
20
+ async verify(input) {
21
+ if (!this.client.isEnabled()) {
22
+ return this.heuristicVerdict(input);
23
+ }
24
+ try {
25
+ const { data, modelUsed } = await this.client.completeJson({
26
+ system: SYSTEM_PROMPT,
27
+ user: this.buildUserPrompt(input),
28
+ });
29
+ return {
30
+ decision: normalizeDecision(data.decision),
31
+ confidence: clampConfidence(data.confidence),
32
+ rationale: (data.rationale ?? '').slice(0, 1000) || 'No rationale provided.',
33
+ unmetSurfaces: Array.isArray(data.unmetSurfaces) ? data.unmetSurfaces.slice(0, 50) : [],
34
+ modelUsed,
35
+ advisory: true,
36
+ createdAt: Date.now(),
37
+ };
38
+ }
39
+ catch (err) {
40
+ logger.warn({ err }, 'FreshContextVerifier: LLM call failed, falling back to heuristic');
41
+ return this.heuristicVerdict(input);
42
+ }
43
+ }
44
+ buildUserPrompt(input) {
45
+ return [
46
+ `Stated outcome: ${input.outcome ?? '(not declared)'}`,
47
+ '',
48
+ 'Verification surfaces (the ONLY contract you may verify against):',
49
+ ...(input.verificationSurface.length ? input.verificationSurface.map(s => `- ${s}`) : ['- (none declared)']),
50
+ '',
51
+ 'Gate summary:',
52
+ input.gateSummary || '(none)',
53
+ '',
54
+ 'Diff (the only evidence available to you):',
55
+ input.diffSummary.slice(0, 6000) || '(empty diff)',
56
+ '',
57
+ 'Output JSON: { "decision": "verified|unverified|uncertain", "confidence": 0.0-1.0, "rationale": "...", "unmetSurfaces": ["..."] }',
58
+ ].join('\n');
59
+ }
60
+ /**
61
+ * Deterministic fallback. Unlike the build agent, the fresh verifier trusts
62
+ * only artifacts: a surface is verified iff its significant tokens appear in
63
+ * the diff. Any unmet surface yields "unverified"; no surface at all is
64
+ * "uncertain" (nothing to independently verify against).
65
+ */
66
+ heuristicVerdict(input) {
67
+ const haystack = input.diffSummary.toLowerCase();
68
+ const unmetSurfaces = input.verificationSurface.filter(surface => !surfaceMentioned(surface, haystack));
69
+ let decision;
70
+ let confidence;
71
+ let rationale;
72
+ if (input.verificationSurface.length === 0) {
73
+ decision = 'uncertain';
74
+ confidence = 0.3;
75
+ rationale = 'No verification surface declared; cannot independently verify the outcome from the diff.';
76
+ }
77
+ else if (unmetSurfaces.length > 0) {
78
+ decision = 'unverified';
79
+ confidence = 0.5;
80
+ rationale = `${unmetSurfaces.length}/${input.verificationSurface.length} verification surface(s) have no supporting evidence in the diff.`;
81
+ }
82
+ else {
83
+ decision = 'verified';
84
+ confidence = 0.5;
85
+ rationale = 'Every declared verification surface has supporting evidence in the diff.';
86
+ }
87
+ return {
88
+ decision,
89
+ confidence,
90
+ rationale,
91
+ unmetSurfaces,
92
+ modelUsed: 'heuristic',
93
+ advisory: true,
94
+ createdAt: Date.now(),
95
+ };
96
+ }
97
+ }
98
+ function surfaceMentioned(surface, haystackLower) {
99
+ const tokens = surface
100
+ .toLowerCase()
101
+ .split(/[^a-z0-9]+/i)
102
+ .filter(token => token.length >= 4);
103
+ if (tokens.length === 0)
104
+ return haystackLower.includes(surface.toLowerCase());
105
+ return tokens.some(token => haystackLower.includes(token));
106
+ }
107
+ function normalizeDecision(value) {
108
+ const normalized = (value ?? '').toLowerCase();
109
+ if (normalized === 'verified')
110
+ return 'verified';
111
+ if (normalized === 'unverified')
112
+ return 'unverified';
113
+ return 'uncertain';
114
+ }
115
+ function clampConfidence(value) {
116
+ if (typeof value !== 'number' || Number.isNaN(value))
117
+ return 0.5;
118
+ return Math.max(0, Math.min(1, value));
119
+ }
120
+ //# sourceMappingURL=FreshContextVerifier.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"FreshContextVerifier.js","sourceRoot":"","sources":["../../src/review/FreshContextVerifier.ts"],"names":[],"mappings":"AAAA,yDAAyD;AACzD,uEAAuE;AACvE,+EAA+E;AAC/E,0EAA0E;AAC1E,yEAAyE;AACzE,6BAA6B;AAC7B,EAAE;AACF,6EAA6E;AAC7E,+EAA+E;AAC/E,wDAAwD;AAExD,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAA;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAA;AA0BlD,MAAM,aAAa,GACjB,4HAA4H;IAC5H,gKAAgK;IAChK,kFAAkF,CAAA;AAEpF,MAAM,OAAO,oBAAoB;IAC/B,YAA6B,SAAwB,IAAI,aAAa,EAAE;QAA3C,WAAM,GAAN,MAAM,CAAqC;IAAG,CAAC;IAE5E,KAAK,CAAC,MAAM,CAAC,KAAuB;QAClC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,CAAC;YAC7B,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAA;QACrC,CAAC;QAED,IAAI,CAAC;YACH,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,YAAY,CAKvD;gBACD,MAAM,EAAE,aAAa;gBACrB,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC;aAClC,CAAC,CAAA;YACF,OAAO;gBACL,QAAQ,EAAE,iBAAiB,CAAC,IAAI,CAAC,QAAQ,CAAC;gBAC1C,UAAU,EAAE,eAAe,CAAC,IAAI,CAAC,UAAU,CAAC;gBAC5C,SAAS,EAAE,CAAC,IAAI,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,wBAAwB;gBAC5E,aAAa,EAAE,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE;gBACvF,SAAS;gBACT,QAAQ,EAAE,IAAI;gBACd,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;aACtB,CAAA;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,EAAE,kEAAkE,CAAC,CAAA;YACxF,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAA;QACrC,CAAC;IACH,CAAC;IAEO,eAAe,CAAC,KAAuB;QAC7C,OAAO;YACL,mBAAmB,KAAK,CAAC,OAAO,IAAI,gBAAgB,EAAE;YACtD,EAAE;YACF,mEAAmE;YACnE,GAAG,CAAC,KAAK,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC;YAC5G,EAAE;YACF,eAAe;YACf,KAAK,CAAC,WAAW,IAAI,QAAQ;YAC7B,EAAE;YACF,4CAA4C;YAC5C,KAAK,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,cAAc;YAClD,EAAE;YACF,mIAAmI;SACpI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACd,CAAC;IAED;;;;;OAKG;IACK,gBAAgB,CAAC,KAAuB;QAC9C,MAAM,QAAQ,GAAG,KAAK,CAAC,WAAW,CAAC,WAAW,EAAE,CAAA;QAChD,MAAM,aAAa,GAAG,KAAK,CAAC,mBAAmB,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,gBAAgB,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAA;QAEvG,IAAI,QAA6B,CAAA;QACjC,IAAI,UAAkB,CAAA;QACtB,IAAI,SAAiB,CAAA;QACrB,IAAI,KAAK,CAAC,mBAAmB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3C,QAAQ,GAAG,WAAW,CAAA;YACtB,UAAU,GAAG,GAAG,CAAA;YAChB,SAAS,GAAG,0FAA0F,CAAA;QACxG,CAAC;aAAM,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpC,QAAQ,GAAG,YAAY,CAAA;YACvB,UAAU,GAAG,GAAG,CAAA;YAChB,SAAS,GAAG,GAAG,aAAa,CAAC,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,MAAM,mEAAmE,CAAA;QAC5I,CAAC;aAAM,CAAC;YACN,QAAQ,GAAG,UAAU,CAAA;YACrB,UAAU,GAAG,GAAG,CAAA;YAChB,SAAS,GAAG,0EAA0E,CAAA;QACxF,CAAC;QAED,OAAO;YACL,QAAQ;YACR,UAAU;YACV,SAAS;YACT,aAAa;YACb,SAAS,EAAE,WAAW;YACtB,QAAQ,EAAE,IAAI;YACd,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;SACtB,CAAA;IACH,CAAC;CACF;AAED,SAAS,gBAAgB,CAAC,OAAe,EAAE,aAAqB;IAC9D,MAAM,MAAM,GAAG,OAAO;SACnB,WAAW,EAAE;SACb,KAAK,CAAC,aAAa,CAAC;SACpB,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,CAAC,CAAA;IACrC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,aAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAA;IAC7E,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,aAAa,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAA;AAC5D,CAAC;AAED,SAAS,iBAAiB,CAAC,KAAyB;IAClD,MAAM,UAAU,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAA;IAC9C,IAAI,UAAU,KAAK,UAAU;QAAE,OAAO,UAAU,CAAA;IAChD,IAAI,UAAU,KAAK,YAAY;QAAE,OAAO,YAAY,CAAA;IACpD,OAAO,WAAW,CAAA;AACpB,CAAC;AAED,SAAS,eAAe,CAAC,KAAyB;IAChD,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC;QAAE,OAAO,GAAG,CAAA;IAChE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAA;AACxC,CAAC"}
@@ -0,0 +1,37 @@
1
+ export interface JsonLlmRequest {
2
+ system: string;
3
+ user: string;
4
+ /** Soft cap for the completion. Defaults to 600. */
5
+ maxTokens?: number;
6
+ /** Sampling temperature. Defaults to 0.2 (judges want low variance). */
7
+ temperature?: number;
8
+ /** Abort the request after this many ms. Defaults to 20000. */
9
+ timeoutMs?: number;
10
+ }
11
+ export interface JsonLlmResult<T> {
12
+ data: T;
13
+ modelUsed: string;
14
+ tokensUsed: number;
15
+ }
16
+ /**
17
+ * Enabled only when a local model is explicitly configured. We intentionally
18
+ * gate on SCALE_LOCAL_MODEL (and not the defaulted base URL/api key) so the
19
+ * default developer + CI flow never attempts a network call.
20
+ */
21
+ export declare function isLlmEnabled(): boolean;
22
+ export declare class JsonLlmClient {
23
+ private readonly enabledOverride?;
24
+ constructor(enabledOverride?: boolean | undefined);
25
+ isEnabled(): boolean;
26
+ /**
27
+ * Call the model and parse its reply as JSON. Throws on any failure
28
+ * (disabled, network error, non-2xx, non-JSON reply) so the caller can fall
29
+ * back to a deterministic heuristic — exactly like ReflexionEngine does.
30
+ */
31
+ completeJson<T>(request: JsonLlmRequest): Promise<JsonLlmResult<T>>;
32
+ }
33
+ /**
34
+ * Best-effort JSON extraction: accepts a bare JSON object or one wrapped in a
35
+ * ```json fenced block (a common local-model habit). Returns null on failure.
36
+ */
37
+ export declare function parseJsonReply<T>(content: string): T | null;
@@ -0,0 +1,94 @@
1
+ // SCALE Engine — JSON LLM client (P1.4 / P2.2)
2
+ // A thin, env-gated wrapper around an OpenAI-compatible /chat/completions
3
+ // endpoint that returns parsed JSON. Mirrors the proven pattern in
4
+ // src/cortex/ReflexionEngine.ts: callers stay deterministic and offline by
5
+ // default (isEnabled() === false ⇒ they use their own heuristic fallback), and
6
+ // only reach the network when a local model is explicitly configured.
7
+ import { logger } from '../core/logger.js';
8
+ import { resolveLocalModelConfig } from '../routing/LocalModelProvider.js';
9
+ /**
10
+ * Enabled only when a local model is explicitly configured. We intentionally
11
+ * gate on SCALE_LOCAL_MODEL (and not the defaulted base URL/api key) so the
12
+ * default developer + CI flow never attempts a network call.
13
+ */
14
+ export function isLlmEnabled() {
15
+ return Boolean(process.env.SCALE_LOCAL_MODEL);
16
+ }
17
+ export class JsonLlmClient {
18
+ constructor(enabledOverride) {
19
+ this.enabledOverride = enabledOverride;
20
+ }
21
+ isEnabled() {
22
+ return this.enabledOverride ?? isLlmEnabled();
23
+ }
24
+ /**
25
+ * Call the model and parse its reply as JSON. Throws on any failure
26
+ * (disabled, network error, non-2xx, non-JSON reply) so the caller can fall
27
+ * back to a deterministic heuristic — exactly like ReflexionEngine does.
28
+ */
29
+ async completeJson(request) {
30
+ if (!this.isEnabled()) {
31
+ throw new Error('JsonLlmClient is disabled (set SCALE_LOCAL_MODEL to enable)');
32
+ }
33
+ const config = resolveLocalModelConfig();
34
+ const response = await fetch(`${config.baseUrl}/chat/completions`, {
35
+ method: 'POST',
36
+ headers: {
37
+ 'Content-Type': 'application/json',
38
+ Authorization: `Bearer ${config.apiKey}`,
39
+ },
40
+ body: JSON.stringify({
41
+ model: config.name,
42
+ messages: [
43
+ { role: 'system', content: request.system },
44
+ { role: 'user', content: request.user },
45
+ ],
46
+ temperature: request.temperature ?? 0.2,
47
+ max_tokens: request.maxTokens ?? 600,
48
+ }),
49
+ signal: AbortSignal.timeout(request.timeoutMs ?? 20000),
50
+ });
51
+ if (!response.ok) {
52
+ throw new Error(`LLM endpoint returned ${response.status}`);
53
+ }
54
+ const payload = (await response.json());
55
+ const content = payload.choices?.[0]?.message?.content ?? '';
56
+ const data = parseJsonReply(content);
57
+ if (data === null) {
58
+ throw new Error('LLM reply was not valid JSON');
59
+ }
60
+ return {
61
+ data,
62
+ modelUsed: config.name,
63
+ tokensUsed: payload.usage?.total_tokens ?? 0,
64
+ };
65
+ }
66
+ }
67
+ /**
68
+ * Best-effort JSON extraction: accepts a bare JSON object or one wrapped in a
69
+ * ```json fenced block (a common local-model habit). Returns null on failure.
70
+ */
71
+ export function parseJsonReply(content) {
72
+ const trimmed = content.trim();
73
+ const candidates = [];
74
+ const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/i);
75
+ if (fenced)
76
+ candidates.push(fenced[1].trim());
77
+ candidates.push(trimmed);
78
+ const firstBrace = trimmed.indexOf('{');
79
+ const lastBrace = trimmed.lastIndexOf('}');
80
+ if (firstBrace >= 0 && lastBrace > firstBrace) {
81
+ candidates.push(trimmed.slice(firstBrace, lastBrace + 1));
82
+ }
83
+ for (const candidate of candidates) {
84
+ try {
85
+ return JSON.parse(candidate);
86
+ }
87
+ catch {
88
+ continue;
89
+ }
90
+ }
91
+ logger.debug({ content: trimmed.slice(0, 200) }, 'JsonLlmClient: failed to parse JSON reply');
92
+ return null;
93
+ }
94
+ //# sourceMappingURL=JsonLlmClient.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"JsonLlmClient.js","sourceRoot":"","sources":["../../src/review/JsonLlmClient.ts"],"names":[],"mappings":"AAAA,+CAA+C;AAC/C,0EAA0E;AAC1E,mEAAmE;AACnE,2EAA2E;AAC3E,+EAA+E;AAC/E,sEAAsE;AAEtE,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAA;AAC1C,OAAO,EAAE,uBAAuB,EAAE,MAAM,kCAAkC,CAAA;AAmB1E;;;;GAIG;AACH,MAAM,UAAU,YAAY;IAC1B,OAAO,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAA;AAC/C,CAAC;AAED,MAAM,OAAO,aAAa;IACxB,YAA6B,eAAyB;QAAzB,oBAAe,GAAf,eAAe,CAAU;IAAG,CAAC;IAE1D,SAAS;QACP,OAAO,IAAI,CAAC,eAAe,IAAI,YAAY,EAAE,CAAA;IAC/C,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,YAAY,CAAI,OAAuB;QAC3C,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,6DAA6D,CAAC,CAAA;QAChF,CAAC;QAED,MAAM,MAAM,GAAG,uBAAuB,EAAE,CAAA;QACxC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,MAAM,CAAC,OAAO,mBAAmB,EAAE;YACjE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,MAAM,CAAC,MAAM,EAAE;aACzC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK,EAAE,MAAM,CAAC,IAAI;gBAClB,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,OAAO,CAAC,MAAM,EAAE;oBAC3C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,IAAI,EAAE;iBACxC;gBACD,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,GAAG;gBACvC,UAAU,EAAE,OAAO,CAAC,SAAS,IAAI,GAAG;aACrC,CAAC;YACF,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,IAAI,KAAK,CAAC;SACxD,CAAC,CAAA;QAEF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,yBAAyB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAA;QAC7D,CAAC;QAED,MAAM,OAAO,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAGrC,CAAA;QACD,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,IAAI,EAAE,CAAA;QAC5D,MAAM,IAAI,GAAG,cAAc,CAAI,OAAO,CAAC,CAAA;QACvC,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAA;QACjD,CAAC;QAED,OAAO;YACL,IAAI;YACJ,SAAS,EAAE,MAAM,CAAC,IAAI;YACtB,UAAU,EAAE,OAAO,CAAC,KAAK,EAAE,YAAY,IAAI,CAAC;SAC7C,CAAA;IACH,CAAC;CACF;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAI,OAAe;IAC/C,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAA;IAC9B,MAAM,UAAU,GAAa,EAAE,CAAA;IAE/B,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAA;IAC7D,IAAI,MAAM;QAAE,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;IAC7C,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAExB,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IACvC,MAAM,SAAS,GAAG,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,CAAA;IAC1C,IAAI,UAAU,IAAI,CAAC,IAAI,SAAS,GAAG,UAAU,EAAE,CAAC;QAC9C,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,UAAU,EAAE,SAAS,GAAG,CAAC,CAAC,CAAC,CAAA;IAC3D,CAAC;IAED,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,IAAI,CAAC;YACH,OAAO,IAAI,CAAC,KAAK,CAAC,SAAS,CAAM,CAAA;QACnC,CAAC;QAAC,MAAM,CAAC;YACP,SAAQ;QACV,CAAC;IACH,CAAC;IACD,MAAM,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,EAAE,2CAA2C,CAAC,CAAA;IAC7F,OAAO,IAAI,CAAA;AACb,CAAC"}
@@ -0,0 +1,61 @@
1
+ import { JsonLlmClient } from './JsonLlmClient.js';
2
+ export type JudgeDecision = 'pass' | 'fail' | 'uncertain';
3
+ export interface JudgeVerdict {
4
+ /** Advisory call on whether the diff meets the Spec outcome. */
5
+ decision: JudgeDecision;
6
+ /** 0..1 self-reported confidence. */
7
+ confidence: number;
8
+ rationale: string;
9
+ /** verificationSurface entries with no corresponding evidence in the diff. */
10
+ unmetSurfaces: string[];
11
+ /** Model name, or 'heuristic' when the LLM path was not taken. */
12
+ modelUsed: string;
13
+ /** Versioned prompt identifier, e.g. "spec-conformance.v1". */
14
+ promptVersion: string;
15
+ /** Always true — kept explicit so consumers never gate on this verdict. */
16
+ advisory: true;
17
+ createdAt: number;
18
+ }
19
+ export interface JudgeInput {
20
+ outcome?: string;
21
+ verificationSurface: string[];
22
+ /** Pre-trimmed diff summary (changed files + salient added lines). */
23
+ diffSummary: string;
24
+ reviewFindings: {
25
+ critical: number;
26
+ high: number;
27
+ medium: number;
28
+ low: number;
29
+ };
30
+ }
31
+ interface JudgePromptRecord {
32
+ id: string;
33
+ version: string;
34
+ system: string;
35
+ rubric: string;
36
+ createdAt: number;
37
+ }
38
+ /**
39
+ * Loads/persists the versioned judge prompt under `.scale/judges/<id>.json`
40
+ * (decision L1) so the rubric is auditable and can drift independently of code.
41
+ */
42
+ export declare class JudgePromptStore {
43
+ private readonly dir;
44
+ constructor(scaleDir?: string);
45
+ load(id?: string): JudgePromptRecord;
46
+ private write;
47
+ }
48
+ export declare class LlmJudge {
49
+ private readonly client;
50
+ private readonly promptStore;
51
+ constructor(client?: JsonLlmClient, promptStore?: JudgePromptStore);
52
+ judge(input: JudgeInput): Promise<JudgeVerdict>;
53
+ private buildUserPrompt;
54
+ /**
55
+ * Deterministic fallback: a surface is "unmet" when none of its significant
56
+ * tokens appear in the diff summary; any unmet surface or any critical/high
57
+ * review finding turns the advisory verdict negative.
58
+ */
59
+ private heuristicVerdict;
60
+ }
61
+ export {};
@@ -0,0 +1,167 @@
1
+ // SCALE Engine — LLM-as-Judge (P1.4)
2
+ // Independent, advisory check of whether a diff actually satisfies the Spec's
3
+ // declared outcome / verificationSurface. The verdict is written into the
4
+ // review record as *advisory* evidence (decision K1): it never participates in
5
+ // the pass/fail decision and never blocks ship.
6
+ //
7
+ // Like ReflexionEngine, the judge runs an env-gated LLM when SCALE_LOCAL_MODEL
8
+ // is configured and otherwise falls back to a deterministic heuristic, so the
9
+ // default developer + CI flow stays offline, free and reproducible.
10
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
11
+ import { join } from 'node:path';
12
+ import { logger } from '../core/logger.js';
13
+ import { JsonLlmClient } from './JsonLlmClient.js';
14
+ const DEFAULT_PROMPT = {
15
+ id: 'spec-conformance',
16
+ version: 'v1',
17
+ system: 'You are an independent code-review judge. Decide only whether the diff actually achieves the stated outcome and exercises every declared verification surface. ' +
18
+ 'You are advisory: do not approve work that lacks evidence. Output strict JSON only.',
19
+ rubric: 'pass = every verification surface is plausibly addressed by the diff and no critical/high review finding contradicts the outcome. ' +
20
+ 'fail = the diff clearly does not achieve the outcome or leaves a declared surface unaddressed. ' +
21
+ 'uncertain = evidence is insufficient to decide.',
22
+ createdAt: 0,
23
+ };
24
+ /**
25
+ * Loads/persists the versioned judge prompt under `.scale/judges/<id>.json`
26
+ * (decision L1) so the rubric is auditable and can drift independently of code.
27
+ */
28
+ export class JudgePromptStore {
29
+ constructor(scaleDir = process.env.SCALE_DIR ?? '.scale') {
30
+ this.dir = join(scaleDir, 'judges');
31
+ }
32
+ load(id = DEFAULT_PROMPT.id) {
33
+ const file = join(this.dir, `${id}.json`);
34
+ if (existsSync(file)) {
35
+ try {
36
+ return JSON.parse(readFileSync(file, 'utf-8'));
37
+ }
38
+ catch {
39
+ logger.warn({ file }, 'JudgePromptStore: corrupt prompt file, using bundled default');
40
+ }
41
+ }
42
+ else if (id === DEFAULT_PROMPT.id) {
43
+ this.write({ ...DEFAULT_PROMPT, createdAt: Date.now() });
44
+ }
45
+ return { ...DEFAULT_PROMPT, createdAt: DEFAULT_PROMPT.createdAt || Date.now() };
46
+ }
47
+ write(record) {
48
+ if (!existsSync(this.dir))
49
+ mkdirSync(this.dir, { recursive: true });
50
+ writeFileSync(join(this.dir, `${record.id}.json`), JSON.stringify(record, null, 2), 'utf-8');
51
+ }
52
+ }
53
+ export class LlmJudge {
54
+ constructor(client = new JsonLlmClient(), promptStore = new JudgePromptStore()) {
55
+ this.client = client;
56
+ this.promptStore = promptStore;
57
+ }
58
+ async judge(input) {
59
+ const prompt = this.promptStore.load();
60
+ const promptVersion = `${prompt.id}.${prompt.version}`;
61
+ if (!this.client.isEnabled()) {
62
+ return this.heuristicVerdict(input, promptVersion);
63
+ }
64
+ try {
65
+ const { data, modelUsed } = await this.client.completeJson({
66
+ system: `${prompt.system}\n\nRubric: ${prompt.rubric}`,
67
+ user: this.buildUserPrompt(input),
68
+ });
69
+ return {
70
+ decision: normalizeDecision(data.decision),
71
+ confidence: clampConfidence(data.confidence),
72
+ rationale: (data.rationale ?? '').slice(0, 1000) || 'No rationale provided.',
73
+ unmetSurfaces: Array.isArray(data.unmetSurfaces) ? data.unmetSurfaces.slice(0, 50) : [],
74
+ modelUsed,
75
+ promptVersion,
76
+ advisory: true,
77
+ createdAt: Date.now(),
78
+ };
79
+ }
80
+ catch (err) {
81
+ logger.warn({ err }, 'LlmJudge: LLM call failed, falling back to heuristic');
82
+ return this.heuristicVerdict(input, promptVersion);
83
+ }
84
+ }
85
+ buildUserPrompt(input) {
86
+ return [
87
+ `Outcome: ${input.outcome ?? '(not declared)'}`,
88
+ '',
89
+ 'Verification surfaces (each must be addressed):',
90
+ ...(input.verificationSurface.length ? input.verificationSurface.map(s => `- ${s}`) : ['- (none declared)']),
91
+ '',
92
+ 'Review findings:',
93
+ `critical=${input.reviewFindings.critical} high=${input.reviewFindings.high} medium=${input.reviewFindings.medium} low=${input.reviewFindings.low}`,
94
+ '',
95
+ 'Diff summary:',
96
+ input.diffSummary.slice(0, 6000) || '(empty diff)',
97
+ '',
98
+ 'Output JSON: { "decision": "pass|fail|uncertain", "confidence": 0.0-1.0, "rationale": "...", "unmetSurfaces": ["..."] }',
99
+ ].join('\n');
100
+ }
101
+ /**
102
+ * Deterministic fallback: a surface is "unmet" when none of its significant
103
+ * tokens appear in the diff summary; any unmet surface or any critical/high
104
+ * review finding turns the advisory verdict negative.
105
+ */
106
+ heuristicVerdict(input, promptVersion) {
107
+ const haystack = input.diffSummary.toLowerCase();
108
+ const unmetSurfaces = input.verificationSurface.filter(surface => !surfaceMentioned(surface, haystack));
109
+ const blockingFindings = input.reviewFindings.critical + input.reviewFindings.high;
110
+ let decision;
111
+ let confidence;
112
+ let rationale;
113
+ if (blockingFindings > 0) {
114
+ decision = 'fail';
115
+ confidence = 0.6;
116
+ rationale = `${blockingFindings} critical/high review finding(s) contradict a "done" claim.`;
117
+ }
118
+ else if (input.verificationSurface.length === 0) {
119
+ decision = 'uncertain';
120
+ confidence = 0.3;
121
+ rationale = 'No verification surface declared; cannot judge conformance from the diff alone.';
122
+ }
123
+ else if (unmetSurfaces.length > 0) {
124
+ decision = 'uncertain';
125
+ confidence = 0.4;
126
+ rationale = `${unmetSurfaces.length}/${input.verificationSurface.length} verification surface(s) have no matching evidence in the diff.`;
127
+ }
128
+ else {
129
+ decision = 'pass';
130
+ confidence = 0.5;
131
+ rationale = 'All declared verification surfaces appear in the diff and no critical/high findings were raised.';
132
+ }
133
+ return {
134
+ decision,
135
+ confidence,
136
+ rationale,
137
+ unmetSurfaces,
138
+ modelUsed: 'heuristic',
139
+ promptVersion,
140
+ advisory: true,
141
+ createdAt: Date.now(),
142
+ };
143
+ }
144
+ }
145
+ function surfaceMentioned(surface, haystackLower) {
146
+ const tokens = surface
147
+ .toLowerCase()
148
+ .split(/[^a-z0-9]+/i)
149
+ .filter(token => token.length >= 4);
150
+ if (tokens.length === 0)
151
+ return haystackLower.includes(surface.toLowerCase());
152
+ return tokens.some(token => haystackLower.includes(token));
153
+ }
154
+ function normalizeDecision(value) {
155
+ const normalized = (value ?? '').toLowerCase();
156
+ if (normalized === 'pass')
157
+ return 'pass';
158
+ if (normalized === 'fail')
159
+ return 'fail';
160
+ return 'uncertain';
161
+ }
162
+ function clampConfidence(value) {
163
+ if (typeof value !== 'number' || Number.isNaN(value))
164
+ return 0.5;
165
+ return Math.max(0, Math.min(1, value));
166
+ }
167
+ //# sourceMappingURL=LlmJudge.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"LlmJudge.js","sourceRoot":"","sources":["../../src/review/LlmJudge.ts"],"names":[],"mappings":"AAAA,qCAAqC;AACrC,8EAA8E;AAC9E,0EAA0E;AAC1E,+EAA+E;AAC/E,gDAAgD;AAChD,EAAE;AACF,+EAA+E;AAC/E,8EAA8E;AAC9E,oEAAoE;AAEpE,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,SAAS,CAAA;AAC5E,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAChC,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAA;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAA;AAqClD,MAAM,cAAc,GAAsB;IACxC,EAAE,EAAE,kBAAkB;IACtB,OAAO,EAAE,IAAI;IACb,MAAM,EACJ,iKAAiK;QACjK,qFAAqF;IACvF,MAAM,EACJ,oIAAoI;QACpI,iGAAiG;QACjG,iDAAiD;IACnD,SAAS,EAAE,CAAC;CACb,CAAA;AAED;;;GAGG;AACH,MAAM,OAAO,gBAAgB;IAG3B,YAAY,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,SAAS,IAAI,QAAQ;QACtD,IAAI,CAAC,GAAG,GAAG,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAA;IACrC,CAAC;IAED,IAAI,CAAC,EAAE,GAAG,cAAc,CAAC,EAAE;QACzB,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,OAAO,CAAC,CAAA;QACzC,IAAI,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YACrB,IAAI,CAAC;gBACH,OAAO,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAsB,CAAA;YACrE,CAAC;YAAC,MAAM,CAAC;gBACP,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,EAAE,8DAA8D,CAAC,CAAA;YACvF,CAAC;QACH,CAAC;aAAM,IAAI,EAAE,KAAK,cAAc,CAAC,EAAE,EAAE,CAAC;YACpC,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,cAAc,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAA;QAC1D,CAAC;QACD,OAAO,EAAE,GAAG,cAAc,EAAE,SAAS,EAAE,cAAc,CAAC,SAAS,IAAI,IAAI,CAAC,GAAG,EAAE,EAAE,CAAA;IACjF,CAAC;IAEO,KAAK,CAAC,MAAyB;QACrC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC;YAAE,SAAS,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;QACnE,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,MAAM,CAAC,EAAE,OAAO,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAA;IAC9F,CAAC;CACF;AAED,MAAM,OAAO,QAAQ;IACnB,YACmB,SAAwB,IAAI,aAAa,EAAE,EAC3C,cAAgC,IAAI,gBAAgB,EAAE;QADtD,WAAM,GAAN,MAAM,CAAqC;QAC3C,gBAAW,GAAX,WAAW,CAA2C;IACtE,CAAC;IAEJ,KAAK,CAAC,KAAK,CAAC,KAAiB;QAC3B,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,CAAA;QACtC,MAAM,aAAa,GAAG,GAAG,MAAM,CAAC,EAAE,IAAI,MAAM,CAAC,OAAO,EAAE,CAAA;QAEtD,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,CAAC;YAC7B,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,aAAa,CAAC,CAAA;QACpD,CAAC;QAED,IAAI,CAAC;YACH,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,YAAY,CAKvD;gBACD,MAAM,EAAE,GAAG,MAAM,CAAC,MAAM,eAAe,MAAM,CAAC,MAAM,EAAE;gBACtD,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC;aAClC,CAAC,CAAA;YACF,OAAO;gBACL,QAAQ,EAAE,iBAAiB,CAAC,IAAI,CAAC,QAAQ,CAAC;gBAC1C,UAAU,EAAE,eAAe,CAAC,IAAI,CAAC,UAAU,CAAC;gBAC5C,SAAS,EAAE,CAAC,IAAI,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,wBAAwB;gBAC5E,aAAa,EAAE,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE;gBACvF,SAAS;gBACT,aAAa;gBACb,QAAQ,EAAE,IAAI;gBACd,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;aACtB,CAAA;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,EAAE,sDAAsD,CAAC,CAAA;YAC5E,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,aAAa,CAAC,CAAA;QACpD,CAAC;IACH,CAAC;IAEO,eAAe,CAAC,KAAiB;QACvC,OAAO;YACL,YAAY,KAAK,CAAC,OAAO,IAAI,gBAAgB,EAAE;YAC/C,EAAE;YACF,iDAAiD;YACjD,GAAG,CAAC,KAAK,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC;YAC5G,EAAE;YACF,kBAAkB;YAClB,YAAY,KAAK,CAAC,cAAc,CAAC,QAAQ,SAAS,KAAK,CAAC,cAAc,CAAC,IAAI,WAAW,KAAK,CAAC,cAAc,CAAC,MAAM,QAAQ,KAAK,CAAC,cAAc,CAAC,GAAG,EAAE;YACnJ,EAAE;YACF,eAAe;YACf,KAAK,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,cAAc;YAClD,EAAE;YACF,yHAAyH;SAC1H,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACd,CAAC;IAED;;;;OAIG;IACK,gBAAgB,CAAC,KAAiB,EAAE,aAAqB;QAC/D,MAAM,QAAQ,GAAG,KAAK,CAAC,WAAW,CAAC,WAAW,EAAE,CAAA;QAChD,MAAM,aAAa,GAAG,KAAK,CAAC,mBAAmB,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,gBAAgB,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAA;QACvG,MAAM,gBAAgB,GAAG,KAAK,CAAC,cAAc,CAAC,QAAQ,GAAG,KAAK,CAAC,cAAc,CAAC,IAAI,CAAA;QAElF,IAAI,QAAuB,CAAA;QAC3B,IAAI,UAAkB,CAAA;QACtB,IAAI,SAAiB,CAAA;QACrB,IAAI,gBAAgB,GAAG,CAAC,EAAE,CAAC;YACzB,QAAQ,GAAG,MAAM,CAAA;YACjB,UAAU,GAAG,GAAG,CAAA;YAChB,SAAS,GAAG,GAAG,gBAAgB,6DAA6D,CAAA;QAC9F,CAAC;aAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClD,QAAQ,GAAG,WAAW,CAAA;YACtB,UAAU,GAAG,GAAG,CAAA;YAChB,SAAS,GAAG,iFAAiF,CAAA;QAC/F,CAAC;aAAM,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpC,QAAQ,GAAG,WAAW,CAAA;YACtB,UAAU,GAAG,GAAG,CAAA;YAChB,SAAS,GAAG,GAAG,aAAa,CAAC,MAAM,IAAI,KAAK,CAAC,mBAAmB,CAAC,MAAM,iEAAiE,CAAA;QAC1I,CAAC;aAAM,CAAC;YACN,QAAQ,GAAG,MAAM,CAAA;YACjB,UAAU,GAAG,GAAG,CAAA;YAChB,SAAS,GAAG,kGAAkG,CAAA;QAChH,CAAC;QAED,OAAO;YACL,QAAQ;YACR,UAAU;YACV,SAAS;YACT,aAAa;YACb,SAAS,EAAE,WAAW;YACtB,aAAa;YACb,QAAQ,EAAE,IAAI;YACd,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;SACtB,CAAA;IACH,CAAC;CACF;AAED,SAAS,gBAAgB,CAAC,OAAe,EAAE,aAAqB;IAC9D,MAAM,MAAM,GAAG,OAAO;SACnB,WAAW,EAAE;SACb,KAAK,CAAC,aAAa,CAAC;SACpB,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,CAAC,CAAA;IACrC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,aAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAA;IAC7E,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,aAAa,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAA;AAC5D,CAAC;AAED,SAAS,iBAAiB,CAAC,KAAyB;IAClD,MAAM,UAAU,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAA;IAC9C,IAAI,UAAU,KAAK,MAAM;QAAE,OAAO,MAAM,CAAA;IACxC,IAAI,UAAU,KAAK,MAAM;QAAE,OAAO,MAAM,CAAA;IACxC,OAAO,WAAW,CAAA;AACpB,CAAC;AAED,SAAS,eAAe,CAAC,KAAyB;IAChD,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC;QAAE,OAAO,GAAG,CAAA;IAChE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAA;AACxC,CAAC"}
@@ -9,6 +9,7 @@ import { type GovernanceRoiSummary } from '../workflow/GovernanceRoi.js';
9
9
  import { type EvolutionShadowReport } from '../workflow/EvolutionShadowPromoter.js';
10
10
  import { type LearningEntry } from '../evolution/SessionLearnings.js';
11
11
  import { type SessionPreamble } from '../workflow/SessionPreamble.js';
12
+ import { type AgentLoopReadinessReport } from '../workflow/AgentLoopReadiness.js';
12
13
  export interface AiOsRuntimeInput {
13
14
  projectDir?: string;
14
15
  scaleDir?: string;
@@ -412,7 +413,7 @@ export interface AiOsStatusReport {
412
413
  nextActions: string[];
413
414
  warnings: string[];
414
415
  }
415
- export type AiOsIntelligenceSignalId = 'memory-recall' | 'context-savings' | 'skill-routing' | 'evaluator-intelligence' | 'tool-strategy' | 'adaptive-workflow' | 'evolution-shadow' | 'benchmark-intelligence';
416
+ export type AiOsIntelligenceSignalId = 'memory-recall' | 'context-savings' | 'skill-routing' | 'evaluator-intelligence' | 'tool-strategy' | 'adaptive-workflow' | 'evolution-shadow' | 'agent-loop-readiness' | 'benchmark-intelligence';
416
417
  export interface AiOsIntelligenceSignal {
417
418
  id: AiOsIntelligenceSignalId;
418
419
  status: AiOsClosedLoopStatus;
@@ -432,6 +433,7 @@ export interface AiOsIntelligenceReport {
432
433
  contextQuality: AiOsContextQualitySummary;
433
434
  evaluatorQuality: AiOsEvaluatorQualitySummary;
434
435
  toolStrategyQuality: AiOsToolStrategyQualitySummary;
436
+ agentLoopQuality: AiOsAgentLoopQualitySummary;
435
437
  evolutionQuality: AiOsEvolutionQualitySummary;
436
438
  estimatedTokenSavings: number;
437
439
  skillSteps: number;
@@ -467,6 +469,17 @@ export interface AiOsToolStrategyQualitySummary {
467
469
  estimatedCostUnits: number;
468
470
  fallbackCoverage: number;
469
471
  }
472
+ export interface AiOsAgentLoopQualitySummary {
473
+ status: AgentLoopReadinessReport['status'];
474
+ score: number;
475
+ readySignals: number;
476
+ warningSignals: number;
477
+ missingSignals: number;
478
+ loopRecoveryRate: number | null;
479
+ guardrailCoverage: number | null;
480
+ budgetControlled: boolean | null;
481
+ terminationEvidence: boolean | null;
482
+ }
470
483
  export interface AiOsEvolutionQualitySummary {
471
484
  proposals: number;
472
485
  shadowRules: number;