tryassay 0.21.2 → 0.22.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/demo/.claude/.truth_last_prompt +1 -0
- package/demo/.claude/truth_status +1 -0
- package/demo/css/style.css +840 -0
- package/demo/index.html +78 -0
- package/demo/js/chat.js +535 -0
- package/demo/js/code-panel.js +206 -0
- package/demo/js/preview.js +456 -0
- package/demo/js/sse-client.js +600 -0
- package/demo/js/state.js +172 -0
- package/demo/js/timeline.js +80 -0
- package/dist/api/server.d.ts +3 -0
- package/dist/api/server.js +127 -20
- package/dist/api/server.js.map +1 -1
- package/dist/cli.js +11 -0
- package/dist/cli.js.map +1 -1
- package/dist/commands/assess.d.ts +2 -0
- package/dist/commands/assess.js +132 -164
- package/dist/commands/assess.js.map +1 -1
- package/dist/commands/demo.d.ts +5 -0
- package/dist/commands/demo.js +357 -0
- package/dist/commands/demo.js.map +1 -0
- package/dist/lib/__tests__/arithmetic-quick-test.d.ts +6 -0
- package/dist/lib/__tests__/arithmetic-quick-test.js +197 -0
- package/dist/lib/__tests__/arithmetic-quick-test.js.map +1 -0
- package/dist/lib/__tests__/arithmetic-real-llm-test.d.ts +13 -0
- package/dist/lib/__tests__/arithmetic-real-llm-test.js +284 -0
- package/dist/lib/__tests__/arithmetic-real-llm-test.js.map +1 -0
- package/dist/lib/__tests__/arithmetic-value-demo.d.ts +10 -0
- package/dist/lib/__tests__/arithmetic-value-demo.js +193 -0
- package/dist/lib/__tests__/arithmetic-value-demo.js.map +1 -0
- package/dist/lib/__tests__/flow-to-claims.test.d.ts +1 -0
- package/dist/lib/__tests__/flow-to-claims.test.js +91 -0
- package/dist/lib/__tests__/flow-to-claims.test.js.map +1 -0
- package/dist/lib/__tests__/formal-verifier-api-misuse.test.d.ts +9 -0
- package/dist/lib/__tests__/formal-verifier-api-misuse.test.js +391 -0
- package/dist/lib/__tests__/formal-verifier-api-misuse.test.js.map +1 -0
- package/dist/lib/__tests__/formal-verifier-arithmetic.test.d.ts +7 -0
- package/dist/lib/__tests__/formal-verifier-arithmetic.test.js +318 -0
- package/dist/lib/__tests__/formal-verifier-arithmetic.test.js.map +1 -0
- package/dist/lib/__tests__/intent-extractor.test.d.ts +1 -0
- package/dist/lib/__tests__/intent-extractor.test.js +97 -0
- package/dist/lib/__tests__/intent-extractor.test.js.map +1 -0
- package/dist/lib/__tests__/intent-reviewer.test.d.ts +1 -0
- package/dist/lib/__tests__/intent-reviewer.test.js +55 -0
- package/dist/lib/__tests__/intent-reviewer.test.js.map +1 -0
- package/dist/lib/__tests__/mr-gsm8k-benchmark.d.ts +11 -0
- package/dist/lib/__tests__/mr-gsm8k-benchmark.js +224 -0
- package/dist/lib/__tests__/mr-gsm8k-benchmark.js.map +1 -0
- package/dist/lib/anthropic.js +25 -33
- package/dist/lib/anthropic.js.map +1 -1
- package/dist/lib/assessment-reporter.js +9 -13
- package/dist/lib/assessment-reporter.js.map +1 -1
- package/dist/lib/claim-extractor.js +10 -19
- package/dist/lib/claim-extractor.js.map +1 -1
- package/dist/lib/code-verifier.js +16 -36
- package/dist/lib/code-verifier.js.map +1 -1
- package/dist/lib/constraint-engine.js +10 -19
- package/dist/lib/constraint-engine.js.map +1 -1
- package/dist/lib/formal-verifier.d.ts +1 -1
- package/dist/lib/formal-verifier.js +454 -0
- package/dist/lib/formal-verifier.js.map +1 -1
- package/dist/lib/guided-generator.js +19 -37
- package/dist/lib/guided-generator.js.map +1 -1
- package/dist/lib/intent-extractor.d.ts +47 -0
- package/dist/lib/intent-extractor.js +427 -0
- package/dist/lib/intent-extractor.js.map +1 -0
- package/dist/lib/intent-reviewer.d.ts +14 -0
- package/dist/lib/intent-reviewer.js +148 -0
- package/dist/lib/intent-reviewer.js.map +1 -0
- package/dist/lib/intent-types.d.ts +89 -0
- package/dist/lib/intent-types.js +5 -0
- package/dist/lib/intent-types.js.map +1 -0
- package/dist/lib/inventory-extractor.js +9 -22
- package/dist/lib/inventory-extractor.js.map +1 -1
- package/dist/lib/llm-provider.d.ts +23 -0
- package/dist/lib/llm-provider.js +130 -0
- package/dist/lib/llm-provider.js.map +1 -0
- package/dist/lib/remediator.js +20 -28
- package/dist/lib/remediator.js.map +1 -1
- package/dist/lib/requirements-generator.js +14 -19
- package/dist/lib/requirements-generator.js.map +1 -1
- package/dist/lib/spec-synthesizer.js +10 -19
- package/dist/lib/spec-synthesizer.js.map +1 -1
- package/dist/runtime/agents/planner-agent.d.ts +5 -2
- package/dist/runtime/agents/planner-agent.js +232 -1
- package/dist/runtime/agents/planner-agent.js.map +1 -1
- package/dist/runtime/app-create-orchestrator.d.ts +9 -1
- package/dist/runtime/app-create-orchestrator.js +265 -87
- package/dist/runtime/app-create-orchestrator.js.map +1 -1
- package/dist/runtime/check-catalog.js +5 -3
- package/dist/runtime/check-catalog.js.map +1 -1
- package/dist/runtime/check-definitions.d.ts +10 -0
- package/dist/runtime/check-definitions.js +52 -2
- package/dist/runtime/check-definitions.js.map +1 -1
- package/dist/runtime/composition-verifier.js +8 -12
- package/dist/runtime/composition-verifier.js.map +1 -1
- package/dist/runtime/gap-detector.js +8 -10
- package/dist/runtime/gap-detector.js.map +1 -1
- package/dist/runtime/input-validator.d.ts +7 -0
- package/dist/runtime/input-validator.js +162 -0
- package/dist/runtime/input-validator.js.map +1 -0
- package/dist/runtime/model-router.d.ts +10 -0
- package/dist/runtime/model-router.js +42 -0
- package/dist/runtime/model-router.js.map +1 -0
- package/dist/runtime/pattern-extractor.js +8 -10
- package/dist/runtime/pattern-extractor.js.map +1 -1
- package/dist/runtime/planner.js +11 -16
- package/dist/runtime/planner.js.map +1 -1
- package/dist/runtime/prompt-guard.d.ts +2 -0
- package/dist/runtime/prompt-guard.js +180 -0
- package/dist/runtime/prompt-guard.js.map +1 -0
- package/dist/runtime/prompt-safety-analyzer.js +8 -13
- package/dist/runtime/prompt-safety-analyzer.js.map +1 -1
- package/dist/runtime/reasoner.js +19 -33
- package/dist/runtime/reasoner.js.map +1 -1
- package/dist/runtime/rule-meta-verifier.js +9 -11
- package/dist/runtime/rule-meta-verifier.js.map +1 -1
- package/dist/runtime/safe-executor.d.ts +23 -0
- package/dist/runtime/safe-executor.js +151 -0
- package/dist/runtime/safe-executor.js.map +1 -0
- package/dist/runtime/specialized-agent.js +10 -14
- package/dist/runtime/specialized-agent.js.map +1 -1
- package/dist/runtime/strategy-library.js +8 -10
- package/dist/runtime/strategy-library.js.map +1 -1
- package/dist/runtime/supabase-experience-store.js.map +1 -1
- package/dist/runtime/supabase-provisioner.d.ts +35 -0
- package/dist/runtime/supabase-provisioner.js +192 -0
- package/dist/runtime/supabase-provisioner.js.map +1 -0
- package/dist/runtime/types.d.ts +116 -0
- package/dist/sdk/forward-verify.js +16 -33
- package/dist/sdk/forward-verify.js.map +1 -1
- package/package.json +2 -1
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { MODEL } from './anthropic.js';
|
|
2
|
+
import { getProvider } from './llm-provider.js';
|
|
2
3
|
// ---------------------------------------------------------------------------
|
|
3
4
|
// Domain patterns — embedded knowledge from Assay benchmark findings
|
|
4
5
|
// ---------------------------------------------------------------------------
|
|
@@ -243,27 +244,17 @@ async function deriveSpecConstraints(specs, task, language, onProgress) {
|
|
|
243
244
|
if (specs.length === 0) {
|
|
244
245
|
return { constraints: [], inputTokens: 0, outputTokens: 0 };
|
|
245
246
|
}
|
|
246
|
-
|
|
247
|
-
onProgress?.('Deriving spec constraints via Claude (streaming)...');
|
|
247
|
+
onProgress?.('Deriving spec constraints via Claude...');
|
|
248
248
|
const specsText = specs
|
|
249
249
|
.map((s) => `[${s.id}] (${s.category}, ${s.severity}) ${s.description}\n Assertion: ${s.assertion}`)
|
|
250
250
|
.join('\n\n');
|
|
251
|
-
const
|
|
251
|
+
const result = await getProvider().complete({
|
|
252
252
|
model: MODEL,
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
{
|
|
257
|
-
role: 'user',
|
|
258
|
-
content: `Generate constraints for the following coding task and specifications.\n\nLanguage: ${language}\nTask: ${task}\n\nSpecifications:\n${specsText}`,
|
|
259
|
-
},
|
|
260
|
-
],
|
|
261
|
-
});
|
|
262
|
-
let rawText = '';
|
|
263
|
-
stream.on('text', (text) => {
|
|
264
|
-
rawText += text;
|
|
253
|
+
maxTokens: 8_000,
|
|
254
|
+
systemPrompt: CONSTRAINT_SYSTEM_PROMPT,
|
|
255
|
+
userPrompt: `Generate constraints for the following coding task and specifications.\n\nLanguage: ${language}\nTask: ${task}\n\nSpecifications:\n${specsText}`,
|
|
265
256
|
});
|
|
266
|
-
const
|
|
257
|
+
const rawText = result.content;
|
|
267
258
|
onProgress?.('Parsing constraint results...');
|
|
268
259
|
let jsonText = stripCodeFences(rawText);
|
|
269
260
|
jsonText = repairTruncatedArray(jsonText, onProgress);
|
|
@@ -290,8 +281,8 @@ async function deriveSpecConstraints(specs, task, language, onProgress) {
|
|
|
290
281
|
}
|
|
291
282
|
return {
|
|
292
283
|
constraints,
|
|
293
|
-
inputTokens:
|
|
294
|
-
outputTokens:
|
|
284
|
+
inputTokens: result.inputTokens ?? 0,
|
|
285
|
+
outputTokens: result.outputTokens ?? 0,
|
|
295
286
|
};
|
|
296
287
|
}
|
|
297
288
|
// ---------------------------------------------------------------------------
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"constraint-engine.js","sourceRoot":"","sources":["../../src/lib/constraint-engine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"constraint-engine.js","sourceRoot":"","sources":["../../src/lib/constraint-engine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AACvC,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAUhD,8EAA8E;AAC9E,qEAAqE;AACrE,8EAA8E;AAE9E,MAAM,eAAe,GAGhB;IACH;QACE,QAAQ,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,IAAI,CAAC;QAC5C,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,+DAA+D,EAAE;YAC9F,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,gEAAgE,EAAE,OAAO,EAAE,2CAA2C,EAAE;SAC1J;KACF;IACD;QACE,QAAQ,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,CAAC;QACtD,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,2DAA2D,EAAE;YAC1F,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,wDAAwD,EAAE;SAC5F;KACF;IACD;QACE,QAAQ,EAAE,CAAC,SAAS,EAAE,WAAW,EAAE,QAAQ,EAAE,YAAY,CAAC;QAC1D,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,gEAAgE,EAAE;YAC/F,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,gEAAgE,EAAE,OAAO,EAAE,4DAA4D,EAAE;SAC3K;KACF;IACD;QACE,QAAQ,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,CAAC;QAC7C,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,kEAAkE,EAAE;YACjG,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,qDAAqD,EAAE;YACxF,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,iDAAiD,EAAE;SACnF;KACF;IACD;QACE,QAAQ,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,CAAC;QACpE,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,yCAAyC,EAAE;YACxE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,6DAA6D,EAAE;SAC/F;KACF;IACD;QACE,QAAQ,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,CAAC;QAC1D,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,4DAA4D,EAAE;YAC3F,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,8BAA8B,EAAE;SAC9D;KACF;IACD;QACE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,IAAI,CAAC;QAC7C,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,kDAAkD,EAAE;YACjF,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,qDAAqD,EAAE;SACrF;KACF;IACD;QACE,QAAQ,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC;QACvC,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,yFAAyF,EAAE;SAC7H;KACF;IACD;QACE,QAAQ,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,OAAO,EAAE,aAAa,CAAC;QAC/D,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,2DAA2D,EAAE,OAAO,EAAE,gBAAgB,EAAE;YACzH,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,6EAA6E,EAAE;SAC/G;KACF;IACD;QACE,QAAQ,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,CAAC;QAC9C,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,6FAA6F,EAAE,OAAO,EAAE,cAAc,EAAE;YACrJ,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,4DAA4D,EAAE;SAC5F;KACF;IACD;QACE,QAAQ,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC;QACnD,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,sDAAsD,EAAE;YACrF,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,uDAAuD,EAAE;YAC1F,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,oFAAoF,EAAE;SACtH;KACF;IACD;QACE,QAAQ,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,CAAC;QAC/C,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,6CAA6C,EAAE;YAC5E,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,4DAA4D,EAAE;SAChG;KACF;IACD;QACE,QAAQ,EAAE,CAAC,WAAW,EAAE,WAAW,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,CAAC;QACjE,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,+CAA+C,EAAE;YAC9E,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,2DAA2D,EAAE;YAC5F,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,qFAAqF,EAAE;SACvH;KACF;IACD;QACE,QAAQ,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,CAAC;QAC1D,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,+BAA+B,EAAE;YAC9D,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,8DAA8D,EAAE;SAChG;KACF;IACD;QACE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,CAAC;QAC3D,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,2DAA2D,EAAE;YAC1F,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,oEAAoE,EAAE;SACxG;KACF;IACD;QACE,QAAQ,EAAE,CAAC,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,YAAY,EAAE,SAAS,CAAC;QAClE,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,4CAA4C,EAAE;SAC5E;KACF;IACD;QACE,QAAQ,EAAE,CAAC,KAAK,EAAE,aAAa,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,CAAC;QAC7D,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,wEAAwE,EAAE;YACvG,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,wEAAwE,EAAE;SAC5G;KACF;IACD;QACE,QAAQ,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,WAAW,CAAC;QAC7D,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,4DAA4D,EAAE;YAC3F,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,0EAA0E,EAAE;SAC1G;KACF;CACF,CAAC;AAEF,8EAA8E;AAC9E,6CAA6C;AAC7C,8EAA8E;AAE9E,MAAM,wBAAwB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAiC/B,CAAC;AAQH,MAAM,WAAW,GAAqB,CAAC,MAAM,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC;AAErE,SAAS,kBAAkB,CAAC,GAAkB;IAC5C,IAAI,CAAC,GAAG,CAAC,WAAW,IAAI,OAAO,GAAG,CAAC,WAAW,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IAEzE,MAAM,IAAI,GAAG,WAAW,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAsB,CAAC;QAC3D,CAAC,CAAE,GAAG,CAAC,IAAuB;QAC9B,CAAC,CAAC,MAAM,CAAC;IAEX,OAAO;QACL,IAAI;QACJ,WAAW,EAAE,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE;QACnC,OAAO,EAAE,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,SAAS;KAC1E,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,0BAA0B;AAC1B,8EAA8E;AAE9E,SAAS,mBAAmB,CAAC,IAAY;IACvC,MAAM,SAAS,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IACrC,MAAM,OAAO,GAA4C,EAAE,CAAC;IAC5D,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAE/B,KAAK,MAAM,OAAO,IAAI,eAAe,EAAE,CAAC;QACtC,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QACtE,IAAI,CAAC,SAAS;YAAE,SAAS;QAEzB,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;YACpC,6BAA6B;YAC7B,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC;gBAAE,SAAS;YACtC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;YAExB,OAAO,CAAC,IAAI,CAAC;gBACX,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,WAAW,EAAE,CAAC,CAAC,WAAW;gBAC1B,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,MAAM,EAAE,QAA4B;aACrC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,8EAA8E;AAC9E,4EAA4E;AAC5E,8EAA8E;AAE9E,SAAS,eAAe,CAAC,IAAY;IACnC,IAAI,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE3B,4BAA4B;IAC5B,IAAI,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QAC/B,MAAM,YAAY,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC5C,IAAI,YAAY,KAAK,CAAC,CAAC,EAAE,CAAC;YACxB,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,YAAY,GAAG,CAAC,CAAC,CAAC;QAC9C,CAAC;IACH,CAAC;IAED,4BAA4B;IAC5B,MAAM,SAAS,GAAG,QAAQ,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;IAC9C,IAAI,SAAS,KAAK,CAAC,CAAC,EAAE,CAAC;QACrB,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;IAC1C,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;AACzB,CAAC;AAED,SAAS,oBAAoB,CAAC,QAAgB,EAAE,UAAkC;IAChF,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;QAAE,OAAO,QAAQ,CAAC;IAE5C,MAAM,cAAc,GAAG,QAAQ,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACjD,IAAI,cAAc,KAAK,CAAC,CAAC,EAAE,CAAC;QAC1B,UAAU,EAAE,CAAC,8DAA8D,CAAC,CAAC;QAC7E,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC;IACvD,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,8EAA8E;AAC9E,mCAAmC;AACnC,8EAA8E;AAE9E,KAAK,UAAU,qBAAqB,CAClC,KAAiB,EACjB,IAAY,EACZ,QAAgB,EAChB,UAAkC;IAElC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,WAAW,EAAE,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,CAAC;IAC9D,CAAC;IAED,UAAU,EAAE,CAAC,yCAAyC,CAAC,CAAC;IAExD,MAAM,SAAS,GAAG,KAAK;SACpB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,QAAQ,KAAK,CAAC,CAAC,QAAQ,KAAK,CAAC,CAAC,WAAW,kBAAkB,CAAC,CAAC,SAAS,EAAE,CAAC;SACpG,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,MAAM,MAAM,GAAG,MAAM,WAAW,EAAE,CAAC,QAAQ,CAAC;QAC1C,KAAK,EAAE,KAAK;QACZ,SAAS,EAAE,KAAK;QAChB,YAAY,EAAE,wBAAwB;QACtC,UAAU,EAAE,uFAAuF,QAAQ,WAAW,IAAI,wBAAwB,SAAS,EAAE;KAC9J,CAAC,CAAC;IAEH,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;IAE/B,UAAU,EAAE,CAAC,+BAA+B,CAAC,CAAC;IAE9C,IAAI,QAAQ,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;IACxC,QAAQ,GAAG,oBAAoB,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IAEtD,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAChC,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CACb,yDAAyD;YACzD,oBAAoB,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAC7C,CAAC;IACJ,CAAC;IAED,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;IACvE,CAAC;IAED,MAAM,WAAW,GAA4C,EAAE,CAAC;IAChE,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QACzB,MAAM,SAAS,GAAG,kBAAkB,CAAC,GAAoB,CAAC,CAAC;QAC3D,IAAI,SAAS,EAAE,CAAC;YACd,WAAW,CAAC,IAAI,CAAC;gBACf,GAAG,SAAS;gBACZ,MAAM,EAAE,MAA0B;aACnC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO;QACL,WAAW;QACX,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,CAAC;QACpC,YAAY,EAAE,MAAM,CAAC,YAAY,IAAI,CAAC;KACvC,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,cAAc;AACd,8EAA8E;AAE9E,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,IAAY,EACZ,QAAgB,EAChB,KAAiB,EACjB,UAAkC;IAElC,2BAA2B;IAC3B,UAAU,EAAE,CAAC,6BAA6B,CAAC,CAAC;IAC5C,MAAM,iBAAiB,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;IACpD,UAAU,EAAE,CAAC,WAAW,iBAAiB,CAAC,MAAM,wBAAwB,CAAC,CAAC;IAE1E,qCAAqC;IACrC,MAAM,EAAE,WAAW,EAAE,eAAe,EAAE,WAAW,EAAE,YAAY,EAAE,GAC/D,MAAM,qBAAqB,CAAC,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;IACjE,UAAU,EAAE,CAAC,WAAW,eAAe,CAAC,MAAM,sBAAsB,CAAC,CAAC;IAEtE,6DAA6D;IAC7D,MAAM,cAAc,GAAG,CAAC,GAAG,iBAAiB,EAAE,GAAG,eAAe,CAAC,CAAC;IAClE,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,OAAO,GAA4C,EAAE,CAAC;IAE5D,KAAK,MAAM,CAAC,IAAI,cAAc,EAAE,CAAC;QAC/B,MAAM,GAAG,GAAG,CAAC,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC;QACxC,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,SAAS;QAC5B,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACd,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,mDAAmD;IACnD,MAAM,QAAQ,GAA2B,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QAC9D,EAAE,EAAE,OAAO,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;QAC3C,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,WAAW,EAAE,CAAC,CAAC,WAAW;QAC1B,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,MAAM,EAAE,CAAC,CAAC,MAAO;KAClB,CAAC,CAAC,CAAC;IAEJ,UAAU,EAAE,CAAC,UAAU,QAAQ,CAAC,MAAM,qCAAqC,CAAC,CAAC;IAE7E,OAAO;QACL,IAAI;QACJ,WAAW,EAAE,QAAQ;QACrB,gBAAgB,EAAE,QAAQ,CAAC,MAAM;QACjC,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACrC,WAAW;QACX,YAAY;KACb,CAAC;AACJ,CAAC"}
|
|
@@ -30,7 +30,7 @@ export interface FormalVerificationStats {
|
|
|
30
30
|
disagreements: number;
|
|
31
31
|
formal_overrides: number;
|
|
32
32
|
}
|
|
33
|
-
export type FormalCheckType = 'function_exists' | 'parameter_check' | 'error_handling' | 'null_check' | 'type_annotation' | 'sql_parameterized' | 'input_validation';
|
|
33
|
+
export type FormalCheckType = 'function_exists' | 'parameter_check' | 'error_handling' | 'null_check' | 'type_annotation' | 'sql_parameterized' | 'input_validation' | 'arithmetic_correctness' | 'api_misuse' | 'undefined_reference';
|
|
34
34
|
export interface FormalCheckResult {
|
|
35
35
|
claimId: string;
|
|
36
36
|
checkType: FormalCheckType;
|
|
@@ -4,6 +4,155 @@
|
|
|
4
4
|
* This is a local copy of api/lib/formal-verifier.ts for use within the SDK.
|
|
5
5
|
* The canonical version lives at api/lib/formal-verifier.ts.
|
|
6
6
|
*/
|
|
7
|
+
// ── Safe Arithmetic Evaluator ────────────────────────────────
|
|
8
|
+
function safeEvalArithmetic(expr) {
|
|
9
|
+
const src = expr.replace(/,/g, '').trim();
|
|
10
|
+
if (src.length === 0)
|
|
11
|
+
return null;
|
|
12
|
+
let pos = 0;
|
|
13
|
+
function peek() { return src[pos] ?? ''; }
|
|
14
|
+
function advance() { return src[pos++] ?? ''; }
|
|
15
|
+
function skipWS() { while (pos < src.length && src[pos] === ' ')
|
|
16
|
+
pos++; }
|
|
17
|
+
function parseNumber() {
|
|
18
|
+
skipWS();
|
|
19
|
+
let num = '';
|
|
20
|
+
while (pos < src.length && (src[pos] >= '0' && src[pos] <= '9' || src[pos] === '.')) {
|
|
21
|
+
num += advance();
|
|
22
|
+
}
|
|
23
|
+
if (num === '' || num === '.')
|
|
24
|
+
return null;
|
|
25
|
+
const val = parseFloat(num);
|
|
26
|
+
return isNaN(val) ? null : val;
|
|
27
|
+
}
|
|
28
|
+
function parsePrimary() {
|
|
29
|
+
skipWS();
|
|
30
|
+
if (peek() === '(') {
|
|
31
|
+
advance();
|
|
32
|
+
const val = parseExpression();
|
|
33
|
+
if (val === null)
|
|
34
|
+
return null;
|
|
35
|
+
skipWS();
|
|
36
|
+
if (peek() !== ')')
|
|
37
|
+
return null;
|
|
38
|
+
advance();
|
|
39
|
+
return val;
|
|
40
|
+
}
|
|
41
|
+
if (peek() >= '0' && peek() <= '9' || peek() === '.') {
|
|
42
|
+
return parseNumber();
|
|
43
|
+
}
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
function parsePower() {
|
|
47
|
+
const base = parsePrimary();
|
|
48
|
+
if (base === null)
|
|
49
|
+
return null;
|
|
50
|
+
skipWS();
|
|
51
|
+
if (pos + 1 < src.length && src[pos] === '*' && src[pos + 1] === '*') {
|
|
52
|
+
pos += 2;
|
|
53
|
+
const exp = parseUnary();
|
|
54
|
+
if (exp === null)
|
|
55
|
+
return null;
|
|
56
|
+
const result = Math.pow(base, exp);
|
|
57
|
+
if (!isFinite(result))
|
|
58
|
+
return null;
|
|
59
|
+
return result;
|
|
60
|
+
}
|
|
61
|
+
return base;
|
|
62
|
+
}
|
|
63
|
+
function parseUnary() {
|
|
64
|
+
skipWS();
|
|
65
|
+
if (peek() === '-') {
|
|
66
|
+
advance();
|
|
67
|
+
const val = parsePower();
|
|
68
|
+
return val === null ? null : -val;
|
|
69
|
+
}
|
|
70
|
+
if (peek() === '+') {
|
|
71
|
+
advance();
|
|
72
|
+
return parsePower();
|
|
73
|
+
}
|
|
74
|
+
return parsePower();
|
|
75
|
+
}
|
|
76
|
+
function parseTerm() {
|
|
77
|
+
let left = parseUnary();
|
|
78
|
+
if (left === null)
|
|
79
|
+
return null;
|
|
80
|
+
while (true) {
|
|
81
|
+
skipWS();
|
|
82
|
+
if (pos + 1 < src.length && src[pos] === '/' && src[pos + 1] === '/') {
|
|
83
|
+
pos += 2;
|
|
84
|
+
const right = parseUnary();
|
|
85
|
+
if (right === null || right === 0)
|
|
86
|
+
return null;
|
|
87
|
+
left = Math.floor(left / right);
|
|
88
|
+
}
|
|
89
|
+
else if (peek() === '*' && (pos + 1 >= src.length || src[pos + 1] !== '*')) {
|
|
90
|
+
advance();
|
|
91
|
+
const right = parseUnary();
|
|
92
|
+
if (right === null)
|
|
93
|
+
return null;
|
|
94
|
+
left = left * right;
|
|
95
|
+
}
|
|
96
|
+
else if (peek() === '/' && (pos + 1 >= src.length || src[pos + 1] !== '/')) {
|
|
97
|
+
advance();
|
|
98
|
+
const right = parseUnary();
|
|
99
|
+
if (right === null || right === 0)
|
|
100
|
+
return null;
|
|
101
|
+
left = left / right;
|
|
102
|
+
}
|
|
103
|
+
else if (peek() === '%') {
|
|
104
|
+
advance();
|
|
105
|
+
const right = parseUnary();
|
|
106
|
+
if (right === null || right === 0)
|
|
107
|
+
return null;
|
|
108
|
+
left = left % right;
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
break;
|
|
112
|
+
}
|
|
113
|
+
if (!isFinite(left))
|
|
114
|
+
return null;
|
|
115
|
+
}
|
|
116
|
+
return left;
|
|
117
|
+
}
|
|
118
|
+
function parseExpression() {
|
|
119
|
+
let left = parseTerm();
|
|
120
|
+
if (left === null)
|
|
121
|
+
return null;
|
|
122
|
+
while (true) {
|
|
123
|
+
skipWS();
|
|
124
|
+
if (peek() === '+') {
|
|
125
|
+
advance();
|
|
126
|
+
const right = parseTerm();
|
|
127
|
+
if (right === null)
|
|
128
|
+
return null;
|
|
129
|
+
left = left + right;
|
|
130
|
+
}
|
|
131
|
+
else if (peek() === '-') {
|
|
132
|
+
advance();
|
|
133
|
+
const right = parseTerm();
|
|
134
|
+
if (right === null)
|
|
135
|
+
return null;
|
|
136
|
+
left = left - right;
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
139
|
+
break;
|
|
140
|
+
}
|
|
141
|
+
if (!isFinite(left))
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
return left;
|
|
145
|
+
}
|
|
146
|
+
const result = parseExpression();
|
|
147
|
+
if (result === null)
|
|
148
|
+
return null;
|
|
149
|
+
skipWS();
|
|
150
|
+
if (pos < src.length)
|
|
151
|
+
return null;
|
|
152
|
+
if (!isFinite(result))
|
|
153
|
+
return null;
|
|
154
|
+
return result;
|
|
155
|
+
}
|
|
7
156
|
// ── Preprocessing ────────────────────────────────────────────
|
|
8
157
|
function preprocessCode(code) {
|
|
9
158
|
let result = code;
|
|
@@ -21,6 +170,58 @@ function preprocessCode(code) {
|
|
|
21
170
|
result = result.replace(/`(?:[^`\\]|\\.)*`/g, '`__STR__`');
|
|
22
171
|
return result;
|
|
23
172
|
}
|
|
173
|
+
// ── Known-Bad API List ───────────────────────────────────────
|
|
174
|
+
// Conservative v1: only APIs that unambiguously don't exist in the target language.
|
|
175
|
+
const KNOWN_BAD_APIS = {
|
|
176
|
+
// JavaScript/TypeScript — non-existent array methods
|
|
177
|
+
'.flatten(': { languages: ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'], correct: '.flat(' },
|
|
178
|
+
'.contains(': { languages: ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'], correct: '.includes(' },
|
|
179
|
+
// Non-existent Object methods
|
|
180
|
+
'Object.fromPairs(': { languages: ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'], correct: 'Object.fromEntries(' },
|
|
181
|
+
'Object.pairs(': { languages: ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'], correct: 'Object.entries(' },
|
|
182
|
+
'Object.size(': { languages: ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'], correct: 'Object.keys(...).length' },
|
|
183
|
+
'Object.length': { languages: ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'], correct: 'Object.keys(...).length' },
|
|
184
|
+
// Non-existent string methods (JS)
|
|
185
|
+
'.trimLeft(': { languages: ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'], correct: '.trimStart(' },
|
|
186
|
+
'.trimRight(': { languages: ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'], correct: '.trimEnd(' },
|
|
187
|
+
// Python — common LLM hallucinations (method names only, no type prefix)
|
|
188
|
+
'.has_key(': { languages: ['python', 'py'], correct: '"key" in dict' },
|
|
189
|
+
'.iteritems(': { languages: ['python', 'py'], correct: '.items()' },
|
|
190
|
+
'.itervalues(': { languages: ['python', 'py'], correct: '.values()' },
|
|
191
|
+
'.iterkeys(': { languages: ['python', 'py'], correct: '.keys()' },
|
|
192
|
+
'.remove_at(': { languages: ['python', 'py'], correct: '.pop(index)' },
|
|
193
|
+
};
|
|
194
|
+
// ── Built-in Allowlists ──────────────────────────────────────
|
|
195
|
+
const JS_BUILTINS = new Set([
|
|
196
|
+
'console', 'window', 'document', 'navigator', 'location', 'history',
|
|
197
|
+
'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval',
|
|
198
|
+
'Promise', 'JSON', 'Math', 'Date', 'RegExp', 'Error', 'TypeError',
|
|
199
|
+
'Array', 'Object', 'String', 'Number', 'Boolean', 'Symbol', 'Map', 'Set',
|
|
200
|
+
'WeakMap', 'WeakSet', 'Proxy', 'Reflect',
|
|
201
|
+
'parseInt', 'parseFloat', 'isNaN', 'isFinite', 'NaN', 'Infinity',
|
|
202
|
+
'undefined', 'null', 'true', 'false',
|
|
203
|
+
'require', 'module', 'exports', '__dirname', '__filename',
|
|
204
|
+
'process', 'Buffer', 'global', 'globalThis',
|
|
205
|
+
'fetch', 'Request', 'Response', 'Headers', 'URL', 'URLSearchParams',
|
|
206
|
+
'TextEncoder', 'TextDecoder', 'AbortController', 'FormData', 'Blob', 'File',
|
|
207
|
+
'Event', 'EventTarget', 'CustomEvent',
|
|
208
|
+
'alert', 'confirm', 'prompt',
|
|
209
|
+
'React', 'Component', 'Fragment', 'useState', 'useEffect', 'useRef',
|
|
210
|
+
'useCallback', 'useMemo', 'useContext', 'useReducer',
|
|
211
|
+
'describe', 'it', 'test', 'expect', 'beforeEach', 'afterEach', 'jest', 'vi',
|
|
212
|
+
]);
|
|
213
|
+
const PY_BUILTINS = new Set([
|
|
214
|
+
'print', 'len', 'range', 'enumerate', 'zip', 'map', 'filter',
|
|
215
|
+
'int', 'float', 'str', 'bool', 'list', 'dict', 'set', 'tuple',
|
|
216
|
+
'type', 'isinstance', 'issubclass', 'hasattr', 'getattr', 'setattr',
|
|
217
|
+
'open', 'input', 'sorted', 'reversed', 'min', 'max', 'sum', 'abs',
|
|
218
|
+
'any', 'all', 'iter', 'next', 'super', 'property', 'staticmethod',
|
|
219
|
+
'classmethod', 'None', 'True', 'False', 'self', 'cls',
|
|
220
|
+
'Exception', 'ValueError', 'TypeError', 'KeyError', 'IndexError',
|
|
221
|
+
'ImportError', 'AttributeError', 'RuntimeError', 'StopIteration',
|
|
222
|
+
'os', 'sys', 'json', 'math', 're', 'datetime', 'collections',
|
|
223
|
+
'functools', 'itertools', 'typing', 'pathlib', 'unittest', 'pytest',
|
|
224
|
+
]);
|
|
24
225
|
// ── Claim Classification ─────────────────────────────────────
|
|
25
226
|
function classifyClaim(claim) {
|
|
26
227
|
const text = `${claim.description} ${claim.assertion}`.toLowerCase();
|
|
@@ -73,6 +274,22 @@ function classifyClaim(claim) {
|
|
|
73
274
|
if (/(?:validates?\s+(?:user\s+)?input|input\s+validation|sanitizes?\s+input|validates?\s+(?:before|prior))/i.test(text)) {
|
|
74
275
|
return { claim, formallyVerifiable: true, checkType: 'input_validation' };
|
|
75
276
|
}
|
|
277
|
+
// Arithmetic correctness checks
|
|
278
|
+
if (/(?:calculates?|computes?|evaluates?\s+to|sums?|totals?|multiplies?|divides?|averages?|rounds?)/i.test(text) ||
|
|
279
|
+
/(?:returns?\s+(?:the\s+)?(?:sum|product|average|total|difference|ratio|percentage|remainder|quotient))/i.test(text) ||
|
|
280
|
+
/(?:result\s+(?:is|equals?|should\s+be)\s+\d)/i.test(text)) {
|
|
281
|
+
return { claim, formallyVerifiable: true, checkType: 'arithmetic_correctness' };
|
|
282
|
+
}
|
|
283
|
+
// API misuse checks
|
|
284
|
+
if (/(?:uses?|calls?|invokes?)\s+(?:the\s+)?(?:api|method|function)\s+['"`]?(\w+(?:\.\w+)*)['"`]?/i.test(text) ||
|
|
285
|
+
/(?:calls?\s+\w+\.\w+)|(?:uses?\s+\w+\.\w+\s*\()/i.test(originalText)) {
|
|
286
|
+
return { claim, formallyVerifiable: true, checkType: 'api_misuse' };
|
|
287
|
+
}
|
|
288
|
+
// Undefined reference checks
|
|
289
|
+
if (/(?:references?|uses?|accesses?)\s+(?:variable|function|import|module|identifier)\s+['"`]?(\w+)['"`]?/i.test(text) ||
|
|
290
|
+
/(?:variable|function|import)\s+['"`]?(\w+)['"`]?\s+(?:is\s+)?(?:defined|declared|imported|available)/i.test(text)) {
|
|
291
|
+
return { claim, formallyVerifiable: true, checkType: 'undefined_reference' };
|
|
292
|
+
}
|
|
76
293
|
return { claim, formallyVerifiable: false };
|
|
77
294
|
}
|
|
78
295
|
// ── Structural Verifiers ─────────────────────────────────────
|
|
@@ -399,6 +616,234 @@ function checkInputValidation(code, language) {
|
|
|
399
616
|
confidence: 1,
|
|
400
617
|
};
|
|
401
618
|
}
|
|
619
|
+
// ── Arithmetic Correctness ───────────────────────────────────
|
|
620
|
+
function checkArithmeticCorrectness(code, language, claim) {
|
|
621
|
+
const clean = preprocessCode(code);
|
|
622
|
+
const text = `${claim.description} ${claim.assertion}`;
|
|
623
|
+
// Phase A: Extract constant expressions from code
|
|
624
|
+
const assignments = [];
|
|
625
|
+
let assignPatterns;
|
|
626
|
+
if (['typescript', 'javascript', 'ts', 'js', 'tsx', 'jsx'].includes(language)) {
|
|
627
|
+
assignPatterns = [/(?:const|let|var)\s+(\w+)\s*(?::\s*\w+)?\s*=\s*([^;{\n]+)/g];
|
|
628
|
+
}
|
|
629
|
+
else if (['python', 'py'].includes(language)) {
|
|
630
|
+
assignPatterns = [/^(\w+)\s*=\s*([^#\n]+)/gm];
|
|
631
|
+
}
|
|
632
|
+
else if (['go', 'golang'].includes(language)) {
|
|
633
|
+
assignPatterns = [/(?:const|var)\s+(\w+)\s*(?:\w+)?\s*=\s*([^\n]+)/g];
|
|
634
|
+
}
|
|
635
|
+
else if (['java', 'kotlin', 'scala'].includes(language)) {
|
|
636
|
+
assignPatterns = [/(?:final\s+)?(?:\w+)\s+(\w+)\s*=\s*([^;]+)/g];
|
|
637
|
+
}
|
|
638
|
+
else {
|
|
639
|
+
assignPatterns = [/(?:const|let|var|final)?\s*(\w+)\s*=\s*([^;\n]+)/g];
|
|
640
|
+
}
|
|
641
|
+
for (const pattern of assignPatterns) {
|
|
642
|
+
let m;
|
|
643
|
+
while ((m = pattern.exec(clean)) !== null) {
|
|
644
|
+
const name = m[1];
|
|
645
|
+
const rhs = m[2].trim();
|
|
646
|
+
if (/^['"`\[{(]|^(?:function|def|class|new|await|async)\b/.test(rhs))
|
|
647
|
+
continue;
|
|
648
|
+
if (/^(?:true|false|null|undefined|None|nil)\b/i.test(rhs))
|
|
649
|
+
continue;
|
|
650
|
+
const value = safeEvalArithmetic(rhs);
|
|
651
|
+
if (value !== null) {
|
|
652
|
+
assignments.push({ name, expr: rhs, value });
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
// Phase B: Extract expected values from claim text
|
|
657
|
+
const claimNumbers = [...text.matchAll(/\b(\d+\.?\d*)\b/g)].map(m => parseFloat(m[1]));
|
|
658
|
+
const claimExprMatch = text.match(/(\d[\d\s.+\-*/%()]*\d)\s*(?:=|equals?|is)\s*(\d+\.?\d*)/i);
|
|
659
|
+
// Phase C: Compare
|
|
660
|
+
if (claimExprMatch) {
|
|
661
|
+
const claimExpr = claimExprMatch[1].trim();
|
|
662
|
+
const claimResult = parseFloat(claimExprMatch[2]);
|
|
663
|
+
const evaluated = safeEvalArithmetic(claimExpr);
|
|
664
|
+
if (evaluated !== null && !isNaN(claimResult)) {
|
|
665
|
+
const matches = Math.abs(evaluated - claimResult) < 1e-9;
|
|
666
|
+
return {
|
|
667
|
+
claimId: claim.id,
|
|
668
|
+
checkType: 'arithmetic_correctness',
|
|
669
|
+
verdict: matches ? 'PASS' : 'FAIL',
|
|
670
|
+
evidence: matches
|
|
671
|
+
? `Arithmetic verified: ${claimExpr} = ${evaluated}`
|
|
672
|
+
: `Arithmetic error: ${claimExpr} = ${evaluated}, but claim states ${claimResult}`,
|
|
673
|
+
confidence: 1,
|
|
674
|
+
};
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
for (const assign of assignments) {
|
|
678
|
+
const claimReferencesVar = new RegExp(`\\b${assign.name}\\b`, 'i').test(text);
|
|
679
|
+
const claimReferencesValue = claimNumbers.some(n => Math.abs(n - assign.value) < 1e-9);
|
|
680
|
+
if (claimReferencesVar && claimNumbers.length > 0) {
|
|
681
|
+
const contradicts = claimNumbers.some(n => {
|
|
682
|
+
return Math.abs(n - assign.value) > 1e-9 && Math.abs(n - assign.value) < assign.value * 10;
|
|
683
|
+
});
|
|
684
|
+
if (contradicts && !claimReferencesValue) {
|
|
685
|
+
return {
|
|
686
|
+
claimId: claim.id,
|
|
687
|
+
checkType: 'arithmetic_correctness',
|
|
688
|
+
verdict: 'FAIL',
|
|
689
|
+
evidence: `Variable '${assign.name}' evaluates to ${assign.value} (from: ${assign.expr}), but claim expects a different value`,
|
|
690
|
+
confidence: 1,
|
|
691
|
+
};
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
return {
|
|
696
|
+
claimId: claim.id,
|
|
697
|
+
checkType: 'arithmetic_correctness',
|
|
698
|
+
verdict: 'PASS',
|
|
699
|
+
evidence: assignments.length > 0
|
|
700
|
+
? `Found ${assignments.length} constant expression(s) in code, no contradictions with claim`
|
|
701
|
+
: 'No deterministically evaluable constant expressions found — deferring to LLM',
|
|
702
|
+
confidence: 1,
|
|
703
|
+
};
|
|
704
|
+
}
|
|
705
|
+
// ── API Misuse Check ─────────────────────────────────────────
|
|
706
|
+
function checkApiMisuse(code, language, claim) {
|
|
707
|
+
const clean = preprocessCode(code);
|
|
708
|
+
const langLower = language.toLowerCase();
|
|
709
|
+
for (const [badApi, info] of Object.entries(KNOWN_BAD_APIS)) {
|
|
710
|
+
if (info.languages.length === 0)
|
|
711
|
+
continue;
|
|
712
|
+
if (!info.languages.includes(langLower))
|
|
713
|
+
continue;
|
|
714
|
+
if (clean.includes(badApi)) {
|
|
715
|
+
return {
|
|
716
|
+
claimId: claim.id,
|
|
717
|
+
checkType: 'api_misuse',
|
|
718
|
+
verdict: 'FAIL',
|
|
719
|
+
evidence: `Code uses '${badApi.replace(/[()]/g, '')}' which does not exist in ${language}. Use '${info.correct}' instead.`,
|
|
720
|
+
confidence: 1,
|
|
721
|
+
};
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
return {
|
|
725
|
+
claimId: claim.id,
|
|
726
|
+
checkType: 'api_misuse',
|
|
727
|
+
verdict: 'PASS',
|
|
728
|
+
evidence: 'No known non-existent API calls detected',
|
|
729
|
+
confidence: 1,
|
|
730
|
+
};
|
|
731
|
+
}
|
|
732
|
+
// ── Undefined Reference Check ────────────────────────────────
|
|
733
|
+
function checkUndefinedReference(code, language, claim) {
|
|
734
|
+
const text = `${claim.description} ${claim.assertion}`;
|
|
735
|
+
const identMatch = text.match(/(?:references?|uses?|accesses?)\s+(?:variable|function|import|module|identifier)\s+['"`]?(\w+)['"`]?/i) || text.match(/(?:variable|function|import)\s+['"`]?(\w+)['"`]?\s+(?:is\s+)?(?:defined|declared|imported|available)/i);
|
|
736
|
+
if (!identMatch) {
|
|
737
|
+
return {
|
|
738
|
+
claimId: claim.id,
|
|
739
|
+
checkType: 'undefined_reference',
|
|
740
|
+
verdict: 'PASS',
|
|
741
|
+
evidence: 'No specific identifier extracted from claim — deferring to LLM',
|
|
742
|
+
confidence: 1,
|
|
743
|
+
};
|
|
744
|
+
}
|
|
745
|
+
const identifier = identMatch[1];
|
|
746
|
+
const clean = preprocessCode(code);
|
|
747
|
+
const langLower = language.toLowerCase();
|
|
748
|
+
const builtins = ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'].includes(langLower)
|
|
749
|
+
? JS_BUILTINS
|
|
750
|
+
: ['python', 'py'].includes(langLower)
|
|
751
|
+
? PY_BUILTINS
|
|
752
|
+
: new Set();
|
|
753
|
+
if (builtins.has(identifier)) {
|
|
754
|
+
return {
|
|
755
|
+
claimId: claim.id,
|
|
756
|
+
checkType: 'undefined_reference',
|
|
757
|
+
verdict: 'PASS',
|
|
758
|
+
evidence: `'${identifier}' is a built-in identifier in ${language}`,
|
|
759
|
+
confidence: 1,
|
|
760
|
+
};
|
|
761
|
+
}
|
|
762
|
+
const declarations = new Set();
|
|
763
|
+
if (['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'].includes(langLower)) {
|
|
764
|
+
for (const m of clean.matchAll(/(?:const|let|var)\s+(\w+)/g))
|
|
765
|
+
declarations.add(m[1]);
|
|
766
|
+
for (const m of clean.matchAll(/function\s+(\w+)/g))
|
|
767
|
+
declarations.add(m[1]);
|
|
768
|
+
for (const m of clean.matchAll(/class\s+(\w+)/g))
|
|
769
|
+
declarations.add(m[1]);
|
|
770
|
+
for (const m of clean.matchAll(/import\s+\{([^}]+)\}/g)) {
|
|
771
|
+
for (const name of m[1].split(',')) {
|
|
772
|
+
const trimmed = name.trim().split(/\s+as\s+/).pop()?.trim();
|
|
773
|
+
if (trimmed)
|
|
774
|
+
declarations.add(trimmed);
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
for (const m of clean.matchAll(/import\s+(\w+)\s+from/g))
|
|
778
|
+
declarations.add(m[1]);
|
|
779
|
+
for (const m of clean.matchAll(/(?:function\s+\w+|=>)\s*\(([^)]*)\)/g)) {
|
|
780
|
+
for (const p of m[1].split(',')) {
|
|
781
|
+
const paramName = p.trim().split(/[\s:=]/)[0];
|
|
782
|
+
if (paramName)
|
|
783
|
+
declarations.add(paramName);
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
else if (['python', 'py'].includes(langLower)) {
|
|
788
|
+
for (const m of clean.matchAll(/def\s+(\w+)/g))
|
|
789
|
+
declarations.add(m[1]);
|
|
790
|
+
for (const m of clean.matchAll(/class\s+(\w+)/g))
|
|
791
|
+
declarations.add(m[1]);
|
|
792
|
+
for (const m of clean.matchAll(/^(\w+)\s*=/gm))
|
|
793
|
+
declarations.add(m[1]);
|
|
794
|
+
for (const m of clean.matchAll(/import\s+(\w+)/g))
|
|
795
|
+
declarations.add(m[1]);
|
|
796
|
+
for (const m of clean.matchAll(/from\s+\w+\s+import\s+(.+)/g)) {
|
|
797
|
+
for (const name of m[1].split(',')) {
|
|
798
|
+
const trimmed = name.trim().split(/\s+as\s+/).pop()?.trim();
|
|
799
|
+
if (trimmed)
|
|
800
|
+
declarations.add(trimmed);
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
else {
|
|
805
|
+
for (const m of clean.matchAll(/(?:const|let|var|function|class|def|func|fn)\s+(\w+)/g)) {
|
|
806
|
+
declarations.add(m[1]);
|
|
807
|
+
}
|
|
808
|
+
for (const m of clean.matchAll(/^(\w+)\s*=/gm))
|
|
809
|
+
declarations.add(m[1]);
|
|
810
|
+
}
|
|
811
|
+
if (declarations.size < 3) {
|
|
812
|
+
return {
|
|
813
|
+
claimId: claim.id,
|
|
814
|
+
checkType: 'undefined_reference',
|
|
815
|
+
verdict: 'PASS',
|
|
816
|
+
evidence: `Code has fewer than 3 declarations (likely a fragment) — deferring to LLM`,
|
|
817
|
+
confidence: 1,
|
|
818
|
+
};
|
|
819
|
+
}
|
|
820
|
+
if (declarations.has(identifier)) {
|
|
821
|
+
return {
|
|
822
|
+
claimId: claim.id,
|
|
823
|
+
checkType: 'undefined_reference',
|
|
824
|
+
verdict: 'PASS',
|
|
825
|
+
evidence: `'${identifier}' is declared in the source code`,
|
|
826
|
+
confidence: 1,
|
|
827
|
+
};
|
|
828
|
+
}
|
|
829
|
+
const identRegex = new RegExp(`\\b${identifier}\\b`);
|
|
830
|
+
if (!identRegex.test(clean)) {
|
|
831
|
+
return {
|
|
832
|
+
claimId: claim.id,
|
|
833
|
+
checkType: 'undefined_reference',
|
|
834
|
+
verdict: 'FAIL',
|
|
835
|
+
evidence: `'${identifier}' is not found anywhere in the source code`,
|
|
836
|
+
confidence: 1,
|
|
837
|
+
};
|
|
838
|
+
}
|
|
839
|
+
return {
|
|
840
|
+
claimId: claim.id,
|
|
841
|
+
checkType: 'undefined_reference',
|
|
842
|
+
verdict: 'FAIL',
|
|
843
|
+
evidence: `'${identifier}' is referenced but not declared in the source code`,
|
|
844
|
+
confidence: 1,
|
|
845
|
+
};
|
|
846
|
+
}
|
|
402
847
|
// ── Main Entry Point ─────────────────────────────────────────
|
|
403
848
|
export function runFormalVerification(code, language, claims, llmVerifications) {
|
|
404
849
|
const classified = claims.map(c => classifyClaim(c));
|
|
@@ -431,6 +876,15 @@ export function runFormalVerification(code, language, claims, llmVerifications)
|
|
|
431
876
|
case 'input_validation':
|
|
432
877
|
result = checkInputValidation(code, language);
|
|
433
878
|
break;
|
|
879
|
+
case 'arithmetic_correctness':
|
|
880
|
+
result = checkArithmeticCorrectness(code, language, c.claim);
|
|
881
|
+
break;
|
|
882
|
+
case 'api_misuse':
|
|
883
|
+
result = checkApiMisuse(code, language, c.claim);
|
|
884
|
+
break;
|
|
885
|
+
case 'undefined_reference':
|
|
886
|
+
result = checkUndefinedReference(code, language, c.claim);
|
|
887
|
+
break;
|
|
434
888
|
default:
|
|
435
889
|
continue;
|
|
436
890
|
}
|