tryassay 0.22.0 → 0.22.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/demo/css/style.css +495 -836
- package/demo/index.html +40 -184
- package/demo/js/chat.js +385 -142
- package/demo/js/preview.js +456 -0
- package/demo/js/sse-client.js +262 -135
- package/demo/js/state.js +11 -1
- package/demo/js/timeline.js +57 -371
- package/dist/api/server.d.ts +2 -0
- package/dist/api/server.js +63 -19
- package/dist/api/server.js.map +1 -1
- package/dist/cli.js +2 -0
- package/dist/cli.js.map +1 -1
- package/dist/commands/assess.d.ts +2 -0
- package/dist/commands/assess.js +132 -164
- package/dist/commands/assess.js.map +1 -1
- package/dist/commands/demo.js +259 -9
- package/dist/commands/demo.js.map +1 -1
- package/dist/lib/__tests__/arithmetic-quick-test.d.ts +6 -0
- package/dist/lib/__tests__/arithmetic-quick-test.js +197 -0
- package/dist/lib/__tests__/arithmetic-quick-test.js.map +1 -0
- package/dist/lib/__tests__/arithmetic-real-llm-test.d.ts +13 -0
- package/dist/lib/__tests__/arithmetic-real-llm-test.js +284 -0
- package/dist/lib/__tests__/arithmetic-real-llm-test.js.map +1 -0
- package/dist/lib/__tests__/arithmetic-value-demo.d.ts +10 -0
- package/dist/lib/__tests__/arithmetic-value-demo.js +193 -0
- package/dist/lib/__tests__/arithmetic-value-demo.js.map +1 -0
- package/dist/lib/__tests__/flow-to-claims.test.d.ts +1 -0
- package/dist/lib/__tests__/flow-to-claims.test.js +91 -0
- package/dist/lib/__tests__/flow-to-claims.test.js.map +1 -0
- package/dist/lib/__tests__/formal-verifier-api-misuse.test.d.ts +9 -0
- package/dist/lib/__tests__/formal-verifier-api-misuse.test.js +391 -0
- package/dist/lib/__tests__/formal-verifier-api-misuse.test.js.map +1 -0
- package/dist/lib/__tests__/formal-verifier-arithmetic.test.d.ts +7 -0
- package/dist/lib/__tests__/formal-verifier-arithmetic.test.js +318 -0
- package/dist/lib/__tests__/formal-verifier-arithmetic.test.js.map +1 -0
- package/dist/lib/__tests__/intent-extractor.test.d.ts +1 -0
- package/dist/lib/__tests__/intent-extractor.test.js +97 -0
- package/dist/lib/__tests__/intent-extractor.test.js.map +1 -0
- package/dist/lib/__tests__/intent-reviewer.test.d.ts +1 -0
- package/dist/lib/__tests__/intent-reviewer.test.js +55 -0
- package/dist/lib/__tests__/intent-reviewer.test.js.map +1 -0
- package/dist/lib/__tests__/mr-gsm8k-benchmark.d.ts +11 -0
- package/dist/lib/__tests__/mr-gsm8k-benchmark.js +224 -0
- package/dist/lib/__tests__/mr-gsm8k-benchmark.js.map +1 -0
- package/dist/lib/anthropic.js +25 -33
- package/dist/lib/anthropic.js.map +1 -1
- package/dist/lib/assessment-reporter.js +9 -13
- package/dist/lib/assessment-reporter.js.map +1 -1
- package/dist/lib/claim-extractor.js +10 -19
- package/dist/lib/claim-extractor.js.map +1 -1
- package/dist/lib/code-verifier.js +16 -36
- package/dist/lib/code-verifier.js.map +1 -1
- package/dist/lib/constraint-engine.js +10 -19
- package/dist/lib/constraint-engine.js.map +1 -1
- package/dist/lib/formal-verifier.d.ts +1 -1
- package/dist/lib/formal-verifier.js +454 -0
- package/dist/lib/formal-verifier.js.map +1 -1
- package/dist/lib/guided-generator.js +19 -37
- package/dist/lib/guided-generator.js.map +1 -1
- package/dist/lib/intent-extractor.d.ts +47 -0
- package/dist/lib/intent-extractor.js +432 -0
- package/dist/lib/intent-extractor.js.map +1 -0
- package/dist/lib/intent-reviewer.d.ts +14 -0
- package/dist/lib/intent-reviewer.js +148 -0
- package/dist/lib/intent-reviewer.js.map +1 -0
- package/dist/lib/intent-types.d.ts +89 -0
- package/dist/lib/intent-types.js +5 -0
- package/dist/lib/intent-types.js.map +1 -0
- package/dist/lib/inventory-extractor.js +9 -22
- package/dist/lib/inventory-extractor.js.map +1 -1
- package/dist/lib/llm-provider.d.ts +23 -0
- package/dist/lib/llm-provider.js +130 -0
- package/dist/lib/llm-provider.js.map +1 -0
- package/dist/lib/remediator.js +20 -28
- package/dist/lib/remediator.js.map +1 -1
- package/dist/lib/requirements-generator.js +14 -19
- package/dist/lib/requirements-generator.js.map +1 -1
- package/dist/lib/spec-synthesizer.js +10 -19
- package/dist/lib/spec-synthesizer.js.map +1 -1
- package/dist/runtime/app-create-orchestrator.d.ts +5 -1
- package/dist/runtime/app-create-orchestrator.js +114 -39
- package/dist/runtime/app-create-orchestrator.js.map +1 -1
- package/dist/runtime/check-catalog.js +5 -3
- package/dist/runtime/check-catalog.js.map +1 -1
- package/dist/runtime/check-definitions.d.ts +10 -0
- package/dist/runtime/check-definitions.js +52 -2
- package/dist/runtime/check-definitions.js.map +1 -1
- package/dist/runtime/composition-verifier.js +8 -12
- package/dist/runtime/composition-verifier.js.map +1 -1
- package/dist/runtime/gap-detector.js +8 -10
- package/dist/runtime/gap-detector.js.map +1 -1
- package/dist/runtime/input-validator.d.ts +7 -0
- package/dist/runtime/input-validator.js +162 -0
- package/dist/runtime/input-validator.js.map +1 -0
- package/dist/runtime/model-router.d.ts +10 -0
- package/dist/runtime/model-router.js +42 -0
- package/dist/runtime/model-router.js.map +1 -0
- package/dist/runtime/pattern-extractor.js +8 -10
- package/dist/runtime/pattern-extractor.js.map +1 -1
- package/dist/runtime/planner.js +11 -16
- package/dist/runtime/planner.js.map +1 -1
- package/dist/runtime/prompt-guard.d.ts +2 -0
- package/dist/runtime/prompt-guard.js +180 -0
- package/dist/runtime/prompt-guard.js.map +1 -0
- package/dist/runtime/prompt-safety-analyzer.js +8 -13
- package/dist/runtime/prompt-safety-analyzer.js.map +1 -1
- package/dist/runtime/reasoner.js +19 -33
- package/dist/runtime/reasoner.js.map +1 -1
- package/dist/runtime/rule-meta-verifier.js +9 -11
- package/dist/runtime/rule-meta-verifier.js.map +1 -1
- package/dist/runtime/safe-executor.d.ts +23 -0
- package/dist/runtime/safe-executor.js +151 -0
- package/dist/runtime/safe-executor.js.map +1 -0
- package/dist/runtime/specialized-agent.js +10 -14
- package/dist/runtime/specialized-agent.js.map +1 -1
- package/dist/runtime/strategy-library.js +8 -10
- package/dist/runtime/strategy-library.js.map +1 -1
- package/dist/runtime/supabase-experience-store.js.map +1 -1
- package/dist/runtime/supabase-provisioner.d.ts +35 -0
- package/dist/runtime/supabase-provisioner.js +192 -0
- package/dist/runtime/supabase-provisioner.js.map +1 -0
- package/dist/runtime/types.d.ts +88 -0
- package/dist/sdk/forward-verify.js +16 -33
- package/dist/sdk/forward-verify.js.map +1 -1
- package/package.json +1 -1
- package/demo/data/demo-events.json +0 -103
- package/demo/js/demo-mode.js +0 -107
- package/demo/js/orb.js +0 -634
- package/demo/js/question-cards.js +0 -207
- package/demo/js/voice.js +0 -154
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { MODEL } from './anthropic.js';
|
|
2
|
+
import { getProvider } from './llm-provider.js';
|
|
2
3
|
// ---------------------------------------------------------------------------
|
|
3
4
|
// Domain patterns — embedded knowledge from Assay benchmark findings
|
|
4
5
|
// ---------------------------------------------------------------------------
|
|
@@ -243,27 +244,17 @@ async function deriveSpecConstraints(specs, task, language, onProgress) {
|
|
|
243
244
|
if (specs.length === 0) {
|
|
244
245
|
return { constraints: [], inputTokens: 0, outputTokens: 0 };
|
|
245
246
|
}
|
|
246
|
-
|
|
247
|
-
onProgress?.('Deriving spec constraints via Claude (streaming)...');
|
|
247
|
+
onProgress?.('Deriving spec constraints via Claude...');
|
|
248
248
|
const specsText = specs
|
|
249
249
|
.map((s) => `[${s.id}] (${s.category}, ${s.severity}) ${s.description}\n Assertion: ${s.assertion}`)
|
|
250
250
|
.join('\n\n');
|
|
251
|
-
const
|
|
251
|
+
const result = await getProvider().complete({
|
|
252
252
|
model: MODEL,
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
{
|
|
257
|
-
role: 'user',
|
|
258
|
-
content: `Generate constraints for the following coding task and specifications.\n\nLanguage: ${language}\nTask: ${task}\n\nSpecifications:\n${specsText}`,
|
|
259
|
-
},
|
|
260
|
-
],
|
|
261
|
-
});
|
|
262
|
-
let rawText = '';
|
|
263
|
-
stream.on('text', (text) => {
|
|
264
|
-
rawText += text;
|
|
253
|
+
maxTokens: 8_000,
|
|
254
|
+
systemPrompt: CONSTRAINT_SYSTEM_PROMPT,
|
|
255
|
+
userPrompt: `Generate constraints for the following coding task and specifications.\n\nLanguage: ${language}\nTask: ${task}\n\nSpecifications:\n${specsText}`,
|
|
265
256
|
});
|
|
266
|
-
const
|
|
257
|
+
const rawText = result.content;
|
|
267
258
|
onProgress?.('Parsing constraint results...');
|
|
268
259
|
let jsonText = stripCodeFences(rawText);
|
|
269
260
|
jsonText = repairTruncatedArray(jsonText, onProgress);
|
|
@@ -290,8 +281,8 @@ async function deriveSpecConstraints(specs, task, language, onProgress) {
|
|
|
290
281
|
}
|
|
291
282
|
return {
|
|
292
283
|
constraints,
|
|
293
|
-
inputTokens:
|
|
294
|
-
outputTokens:
|
|
284
|
+
inputTokens: result.inputTokens ?? 0,
|
|
285
|
+
outputTokens: result.outputTokens ?? 0,
|
|
295
286
|
};
|
|
296
287
|
}
|
|
297
288
|
// ---------------------------------------------------------------------------
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"constraint-engine.js","sourceRoot":"","sources":["../../src/lib/constraint-engine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"constraint-engine.js","sourceRoot":"","sources":["../../src/lib/constraint-engine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AACvC,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAUhD,8EAA8E;AAC9E,qEAAqE;AACrE,8EAA8E;AAE9E,MAAM,eAAe,GAGhB;IACH;QACE,QAAQ,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,UAAU,EAAE,IAAI,CAAC;QAC5C,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,+DAA+D,EAAE;YAC9F,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,gEAAgE,EAAE,OAAO,EAAE,2CAA2C,EAAE;SAC1J;KACF;IACD;QACE,QAAQ,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,CAAC;QACtD,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,2DAA2D,EAAE;YAC1F,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,wDAAwD,EAAE;SAC5F;KACF;IACD;QACE,QAAQ,EAAE,CAAC,SAAS,EAAE,WAAW,EAAE,QAAQ,EAAE,YAAY,CAAC;QAC1D,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,gEAAgE,EAAE;YAC/F,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,gEAAgE,EAAE,OAAO,EAAE,4DAA4D,EAAE;SAC3K;KACF;IACD;QACE,QAAQ,EAAE,CAAC,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,CAAC;QAC7C,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,kEAAkE,EAAE;YACjG,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,qDAAqD,EAAE;YACxF,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,iDAAiD,EAAE;SACnF;KACF;IACD;QACE,QAAQ,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,CAAC;QACpE,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,yCAAyC,EAAE;YACxE,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,6DAA6D,EAAE;SAC/F;KACF;IACD;QACE,QAAQ,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,CAAC;QAC1D,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,4DAA4D,EAAE;YAC3F,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,8BAA8B,EAAE;SAC9D;KACF;IACD;QACE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,IAAI,CAAC;QAC7C,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,kDAAkD,EAAE;YACjF,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,qDAAqD,EAAE;SACrF;KACF;IACD;QACE,QAAQ,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC;QACvC,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,yFAAyF,EAAE;SAC7H;KACF;IACD;QACE,QAAQ,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,OAAO,EAAE,aAAa,CAAC;QAC/D,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,2DAA2D,EAAE,OAAO,EAAE,gBAAgB,EAAE;YACzH,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,6EAA6E,EAAE;SAC/G;KACF;IACD;QACE,QAAQ,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,CAAC;QAC9C,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,6FAA6F,EAAE,OAAO,EAAE,cAAc,EAAE;YACrJ,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,4DAA4D,EAAE;SAC5F;KACF;IACD;QACE,QAAQ,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC;QACnD,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,sDAAsD,EAAE;YACrF,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,uDAAuD,EAAE;YAC1F,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,oFAAoF,EAAE;SACtH;KACF;IACD;QACE,QAAQ,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,CAAC;QAC/C,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,6CAA6C,EAAE;YAC5E,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,4DAA4D,EAAE;SAChG;KACF;IACD;QACE,QAAQ,EAAE,CAAC,WAAW,EAAE,WAAW,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,CAAC;QACjE,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,+CAA+C,EAAE;YAC9E,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,2DAA2D,EAAE;YAC5F,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,qFAAqF,EAAE;SACvH;KACF;IACD;QACE,QAAQ,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,CAAC;QAC1D,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,+BAA+B,EAAE;YAC9D,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,8DAA8D,EAAE;SAChG;KACF;IACD;QACE,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,CAAC;QAC3D,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,2DAA2D,EAAE;YAC1F,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,oEAAoE,EAAE;SACxG;KACF;IACD;QACE,QAAQ,EAAE,CAAC,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,YAAY,EAAE,SAAS,CAAC;QAClE,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,4CAA4C,EAAE;SAC5E;KACF;IACD;QACE,QAAQ,EAAE,CAAC,KAAK,EAAE,aAAa,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,CAAC;QAC7D,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,wEAAwE,EAAE;YACvG,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,wEAAwE,EAAE;SAC5G;KACF;IACD;QACE,QAAQ,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,WAAW,CAAC;QAC7D,WAAW,EAAE;YACX,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,4DAA4D,EAAE;YAC3F,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,0EAA0E,EAAE;SAC1G;KACF;CACF,CAAC;AAEF,8EAA8E;AAC9E,6CAA6C;AAC7C,8EAA8E;AAE9E,MAAM,wBAAwB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAiC/B,CAAC;AAQH,MAAM,WAAW,GAAqB,CAAC,MAAM,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC;AAErE,SAAS,kBAAkB,CAAC,GAAkB;IAC5C,IAAI,CAAC,GAAG,CAAC,WAAW,IAAI,OAAO,GAAG,CAAC,WAAW,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IAEzE,MAAM,IAAI,GAAG,WAAW,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAsB,CAAC;QAC3D,CAAC,CAAE,GAAG,CAAC,IAAuB;QAC9B,CAAC,CAAC,MAAM,CAAC;IAEX,OAAO;QACL,IAAI;QACJ,WAAW,EAAE,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE;QACnC,OAAO,EAAE,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,SAAS;KAC1E,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,0BAA0B;AAC1B,8EAA8E;AAE9E,SAAS,mBAAmB,CAAC,IAAY;IACvC,MAAM,SAAS,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IACrC,MAAM,OAAO,GAA4C,EAAE,CAAC;IAC5D,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAE/B,KAAK,MAAM,OAAO,IAAI,eAAe,EAAE,CAAC;QACtC,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QACtE,IAAI,CAAC,SAAS;YAAE,SAAS;QAEzB,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;YACpC,6BAA6B;YAC7B,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC;gBAAE,SAAS;YACtC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;YAExB,OAAO,CAAC,IAAI,CAAC;gBACX,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,WAAW,EAAE,CAAC,CAAC,WAAW;gBAC1B,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,MAAM,EAAE,QAA4B;aACrC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,8EAA8E;AAC9E,4EAA4E;AAC5E,8EAA8E;AAE9E,SAAS,eAAe,CAAC,IAAY;IACnC,IAAI,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE3B,4BAA4B;IAC5B,IAAI,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QAC/B,MAAM,YAAY,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC5C,IAAI,YAAY,KAAK,CAAC,CAAC,EAAE,CAAC;YACxB,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,YAAY,GAAG,CAAC,CAAC,CAAC;QAC9C,CAAC;IACH,CAAC;IAED,4BAA4B;IAC5B,MAAM,SAAS,GAAG,QAAQ,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;IAC9C,IAAI,SAAS,KAAK,CAAC,CAAC,EAAE,CAAC;QACrB,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;IAC1C,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;AACzB,CAAC;AAED,SAAS,oBAAoB,CAAC,QAAgB,EAAE,UAAkC;IAChF,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;QAAE,OAAO,QAAQ,CAAC;IAE5C,MAAM,cAAc,GAAG,QAAQ,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACjD,IAAI,cAAc,KAAK,CAAC,CAAC,EAAE,CAAC;QAC1B,UAAU,EAAE,CAAC,8DAA8D,CAAC,CAAC;QAC7E,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC;IACvD,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,8EAA8E;AAC9E,mCAAmC;AACnC,8EAA8E;AAE9E,KAAK,UAAU,qBAAqB,CAClC,KAAiB,EACjB,IAAY,EACZ,QAAgB,EAChB,UAAkC;IAElC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,WAAW,EAAE,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,CAAC;IAC9D,CAAC;IAED,UAAU,EAAE,CAAC,yCAAyC,CAAC,CAAC;IAExD,MAAM,SAAS,GAAG,KAAK;SACpB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,QAAQ,KAAK,CAAC,CAAC,QAAQ,KAAK,CAAC,CAAC,WAAW,kBAAkB,CAAC,CAAC,SAAS,EAAE,CAAC;SACpG,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,MAAM,MAAM,GAAG,MAAM,WAAW,EAAE,CAAC,QAAQ,CAAC;QAC1C,KAAK,EAAE,KAAK;QACZ,SAAS,EAAE,KAAK;QAChB,YAAY,EAAE,wBAAwB;QACtC,UAAU,EAAE,uFAAuF,QAAQ,WAAW,IAAI,wBAAwB,SAAS,EAAE;KAC9J,CAAC,CAAC;IAEH,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;IAE/B,UAAU,EAAE,CAAC,+BAA+B,CAAC,CAAC;IAE9C,IAAI,QAAQ,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;IACxC,QAAQ,GAAG,oBAAoB,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IAEtD,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAChC,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CACb,yDAAyD;YACzD,oBAAoB,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAC7C,CAAC;IACJ,CAAC;IAED,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;IACvE,CAAC;IAED,MAAM,WAAW,GAA4C,EAAE,CAAC;IAChE,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QACzB,MAAM,SAAS,GAAG,kBAAkB,CAAC,GAAoB,CAAC,CAAC;QAC3D,IAAI,SAAS,EAAE,CAAC;YACd,WAAW,CAAC,IAAI,CAAC;gBACf,GAAG,SAAS;gBACZ,MAAM,EAAE,MAA0B;aACnC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO;QACL,WAAW;QACX,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,CAAC;QACpC,YAAY,EAAE,MAAM,CAAC,YAAY,IAAI,CAAC;KACvC,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,cAAc;AACd,8EAA8E;AAE9E,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,IAAY,EACZ,QAAgB,EAChB,KAAiB,EACjB,UAAkC;IAElC,2BAA2B;IAC3B,UAAU,EAAE,CAAC,6BAA6B,CAAC,CAAC;IAC5C,MAAM,iBAAiB,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;IACpD,UAAU,EAAE,CAAC,WAAW,iBAAiB,CAAC,MAAM,wBAAwB,CAAC,CAAC;IAE1E,qCAAqC;IACrC,MAAM,EAAE,WAAW,EAAE,eAAe,EAAE,WAAW,EAAE,YAAY,EAAE,GAC/D,MAAM,qBAAqB,CAAC,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;IACjE,UAAU,EAAE,CAAC,WAAW,eAAe,CAAC,MAAM,sBAAsB,CAAC,CAAC;IAEtE,6DAA6D;IAC7D,MAAM,cAAc,GAAG,CAAC,GAAG,iBAAiB,EAAE,GAAG,eAAe,CAAC,CAAC;IAClE,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,OAAO,GAA4C,EAAE,CAAC;IAE5D,KAAK,MAAM,CAAC,IAAI,cAAc,EAAE,CAAC;QAC/B,MAAM,GAAG,GAAG,CAAC,CAAC,WAAW,CAAC,WAAW,EAAE,CAAC;QACxC,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,SAAS;QAC5B,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACd,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,mDAAmD;IACnD,MAAM,QAAQ,GAA2B,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QAC9D,EAAE,EAAE,OAAO,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;QAC3C,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,WAAW,EAAE,CAAC,CAAC,WAAW;QAC1B,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,MAAM,EAAE,CAAC,CAAC,MAAO;KAClB,CAAC,CAAC,CAAC;IAEJ,UAAU,EAAE,CAAC,UAAU,QAAQ,CAAC,MAAM,qCAAqC,CAAC,CAAC;IAE7E,OAAO;QACL,IAAI;QACJ,WAAW,EAAE,QAAQ;QACrB,gBAAgB,EAAE,QAAQ,CAAC,MAAM;QACjC,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACrC,WAAW;QACX,YAAY;KACb,CAAC;AACJ,CAAC"}
|
|
@@ -30,7 +30,7 @@ export interface FormalVerificationStats {
|
|
|
30
30
|
disagreements: number;
|
|
31
31
|
formal_overrides: number;
|
|
32
32
|
}
|
|
33
|
-
export type FormalCheckType = 'function_exists' | 'parameter_check' | 'error_handling' | 'null_check' | 'type_annotation' | 'sql_parameterized' | 'input_validation';
|
|
33
|
+
export type FormalCheckType = 'function_exists' | 'parameter_check' | 'error_handling' | 'null_check' | 'type_annotation' | 'sql_parameterized' | 'input_validation' | 'arithmetic_correctness' | 'api_misuse' | 'undefined_reference';
|
|
34
34
|
export interface FormalCheckResult {
|
|
35
35
|
claimId: string;
|
|
36
36
|
checkType: FormalCheckType;
|
|
@@ -4,6 +4,155 @@
|
|
|
4
4
|
* This is a local copy of api/lib/formal-verifier.ts for use within the SDK.
|
|
5
5
|
* The canonical version lives at api/lib/formal-verifier.ts.
|
|
6
6
|
*/
|
|
7
|
+
// ── Safe Arithmetic Evaluator ────────────────────────────────
|
|
8
|
+
function safeEvalArithmetic(expr) {
|
|
9
|
+
const src = expr.replace(/,/g, '').trim();
|
|
10
|
+
if (src.length === 0)
|
|
11
|
+
return null;
|
|
12
|
+
let pos = 0;
|
|
13
|
+
function peek() { return src[pos] ?? ''; }
|
|
14
|
+
function advance() { return src[pos++] ?? ''; }
|
|
15
|
+
function skipWS() { while (pos < src.length && src[pos] === ' ')
|
|
16
|
+
pos++; }
|
|
17
|
+
function parseNumber() {
|
|
18
|
+
skipWS();
|
|
19
|
+
let num = '';
|
|
20
|
+
while (pos < src.length && (src[pos] >= '0' && src[pos] <= '9' || src[pos] === '.')) {
|
|
21
|
+
num += advance();
|
|
22
|
+
}
|
|
23
|
+
if (num === '' || num === '.')
|
|
24
|
+
return null;
|
|
25
|
+
const val = parseFloat(num);
|
|
26
|
+
return isNaN(val) ? null : val;
|
|
27
|
+
}
|
|
28
|
+
function parsePrimary() {
|
|
29
|
+
skipWS();
|
|
30
|
+
if (peek() === '(') {
|
|
31
|
+
advance();
|
|
32
|
+
const val = parseExpression();
|
|
33
|
+
if (val === null)
|
|
34
|
+
return null;
|
|
35
|
+
skipWS();
|
|
36
|
+
if (peek() !== ')')
|
|
37
|
+
return null;
|
|
38
|
+
advance();
|
|
39
|
+
return val;
|
|
40
|
+
}
|
|
41
|
+
if (peek() >= '0' && peek() <= '9' || peek() === '.') {
|
|
42
|
+
return parseNumber();
|
|
43
|
+
}
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
function parsePower() {
|
|
47
|
+
const base = parsePrimary();
|
|
48
|
+
if (base === null)
|
|
49
|
+
return null;
|
|
50
|
+
skipWS();
|
|
51
|
+
if (pos + 1 < src.length && src[pos] === '*' && src[pos + 1] === '*') {
|
|
52
|
+
pos += 2;
|
|
53
|
+
const exp = parseUnary();
|
|
54
|
+
if (exp === null)
|
|
55
|
+
return null;
|
|
56
|
+
const result = Math.pow(base, exp);
|
|
57
|
+
if (!isFinite(result))
|
|
58
|
+
return null;
|
|
59
|
+
return result;
|
|
60
|
+
}
|
|
61
|
+
return base;
|
|
62
|
+
}
|
|
63
|
+
function parseUnary() {
|
|
64
|
+
skipWS();
|
|
65
|
+
if (peek() === '-') {
|
|
66
|
+
advance();
|
|
67
|
+
const val = parsePower();
|
|
68
|
+
return val === null ? null : -val;
|
|
69
|
+
}
|
|
70
|
+
if (peek() === '+') {
|
|
71
|
+
advance();
|
|
72
|
+
return parsePower();
|
|
73
|
+
}
|
|
74
|
+
return parsePower();
|
|
75
|
+
}
|
|
76
|
+
function parseTerm() {
|
|
77
|
+
let left = parseUnary();
|
|
78
|
+
if (left === null)
|
|
79
|
+
return null;
|
|
80
|
+
while (true) {
|
|
81
|
+
skipWS();
|
|
82
|
+
if (pos + 1 < src.length && src[pos] === '/' && src[pos + 1] === '/') {
|
|
83
|
+
pos += 2;
|
|
84
|
+
const right = parseUnary();
|
|
85
|
+
if (right === null || right === 0)
|
|
86
|
+
return null;
|
|
87
|
+
left = Math.floor(left / right);
|
|
88
|
+
}
|
|
89
|
+
else if (peek() === '*' && (pos + 1 >= src.length || src[pos + 1] !== '*')) {
|
|
90
|
+
advance();
|
|
91
|
+
const right = parseUnary();
|
|
92
|
+
if (right === null)
|
|
93
|
+
return null;
|
|
94
|
+
left = left * right;
|
|
95
|
+
}
|
|
96
|
+
else if (peek() === '/' && (pos + 1 >= src.length || src[pos + 1] !== '/')) {
|
|
97
|
+
advance();
|
|
98
|
+
const right = parseUnary();
|
|
99
|
+
if (right === null || right === 0)
|
|
100
|
+
return null;
|
|
101
|
+
left = left / right;
|
|
102
|
+
}
|
|
103
|
+
else if (peek() === '%') {
|
|
104
|
+
advance();
|
|
105
|
+
const right = parseUnary();
|
|
106
|
+
if (right === null || right === 0)
|
|
107
|
+
return null;
|
|
108
|
+
left = left % right;
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
break;
|
|
112
|
+
}
|
|
113
|
+
if (!isFinite(left))
|
|
114
|
+
return null;
|
|
115
|
+
}
|
|
116
|
+
return left;
|
|
117
|
+
}
|
|
118
|
+
function parseExpression() {
|
|
119
|
+
let left = parseTerm();
|
|
120
|
+
if (left === null)
|
|
121
|
+
return null;
|
|
122
|
+
while (true) {
|
|
123
|
+
skipWS();
|
|
124
|
+
if (peek() === '+') {
|
|
125
|
+
advance();
|
|
126
|
+
const right = parseTerm();
|
|
127
|
+
if (right === null)
|
|
128
|
+
return null;
|
|
129
|
+
left = left + right;
|
|
130
|
+
}
|
|
131
|
+
else if (peek() === '-') {
|
|
132
|
+
advance();
|
|
133
|
+
const right = parseTerm();
|
|
134
|
+
if (right === null)
|
|
135
|
+
return null;
|
|
136
|
+
left = left - right;
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
139
|
+
break;
|
|
140
|
+
}
|
|
141
|
+
if (!isFinite(left))
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
return left;
|
|
145
|
+
}
|
|
146
|
+
const result = parseExpression();
|
|
147
|
+
if (result === null)
|
|
148
|
+
return null;
|
|
149
|
+
skipWS();
|
|
150
|
+
if (pos < src.length)
|
|
151
|
+
return null;
|
|
152
|
+
if (!isFinite(result))
|
|
153
|
+
return null;
|
|
154
|
+
return result;
|
|
155
|
+
}
|
|
7
156
|
// ── Preprocessing ────────────────────────────────────────────
|
|
8
157
|
function preprocessCode(code) {
|
|
9
158
|
let result = code;
|
|
@@ -21,6 +170,58 @@ function preprocessCode(code) {
|
|
|
21
170
|
result = result.replace(/`(?:[^`\\]|\\.)*`/g, '`__STR__`');
|
|
22
171
|
return result;
|
|
23
172
|
}
|
|
173
|
+
// ── Known-Bad API List ───────────────────────────────────────
|
|
174
|
+
// Conservative v1: only APIs that unambiguously don't exist in the target language.
|
|
175
|
+
const KNOWN_BAD_APIS = {
|
|
176
|
+
// JavaScript/TypeScript — non-existent array methods
|
|
177
|
+
'.flatten(': { languages: ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'], correct: '.flat(' },
|
|
178
|
+
'.contains(': { languages: ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'], correct: '.includes(' },
|
|
179
|
+
// Non-existent Object methods
|
|
180
|
+
'Object.fromPairs(': { languages: ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'], correct: 'Object.fromEntries(' },
|
|
181
|
+
'Object.pairs(': { languages: ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'], correct: 'Object.entries(' },
|
|
182
|
+
'Object.size(': { languages: ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'], correct: 'Object.keys(...).length' },
|
|
183
|
+
'Object.length': { languages: ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'], correct: 'Object.keys(...).length' },
|
|
184
|
+
// Non-existent string methods (JS)
|
|
185
|
+
'.trimLeft(': { languages: ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'], correct: '.trimStart(' },
|
|
186
|
+
'.trimRight(': { languages: ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'], correct: '.trimEnd(' },
|
|
187
|
+
// Python — common LLM hallucinations (method names only, no type prefix)
|
|
188
|
+
'.has_key(': { languages: ['python', 'py'], correct: '"key" in dict' },
|
|
189
|
+
'.iteritems(': { languages: ['python', 'py'], correct: '.items()' },
|
|
190
|
+
'.itervalues(': { languages: ['python', 'py'], correct: '.values()' },
|
|
191
|
+
'.iterkeys(': { languages: ['python', 'py'], correct: '.keys()' },
|
|
192
|
+
'.remove_at(': { languages: ['python', 'py'], correct: '.pop(index)' },
|
|
193
|
+
};
|
|
194
|
+
// ── Built-in Allowlists ──────────────────────────────────────
|
|
195
|
+
const JS_BUILTINS = new Set([
|
|
196
|
+
'console', 'window', 'document', 'navigator', 'location', 'history',
|
|
197
|
+
'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval',
|
|
198
|
+
'Promise', 'JSON', 'Math', 'Date', 'RegExp', 'Error', 'TypeError',
|
|
199
|
+
'Array', 'Object', 'String', 'Number', 'Boolean', 'Symbol', 'Map', 'Set',
|
|
200
|
+
'WeakMap', 'WeakSet', 'Proxy', 'Reflect',
|
|
201
|
+
'parseInt', 'parseFloat', 'isNaN', 'isFinite', 'NaN', 'Infinity',
|
|
202
|
+
'undefined', 'null', 'true', 'false',
|
|
203
|
+
'require', 'module', 'exports', '__dirname', '__filename',
|
|
204
|
+
'process', 'Buffer', 'global', 'globalThis',
|
|
205
|
+
'fetch', 'Request', 'Response', 'Headers', 'URL', 'URLSearchParams',
|
|
206
|
+
'TextEncoder', 'TextDecoder', 'AbortController', 'FormData', 'Blob', 'File',
|
|
207
|
+
'Event', 'EventTarget', 'CustomEvent',
|
|
208
|
+
'alert', 'confirm', 'prompt',
|
|
209
|
+
'React', 'Component', 'Fragment', 'useState', 'useEffect', 'useRef',
|
|
210
|
+
'useCallback', 'useMemo', 'useContext', 'useReducer',
|
|
211
|
+
'describe', 'it', 'test', 'expect', 'beforeEach', 'afterEach', 'jest', 'vi',
|
|
212
|
+
]);
|
|
213
|
+
const PY_BUILTINS = new Set([
|
|
214
|
+
'print', 'len', 'range', 'enumerate', 'zip', 'map', 'filter',
|
|
215
|
+
'int', 'float', 'str', 'bool', 'list', 'dict', 'set', 'tuple',
|
|
216
|
+
'type', 'isinstance', 'issubclass', 'hasattr', 'getattr', 'setattr',
|
|
217
|
+
'open', 'input', 'sorted', 'reversed', 'min', 'max', 'sum', 'abs',
|
|
218
|
+
'any', 'all', 'iter', 'next', 'super', 'property', 'staticmethod',
|
|
219
|
+
'classmethod', 'None', 'True', 'False', 'self', 'cls',
|
|
220
|
+
'Exception', 'ValueError', 'TypeError', 'KeyError', 'IndexError',
|
|
221
|
+
'ImportError', 'AttributeError', 'RuntimeError', 'StopIteration',
|
|
222
|
+
'os', 'sys', 'json', 'math', 're', 'datetime', 'collections',
|
|
223
|
+
'functools', 'itertools', 'typing', 'pathlib', 'unittest', 'pytest',
|
|
224
|
+
]);
|
|
24
225
|
// ── Claim Classification ─────────────────────────────────────
|
|
25
226
|
function classifyClaim(claim) {
|
|
26
227
|
const text = `${claim.description} ${claim.assertion}`.toLowerCase();
|
|
@@ -73,6 +274,22 @@ function classifyClaim(claim) {
|
|
|
73
274
|
if (/(?:validates?\s+(?:user\s+)?input|input\s+validation|sanitizes?\s+input|validates?\s+(?:before|prior))/i.test(text)) {
|
|
74
275
|
return { claim, formallyVerifiable: true, checkType: 'input_validation' };
|
|
75
276
|
}
|
|
277
|
+
// Arithmetic correctness checks
|
|
278
|
+
if (/(?:calculates?|computes?|evaluates?\s+to|sums?|totals?|multiplies?|divides?|averages?|rounds?)/i.test(text) ||
|
|
279
|
+
/(?:returns?\s+(?:the\s+)?(?:sum|product|average|total|difference|ratio|percentage|remainder|quotient))/i.test(text) ||
|
|
280
|
+
/(?:result\s+(?:is|equals?|should\s+be)\s+\d)/i.test(text)) {
|
|
281
|
+
return { claim, formallyVerifiable: true, checkType: 'arithmetic_correctness' };
|
|
282
|
+
}
|
|
283
|
+
// API misuse checks
|
|
284
|
+
if (/(?:uses?|calls?|invokes?)\s+(?:the\s+)?(?:api|method|function)\s+['"`]?(\w+(?:\.\w+)*)['"`]?/i.test(text) ||
|
|
285
|
+
/(?:calls?\s+\w+\.\w+)|(?:uses?\s+\w+\.\w+\s*\()/i.test(originalText)) {
|
|
286
|
+
return { claim, formallyVerifiable: true, checkType: 'api_misuse' };
|
|
287
|
+
}
|
|
288
|
+
// Undefined reference checks
|
|
289
|
+
if (/(?:references?|uses?|accesses?)\s+(?:variable|function|import|module|identifier)\s+['"`]?(\w+)['"`]?/i.test(text) ||
|
|
290
|
+
/(?:variable|function|import)\s+['"`]?(\w+)['"`]?\s+(?:is\s+)?(?:defined|declared|imported|available)/i.test(text)) {
|
|
291
|
+
return { claim, formallyVerifiable: true, checkType: 'undefined_reference' };
|
|
292
|
+
}
|
|
76
293
|
return { claim, formallyVerifiable: false };
|
|
77
294
|
}
|
|
78
295
|
// ── Structural Verifiers ─────────────────────────────────────
|
|
@@ -399,6 +616,234 @@ function checkInputValidation(code, language) {
|
|
|
399
616
|
confidence: 1,
|
|
400
617
|
};
|
|
401
618
|
}
|
|
619
|
+
// ── Arithmetic Correctness ───────────────────────────────────
|
|
620
|
+
function checkArithmeticCorrectness(code, language, claim) {
|
|
621
|
+
const clean = preprocessCode(code);
|
|
622
|
+
const text = `${claim.description} ${claim.assertion}`;
|
|
623
|
+
// Phase A: Extract constant expressions from code
|
|
624
|
+
const assignments = [];
|
|
625
|
+
let assignPatterns;
|
|
626
|
+
if (['typescript', 'javascript', 'ts', 'js', 'tsx', 'jsx'].includes(language)) {
|
|
627
|
+
assignPatterns = [/(?:const|let|var)\s+(\w+)\s*(?::\s*\w+)?\s*=\s*([^;{\n]+)/g];
|
|
628
|
+
}
|
|
629
|
+
else if (['python', 'py'].includes(language)) {
|
|
630
|
+
assignPatterns = [/^(\w+)\s*=\s*([^#\n]+)/gm];
|
|
631
|
+
}
|
|
632
|
+
else if (['go', 'golang'].includes(language)) {
|
|
633
|
+
assignPatterns = [/(?:const|var)\s+(\w+)\s*(?:\w+)?\s*=\s*([^\n]+)/g];
|
|
634
|
+
}
|
|
635
|
+
else if (['java', 'kotlin', 'scala'].includes(language)) {
|
|
636
|
+
assignPatterns = [/(?:final\s+)?(?:\w+)\s+(\w+)\s*=\s*([^;]+)/g];
|
|
637
|
+
}
|
|
638
|
+
else {
|
|
639
|
+
assignPatterns = [/(?:const|let|var|final)?\s*(\w+)\s*=\s*([^;\n]+)/g];
|
|
640
|
+
}
|
|
641
|
+
for (const pattern of assignPatterns) {
|
|
642
|
+
let m;
|
|
643
|
+
while ((m = pattern.exec(clean)) !== null) {
|
|
644
|
+
const name = m[1];
|
|
645
|
+
const rhs = m[2].trim();
|
|
646
|
+
if (/^['"`\[{(]|^(?:function|def|class|new|await|async)\b/.test(rhs))
|
|
647
|
+
continue;
|
|
648
|
+
if (/^(?:true|false|null|undefined|None|nil)\b/i.test(rhs))
|
|
649
|
+
continue;
|
|
650
|
+
const value = safeEvalArithmetic(rhs);
|
|
651
|
+
if (value !== null) {
|
|
652
|
+
assignments.push({ name, expr: rhs, value });
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
// Phase B: Extract expected values from claim text
|
|
657
|
+
const claimNumbers = [...text.matchAll(/\b(\d+\.?\d*)\b/g)].map(m => parseFloat(m[1]));
|
|
658
|
+
const claimExprMatch = text.match(/(\d[\d\s.+\-*/%()]*\d)\s*(?:=|equals?|is)\s*(\d+\.?\d*)/i);
|
|
659
|
+
// Phase C: Compare
|
|
660
|
+
if (claimExprMatch) {
|
|
661
|
+
const claimExpr = claimExprMatch[1].trim();
|
|
662
|
+
const claimResult = parseFloat(claimExprMatch[2]);
|
|
663
|
+
const evaluated = safeEvalArithmetic(claimExpr);
|
|
664
|
+
if (evaluated !== null && !isNaN(claimResult)) {
|
|
665
|
+
const matches = Math.abs(evaluated - claimResult) < 1e-9;
|
|
666
|
+
return {
|
|
667
|
+
claimId: claim.id,
|
|
668
|
+
checkType: 'arithmetic_correctness',
|
|
669
|
+
verdict: matches ? 'PASS' : 'FAIL',
|
|
670
|
+
evidence: matches
|
|
671
|
+
? `Arithmetic verified: ${claimExpr} = ${evaluated}`
|
|
672
|
+
: `Arithmetic error: ${claimExpr} = ${evaluated}, but claim states ${claimResult}`,
|
|
673
|
+
confidence: 1,
|
|
674
|
+
};
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
for (const assign of assignments) {
|
|
678
|
+
const claimReferencesVar = new RegExp(`\\b${assign.name}\\b`, 'i').test(text);
|
|
679
|
+
const claimReferencesValue = claimNumbers.some(n => Math.abs(n - assign.value) < 1e-9);
|
|
680
|
+
if (claimReferencesVar && claimNumbers.length > 0) {
|
|
681
|
+
const contradicts = claimNumbers.some(n => {
|
|
682
|
+
return Math.abs(n - assign.value) > 1e-9 && Math.abs(n - assign.value) < assign.value * 10;
|
|
683
|
+
});
|
|
684
|
+
if (contradicts && !claimReferencesValue) {
|
|
685
|
+
return {
|
|
686
|
+
claimId: claim.id,
|
|
687
|
+
checkType: 'arithmetic_correctness',
|
|
688
|
+
verdict: 'FAIL',
|
|
689
|
+
evidence: `Variable '${assign.name}' evaluates to ${assign.value} (from: ${assign.expr}), but claim expects a different value`,
|
|
690
|
+
confidence: 1,
|
|
691
|
+
};
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
return {
|
|
696
|
+
claimId: claim.id,
|
|
697
|
+
checkType: 'arithmetic_correctness',
|
|
698
|
+
verdict: 'PASS',
|
|
699
|
+
evidence: assignments.length > 0
|
|
700
|
+
? `Found ${assignments.length} constant expression(s) in code, no contradictions with claim`
|
|
701
|
+
: 'No deterministically evaluable constant expressions found — deferring to LLM',
|
|
702
|
+
confidence: 1,
|
|
703
|
+
};
|
|
704
|
+
}
|
|
705
|
+
// ── API Misuse Check ─────────────────────────────────────────
|
|
706
|
+
function checkApiMisuse(code, language, claim) {
|
|
707
|
+
const clean = preprocessCode(code);
|
|
708
|
+
const langLower = language.toLowerCase();
|
|
709
|
+
for (const [badApi, info] of Object.entries(KNOWN_BAD_APIS)) {
|
|
710
|
+
if (info.languages.length === 0)
|
|
711
|
+
continue;
|
|
712
|
+
if (!info.languages.includes(langLower))
|
|
713
|
+
continue;
|
|
714
|
+
if (clean.includes(badApi)) {
|
|
715
|
+
return {
|
|
716
|
+
claimId: claim.id,
|
|
717
|
+
checkType: 'api_misuse',
|
|
718
|
+
verdict: 'FAIL',
|
|
719
|
+
evidence: `Code uses '${badApi.replace(/[()]/g, '')}' which does not exist in ${language}. Use '${info.correct}' instead.`,
|
|
720
|
+
confidence: 1,
|
|
721
|
+
};
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
return {
|
|
725
|
+
claimId: claim.id,
|
|
726
|
+
checkType: 'api_misuse',
|
|
727
|
+
verdict: 'PASS',
|
|
728
|
+
evidence: 'No known non-existent API calls detected',
|
|
729
|
+
confidence: 1,
|
|
730
|
+
};
|
|
731
|
+
}
|
|
732
|
+
// ── Undefined Reference Check ────────────────────────────────
|
|
733
|
+
function checkUndefinedReference(code, language, claim) {
|
|
734
|
+
const text = `${claim.description} ${claim.assertion}`;
|
|
735
|
+
const identMatch = text.match(/(?:references?|uses?|accesses?)\s+(?:variable|function|import|module|identifier)\s+['"`]?(\w+)['"`]?/i) || text.match(/(?:variable|function|import)\s+['"`]?(\w+)['"`]?\s+(?:is\s+)?(?:defined|declared|imported|available)/i);
|
|
736
|
+
if (!identMatch) {
|
|
737
|
+
return {
|
|
738
|
+
claimId: claim.id,
|
|
739
|
+
checkType: 'undefined_reference',
|
|
740
|
+
verdict: 'PASS',
|
|
741
|
+
evidence: 'No specific identifier extracted from claim — deferring to LLM',
|
|
742
|
+
confidence: 1,
|
|
743
|
+
};
|
|
744
|
+
}
|
|
745
|
+
const identifier = identMatch[1];
|
|
746
|
+
const clean = preprocessCode(code);
|
|
747
|
+
const langLower = language.toLowerCase();
|
|
748
|
+
const builtins = ['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'].includes(langLower)
|
|
749
|
+
? JS_BUILTINS
|
|
750
|
+
: ['python', 'py'].includes(langLower)
|
|
751
|
+
? PY_BUILTINS
|
|
752
|
+
: new Set();
|
|
753
|
+
if (builtins.has(identifier)) {
|
|
754
|
+
return {
|
|
755
|
+
claimId: claim.id,
|
|
756
|
+
checkType: 'undefined_reference',
|
|
757
|
+
verdict: 'PASS',
|
|
758
|
+
evidence: `'${identifier}' is a built-in identifier in ${language}`,
|
|
759
|
+
confidence: 1,
|
|
760
|
+
};
|
|
761
|
+
}
|
|
762
|
+
const declarations = new Set();
|
|
763
|
+
if (['js', 'ts', 'jsx', 'tsx', 'javascript', 'typescript'].includes(langLower)) {
|
|
764
|
+
for (const m of clean.matchAll(/(?:const|let|var)\s+(\w+)/g))
|
|
765
|
+
declarations.add(m[1]);
|
|
766
|
+
for (const m of clean.matchAll(/function\s+(\w+)/g))
|
|
767
|
+
declarations.add(m[1]);
|
|
768
|
+
for (const m of clean.matchAll(/class\s+(\w+)/g))
|
|
769
|
+
declarations.add(m[1]);
|
|
770
|
+
for (const m of clean.matchAll(/import\s+\{([^}]+)\}/g)) {
|
|
771
|
+
for (const name of m[1].split(',')) {
|
|
772
|
+
const trimmed = name.trim().split(/\s+as\s+/).pop()?.trim();
|
|
773
|
+
if (trimmed)
|
|
774
|
+
declarations.add(trimmed);
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
for (const m of clean.matchAll(/import\s+(\w+)\s+from/g))
|
|
778
|
+
declarations.add(m[1]);
|
|
779
|
+
for (const m of clean.matchAll(/(?:function\s+\w+|=>)\s*\(([^)]*)\)/g)) {
|
|
780
|
+
for (const p of m[1].split(',')) {
|
|
781
|
+
const paramName = p.trim().split(/[\s:=]/)[0];
|
|
782
|
+
if (paramName)
|
|
783
|
+
declarations.add(paramName);
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
else if (['python', 'py'].includes(langLower)) {
|
|
788
|
+
for (const m of clean.matchAll(/def\s+(\w+)/g))
|
|
789
|
+
declarations.add(m[1]);
|
|
790
|
+
for (const m of clean.matchAll(/class\s+(\w+)/g))
|
|
791
|
+
declarations.add(m[1]);
|
|
792
|
+
for (const m of clean.matchAll(/^(\w+)\s*=/gm))
|
|
793
|
+
declarations.add(m[1]);
|
|
794
|
+
for (const m of clean.matchAll(/import\s+(\w+)/g))
|
|
795
|
+
declarations.add(m[1]);
|
|
796
|
+
for (const m of clean.matchAll(/from\s+\w+\s+import\s+(.+)/g)) {
|
|
797
|
+
for (const name of m[1].split(',')) {
|
|
798
|
+
const trimmed = name.trim().split(/\s+as\s+/).pop()?.trim();
|
|
799
|
+
if (trimmed)
|
|
800
|
+
declarations.add(trimmed);
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
else {
|
|
805
|
+
for (const m of clean.matchAll(/(?:const|let|var|function|class|def|func|fn)\s+(\w+)/g)) {
|
|
806
|
+
declarations.add(m[1]);
|
|
807
|
+
}
|
|
808
|
+
for (const m of clean.matchAll(/^(\w+)\s*=/gm))
|
|
809
|
+
declarations.add(m[1]);
|
|
810
|
+
}
|
|
811
|
+
if (declarations.size < 3) {
|
|
812
|
+
return {
|
|
813
|
+
claimId: claim.id,
|
|
814
|
+
checkType: 'undefined_reference',
|
|
815
|
+
verdict: 'PASS',
|
|
816
|
+
evidence: `Code has fewer than 3 declarations (likely a fragment) — deferring to LLM`,
|
|
817
|
+
confidence: 1,
|
|
818
|
+
};
|
|
819
|
+
}
|
|
820
|
+
if (declarations.has(identifier)) {
|
|
821
|
+
return {
|
|
822
|
+
claimId: claim.id,
|
|
823
|
+
checkType: 'undefined_reference',
|
|
824
|
+
verdict: 'PASS',
|
|
825
|
+
evidence: `'${identifier}' is declared in the source code`,
|
|
826
|
+
confidence: 1,
|
|
827
|
+
};
|
|
828
|
+
}
|
|
829
|
+
const identRegex = new RegExp(`\\b${identifier}\\b`);
|
|
830
|
+
if (!identRegex.test(clean)) {
|
|
831
|
+
return {
|
|
832
|
+
claimId: claim.id,
|
|
833
|
+
checkType: 'undefined_reference',
|
|
834
|
+
verdict: 'FAIL',
|
|
835
|
+
evidence: `'${identifier}' is not found anywhere in the source code`,
|
|
836
|
+
confidence: 1,
|
|
837
|
+
};
|
|
838
|
+
}
|
|
839
|
+
return {
|
|
840
|
+
claimId: claim.id,
|
|
841
|
+
checkType: 'undefined_reference',
|
|
842
|
+
verdict: 'FAIL',
|
|
843
|
+
evidence: `'${identifier}' is referenced but not declared in the source code`,
|
|
844
|
+
confidence: 1,
|
|
845
|
+
};
|
|
846
|
+
}
|
|
402
847
|
// ── Main Entry Point ─────────────────────────────────────────
|
|
403
848
|
export function runFormalVerification(code, language, claims, llmVerifications) {
|
|
404
849
|
const classified = claims.map(c => classifyClaim(c));
|
|
@@ -431,6 +876,15 @@ export function runFormalVerification(code, language, claims, llmVerifications)
|
|
|
431
876
|
case 'input_validation':
|
|
432
877
|
result = checkInputValidation(code, language);
|
|
433
878
|
break;
|
|
879
|
+
case 'arithmetic_correctness':
|
|
880
|
+
result = checkArithmeticCorrectness(code, language, c.claim);
|
|
881
|
+
break;
|
|
882
|
+
case 'api_misuse':
|
|
883
|
+
result = checkApiMisuse(code, language, c.claim);
|
|
884
|
+
break;
|
|
885
|
+
case 'undefined_reference':
|
|
886
|
+
result = checkUndefinedReference(code, language, c.claim);
|
|
887
|
+
break;
|
|
434
888
|
default:
|
|
435
889
|
continue;
|
|
436
890
|
}
|