@vextlabs/theron-cli 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/profiles/methodologies/build_domains.d.ts +6 -0
- package/dist/profiles/methodologies/build_domains.js +170 -0
- package/dist/profiles/methodologies/build_domains.js.map +1 -0
- package/dist/profiles/methodologies/regulated_domains.d.ts +6 -0
- package/dist/profiles/methodologies/regulated_domains.js +153 -0
- package/dist/profiles/methodologies/regulated_domains.js.map +1 -0
- package/dist/profiles/methodologies/research_domains.d.ts +8 -0
- package/dist/profiles/methodologies/research_domains.js +179 -0
- package/dist/profiles/methodologies/research_domains.js.map +1 -0
- package/dist/profiles/methodologies/strategy_domains.d.ts +15 -0
- package/dist/profiles/methodologies/strategy_domains.js +193 -0
- package/dist/profiles/methodologies/strategy_domains.js.map +1 -0
- package/dist/profiles/seeds.js +210 -85
- package/dist/profiles/seeds.js.map +1 -1
- package/dist/verifiers/calc_gate.d.ts +2 -0
- package/dist/verifiers/calc_gate.js +112 -0
- package/dist/verifiers/calc_gate.js.map +1 -0
- package/dist/verifiers/citation_gate.d.ts +2 -0
- package/dist/verifiers/citation_gate.js +130 -0
- package/dist/verifiers/citation_gate.js.map +1 -0
- package/dist/verifiers/evidence_gate.d.ts +2 -0
- package/dist/verifiers/evidence_gate.js +108 -0
- package/dist/verifiers/evidence_gate.js.map +1 -0
- package/dist/verifiers/index.js +8 -0
- package/dist/verifiers/index.js.map +1 -1
- package/dist/verifiers/source_gate.d.ts +2 -0
- package/dist/verifiers/source_gate.js +126 -0
- package/dist/verifiers/source_gate.js.map +1 -0
- package/package.json +2 -2
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
// Calc-gate verifier — the deterministic teeth behind the numeric
|
|
2
|
+
// methodology's "show the work" invariant.
|
|
3
|
+
//
|
|
4
|
+
// The throughline for finance / data / math (and the numeric trades —
|
|
5
|
+
// trader / engineer / realtor) is: a number you COMPUTED must arrive with
|
|
6
|
+
// the computation that produced it. A bare asserted result figure — "the
|
|
7
|
+
// IRR is 14.3%", "the LTV/CAC is 3.2x", "the NPV is $1.24M", "the deflection
|
|
8
|
+
// is 4.7mm" — with no derivation is the failure mode this gate catches. It
|
|
9
|
+
// runs on the numeric profiles only.
|
|
10
|
+
//
|
|
11
|
+
// How it differs from arithmetic_recheck (the existing numeric kernel):
|
|
12
|
+
// - arithmetic_recheck takes an explicit "X op Y = Z" line and re-evaluates
|
|
13
|
+
// it — it catches WRONG math that's already shown.
|
|
14
|
+
// - calc_gate catches MISSING math: a derived headline figure asserted with
|
|
15
|
+
// no visible derivation at all. The two are complementary — recheck
|
|
16
|
+
// proves shown work is right; calc_gate proves the work was shown.
|
|
17
|
+
//
|
|
18
|
+
// How it works:
|
|
19
|
+
// 1. Scan the output for a COMPUTED-RESULT assertion: a named computed
|
|
20
|
+
// quantity (IRR, NPV, ROI, LTV/CAC, margin, CAGR, payback, deflection,
|
|
21
|
+
// stress, mean, median, std dev, p-value, total, etc.) stated as a
|
|
22
|
+
// concrete value ("= 14.3%", "is $1.24M", "comes to 3.2x").
|
|
23
|
+
// 2. If such a claim is present, check for DERIVATION evidence anywhere in
|
|
24
|
+
// the output: an arithmetic line (a + b = c), a formula, a fenced code/
|
|
25
|
+
// calc block, a python calculator call, a stated set of inputs, or a
|
|
26
|
+
// worked table. Any of these proves the number wasn't conjured.
|
|
27
|
+
// 3. A computed figure asserted with NO derivation is a BLOCK — fed back so
|
|
28
|
+
// the model shows the steps before the number stands.
|
|
29
|
+
//
|
|
30
|
+
// Conservative by design: it only fires when a RESULT-NAMED quantity is
|
|
31
|
+
// stated as a hard value. Round-number context ("we have ~10 customers"),
|
|
32
|
+
// inputs the user gave ("at a 10% discount rate"), questions, and figures the
|
|
33
|
+
// model explicitly flags as estimates/assumptions all pass. The guard below
|
|
34
|
+
// keeps it off honest "rough estimate" language.
|
|
35
|
+
const APPLIES_TO_PROFILES = new Set([
|
|
36
|
+
"finance",
|
|
37
|
+
"data",
|
|
38
|
+
"math",
|
|
39
|
+
"trader",
|
|
40
|
+
"engineer",
|
|
41
|
+
"realtor",
|
|
42
|
+
]);
|
|
43
|
+
// Only gate substantive outputs — a one-line "the total is 5" aside isn't a
|
|
44
|
+
// derivation-worthy result.
|
|
45
|
+
const MIN_LENGTH_TO_GATE = 160;
|
|
46
|
+
// COMPUTED-RESULT assertion: a named, derived quantity stated as a concrete
|
|
47
|
+
// value. The quantity name is what marks it as COMPUTED (you don't "measure"
|
|
48
|
+
// an IRR, you compute it), so a bare value next to one of these names implies
|
|
49
|
+
// a calculation must exist.
|
|
50
|
+
const NAMED_RESULT = "irr|npv|roi|roic|roe|cagr|ltv\\s*/?\\s*cac|ltv|cac|payback(?:\\s+period)?|gross\\s+margin|net\\s+margin|ebitda|arr|mrr|burn\\s+rate|runway|wacc|dcf|cap\\s+rate|cash[- ]on[- ]cash|dscr|noi|break[- ]even|deflection|bending\\s+(?:moment|stress)|shear|torque|stress|strain|moment\\s+of\\s+inertia|resistance|voltage|current|the\\s+mean|the\\s+median|the\\s+average|std(?:\\.|\\s+dev)?|standard\\s+deviation|variance|p[- ]value|correlation|r\\^?2|r[- ]squared|the\\s+total|the\\s+sum|the\\s+result|the\\s+(?:projected|expected)\\s+\\w+";
|
|
51
|
+
// A concrete value token: a number with optional currency/unit/percent/x.
|
|
52
|
+
const VALUE = "[\\$€£]?-?\\d[\\d,]*(?:\\.\\d+)?\\s*(?:%|x|bps|mm|cm|m|kg|n|nm|kn|mpa|psi|v|a|ω|ohm|years?|months?|days?|k|m|b|bn|mn)?";
|
|
53
|
+
const CLAIM_PATTERNS = [
|
|
54
|
+
// "the IRR is 14.3%", "NPV = $1.24M", "LTV/CAC comes to 3.2x"
|
|
55
|
+
new RegExp(`\\b(?:${NAMED_RESULT})\\b[^.\\n]{0,30}\\b(?:is|=|comes?\\s+(?:to|out\\s+to)|works?\\s+out\\s+to|equals?|totals?|of)\\b\\s*(?:approximately\\s+|roughly\\s+|about\\s+|~)?${VALUE}`, "i"),
|
|
56
|
+
// "= 14.3%" immediately following a named result within a tight window
|
|
57
|
+
new RegExp(`\\b(?:${NAMED_RESULT})\\b[^=\\n]{0,20}=\\s*${VALUE}`, "i"),
|
|
58
|
+
];
|
|
59
|
+
// GUARD: the model being honest that a figure is a rough estimate / placeholder
|
|
60
|
+
// / illustrative, or asking for inputs, doesn't need a full derivation shown —
|
|
61
|
+
// that's honest uncertainty, which we reward. Also guards values the user
|
|
62
|
+
// themselves supplied as an input ("at your stated 10% rate").
|
|
63
|
+
const GUARD = /\b(?:rough(?:ly)?\s+estimate|ballpark|placeholder|illustrative|for\s+example|let'?s\s+(?:say|assume)|hypothetical|order\s+of\s+magnitude|i\s+don'?t\s+have|i\s+can'?t\s+(?:compute|calculate)|need\s+(?:the|more)\s+(?:inputs?|data|numbers?)|what\s+(?:is|are)\s+your|you\s+(?:said|gave|specified|provided)|as\s+you\s+(?:stated|noted))\b/i;
|
|
64
|
+
// DERIVATION evidence — proof the number came from a calculation.
|
|
65
|
+
const DERIVATION_EVIDENCE = [
|
|
66
|
+
/[\$€£]?-?\d[\d,]*(?:\.\d+)?\s*[+\-*/×÷]\s*[\$€£]?-?\d[\d,]*(?:\.\d+)?/, // an arithmetic expression a op b
|
|
67
|
+
/=\s*[\$€£]?-?\d[\d,]*(?:\.\d+)?\s*[+\-*/×÷]/, // = a op b ...
|
|
68
|
+
/```[\s\S]*?```/, // a fenced calc / code block
|
|
69
|
+
/\bpython3?\b[^.\n]{0,40}\b(?:-c|print|=)/i, // a python calculator call
|
|
70
|
+
/\b(?:formula|equation|derivation|calculation|computed\s+as|worked\s+out|step\s*\d|step[- ]by[- ]step)\b/i,
|
|
71
|
+
/\b(?:where|given|inputs?|assumptions?)\b\s*[:\-][^.\n]{0,40}=/i, // a "given: x = ..., y = ..." inputs block
|
|
72
|
+
/\|\s*[-:]+\s*\|/, // a markdown table separator (a worked table)
|
|
73
|
+
/\b(?:sum|total|mean|average)\s*(?:of)?\s*\([^)]*[+,][^)]*\)/i, // sum(a, b, c) / mean(...)
|
|
74
|
+
/\(\s*[\$€£]?-?\d[\d,]*(?:\.\d+)?[^)]*[+\-*/×÷][^)]*\)/, // a parenthesized expression (a + b)
|
|
75
|
+
];
|
|
76
|
+
function anyMatch(patterns, text) {
|
|
77
|
+
return patterns.some((re) => re.test(text));
|
|
78
|
+
}
|
|
79
|
+
export const calcGateKernel = {
|
|
80
|
+
slug: "calc_gate",
|
|
81
|
+
describe: "Numeric domains: a computed result figure (IRR, NPV, margin, deflection, mean, …) must show the calculation/derivation, not just assert the number",
|
|
82
|
+
async run(ctx) {
|
|
83
|
+
if (!APPLIES_TO_PROFILES.has(ctx.profile))
|
|
84
|
+
return [];
|
|
85
|
+
const text = ctx.assistantText;
|
|
86
|
+
if (text.length < MIN_LENGTH_TO_GATE)
|
|
87
|
+
return [];
|
|
88
|
+
// Is there a named computed-result assertion at all?
|
|
89
|
+
const claimMatch = CLAIM_PATTERNS.map((re) => re.exec(text)).find(Boolean);
|
|
90
|
+
if (!claimMatch)
|
|
91
|
+
return [];
|
|
92
|
+
// Guard: rough estimate / user-supplied input / "I can't compute" — these
|
|
93
|
+
// don't need a derivation shown. Inspect a window around the match.
|
|
94
|
+
const idx = claimMatch.index ?? 0;
|
|
95
|
+
const window = text.slice(Math.max(0, idx - 90), idx + (claimMatch[0]?.length ?? 0) + 90);
|
|
96
|
+
if (GUARD.test(window))
|
|
97
|
+
return [];
|
|
98
|
+
// Does the output show the work anywhere?
|
|
99
|
+
if (anyMatch(DERIVATION_EVIDENCE, text))
|
|
100
|
+
return [];
|
|
101
|
+
return [
|
|
102
|
+
{
|
|
103
|
+
severity: "block",
|
|
104
|
+
kernel: "calc_gate",
|
|
105
|
+
message: `A computed figure ("${claimMatch[0].slice(0, 60).trim()}…") is asserted with no visible calculation. ` +
|
|
106
|
+
`Per the methodology for numeric work: show the work — the arithmetic, formula, inputs, or a worked table that produces the number. ` +
|
|
107
|
+
`A bare result the reader can't reproduce is a hypothesis, not an answer.`,
|
|
108
|
+
},
|
|
109
|
+
];
|
|
110
|
+
},
|
|
111
|
+
};
|
|
112
|
+
//# sourceMappingURL=calc_gate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"calc_gate.js","sourceRoot":"","sources":["../../src/verifiers/calc_gate.ts"],"names":[],"mappings":"AAAA,kEAAkE;AAClE,2CAA2C;AAC3C,EAAE;AACF,sEAAsE;AACtE,0EAA0E;AAC1E,yEAAyE;AACzE,6EAA6E;AAC7E,2EAA2E;AAC3E,qCAAqC;AACrC,EAAE;AACF,wEAAwE;AACxE,8EAA8E;AAC9E,uDAAuD;AACvD,8EAA8E;AAC9E,wEAAwE;AACxE,uEAAuE;AACvE,EAAE;AACF,gBAAgB;AAChB,yEAAyE;AACzE,4EAA4E;AAC5E,wEAAwE;AACxE,iEAAiE;AACjE,6EAA6E;AAC7E,6EAA6E;AAC7E,0EAA0E;AAC1E,qEAAqE;AACrE,8EAA8E;AAC9E,2DAA2D;AAC3D,EAAE;AACF,wEAAwE;AACxE,0EAA0E;AAC1E,8EAA8E;AAC9E,4EAA4E;AAC5E,iDAAiD;AAIjD,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC;IAClC,SAAS;IACT,MAAM;IACN,MAAM;IACN,QAAQ;IACR,UAAU;IACV,SAAS;CACV,CAAC,CAAC;AAEH,4EAA4E;AAC5E,4BAA4B;AAC5B,MAAM,kBAAkB,GAAG,GAAG,CAAC;AAE/B,4EAA4E;AAC5E,6EAA6E;AAC7E,8EAA8E;AAC9E,4BAA4B;AAC5B,MAAM,YAAY,GAChB,ohBAAohB,CAAC;AAEvhB,0EAA0E;AAC1E,MAAM,KAAK,GAAG,wHAAwH,CAAC;AAEvI,MAAM,cAAc,GAAa;IAC/B,8DAA8D;IAC9D,IAAI,MAAM,CAAC,SAAS,YAAY,sJAAsJ,KAAK,EAAE,EAAE,GAAG,CAAC;IACnM,uEAAuE;IACvE,IAAI,MAAM,CAAC,SAAS,YAAY,yBAAyB,KAAK,EAAE,EAAE,GAAG,CAAC;CACvE,CAAC;AAEF,gFAAgF;AAChF,+EAA+E;AAC/E,0EAA0E;AAC1E,+DAA+D;AAC/D,MAAM,KAAK,GACT,+UAA+U,CAAC;AAElV,kEAAkE;AAClE,MAAM,mBAAmB,GAAa;IACpC,uEAAuE,EAAE,kCAAkC;IAC3G,6CAA6C,EAA2B,eAAe;IACvF,gBAAgB,EAAwD,6BAA6B;IACrG,2CAA2C,EAA6B,2BAA2B;IACnG,0GAA0G;IAC1G,gEAAgE,EAAS,2CAA2C;IACpH,iBAAiB,EAAuD,8CAA8C;IACtH,8DAA8D,EAAU,2BAA2B;IACnG,uDAAuD,EAAkB,qCAAqC;CAC/G,CAAC;AAEF,SAAS,QAAQ,CAAC,QAAkB,EAAE,IAAY;IAChD,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;AAC9C,CAAC;AAED,MAAM,CAAC,MAAM,cAAc,GAAa;IACtC,IAAI,EAAE,WAAW;IACjB,QAAQ,EACN,oJAAoJ;IACtJ,KAAK,CAAC,GAAG,CAAC,GAAoB;QAC5B,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC;YAAE,OAAO,EAAE,CAAC;QACrD,MAAM,IAAI,GAAG,GAAG,CAAC,aAAa,CAAC;QAC/B,IAAI,IAAI,CAAC,MAAM,GAAG,kBAAkB;YAAE,OAAO,EAAE,CAAC;QAEhD,qDAAqD;QACrD,MAAM,UAAU,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC3E,IAAI,CAAC,UAAU;YAAE,OAAO,EAAE,CAAC;QAE3B,0EAA0E;QAC1E,oEAAoE;QACpE,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,IAAI,CAAC,CAAC;QAClC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,GAAG,EAAE,CAAC,EAAE,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;QAC1F,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC;YAAE,OAAO,EAAE,CAAC;QAElC,0CAA0C;QAC1C,IAAI,QAAQ,CAAC,mBAAmB,EAAE,IAAI,CAAC;YAAE,OAAO,EAAE,CAAC;QAEnD,OAAO;YACL;gBACE,QAAQ,EAAE,OAAO;gBACjB,MAAM,EAAE,WAAW;gBACnB,OAAO,EACL,uBAAuB,UAAU,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,+CAA+C;oBACvG,qIAAqI;oBACrI,0EAA0E;aAC7E;SACF,CAAC;IACJ,CAAC;CACF,CAAC"}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
// Citation-gate verifier — the deterministic teeth behind the
|
|
2
|
+
// regulated/research methodology's "cite the source" invariant.
|
|
3
|
+
//
|
|
4
|
+
// The throughline across legal / medical / science / research / finance-fact
|
|
5
|
+
// is: a stated FACT, PRECEDENT, or FIGURE must point at a source. Reciting a
|
|
6
|
+
// statute, a holding, a clinical guideline, a study result, or a hard number
|
|
7
|
+
// from memory — with nothing to verify it against — is the failure mode this
|
|
8
|
+
// gate catches. It runs on the regulated + research profiles only.
|
|
9
|
+
//
|
|
10
|
+
// How it differs from citation_presence (the existing softer check):
|
|
11
|
+
// - citation_presence is a length-triggered "did you cite ANYTHING?" warn.
|
|
12
|
+
// It fires whenever a long output has zero citations.
|
|
13
|
+
// - citation_gate is CLAIM-triggered and BLOCKS. It only fires when the
|
|
14
|
+
// output actually ASSERTS a specific fact/precedent/figure (the sentence
|
|
15
|
+
// that needs a source), and then proves that sentence carries one. This
|
|
16
|
+
// is the harder gate: a 100-word answer that confidently states a holding
|
|
17
|
+
// with no cite is blocked even though it's under the presence threshold.
|
|
18
|
+
//
|
|
19
|
+
// How it works:
|
|
20
|
+
// 1. Scan the output for ASSERTION language that introduces a verifiable
|
|
21
|
+
// fact: a ruled/held case, an approved/indicated drug, a study that
|
|
22
|
+
// "found"/"showed" a result, a statute that "requires", a figure stated
|
|
23
|
+
// as established ("the FDA approved", "the standard rate is 21%").
|
|
24
|
+
// 2. If such a claim is present, check for a CITATION token anywhere a
|
|
25
|
+
// reader could chase it down: a statute § / a case name / a DOI / a
|
|
26
|
+
// named source / a URL / an inline (Author Year).
|
|
27
|
+
// 3. A specific fact/precedent/figure claim WITHOUT a citation is a BLOCK
|
|
28
|
+
// — fed back so the model must attach the source before the claim
|
|
29
|
+
// stands.
|
|
30
|
+
//
|
|
31
|
+
// Conservative by design: it only fires on AFFIRMATIVE, specific assertions.
|
|
32
|
+
// Hedged statements ("I don't have the cite handy"), questions, the model's
|
|
33
|
+
// own disclaimers, and "I can't verify this" all pass. The negation/hedge
|
|
34
|
+
// guard below keeps it from false-firing on honest uncertainty — which is
|
|
35
|
+
// exactly the behavior we want to reward, not punish.
|
|
36
|
+
const APPLIES_TO_PROFILES = new Set([
|
|
37
|
+
"legal",
|
|
38
|
+
"medical",
|
|
39
|
+
"research",
|
|
40
|
+
"academic",
|
|
41
|
+
"policy",
|
|
42
|
+
"science",
|
|
43
|
+
"finance",
|
|
44
|
+
"architect",
|
|
45
|
+
// education profile (slug `tutor`): teach only what you can source/derive —
|
|
46
|
+
// a confident factual/historical/scientific claim still needs a citation.
|
|
47
|
+
"tutor",
|
|
48
|
+
]);
|
|
49
|
+
// Only gate substantive outputs — a one-line "let me look that up" is not a
|
|
50
|
+
// fact assertion that needs a cite.
|
|
51
|
+
const MIN_LENGTH_TO_GATE = 180;
|
|
52
|
+
// ASSERTION language: the model stating a specific, verifiable
|
|
53
|
+
// fact / precedent / figure as established. Crafted to catch affirmative
|
|
54
|
+
// regulated claims while ignoring intentions ("let me find the statute"),
|
|
55
|
+
// questions, and the model's own uncertainty (handled by HEDGE_GUARD).
|
|
56
|
+
const CLAIM_PATTERNS = [
|
|
57
|
+
// A court RULED / HELD / a case ESTABLISHED something — a precedent claim.
|
|
58
|
+
/\b(?:the\s+court|supreme\s+court|holding|precedent)\b[^.\n]{0,80}\b(?:held|ruled|found|established|decided|affirmed|reversed|struck\s+down|established\s+that)\b/i,
|
|
59
|
+
/\b(?:held|ruled|established|decided|affirmed|reversed)\b[^.\n]{0,60}\b(?:that|in|under|the\s+defendant|the\s+plaintiff)\b/i,
|
|
60
|
+
// A statute / regulation / standard REQUIRES / MANDATES / PROHIBITS — a legal-figure claim.
|
|
61
|
+
/\b(?:statute|regulation|law|code|rule|act|directive|the\s+IBC|the\s+ADA|HIPAA|GDPR|the\s+FDA|the\s+SEC|the\s+IRS)\b[^.\n]{0,80}\b(?:requires?|mandates?|prohibits?|specifies?|states?\s+that|sets?\s+(?:a|the)\s+limit|caps?)\b/i,
|
|
62
|
+
// A drug / treatment is APPROVED / INDICATED / CONTRAINDICATED — a clinical claim.
|
|
63
|
+
/\b(?:approved|indicated|contraindicated|first[- ]line|recommended\s+(?:dose|dosage)|the\s+guideline)\b[^.\n]{0,80}\b(?:for|in|by\s+the\s+FDA|treatment|patients?|is\s+\d)\b/i,
|
|
64
|
+
// A STUDY / TRIAL / META-ANALYSIS FOUND / SHOWED / DEMONSTRATED a result — a research-fact claim.
|
|
65
|
+
/\b(?:a\s+)?(?:study|trial|meta[- ]analysis|paper|the\s+research|rct|randomized\s+controlled\s+trial|systematic\s+review)\b[^.\n]{0,60}\b(?:found|showed|demonstrated|reported|concluded|established|revealed)\b/i,
|
|
66
|
+
// An established FIGURE stated as fact: "the standard rate is 21%", "mortality
|
|
67
|
+
// was 4.2%", "n = 1,204 patients", "the prevalence is 1 in 3,000".
|
|
68
|
+
/\b(?:the\s+(?:standard|statutory|federal|approved|recommended|established|baseline))\b[^.\n]{0,50}\b(?:rate|limit|threshold|dose|level|value|figure|percentage)\b[^.\n]{0,30}\bis\b[^.\n]{0,20}[\d$]/i,
|
|
69
|
+
/\b(?:mortality|incidence|prevalence|efficacy|sensitivity|specificity|the\s+rate|the\s+risk|survival)\b[^.\n]{0,40}\b(?:was|is|of)\b\s*[\d$][\d.,]*\s*%?/i,
|
|
70
|
+
];
|
|
71
|
+
// HEDGE / NEGATION guard: if the matched sentence reads as the model being
|
|
72
|
+
// HONEST about not having the source, asking a question, or stating the
|
|
73
|
+
// opposite, it's not a claim that needs proof — it's exactly the "state
|
|
74
|
+
// uncertainty honestly" behavior we reward. Pass it cleanly.
|
|
75
|
+
const HEDGE_GUARD = /\b(?:i\s+don'?t\s+have|i\s+can'?t\s+(?:find|locate|verify|confirm)|not\s+(?:sure|certain|aware|in\s+the)|unsure|unclear|no\s+(?:source|citation|record|study)|cannot\s+(?:cite|verify|confirm)|unverified|unsourced|i\s+believe|i\s+think|may\s+(?:be|have)|might\s+(?:be|have)|appears?\s+to|seems?\s+to|if\s+i\s+recall|as\s+far\s+as\s+i\s+know|point\s+me\s+at|do\s+you\s+have\s+the)\b/i;
|
|
76
|
+
// CITATION tokens — anywhere the claim is anchored to something a reader can
|
|
77
|
+
// chase. Reuses the recognized forms from citation_presence and adds DOI +
|
|
78
|
+
// named-reporter + URL.
|
|
79
|
+
const CITATION_EVIDENCE = [
|
|
80
|
+
/\[[^\]]*§[^\]]*\]/, // [Source §1.2(b)]
|
|
81
|
+
/§\s*\d/, // a bare § N citation
|
|
82
|
+
/\b\d+\s+[A-Z][A-Za-z.]+\s+\d+\b/, // reporter cite: 123 U.S. 456 / 410 F.3d 100
|
|
83
|
+
/\b[A-Z][A-Za-z.&'-]+\s+v\.?\s+[A-Z][A-Za-z.&'-]+/, // Case v. Name
|
|
84
|
+
/\b10\.\d{4,9}\/[-._;()/:A-Z0-9]+/i, // a DOI (10.xxxx/...)
|
|
85
|
+
/\bdoi:\s*\S+/i, // doi: ...
|
|
86
|
+
/\b(?:PMID|PMCID|NCT|arXiv)\s*:?\s*\d/i, // PubMed / trial / arXiv IDs
|
|
87
|
+
/\bhttps?:\/\/\S+/, // a URL
|
|
88
|
+
/\[[^\]]+\]\(\S+\)/, // a markdown link
|
|
89
|
+
/\[[A-Z][a-z]+(?:\s+et\s+al\.?)?\s+\d{4}\]/, // [Smith 2022]
|
|
90
|
+
/\([A-Z][a-z]+(?:\s+et\s+al\.?)?,?\s+\d{4}\)/, // (Smith, 2022)
|
|
91
|
+
/\b(?:USC|U\.S\.C\.|CFR|C\.F\.R\.|IBC|ADA|ISO|IEC|ASTM|NFPA)\s*§?\s*\d/i, // standard/code cite
|
|
92
|
+
];
|
|
93
|
+
function anyMatch(patterns, text) {
|
|
94
|
+
return patterns.some((re) => re.test(text));
|
|
95
|
+
}
|
|
96
|
+
export const citationGateKernel = {
|
|
97
|
+
slug: "citation_gate",
|
|
98
|
+
describe: "Regulated/research: a stated fact, precedent, or figure must carry a citation (statute §, case, DOI, source, or URL)",
|
|
99
|
+
async run(ctx) {
|
|
100
|
+
if (!APPLIES_TO_PROFILES.has(ctx.profile))
|
|
101
|
+
return [];
|
|
102
|
+
const text = ctx.assistantText;
|
|
103
|
+
if (text.length < MIN_LENGTH_TO_GATE)
|
|
104
|
+
return [];
|
|
105
|
+
// Is there an affirmative fact/precedent/figure assertion at all?
|
|
106
|
+
const claimMatch = CLAIM_PATTERNS.map((re) => re.exec(text)).find(Boolean);
|
|
107
|
+
if (!claimMatch)
|
|
108
|
+
return [];
|
|
109
|
+
// Hedge/negation guard: if the matched sentence is the model being honest
|
|
110
|
+
// about NOT having the source (or asking for it), don't gate it. Inspect
|
|
111
|
+
// a window around the match.
|
|
112
|
+
const idx = claimMatch.index ?? 0;
|
|
113
|
+
const window = text.slice(Math.max(0, idx - 90), idx + (claimMatch[0]?.length ?? 0) + 90);
|
|
114
|
+
if (HEDGE_GUARD.test(window))
|
|
115
|
+
return [];
|
|
116
|
+
// Does the output carry a citation the reader could chase down?
|
|
117
|
+
if (anyMatch(CITATION_EVIDENCE, text))
|
|
118
|
+
return [];
|
|
119
|
+
return [
|
|
120
|
+
{
|
|
121
|
+
severity: "block",
|
|
122
|
+
kernel: "citation_gate",
|
|
123
|
+
message: `A specific fact/precedent/figure ("${claimMatch[0].slice(0, 60).trim()}…") is stated with no citation. ` +
|
|
124
|
+
`Per the methodology for regulated/research work: read the source and cite it (statute §, case name, DOI, named study, or URL) ` +
|
|
125
|
+
`before asserting it. If you don't have the source, say so honestly instead of stating it as established.`,
|
|
126
|
+
},
|
|
127
|
+
];
|
|
128
|
+
},
|
|
129
|
+
};
|
|
130
|
+
//# sourceMappingURL=citation_gate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"citation_gate.js","sourceRoot":"","sources":["../../src/verifiers/citation_gate.ts"],"names":[],"mappings":"AAAA,8DAA8D;AAC9D,gEAAgE;AAChE,EAAE;AACF,6EAA6E;AAC7E,6EAA6E;AAC7E,6EAA6E;AAC7E,6EAA6E;AAC7E,mEAAmE;AACnE,EAAE;AACF,qEAAqE;AACrE,6EAA6E;AAC7E,0DAA0D;AAC1D,0EAA0E;AAC1E,6EAA6E;AAC7E,4EAA4E;AAC5E,8EAA8E;AAC9E,6EAA6E;AAC7E,EAAE;AACF,gBAAgB;AAChB,2EAA2E;AAC3E,yEAAyE;AACzE,6EAA6E;AAC7E,wEAAwE;AACxE,yEAAyE;AACzE,yEAAyE;AACzE,uDAAuD;AACvD,4EAA4E;AAC5E,uEAAuE;AACvE,eAAe;AACf,EAAE;AACF,6EAA6E;AAC7E,4EAA4E;AAC5E,0EAA0E;AAC1E,0EAA0E;AAC1E,sDAAsD;AAItD,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC;IAClC,OAAO;IACP,SAAS;IACT,UAAU;IACV,UAAU;IACV,QAAQ;IACR,SAAS;IACT,SAAS;IACT,WAAW;IACX,4EAA4E;IAC5E,0EAA0E;IAC1E,OAAO;CACR,CAAC,CAAC;AAEH,4EAA4E;AAC5E,oCAAoC;AACpC,MAAM,kBAAkB,GAAG,GAAG,CAAC;AAE/B,+DAA+D;AAC/D,yEAAyE;AACzE,0EAA0E;AAC1E,uEAAuE;AACvE,MAAM,cAAc,GAAa;IAC/B,2EAA2E;IAC3E,mKAAmK;IACnK,4HAA4H;IAC5H,4FAA4F;IAC5F,kOAAkO;IAClO,mFAAmF;IACnF,8KAA8K;IAC9K,kGAAkG;IAClG,kNAAkN;IAClN,+EAA+E;IAC/E,mEAAmE;IACnE,uMAAuM;IACvM,0JAA0J;CAC3J,CAAC;AAEF,2EAA2E;AAC3E,wEAAwE;AACxE,wEAAwE;AACxE,6DAA6D;AAC7D,MAAM,WAAW,GACf,8XAA8X,CAAC;AAEjY,6EAA6E;AAC7E,2EAA2E;AAC3E,wBAAwB;AACxB,MAAM,iBAAiB,GAAa;IAClC,mBAAmB,EAAkC,mBAAmB;IACxE,QAAQ,EAA6C,sBAAsB;IAC3E,iCAAiC,EAAoB,6CAA6C;IAClG,kDAAkD,EAAE,eAAe;IACnE,mCAAmC,EAAkB,sBAAsB;IAC3E,eAAe,EAAsC,WAAW;IAChE,uCAAuC,EAAc,6BAA6B;IAClF,kBAAkB,EAAmC,QAAQ;IAC7D,mBAAmB,EAAkC,kBAAkB;IACvE,2CAA2C,EAAU,eAAe;IACpE,6CAA6C,EAAQ,gBAAgB;IACrE,wEAAwE,EAAE,qBAAqB;CAChG,CAAC;AAEF,SAAS,QAAQ,CAAC,QAAkB,EAAE,IAAY;IAChD,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;AAC9C,CAAC;AAED,MAAM,CAAC,MAAM,kBAAkB,GAAa;IAC1C,IAAI,EAAE,eAAe;IACrB,QAAQ,EACN,sHAAsH;IACxH,KAAK,CAAC,GAAG,CAAC,GAAoB;QAC5B,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC;YAAE,OAAO,EAAE,CAAC;QACrD,MAAM,IAAI,GAAG,GAAG,CAAC,aAAa,CAAC;QAC/B,IAAI,IAAI,CAAC,MAAM,GAAG,kBAAkB;YAAE,OAAO,EAAE,CAAC;QAEhD,kEAAkE;QAClE,MAAM,UAAU,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC3E,IAAI,CAAC,UAAU;YAAE,OAAO,EAAE,CAAC;QAE3B,0EAA0E;QAC1E,yEAAyE;QACzE,6BAA6B;QAC7B,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,IAAI,CAAC,CAAC;QAClC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,GAAG,EAAE,CAAC,EAAE,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;QAC1F,IAAI,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC;YAAE,OAAO,EAAE,CAAC;QAExC,gEAAgE;QAChE,IAAI,QAAQ,CAAC,iBAAiB,EAAE,IAAI,CAAC;YAAE,OAAO,EAAE,CAAC;QAEjD,OAAO;YACL;gBACE,QAAQ,EAAE,OAAO;gBACjB,MAAM,EAAE,eAAe;gBACvB,OAAO,EACL,sCAAsC,UAAU,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,kCAAkC;oBACzG,gIAAgI;oBAChI,0GAA0G;aAC7G;SACF,CAAC;IACJ,CAAC;CACF,CAAC"}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
// Evidence-gate verifier — the deterministic teeth behind the pentest /
|
|
2
|
+
// security methodology's "confirm with evidence" invariant.
|
|
3
|
+
//
|
|
4
|
+
// The methodology says: NEVER claim a vulnerability without the exact
|
|
5
|
+
// request that triggers it AND the response/error that proves it. This
|
|
6
|
+
// kernel enforces that gate mechanically rather than trusting the model
|
|
7
|
+
// to self-police. It runs on the `pentest` and `security` profiles only.
|
|
8
|
+
//
|
|
9
|
+
// How it works:
|
|
10
|
+
// 1. Scan the output for VULN-CLAIM language ("SQL injection confirmed",
|
|
11
|
+
// "auth bypass found", "vulnerable to XSS", "IDOR present", etc.).
|
|
12
|
+
// 2. If a claim is present, scan for EVIDENCE alongside it: a concrete
|
|
13
|
+
// request (curl / HTTP method + path / a payload string) AND a
|
|
14
|
+
// concrete observed response (status code, error string, extracted
|
|
15
|
+
// row, token contents, a fenced response block, etc.).
|
|
16
|
+
// 3. A claim WITHOUT both halves of the evidence is a BLOCK — it gets
|
|
17
|
+
// fed back into the next turn so the model must go prove it (re-run
|
|
18
|
+
// the request, paste the response) before the finding stands.
|
|
19
|
+
//
|
|
20
|
+
// This is intentionally conservative: it only fires when the output
|
|
21
|
+
// actually asserts a vulnerability. Recon, baselining, scoping, and
|
|
22
|
+
// "I tested X and it's NOT vulnerable" never trip it. The point is to
|
|
23
|
+
// stop unproven "found a critical SQLi" claims, not to slow down honest
|
|
24
|
+
// investigation.
|
|
25
|
+
const APPLIES_TO_PROFILES = new Set(["pentest", "security"]);
|
|
26
|
+
// A claim is only gated if the output is substantive enough to be a
|
|
27
|
+
// finding rather than a passing aside. Short "let me test for SQLi"
|
|
28
|
+
// lines are not claims.
|
|
29
|
+
const MIN_LENGTH_TO_GATE = 200;
|
|
30
|
+
// VULN-CLAIM language: the model asserting a vulnerability EXISTS /
|
|
31
|
+
// is confirmed / was exploited. Crafted to catch affirmative findings
|
|
32
|
+
// while ignoring negative results ("not vulnerable") and intentions
|
|
33
|
+
// ("let's test for SQLi"), which are handled by NEGATION_GUARD below.
|
|
34
|
+
const CLAIM_PATTERNS = [
|
|
35
|
+
/\b(?:sql\s*injection|sqli|xss|cross[- ]site\s+scripting|csrf|ssrf|idor|bola|rce|remote\s+code\s+execution|auth(?:entication|orization)?\s+bypass|privilege\s+escalation|path\s+traversal|directory\s+traversal|lfi|rfi|xxe|open\s+redirect|insecure\s+deserialization|command\s+injection|broken\s+access\s+control)\b[^.\n]{0,80}\b(?:confirmed|found|present|exists?|works?|succeeds?|is\s+exploitable|vulnerab(?:le|ility)|triggered|exploited|verified)\b/i,
|
|
36
|
+
/\b(?:vulnerab(?:le|ility))\b[^.\n]{0,60}\b(?:to|confirmed|found|present)\b/i,
|
|
37
|
+
/\b(?:confirmed|verified|exploited|found)\b[^.\n]{0,40}\b(?:sql\s*injection|sqli|xss|csrf|ssrf|idor|bola|rce|auth(?:entication|orization)?\s+bypass|privilege\s+escalation|path\s+traversal|command\s+injection|access\s+control)\b/i,
|
|
38
|
+
// A structured finding header asserting severity — "Severity: Critical"
|
|
39
|
+
// / "**Critical**" alongside a finding implies a positive claim.
|
|
40
|
+
/\bseverity\b\s*[:\-]\s*(critical|high|medium|med|low)\b/i,
|
|
41
|
+
];
|
|
42
|
+
// Negation guard: if the matched sentence is a NEGATIVE result, it's not
|
|
43
|
+
// a claim that needs proof. "/login is NOT vulnerable to SQLi" should
|
|
44
|
+
// pass cleanly.
|
|
45
|
+
const NEGATION_NEAR = /\b(not|isn'?t|wasn'?t|no\s+evidence|appears?\s+safe|does\s+not\s+appear|could\s+not|couldn'?t|unable\s+to|ruled\s+out|false\s+positive)\b/i;
|
|
46
|
+
// EVIDENCE — a concrete REQUEST. The exact thing that was sent.
|
|
47
|
+
const REQUEST_EVIDENCE = [
|
|
48
|
+
/\bcurl\b/i, // a curl invocation
|
|
49
|
+
/\b(GET|POST|PUT|DELETE|PATCH)\s+\/?\S/, // METHOD /path
|
|
50
|
+
/\?\w+=/, // a query param payload (?q=...)
|
|
51
|
+
/https?:\/\/\S+/, // a full URL under test
|
|
52
|
+
/\bpayload\b\s*[:\-]/i, // an explicit payload line
|
|
53
|
+
/['"`][^'"`\n]*(?:'|--|\bUNION\b|\bSELECT\b|<script|\.\.\/)[^'"`\n]*['"`]/i, // an injection string
|
|
54
|
+
];
|
|
55
|
+
// EVIDENCE — a concrete RESPONSE / observation that PROVES it.
|
|
56
|
+
const RESPONSE_EVIDENCE = [
|
|
57
|
+
/\b(?:HTTP\/?\d?\.?\d?\s*)?(?:status\s*(?:code)?\s*[:=]?\s*)?\b(200|201|301|302|400|401|403|404|419|500|502|503)\b/,
|
|
58
|
+
/\b(?:SQLITE_ERROR|SQL\s*syntax|ORA-\d+|PG::|MySQLSyntaxError|near\s+["'][^"']*["']:\s*syntax\s+error|unterminated\s+(?:quoted\s+)?string|column\s+count|unrecognized\s+token)\b/i,
|
|
59
|
+
/\b(?:returned|returns|response(?:\s+(?:was|is|body))?|error(?:\s+(?:was|is|message))?|output(?:\s+(?:was|is))?)\b\s*[:\-]/i,
|
|
60
|
+
/```[\s\S]*?```/, // a fenced response/error block
|
|
61
|
+
/\b\d+\s+rows?\b/i, // "3 rows" — baseline/extract evidence
|
|
62
|
+
/\beyJ[A-Za-z0-9_-]{8,}/, // a JWT (header.payload...) inline
|
|
63
|
+
/\b(?:extracted|leaked|dumped|recovered)\b[^.\n]{0,40}\b(?:user|password|hash|token|email|row|record|column)\b/i,
|
|
64
|
+
];
|
|
65
|
+
function anyMatch(patterns, text) {
|
|
66
|
+
return patterns.some((re) => re.test(text));
|
|
67
|
+
}
|
|
68
|
+
export const evidenceGateKernel = {
|
|
69
|
+
slug: "evidence_gate",
|
|
70
|
+
describe: "Pentest/security: a vulnerability CLAIM must carry the exact request + the proving response/error",
|
|
71
|
+
async run(ctx) {
|
|
72
|
+
if (!APPLIES_TO_PROFILES.has(ctx.profile))
|
|
73
|
+
return [];
|
|
74
|
+
const text = ctx.assistantText;
|
|
75
|
+
if (text.length < MIN_LENGTH_TO_GATE)
|
|
76
|
+
return [];
|
|
77
|
+
// Is there an affirmative vulnerability claim at all?
|
|
78
|
+
const claimMatch = CLAIM_PATTERNS.map((re) => re.exec(text)).find(Boolean);
|
|
79
|
+
if (!claimMatch)
|
|
80
|
+
return [];
|
|
81
|
+
// Negation guard: if the matched claim sentence reads as a NEGATIVE
|
|
82
|
+
// result, it's not a finding that needs evidence. Inspect a window
|
|
83
|
+
// around the match.
|
|
84
|
+
const idx = claimMatch.index ?? 0;
|
|
85
|
+
const window = text.slice(Math.max(0, idx - 80), idx + (claimMatch[0]?.length ?? 0) + 80);
|
|
86
|
+
if (NEGATION_NEAR.test(window))
|
|
87
|
+
return [];
|
|
88
|
+
const hasRequest = anyMatch(REQUEST_EVIDENCE, text);
|
|
89
|
+
const hasResponse = anyMatch(RESPONSE_EVIDENCE, text);
|
|
90
|
+
if (hasRequest && hasResponse)
|
|
91
|
+
return [];
|
|
92
|
+
const missing = [];
|
|
93
|
+
if (!hasRequest)
|
|
94
|
+
missing.push("the exact request (curl / METHOD path / payload)");
|
|
95
|
+
if (!hasResponse)
|
|
96
|
+
missing.push("the proving response (status code, error, extracted data, or a fenced response block)");
|
|
97
|
+
return [
|
|
98
|
+
{
|
|
99
|
+
severity: "block",
|
|
100
|
+
kernel: "evidence_gate",
|
|
101
|
+
message: `Vulnerability claim ("${claimMatch[0].slice(0, 60).trim()}…") is missing ${missing.join(" and ")}. ` +
|
|
102
|
+
`Per the methodology: never claim a vuln without the exact request AND the response/error that proves it. ` +
|
|
103
|
+
`Re-run the request, paste both halves of the evidence, then restate the finding.`,
|
|
104
|
+
},
|
|
105
|
+
];
|
|
106
|
+
},
|
|
107
|
+
};
|
|
108
|
+
//# sourceMappingURL=evidence_gate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evidence_gate.js","sourceRoot":"","sources":["../../src/verifiers/evidence_gate.ts"],"names":[],"mappings":"AAAA,wEAAwE;AACxE,4DAA4D;AAC5D,EAAE;AACF,sEAAsE;AACtE,uEAAuE;AACvE,wEAAwE;AACxE,yEAAyE;AACzE,EAAE;AACF,gBAAgB;AAChB,2EAA2E;AAC3E,wEAAwE;AACxE,yEAAyE;AACzE,oEAAoE;AACpE,wEAAwE;AACxE,4DAA4D;AAC5D,wEAAwE;AACxE,yEAAyE;AACzE,mEAAmE;AACnE,EAAE;AACF,oEAAoE;AACpE,oEAAoE;AACpE,sEAAsE;AACtE,wEAAwE;AACxE,iBAAiB;AAIjB,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC,CAAC;AAE7D,oEAAoE;AACpE,oEAAoE;AACpE,wBAAwB;AACxB,MAAM,kBAAkB,GAAG,GAAG,CAAC;AAE/B,oEAAoE;AACpE,sEAAsE;AACtE,oEAAoE;AACpE,sEAAsE;AACtE,MAAM,cAAc,GAAa;IAC/B,gcAAgc;IAChc,6EAA6E;IAC7E,qOAAqO;IACrO,wEAAwE;IACxE,iEAAiE;IACjE,0DAA0D;CAC3D,CAAC;AAEF,yEAAyE;AACzE,sEAAsE;AACtE,gBAAgB;AAChB,MAAM,aAAa,GAAG,4IAA4I,CAAC;AAEnK,gEAAgE;AAChE,MAAM,gBAAgB,GAAa;IACjC,WAAW,EAAoC,oBAAoB;IACnE,uCAAuC,EAAS,eAAe;IAC/D,QAAQ,EAAwC,iCAAiC;IACjF,gBAAgB,EAAgC,wBAAwB;IACxE,sBAAsB,EAA2B,2BAA2B;IAC5E,2EAA2E,EAAE,sBAAsB;CACpG,CAAC;AAEF,+DAA+D;AAC/D,MAAM,iBAAiB,GAAa;IAClC,mHAAmH;IACnH,kLAAkL;IAClL,4HAA4H;IAC5H,gBAAgB,EAAiC,gCAAgC;IACjF,kBAAkB,EAA8B,uCAAuC;IACvF,wBAAwB,EAAyB,mCAAmC;IACpF,gHAAgH;CACjH,CAAC;AAEF,SAAS,QAAQ,CAAC,QAAkB,EAAE,IAAY;IAChD,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;AAC9C,CAAC;AAED,MAAM,CAAC,MAAM,kBAAkB,GAAa;IAC1C,IAAI,EAAE,eAAe;IACrB,QAAQ,EACN,mGAAmG;IACrG,KAAK,CAAC,GAAG,CAAC,GAAoB;QAC5B,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC;YAAE,OAAO,EAAE,CAAC;QACrD,MAAM,IAAI,GAAG,GAAG,CAAC,aAAa,CAAC;QAC/B,IAAI,IAAI,CAAC,MAAM,GAAG,kBAAkB;YAAE,OAAO,EAAE,CAAC;QAEhD,sDAAsD;QACtD,MAAM,UAAU,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC3E,IAAI,CAAC,UAAU;YAAE,OAAO,EAAE,CAAC;QAE3B,oEAAoE;QACpE,mEAAmE;QACnE,oBAAoB;QACpB,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,IAAI,CAAC,CAAC;QAClC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,GAAG,EAAE,CAAC,EAAE,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;QAC1F,IAAI,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC;YAAE,OAAO,EAAE,CAAC;QAE1C,MAAM,UAAU,GAAG,QAAQ,CAAC,gBAAgB,EAAE,IAAI,CAAC,CAAC;QACpD,MAAM,WAAW,GAAG,QAAQ,CAAC,iBAAiB,EAAE,IAAI,CAAC,CAAC;QAEtD,IAAI,UAAU,IAAI,WAAW;YAAE,OAAO,EAAE,CAAC;QAEzC,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,IAAI,CAAC,UAAU;YAAE,OAAO,CAAC,IAAI,CAAC,kDAAkD,CAAC,CAAC;QAClF,IAAI,CAAC,WAAW;YAAE,OAAO,CAAC,IAAI,CAAC,uFAAuF,CAAC,CAAC;QAExH,OAAO;YACL;gBACE,QAAQ,EAAE,OAAO;gBACjB,MAAM,EAAE,eAAe;gBACvB,OAAO,EACL,yBAAyB,UAAU,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,kBAAkB,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI;oBACrG,2GAA2G;oBAC3G,kFAAkF;aACrF;SACF,CAAC;IACJ,CAAC;CACF,CAAC"}
|
package/dist/verifiers/index.js
CHANGED
|
@@ -19,6 +19,10 @@ import { emDashCheckKernel } from "./em_dash_check.js";
|
|
|
19
19
|
import { aiIsmCheckKernel } from "./ai_ism_check.js";
|
|
20
20
|
import { citationPresenceKernel } from "./citation_presence.js";
|
|
21
21
|
import { arithmeticRecheckKernel } from "./arithmetic_recheck.js";
|
|
22
|
+
import { evidenceGateKernel } from "./evidence_gate.js";
|
|
23
|
+
import { citationGateKernel } from "./citation_gate.js";
|
|
24
|
+
import { calcGateKernel } from "./calc_gate.js";
|
|
25
|
+
import { sourceGateKernel } from "./source_gate.js";
|
|
22
26
|
// Map slug → kernel. Profiles reference these via Profile.verifiers[].
|
|
23
27
|
const REGISTRY = new Map();
|
|
24
28
|
function register(v) { REGISTRY.set(v.slug, v); }
|
|
@@ -30,6 +34,10 @@ register(emDashCheckKernel);
|
|
|
30
34
|
register(aiIsmCheckKernel);
|
|
31
35
|
register(citationPresenceKernel);
|
|
32
36
|
register(arithmeticRecheckKernel);
|
|
37
|
+
register(evidenceGateKernel);
|
|
38
|
+
register(citationGateKernel);
|
|
39
|
+
register(calcGateKernel);
|
|
40
|
+
register(sourceGateKernel);
|
|
33
41
|
// Aliases for verifier slugs referenced in seeds.ts that share a kernel
|
|
34
42
|
// (e.g. assumption_marked / advice_disclaimer / confidence_marked all
|
|
35
43
|
// run via style_lint with a different config — the verifier reads the
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/verifiers/index.ts"],"names":[],"mappings":"AAAA,uCAAuC;AACvC,EAAE;AACF,sEAAsE;AACtE,mEAAmE;AACnE,8DAA8D;AAC9D,qEAAqE;AACrE,EAAE;AACF,gEAAgE;AAChE,iEAAiE;AACjE,EAAE;AACF,sEAAsE;AACtE,mEAAmE;AACnE,wCAAwC;AAExC,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AACvC,OAAO,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAClD,OAAO,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAClD,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,EAAE,uBAAuB,EAAE,MAAM,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/verifiers/index.ts"],"names":[],"mappings":"AAAA,uCAAuC;AACvC,EAAE;AACF,sEAAsE;AACtE,mEAAmE;AACnE,8DAA8D;AAC9D,qEAAqE;AACrE,EAAE;AACF,gEAAgE;AAChE,iEAAiE;AACjE,EAAE;AACF,sEAAsE;AACtE,mEAAmE;AACnE,wCAAwC;AAExC,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AACvC,OAAO,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAClD,OAAO,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAClD,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAC;AAChE,OAAO,EAAE,uBAAuB,EAAE,MAAM,yBAAyB,CAAC;AAClE,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACxD,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAGpD,uEAAuE;AACvE,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAoB,CAAC;AAC7C,SAAS,QAAQ,CAAC,CAAW,IAAU,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;AAEjE,QAAQ,CAAC,eAAe,CAAC,CAAC;AAC1B,QAAQ,CAAC,UAAU,CAAC,CAAC;AACrB,QAAQ,CAAC,eAAe,CAAC,CAAC;AAC1B,QAAQ,CAAC,eAAe,CAAC,CAAC;AAC1B,QAAQ,CAAC,iBAAiB,CAAC,CAAC;AAC5B,QAAQ,CAAC,gBAAgB,CAAC,CAAC;AAC3B,QAAQ,CAAC,sBAAsB,CAAC,CAAC;AACjC,QAAQ,CAAC,uBAAuB,CAAC,CAAC;AAClC,QAAQ,CAAC,kBAAkB,CAAC,CAAC;AAC7B,QAAQ,CAAC,kBAAkB,CAAC,CAAC;AAC7B,QAAQ,CAAC,cAAc,CAAC,CAAC;AACzB,QAAQ,CAAC,gBAAgB,CAAC,CAAC;AAE3B,wEAAwE;AACxE,sEAAsE;AACtE,sEAAsE;AACtE,iDAAiD;AACjD,KAAK,MAAM,KAAK,IAAI;IAClB,iBAAiB;IACjB,mBAAmB;IACnB,mBAAmB;IACnB,mBAAmB;IACnB,mBAAmB;IACnB,kBAAkB;IAClB,2BAA2B;IAC3B,YAAY;CACb,EAAE,CAAC;IACF,mEAAmE;IACnE,qEAAqE;IACrE,oEAAoE;IACpE,0CAA0C;IAC1C,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,GAAG,eAAe,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;AAC3D,CAAC;AAID;;qEAEqE;AACrE,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,KAA2B,EAC3B,GAAoB;IAEpB,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAC5C,MAAM,OAAO,GAAG,KAAK;SAClB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;SAC3B,MAAM,CAAC,CAAC,CAAC,EAAiB,EAAE,CAAC,CAAC,KAAK,SAAS,CAAC,CAAC;IACjD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEpC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAC/B,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;QACtB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC;gBAChC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC;gBACV,IAAI,OAAO,CAAkB,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,IAAI,CAAC,CAAC;aAC/E,CAAC,CAAC;YACH,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,0DAA0D;YAC1D,OAAO,CAAC;oBACN,QAAQ,EAAE,MAAe;oBACzB,MAAM,EAAE,CAAC,CAAC,IAAI;oBACd,OAAO,EAAE,qBAAqB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;iBACjF,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CACH,CAAC;IACF,OAAO,OAAO,CAAC,IAAI,EAAE,CAAC;AACxB,CAAC;AAED,gEAAgE;AAChE,MAAM,UAAU,eAAe,CAAC,MAAuB;IAKrD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,aAAa,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;IAClF,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC;IAC5D,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC;IAC1D,MAAM,EAAE,GAAG,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC;IAC/B,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;QAAE,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,QAAQ,CAAC,CAAC;IAC5D,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;QAAE,KAAK,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,OAAO,CAAC,CAAC;IACzD,OAAO;QACL,EAAE;QACF,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;YACxB,MAAM,GAAG,GAAG,CAAC,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;YAC7E,MAAM,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC/C,OAAO,KAAK,GAAG,IAAI,CAAC,CAAC,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,OAAO,EAAE,CAAC;QACpD,CAAC,CAAC;KACH,CAAC;AACJ,CAAC;AAED;;0EAE0E;AAC1E,MAAM,UAAU,iBAAiB,CAAC,MAAuB;IACvD,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC;IAC5D,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACnC,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QAC7B,MAAM,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QACjD,OAAO,MAAM,CAAC,CAAC,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,OAAO,EAAE,CAAC;IAC9C,CAAC,CAAC,CAAC;IACH,OAAO;QACL,gEAAgE;QAChE,8FAA8F;QAC9F,GAAG,KAAK;QACR,EAAE;KACH,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC"}
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
// Source-gate verifier — the soft teeth behind the "don't recite from
|
|
2
|
+
// memory" move that runs across every Claude-quality domain.
|
|
3
|
+
//
|
|
4
|
+
// The throughline: when you assert a CANONICAL fact about a specific system,
|
|
5
|
+
// spec, standard, API, config, or codebase — the kind of fact that has an
|
|
6
|
+
// authoritative source you could open and read — you should have READ that
|
|
7
|
+
// source this turn, not recited it from training memory (which drifts,
|
|
8
|
+
// version-skews, and hallucinates exact signatures/flags/section numbers).
|
|
9
|
+
// This is the cheapest, highest-leverage discipline in the whole set: read
|
|
10
|
+
// the source before asserting.
|
|
11
|
+
//
|
|
12
|
+
// Unlike citation_gate (hard BLOCK on regulated facts) and calc_gate (hard
|
|
13
|
+
// BLOCK on bare computed numbers), this one only WARNS. Canonical recall is
|
|
14
|
+
// often fine — the model frequently does know the exact flag — and a hard
|
|
15
|
+
// block here would be too noisy. The warn nudges the model (and the user)
|
|
16
|
+
// toward "let me Read the actual file/spec to confirm" without halting the
|
|
17
|
+
// loop.
|
|
18
|
+
//
|
|
19
|
+
// How it works:
|
|
20
|
+
// 1. Scan the output for CANONICAL-RECALL language: a confident assertion
|
|
21
|
+
// about a specific named system/spec/standard's exact behavior — "the
|
|
22
|
+
// RFC says", "per the spec", "the default value is", "the flag is named",
|
|
23
|
+
// "section 4.2 of the standard", "the function signature is", "the env
|
|
24
|
+
// var is", "the config key is".
|
|
25
|
+
// 2. Check whether the turn shows evidence the SOURCE was actually consulted:
|
|
26
|
+
// a Read/Grep tool ran on a relevant file (touchedFiles or a read marker
|
|
27
|
+
// in the text), a fenced quote FROM the source, a URL/cite, or explicit
|
|
28
|
+
// "I read/checked the source" language.
|
|
29
|
+
// 3. A canonical-recall assertion with NO sign the source was read is a WARN
|
|
30
|
+
// — surfaced so the model self-corrects toward reading it, never blocking.
|
|
31
|
+
//
|
|
32
|
+
// Conservative by design: it only fires on assertions about a SPECIFIC named
|
|
33
|
+
// source's exact contents, and the source-consulted evidence is generous
|
|
34
|
+
// (reading ANY file this turn, quoting, or linking all count). General
|
|
35
|
+
// explanations, opinions, and the model's own reasoning never trip it.
|
|
36
|
+
// Broad set — the "read the source first" discipline is universal, but it's
|
|
37
|
+
// most load-bearing where exact canonical detail matters: code/spec/standards
|
|
38
|
+
// work and the research/regulated lanes. Writing/creative/therapy don't have a
|
|
39
|
+
// "canonical source" to recite, so they're excluded to keep noise down.
|
|
40
|
+
const APPLIES_TO_PROFILES = new Set([
|
|
41
|
+
"code",
|
|
42
|
+
"security",
|
|
43
|
+
"pentest",
|
|
44
|
+
"ops",
|
|
45
|
+
"engineer",
|
|
46
|
+
"architect",
|
|
47
|
+
"data",
|
|
48
|
+
"legal",
|
|
49
|
+
"research",
|
|
50
|
+
"academic",
|
|
51
|
+
"science",
|
|
52
|
+
"policy",
|
|
53
|
+
"medical",
|
|
54
|
+
// education profile (slug `tutor`): read the source before teaching it.
|
|
55
|
+
"tutor",
|
|
56
|
+
// strategy/design domains: a recommendation or design choice that recites a
|
|
57
|
+
// market "fact" / canonical pattern from memory should reference its source.
|
|
58
|
+
"design",
|
|
59
|
+
"business",
|
|
60
|
+
"product",
|
|
61
|
+
]);
|
|
62
|
+
const MIN_LENGTH_TO_GATE = 160;
|
|
63
|
+
// CANONICAL-RECALL language: a confident assertion about a specific named
|
|
64
|
+
// system/spec/standard's exact contents — the kind of thing you'd want to
|
|
65
|
+
// open the source to confirm rather than recite.
|
|
66
|
+
const CLAIM_PATTERNS = [
|
|
67
|
+
// "the RFC/spec/standard says/states/requires/defines ..."
|
|
68
|
+
/\b(?:the\s+)?(?:RFC\s*\d*|spec(?:ification)?|standard|protocol|the\s+docs?|documentation|the\s+manual|man\s+page|the\s+API\s+docs?)\b[^.\n]{0,40}\b(?:says?|states?|requires?|defines?|specifies?|mandates?|describes?|documents?)\b/i,
|
|
69
|
+
// "section/§ 4.2 of the standard ..." — a precise section reference asserted from memory
|
|
70
|
+
/\b(?:section|§|clause|article|chapter)\s*\d[\d.]*\b[^.\n]{0,30}\b(?:of\s+the|requires?|states?|says?|covers?|defines?)\b/i,
|
|
71
|
+
// "the default (value|setting|port|timeout) is X" — an exact canonical default
|
|
72
|
+
/\bthe\s+default\s+(?:value|setting|port|timeout|behavior|config(?:uration)?|option)\b[^.\n]{0,20}\bis\b/i,
|
|
73
|
+
// "the flag/env var/config key/parameter is named X" — exact identifier recall
|
|
74
|
+
/\b(?:the\s+)?(?:flag|env(?:ironment)?\s+var(?:iable)?|config(?:uration)?\s+(?:key|option|value)|parameter|argument|option|field|header|method|endpoint)\b[^.\n]{0,20}\b(?:is\s+(?:named|called)|is\s+`|defaults?\s+to|must\s+be|should\s+be|takes?)\b/i,
|
|
75
|
+
// "the function/signature/return type is ..." — exact API shape from memory
|
|
76
|
+
/\b(?:the\s+)?(?:function\s+signature|method\s+signature|return\s+type|the\s+signature|the\s+interface|the\s+schema|the\s+type)\b[^.\n]{0,20}\bis\b/i,
|
|
77
|
+
// "per the OWASP/ISO/IEEE/NIST/POSIX ... " naming a standards body's exact requirement
|
|
78
|
+
/\b(?:per|according\s+to|under)\s+(?:the\s+)?(?:OWASP|ISO|IEC|IEEE|NIST|POSIX|W3C|ECMA|ANSI|the\s+RFC)\b[^.\n]{0,40}\b(?:requires?|says?|states?|defines?|mandates?|category)\b/i,
|
|
79
|
+
];
|
|
80
|
+
// Source-consulted evidence — any sign the model actually opened the source
|
|
81
|
+
// this turn rather than reciting it.
|
|
82
|
+
const SOURCE_CONSULTED = [
|
|
83
|
+
/```[\s\S]*?```/, // a fenced quote FROM the source
|
|
84
|
+
/\bhttps?:\/\/\S+/, // a link to the source
|
|
85
|
+
/\[[^\]]+\]\(\S+\)/, // a markdown link
|
|
86
|
+
/\b(?:I\s+(?:read|checked|opened|grepped|searched|verified|confirmed)|reading|let\s+me\s+(?:read|check|grep|open))\b[^.\n]{0,40}\b(?:the\s+)?(?:file|source|spec|docs?|RFC|standard|code|config|man\s+page|repo)\b/i,
|
|
87
|
+
/\b(?:from|in|per)\s+(?:the\s+)?(?:file|source|repo|line\s+\d+|`[^`]+`)\b/i, // "from the file …" / "in `x.ts` line 12"
|
|
88
|
+
/\b[\w./-]+\.(?:ts|tsx|js|jsx|py|go|rs|java|c|cpp|h|json|ya?ml|toml|md|txt|conf|cfg|ini|sh)\b/i, // a concrete filename referenced
|
|
89
|
+
/\b(?:line|L)\s*\d+\b/i, // a line-number reference (read evidence)
|
|
90
|
+
];
|
|
91
|
+
function anyMatch(patterns, text) {
|
|
92
|
+
return patterns.some((re) => re.test(text));
|
|
93
|
+
}
|
|
94
|
+
export const sourceGateKernel = {
|
|
95
|
+
slug: "source_gate",
|
|
96
|
+
describe: "Canonical recall: an assertion about a specific system/spec/standard's exact contents should reference reading the source — warns when it recites from memory",
|
|
97
|
+
async run(ctx) {
|
|
98
|
+
if (!APPLIES_TO_PROFILES.has(ctx.profile))
|
|
99
|
+
return [];
|
|
100
|
+
const text = ctx.assistantText;
|
|
101
|
+
if (text.length < MIN_LENGTH_TO_GATE)
|
|
102
|
+
return [];
|
|
103
|
+
// Is there a canonical-recall assertion at all?
|
|
104
|
+
const claimMatch = CLAIM_PATTERNS.map((re) => re.exec(text)).find(Boolean);
|
|
105
|
+
if (!claimMatch)
|
|
106
|
+
return [];
|
|
107
|
+
// If the model READ a file this turn (Read/Edit on a real file), the
|
|
108
|
+
// source was consulted by definition — don't warn.
|
|
109
|
+
if (ctx.touchedFiles && ctx.touchedFiles.length > 0)
|
|
110
|
+
return [];
|
|
111
|
+
// Did the text itself show the source was consulted (quote / link /
|
|
112
|
+
// filename / "I read the file")?
|
|
113
|
+
if (anyMatch(SOURCE_CONSULTED, text))
|
|
114
|
+
return [];
|
|
115
|
+
return [
|
|
116
|
+
{
|
|
117
|
+
severity: "warn",
|
|
118
|
+
kernel: "source_gate",
|
|
119
|
+
message: `Canonical claim ("${claimMatch[0].slice(0, 60).trim()}…") was asserted with no sign the source was read. ` +
|
|
120
|
+
`Don't recite specs/standards/signatures/defaults from memory — they drift and version-skew. ` +
|
|
121
|
+
`Read the actual file/spec/docs and quote or cite it, or flag the claim as from-memory and unverified.`,
|
|
122
|
+
},
|
|
123
|
+
];
|
|
124
|
+
},
|
|
125
|
+
};
|
|
126
|
+
//# sourceMappingURL=source_gate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"source_gate.js","sourceRoot":"","sources":["../../src/verifiers/source_gate.ts"],"names":[],"mappings":"AAAA,sEAAsE;AACtE,6DAA6D;AAC7D,EAAE;AACF,6EAA6E;AAC7E,0EAA0E;AAC1E,2EAA2E;AAC3E,uEAAuE;AACvE,2EAA2E;AAC3E,2EAA2E;AAC3E,+BAA+B;AAC/B,EAAE;AACF,2EAA2E;AAC3E,4EAA4E;AAC5E,0EAA0E;AAC1E,0EAA0E;AAC1E,2EAA2E;AAC3E,QAAQ;AACR,EAAE;AACF,gBAAgB;AAChB,4EAA4E;AAC5E,2EAA2E;AAC3E,+EAA+E;AAC/E,4EAA4E;AAC5E,qCAAqC;AACrC,gFAAgF;AAChF,8EAA8E;AAC9E,6EAA6E;AAC7E,6CAA6C;AAC7C,+EAA+E;AAC/E,gFAAgF;AAChF,EAAE;AACF,6EAA6E;AAC7E,yEAAyE;AACzE,uEAAuE;AACvE,uEAAuE;AAIvE,4EAA4E;AAC5E,8EAA8E;AAC9E,+EAA+E;AAC/E,wEAAwE;AACxE,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC;IAClC,MAAM;IACN,UAAU;IACV,SAAS;IACT,KAAK;IACL,UAAU;IACV,WAAW;IACX,MAAM;IACN,OAAO;IACP,UAAU;IACV,UAAU;IACV,SAAS;IACT,QAAQ;IACR,SAAS;IACT,wEAAwE;IACxE,OAAO;IACP,4EAA4E;IAC5E,6EAA6E;IAC7E,QAAQ;IACR,UAAU;IACV,SAAS;CACV,CAAC,CAAC;AAEH,MAAM,kBAAkB,GAAG,GAAG,CAAC;AAE/B,0EAA0E;AAC1E,0EAA0E;AAC1E,iDAAiD;AACjD,MAAM,cAAc,GAAa;IAC/B,2DAA2D;IAC3D,uOAAuO;IACvO,yFAAyF;IACzF,2HAA2H;IAC3H,+EAA+E;IAC/E,0GAA0G;IAC1G,+EAA+E;IAC/E,wPAAwP;IACxP,4EAA4E;IAC5E,qJAAqJ;IACrJ,uFAAuF;IACvF,iLAAiL;CAClL,CAAC;AAEF,4EAA4E;AAC5E,qCAAqC;AACrC,MAAM,gBAAgB,GAAa;IACjC,gBAAgB,EAA+C,iCAAiC;IAChG,kBAAkB,EAA6C,uBAAuB;IACtF,mBAAmB,EAA4C,kBAAkB;IACjF,oNAAoN;IACpN,2EAA2E,EAAE,0CAA0C;IACvH,+FAA+F,EAAE,iCAAiC;IAClI,uBAAuB,EAAwC,0CAA0C;CAC1G,CAAC;AAEF,SAAS,QAAQ,CAAC,QAAkB,EAAE,IAAY;IAChD,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;AAC9C,CAAC;AAED,MAAM,CAAC,MAAM,gBAAgB,GAAa;IACxC,IAAI,EAAE,aAAa;IACnB,QAAQ,EACN,+JAA+J;IACjK,KAAK,CAAC,GAAG,CAAC,GAAoB;QAC5B,IAAI,CAAC,mBAAmB,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC;YAAE,OAAO,EAAE,CAAC;QACrD,MAAM,IAAI,GAAG,GAAG,CAAC,aAAa,CAAC;QAC/B,IAAI,IAAI,CAAC,MAAM,GAAG,kBAAkB;YAAE,OAAO,EAAE,CAAC;QAEhD,gDAAgD;QAChD,MAAM,UAAU,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC3E,IAAI,CAAC,UAAU;YAAE,OAAO,EAAE,CAAC;QAE3B,qEAAqE;QACrE,mDAAmD;QACnD,IAAI,GAAG,CAAC,YAAY,IAAI,GAAG,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,EAAE,CAAC;QAE/D,oEAAoE;QACpE,iCAAiC;QACjC,IAAI,QAAQ,CAAC,gBAAgB,EAAE,IAAI,CAAC;YAAE,OAAO,EAAE,CAAC;QAEhD,OAAO;YACL;gBACE,QAAQ,EAAE,MAAM;gBAChB,MAAM,EAAE,aAAa;gBACrB,OAAO,EACL,qBAAqB,UAAU,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,qDAAqD;oBAC3G,8FAA8F;oBAC9F,uGAAuG;aAC1G;SACF,CAAC;IACJ,CAAC;CACF,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vextlabs/theron-cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "Theron CLI — an agentic coding assistant that verifies its own output. Runs an agentic tool loop in your terminal, then runs post-turn checks (types, tests, lint, arithmetic, citations) over the files it touched; blocking failures feed the next turn so it self-corrects. Across a 33-profile domain council. From Vext Labs.",
|
|
5
5
|
"author": "Vext Labs Inc.",
|
|
6
6
|
"license": "Apache-2.0",
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
"build": "tsc -p .",
|
|
30
30
|
"dev": "tsc -p . --watch",
|
|
31
31
|
"start": "node ./bin/theron.js",
|
|
32
|
-
"test": "npm run build && node test/sweep.mjs",
|
|
32
|
+
"test": "npm run build && node test/sweep.mjs && node test/pentest.unit.test.mjs && node test/verifiers.unit.test.mjs",
|
|
33
33
|
"test:integration": "npm run build && THERON_RUN_INTEGRATION=1 node test/integration.mjs",
|
|
34
34
|
"prepublishOnly": "npm run build",
|
|
35
35
|
"release": "npm version patch && npm publish",
|