@beingmartinbmc/ojas 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +308 -0
- package/dist/aahar/index.d.ts +179 -0
- package/dist/aahar/index.d.ts.map +1 -0
- package/dist/aahar/index.js +657 -0
- package/dist/aahar/index.js.map +1 -0
- package/dist/aahar/scoring.d.ts +85 -0
- package/dist/aahar/scoring.d.ts.map +1 -0
- package/dist/aahar/scoring.js +268 -0
- package/dist/aahar/scoring.js.map +1 -0
- package/dist/agni/index.d.ts +113 -0
- package/dist/agni/index.d.ts.map +1 -0
- package/dist/agni/index.js +328 -0
- package/dist/agni/index.js.map +1 -0
- package/dist/agni/model-router.d.ts +77 -0
- package/dist/agni/model-router.d.ts.map +1 -0
- package/dist/agni/model-router.js +163 -0
- package/dist/agni/model-router.js.map +1 -0
- package/dist/agni/response-distiller.d.ts +37 -0
- package/dist/agni/response-distiller.d.ts.map +1 -0
- package/dist/agni/response-distiller.js +193 -0
- package/dist/agni/response-distiller.js.map +1 -0
- package/dist/agni/tiktoken-adapter.d.ts +55 -0
- package/dist/agni/tiktoken-adapter.d.ts.map +1 -0
- package/dist/agni/tiktoken-adapter.js +113 -0
- package/dist/agni/tiktoken-adapter.js.map +1 -0
- package/dist/chikitsa/index.d.ts +130 -0
- package/dist/chikitsa/index.d.ts.map +1 -0
- package/dist/chikitsa/index.js +565 -0
- package/dist/chikitsa/index.js.map +1 -0
- package/dist/demo.d.ts +15 -0
- package/dist/demo.d.ts.map +1 -0
- package/dist/demo.js +278 -0
- package/dist/demo.js.map +1 -0
- package/dist/index.d.ts +201 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +588 -0
- package/dist/index.js.map +1 -0
- package/dist/mcp/audit.d.ts +39 -0
- package/dist/mcp/audit.d.ts.map +1 -0
- package/dist/mcp/audit.js +73 -0
- package/dist/mcp/audit.js.map +1 -0
- package/dist/mcp/contracts.d.ts +76 -0
- package/dist/mcp/contracts.d.ts.map +1 -0
- package/dist/mcp/contracts.js +44 -0
- package/dist/mcp/contracts.js.map +1 -0
- package/dist/mcp/envelope.d.ts +107 -0
- package/dist/mcp/envelope.d.ts.map +1 -0
- package/dist/mcp/envelope.js +162 -0
- package/dist/mcp/envelope.js.map +1 -0
- package/dist/mcp/registry.d.ts +110 -0
- package/dist/mcp/registry.d.ts.map +1 -0
- package/dist/mcp/registry.js +258 -0
- package/dist/mcp/registry.js.map +1 -0
- package/dist/mcp/server.d.ts +26 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/server.js +107 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/mcp/tools/agent.d.ts +4 -0
- package/dist/mcp/tools/agent.d.ts.map +1 -0
- package/dist/mcp/tools/agent.js +300 -0
- package/dist/mcp/tools/agent.js.map +1 -0
- package/dist/mcp/tools/context.d.ts +4 -0
- package/dist/mcp/tools/context.d.ts.map +1 -0
- package/dist/mcp/tools/context.js +261 -0
- package/dist/mcp/tools/context.js.map +1 -0
- package/dist/mcp/tools/index.d.ts +5 -0
- package/dist/mcp/tools/index.d.ts.map +1 -0
- package/dist/mcp/tools/index.js +20 -0
- package/dist/mcp/tools/index.js.map +1 -0
- package/dist/mcp/tools/memory.d.ts +4 -0
- package/dist/mcp/tools/memory.d.ts.map +1 -0
- package/dist/mcp/tools/memory.js +220 -0
- package/dist/mcp/tools/memory.js.map +1 -0
- package/dist/mcp/tools/output.d.ts +4 -0
- package/dist/mcp/tools/output.d.ts.map +1 -0
- package/dist/mcp/tools/output.js +206 -0
- package/dist/mcp/tools/output.js.map +1 -0
- package/dist/mcp/tools/recovery.d.ts +4 -0
- package/dist/mcp/tools/recovery.d.ts.map +1 -0
- package/dist/mcp/tools/recovery.js +165 -0
- package/dist/mcp/tools/recovery.js.map +1 -0
- package/dist/mcp/tools/registrar.d.ts +4 -0
- package/dist/mcp/tools/registrar.d.ts.map +1 -0
- package/dist/mcp/tools/registrar.js +17 -0
- package/dist/mcp/tools/registrar.js.map +1 -0
- package/dist/mcp/tools/report.d.ts +4 -0
- package/dist/mcp/tools/report.d.ts.map +1 -0
- package/dist/mcp/tools/report.js +68 -0
- package/dist/mcp/tools/report.js.map +1 -0
- package/dist/mcp/tools/shared.d.ts +37 -0
- package/dist/mcp/tools/shared.d.ts.map +1 -0
- package/dist/mcp/tools/shared.js +214 -0
- package/dist/mcp/tools/shared.js.map +1 -0
- package/dist/mcp/trace.d.ts +47 -0
- package/dist/mcp/trace.d.ts.map +1 -0
- package/dist/mcp/trace.js +216 -0
- package/dist/mcp/trace.js.map +1 -0
- package/dist/nidra/index.d.ts +275 -0
- package/dist/nidra/index.d.ts.map +1 -0
- package/dist/nidra/index.js +889 -0
- package/dist/nidra/index.js.map +1 -0
- package/dist/persistence/migrations.d.ts +10 -0
- package/dist/persistence/migrations.d.ts.map +1 -0
- package/dist/persistence/migrations.js +77 -0
- package/dist/persistence/migrations.js.map +1 -0
- package/dist/persistence/sqlite.d.ts +30 -0
- package/dist/persistence/sqlite.d.ts.map +1 -0
- package/dist/persistence/sqlite.js +209 -0
- package/dist/persistence/sqlite.js.map +1 -0
- package/dist/persistence/types.d.ts +104 -0
- package/dist/persistence/types.d.ts.map +1 -0
- package/dist/persistence/types.js +5 -0
- package/dist/persistence/types.js.map +1 -0
- package/dist/pulse/index.d.ts +144 -0
- package/dist/pulse/index.d.ts.map +1 -0
- package/dist/pulse/index.js +453 -0
- package/dist/pulse/index.js.map +1 -0
- package/dist/raksha/classifiers/http-classifier.d.ts +26 -0
- package/dist/raksha/classifiers/http-classifier.d.ts.map +1 -0
- package/dist/raksha/classifiers/http-classifier.js +62 -0
- package/dist/raksha/classifiers/http-classifier.js.map +1 -0
- package/dist/raksha/classifiers/index.d.ts +5 -0
- package/dist/raksha/classifiers/index.d.ts.map +1 -0
- package/dist/raksha/classifiers/index.js +8 -0
- package/dist/raksha/classifiers/index.js.map +1 -0
- package/dist/raksha/classifiers/onnx-classifier.d.ts +41 -0
- package/dist/raksha/classifiers/onnx-classifier.d.ts.map +1 -0
- package/dist/raksha/classifiers/onnx-classifier.js +99 -0
- package/dist/raksha/classifiers/onnx-classifier.js.map +1 -0
- package/dist/raksha/hallucination-detectors.d.ts +106 -0
- package/dist/raksha/hallucination-detectors.d.ts.map +1 -0
- package/dist/raksha/hallucination-detectors.js +327 -0
- package/dist/raksha/hallucination-detectors.js.map +1 -0
- package/dist/raksha/index.d.ts +168 -0
- package/dist/raksha/index.d.ts.map +1 -0
- package/dist/raksha/index.js +597 -0
- package/dist/raksha/index.js.map +1 -0
- package/dist/raksha/prompt-injection-detectors.d.ts +30 -0
- package/dist/raksha/prompt-injection-detectors.d.ts.map +1 -0
- package/dist/raksha/prompt-injection-detectors.js +153 -0
- package/dist/raksha/prompt-injection-detectors.js.map +1 -0
- package/dist/types.d.ts +1115 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +71 -0
- package/dist/types.js.map +1 -0
- package/dist/util/calibration.d.ts +32 -0
- package/dist/util/calibration.d.ts.map +1 -0
- package/dist/util/calibration.js +108 -0
- package/dist/util/calibration.js.map +1 -0
- package/dist/util/id.d.ts +2 -0
- package/dist/util/id.d.ts.map +1 -0
- package/dist/util/id.js +9 -0
- package/dist/util/id.js.map +1 -0
- package/dist/vyayam/index.d.ts +76 -0
- package/dist/vyayam/index.d.ts.map +1 -0
- package/dist/vyayam/index.js +528 -0
- package/dist/vyayam/index.js.map +1 -0
- package/dist/vyayam/tool-fault-proxy.d.ts +95 -0
- package/dist/vyayam/tool-fault-proxy.d.ts.map +1 -0
- package/dist/vyayam/tool-fault-proxy.js +170 -0
- package/dist/vyayam/tool-fault-proxy.js.map +1 -0
- package/docs/ARCHITECTURE.md +162 -0
- package/docs/BACKLOG.md +342 -0
- package/docs/CONFIGURATION.md +305 -0
- package/docs/EVIDENCE.md +232 -0
- package/docs/EVIDENCE_MATRIX.md +293 -0
- package/docs/KNOWN_FAILURES.md +367 -0
- package/docs/MCP.md +614 -0
- package/docs/MODULES.md +368 -0
- package/docs/SECURITY.md +251 -0
- package/docs/TRUST.md +88 -0
- package/docs/assets/ojas-hero.png +0 -0
- package/package.json +101 -0
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Built-in `HallucinationDetector` implementations.
|
|
4
|
+
*
|
|
5
|
+
* These detectors are deliberately **dep-free and runtime-fast**. They
|
|
6
|
+
* are not state-of-the-art; their purpose is to give Ojas users a
|
|
7
|
+
* sensible default and a working contract so they can later plug in
|
|
8
|
+
* heavier ML-backed detectors (encoder grounding models, LLM judges)
|
|
9
|
+
* via the same interface.
|
|
10
|
+
*
|
|
11
|
+
* Three detectors are shipped:
|
|
12
|
+
*
|
|
13
|
+
* 1. **`BestOfNInconsistencyDetector`** (black-box consistency)
|
|
14
|
+
* Scores how much the agent's output disagrees with alternative
|
|
15
|
+
* samples of the same prompt. We use a character-shingle Jaccard
|
|
16
|
+
* distance rather than embeddings so we don't need ML. Risk =
|
|
17
|
+
* mean pairwise distance from `output` to each sample, clamped
|
|
18
|
+
* to [0, 1].
|
|
19
|
+
*
|
|
20
|
+
* 2. **`ClaimLevelDetector`** (long-text grounding)
|
|
21
|
+
* Splits `output` into sentence-claims, then for each claim
|
|
22
|
+
* measures n-gram overlap with each `context` item. A claim is
|
|
23
|
+
* "unsupported" when its best-match overlap is below a
|
|
24
|
+
* threshold. Risk = fraction of unsupported claims, weighted by
|
|
25
|
+
* claim length.
|
|
26
|
+
*
|
|
27
|
+
* 3. **`AbstentionDetector`** (refusal recognition)
|
|
28
|
+
* Treats appropriate abstention as a **positive** signal: if
|
|
29
|
+
* the output is mostly hedging / refusal, returns low risk and
|
|
30
|
+
* `abstention: true`. The caller can then route the task to a
|
|
31
|
+
* stronger model rather than penalising the abstention.
|
|
32
|
+
*
|
|
33
|
+
* Composition: callers wanting an ensemble can wrap these via
|
|
34
|
+
* `EnsembleHallucinationDetector` (also shipped).
|
|
35
|
+
*/
|
|
36
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
37
|
+
exports.EnsembleHallucinationDetector = exports.AbstentionDetector = exports.ClaimLevelDetector = exports.BestOfNInconsistencyDetector = void 0;
|
|
38
|
+
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
|
39
|
+
function clamp01(v) {
|
|
40
|
+
if (!Number.isFinite(v))
|
|
41
|
+
return 0;
|
|
42
|
+
return Math.max(0, Math.min(1, v));
|
|
43
|
+
}
|
|
44
|
+
/** Normalise text for matching: lowercase, collapse whitespace, strip ASCII punct. */
|
|
45
|
+
function normalize(text) {
|
|
46
|
+
return text
|
|
47
|
+
.toLowerCase()
|
|
48
|
+
.replace(/[\p{P}\p{S}]+/gu, ' ')
|
|
49
|
+
.replace(/\s+/g, ' ')
|
|
50
|
+
.trim();
|
|
51
|
+
}
|
|
52
|
+
/** Build the set of character shingles of length `k` from `text`. */
|
|
53
|
+
function charShingles(text, k = 4) {
|
|
54
|
+
const norm = normalize(text);
|
|
55
|
+
if (norm.length <= k)
|
|
56
|
+
return new Set([norm]);
|
|
57
|
+
const out = new Set();
|
|
58
|
+
for (let i = 0; i <= norm.length - k; i += 1) {
|
|
59
|
+
out.add(norm.slice(i, i + k));
|
|
60
|
+
}
|
|
61
|
+
return out;
|
|
62
|
+
}
|
|
63
|
+
/** Jaccard similarity between two sets. */
|
|
64
|
+
function jaccard(a, b) {
|
|
65
|
+
if (a.size === 0 && b.size === 0)
|
|
66
|
+
return 1;
|
|
67
|
+
let inter = 0;
|
|
68
|
+
for (const x of a)
|
|
69
|
+
if (b.has(x))
|
|
70
|
+
inter += 1;
|
|
71
|
+
const union = a.size + b.size - inter;
|
|
72
|
+
return union === 0 ? 1 : inter / union;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Split a paragraph into rough sentence claims. We don't ship a real
|
|
76
|
+
* sentence segmenter — that would be heavy. The split is good enough
|
|
77
|
+
* for the n-gram grounding check below; pathological cases (lots of
|
|
78
|
+
* abbreviations) just produce slightly fragmented claims, which still
|
|
79
|
+
* score correctly because the n-gram overlap is local.
|
|
80
|
+
*/
|
|
81
|
+
function splitClaims(text) {
|
|
82
|
+
return text
|
|
83
|
+
.split(/(?<=[.!?])\s+(?=[A-Z\d])/g)
|
|
84
|
+
.map((s) => s.trim())
|
|
85
|
+
.filter((s) => s.length > 0);
|
|
86
|
+
}
|
|
87
|
+
class BestOfNInconsistencyDetector {
|
|
88
|
+
name = 'bestofn/n-gram-jaccard';
|
|
89
|
+
policy;
|
|
90
|
+
constructor(policy = {}) {
|
|
91
|
+
this.policy = {
|
|
92
|
+
shingleK: policy.shingleK ?? 4,
|
|
93
|
+
minSamples: policy.minSamples ?? 2,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
async detect(input) {
|
|
97
|
+
const samples = input.samples ?? [];
|
|
98
|
+
if (samples.length < this.policy.minSamples) {
|
|
99
|
+
return {
|
|
100
|
+
riskScore: 0,
|
|
101
|
+
confidence: 0.05,
|
|
102
|
+
detectedBy: this.name,
|
|
103
|
+
reasons: [
|
|
104
|
+
`BestOfNInconsistencyDetector requires ${this.policy.minSamples}+ samples; got ${samples.length}. ` +
|
|
105
|
+
'Score is meaningless without alternative generations to compare against.',
|
|
106
|
+
],
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
const outputShingles = charShingles(input.output, this.policy.shingleK);
|
|
110
|
+
let distSum = 0;
|
|
111
|
+
let distMax = 0;
|
|
112
|
+
for (const s of samples) {
|
|
113
|
+
const sShingles = charShingles(s, this.policy.shingleK);
|
|
114
|
+
const sim = jaccard(outputShingles, sShingles);
|
|
115
|
+
const dist = 1 - sim;
|
|
116
|
+
distSum += dist;
|
|
117
|
+
if (dist > distMax)
|
|
118
|
+
distMax = dist;
|
|
119
|
+
}
|
|
120
|
+
const meanDist = distSum / samples.length;
|
|
121
|
+
return {
|
|
122
|
+
riskScore: clamp01(meanDist),
|
|
123
|
+
// Confidence rises with sample size, saturating around n=10.
|
|
124
|
+
confidence: clamp01(samples.length / 10),
|
|
125
|
+
detectedBy: this.name,
|
|
126
|
+
reasons: [
|
|
127
|
+
`Mean pairwise character-shingle Jaccard distance from output to ${samples.length} samples = ${meanDist.toFixed(3)} (max = ${distMax.toFixed(3)}).`,
|
|
128
|
+
],
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
exports.BestOfNInconsistencyDetector = BestOfNInconsistencyDetector;
|
|
133
|
+
class ClaimLevelDetector {
|
|
134
|
+
name = 'claim-level/n-gram-grounding';
|
|
135
|
+
policy;
|
|
136
|
+
constructor(policy = {}) {
|
|
137
|
+
this.policy = {
|
|
138
|
+
shingleK: policy.shingleK ?? 4,
|
|
139
|
+
groundingThreshold: policy.groundingThreshold ?? 0.25,
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
async detect(input) {
|
|
143
|
+
const context = input.context ?? [];
|
|
144
|
+
if (context.length === 0) {
|
|
145
|
+
return {
|
|
146
|
+
riskScore: 0,
|
|
147
|
+
confidence: 0.05,
|
|
148
|
+
detectedBy: this.name,
|
|
149
|
+
reasons: [
|
|
150
|
+
'ClaimLevelDetector requires at least one context item to ground claims against. Score is meaningless without context.',
|
|
151
|
+
],
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
const ctxShingles = context.map((c, i) => ({ idx: `ctx-${i}`, shingles: charShingles(c, this.policy.shingleK) }));
|
|
155
|
+
const claims = splitClaims(input.output);
|
|
156
|
+
if (claims.length === 0) {
|
|
157
|
+
return {
|
|
158
|
+
riskScore: 0,
|
|
159
|
+
confidence: 0.05,
|
|
160
|
+
detectedBy: this.name,
|
|
161
|
+
reasons: ['Output produced no parseable claims; treating as null-output, no risk.'],
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
const claimAssessments = [];
|
|
165
|
+
let unsupportedWeight = 0;
|
|
166
|
+
let totalWeight = 0;
|
|
167
|
+
for (const claim of claims) {
|
|
168
|
+
const claimShingles = charShingles(claim, this.policy.shingleK);
|
|
169
|
+
// Best overlap = the highest fraction of claim shingles found in any single context item.
|
|
170
|
+
let bestOverlap = 0;
|
|
171
|
+
let bestIdx;
|
|
172
|
+
for (const { idx, shingles } of ctxShingles) {
|
|
173
|
+
if (claimShingles.size === 0)
|
|
174
|
+
continue;
|
|
175
|
+
let inter = 0;
|
|
176
|
+
for (const sh of claimShingles)
|
|
177
|
+
if (shingles.has(sh))
|
|
178
|
+
inter += 1;
|
|
179
|
+
const overlap = inter / claimShingles.size;
|
|
180
|
+
if (overlap > bestOverlap) {
|
|
181
|
+
bestOverlap = overlap;
|
|
182
|
+
bestIdx = idx;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
const claimRisk = clamp01(1 - bestOverlap / this.policy.groundingThreshold);
|
|
186
|
+
const weight = Math.max(1, claim.length);
|
|
187
|
+
totalWeight += weight;
|
|
188
|
+
if (bestOverlap < this.policy.groundingThreshold) {
|
|
189
|
+
unsupportedWeight += weight;
|
|
190
|
+
}
|
|
191
|
+
claimAssessments.push({
|
|
192
|
+
claim,
|
|
193
|
+
riskScore: claimRisk,
|
|
194
|
+
groundedIn: bestIdx ? [bestIdx] : [],
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
const overallRisk = totalWeight === 0 ? 0 : unsupportedWeight / totalWeight;
|
|
198
|
+
const unsupportedCount = claimAssessments.filter((c) => c.riskScore >= 1).length;
|
|
199
|
+
return {
|
|
200
|
+
riskScore: clamp01(overallRisk),
|
|
201
|
+
confidence: clamp01(claims.length / 5), // saturates around 5 claims
|
|
202
|
+
detectedBy: this.name,
|
|
203
|
+
reasons: [
|
|
204
|
+
`${unsupportedCount}/${claims.length} claim(s) had best-match grounding overlap below ${this.policy.groundingThreshold.toFixed(2)} against ${context.length} context item(s).`,
|
|
205
|
+
],
|
|
206
|
+
claims: claimAssessments,
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
exports.ClaimLevelDetector = ClaimLevelDetector;
|
|
211
|
+
// ─── 3. AbstentionDetector ───────────────────────────────────────────────────
|
|
212
|
+
/**
|
|
213
|
+
* Canonical abstention / hedging phrasings. Conservative — we'd rather
|
|
214
|
+
* miss a hedge than mistake a substantive answer for one. English-only
|
|
215
|
+
* by design (matches the rest of Raksha's regex set).
|
|
216
|
+
*/
|
|
217
|
+
const ABSTENTION_PATTERNS = [
|
|
218
|
+
/\bi (?:do not|don't|cannot|can't|won't) (?:know|determine|tell|say|answer|verify)\b/i,
|
|
219
|
+
/\bi'?m not (?:sure|certain|confident|able)\b/i,
|
|
220
|
+
/\b(?:unable to|not able to) (?:answer|determine|confirm|verify|provide)\b/i,
|
|
221
|
+
/\binsufficient (?:context|information|data|evidence)\b/i,
|
|
222
|
+
/\b(?:no|not enough) (?:context|information|evidence) (?:to|provided)\b/i,
|
|
223
|
+
/\bi need more (?:context|information|details)\b/i,
|
|
224
|
+
/\bplease provide (?:more|additional) (?:context|details|information)\b/i,
|
|
225
|
+
/\b(?:cannot|can'?t) (?:find|locate) (?:this|that|the answer|relevant)\b/i,
|
|
226
|
+
/\bi (?:do not|don't) have (?:access to|enough information)\b/i,
|
|
227
|
+
];
|
|
228
|
+
class AbstentionDetector {
|
|
229
|
+
name = 'abstention/canonical-hedges';
|
|
230
|
+
policy;
|
|
231
|
+
constructor(policy = {}) {
|
|
232
|
+
this.policy = {
|
|
233
|
+
minAbstentionFraction: policy.minAbstentionFraction ?? 0.15,
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
async detect(input) {
|
|
237
|
+
const text = input.output;
|
|
238
|
+
if (text.length === 0) {
|
|
239
|
+
return {
|
|
240
|
+
riskScore: 0,
|
|
241
|
+
confidence: 1,
|
|
242
|
+
detectedBy: this.name,
|
|
243
|
+
reasons: ['Empty output — trivially an abstention.'],
|
|
244
|
+
abstention: true,
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
let matchedChars = 0;
|
|
248
|
+
const reasons = [];
|
|
249
|
+
for (const re of ABSTENTION_PATTERNS) {
|
|
250
|
+
const m = re.exec(text);
|
|
251
|
+
if (m) {
|
|
252
|
+
matchedChars += m[0].length;
|
|
253
|
+
reasons.push(`Matched abstention pattern: "${m[0]}".`);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
const fraction = matchedChars / text.length;
|
|
257
|
+
const isAbstention = fraction >= this.policy.minAbstentionFraction;
|
|
258
|
+
return {
|
|
259
|
+
// Abstention is a *low-risk* signal: the agent is being honest.
|
|
260
|
+
// Risk = 1 - fraction so a fuller abstention gets a lower risk.
|
|
261
|
+
riskScore: isAbstention ? clamp01(1 - fraction) : 0,
|
|
262
|
+
confidence: isAbstention ? 0.9 : 0.05,
|
|
263
|
+
detectedBy: this.name,
|
|
264
|
+
reasons: isAbstention
|
|
265
|
+
? reasons
|
|
266
|
+
: ['Output does not appear to be an abstention.'],
|
|
267
|
+
abstention: isAbstention,
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
exports.AbstentionDetector = AbstentionDetector;
|
|
272
|
+
class EnsembleHallucinationDetector {
|
|
273
|
+
name;
|
|
274
|
+
entries;
|
|
275
|
+
constructor(entries) {
|
|
276
|
+
if (entries.length === 0) {
|
|
277
|
+
throw new Error('EnsembleHallucinationDetector requires at least one entry.');
|
|
278
|
+
}
|
|
279
|
+
this.entries = entries.map((e) => ({
|
|
280
|
+
detector: e.detector,
|
|
281
|
+
weight: e.weight ?? 1,
|
|
282
|
+
}));
|
|
283
|
+
this.name = `ensemble[${this.entries.map((e) => e.detector.name).join(',')}]`;
|
|
284
|
+
}
|
|
285
|
+
async detect(input) {
|
|
286
|
+
const assessments = await Promise.all(this.entries.map(async (e) => ({
|
|
287
|
+
assessment: await e.detector.detect(input),
|
|
288
|
+
weight: e.weight,
|
|
289
|
+
})));
|
|
290
|
+
// Short-circuit: any detector that confidently flags abstention wins.
|
|
291
|
+
const abstaining = assessments.find((a) => a.assessment.abstention && a.assessment.confidence >= 0.5);
|
|
292
|
+
if (abstaining) {
|
|
293
|
+
return {
|
|
294
|
+
riskScore: abstaining.assessment.riskScore,
|
|
295
|
+
confidence: abstaining.assessment.confidence,
|
|
296
|
+
detectedBy: this.name,
|
|
297
|
+
reasons: [
|
|
298
|
+
'Ensemble short-circuited on abstention.',
|
|
299
|
+
...abstaining.assessment.reasons,
|
|
300
|
+
],
|
|
301
|
+
abstention: true,
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
// Weighted average: each detector's contribution is `weight * confidence`.
|
|
305
|
+
let weightSum = 0;
|
|
306
|
+
let riskSum = 0;
|
|
307
|
+
const reasons = [];
|
|
308
|
+
for (const a of assessments) {
|
|
309
|
+
const effective = a.weight * a.assessment.confidence;
|
|
310
|
+
weightSum += effective;
|
|
311
|
+
riskSum += effective * a.assessment.riskScore;
|
|
312
|
+
reasons.push(`[${a.assessment.detectedBy}] risk=${a.assessment.riskScore.toFixed(3)} conf=${a.assessment.confidence.toFixed(2)} weight=${a.weight}`);
|
|
313
|
+
}
|
|
314
|
+
const overallRisk = weightSum === 0 ? 0 : riskSum / weightSum;
|
|
315
|
+
// Ensemble confidence: mean of individual confidences (NOT weighted
|
|
316
|
+
// by weight — every detector's voice counts toward calibration).
|
|
317
|
+
const overallConfidence = assessments.reduce((s, a) => s + a.assessment.confidence, 0) / assessments.length;
|
|
318
|
+
return {
|
|
319
|
+
riskScore: clamp01(overallRisk),
|
|
320
|
+
confidence: clamp01(overallConfidence),
|
|
321
|
+
detectedBy: this.name,
|
|
322
|
+
reasons,
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
exports.EnsembleHallucinationDetector = EnsembleHallucinationDetector;
|
|
327
|
+
//# sourceMappingURL=hallucination-detectors.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hallucination-detectors.js","sourceRoot":"","sources":["../../src/raksha/hallucination-detectors.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;;;AASH,gFAAgF;AAEhF,SAAS,OAAO,CAAC,CAAS;IACxB,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;QAAE,OAAO,CAAC,CAAC;IAClC,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;AACrC,CAAC;AAED,sFAAsF;AACtF,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,IAAI;SACR,WAAW,EAAE;SACb,OAAO,CAAC,iBAAiB,EAAE,GAAG,CAAC;SAC/B,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,qEAAqE;AACrE,SAAS,YAAY,CAAC,IAAY,EAAE,CAAC,GAAG,CAAC;IACvC,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC7B,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IAC7C,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAC;IAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QAC7C,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,2CAA2C;AAC3C,SAAS,OAAO,CAAI,CAAS,EAAE,CAAS;IACtC,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC3C,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,CAAC,IAAI,CAAC;QAAE,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAAE,KAAK,IAAI,CAAC,CAAC;IAC5C,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,KAAK,CAAC;IACtC,OAAO,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC;AACzC,CAAC;AAED;;;;;;GAMG;AACH,SAAS,WAAW,CAAC,IAAY;IAC/B,OAAO,IAAI;SACR,KAAK,CAAC,2BAA2B,CAAC;SAClC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACjC,CAAC;AAeD,MAAa,4BAA4B;IAC9B,IAAI,GAAG,wBAAwB,CAAC;IACxB,MAAM,CAAkC;IAEzD,YAAY,SAAgC,EAAE;QAC5C,IAAI,CAAC,MAAM,GAAG;YACZ,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,CAAC;YAC9B,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,CAAC;SACnC,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,KAAiC;QAC5C,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,IAAI,EAAE,CAAC;QACpC,IAAI,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;YAC5C,OAAO;gBACL,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,IAAI;gBAChB,UAAU,EAAE,IAAI,CAAC,IAAI;gBACrB,OAAO,EAAE;oBACP,yCAAyC,IAAI,CAAC,MAAM,CAAC,UAAU,kBAAkB,OAAO,CAAC,MAAM,IAAI;wBACjG,0EAA0E;iBAC7E;aACF,CAAC;QACJ,CAAC;QAED,MAAM,cAAc,GAAG,YAAY,CAAC,KAAK,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QACxE,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,SAAS,GAAG,YAAY,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YACxD,MAAM,GAAG,GAAG,OAAO,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;YAC/C,MAAM,IAAI,GAAG,CAAC,GAAG,GAAG,CAAC;YACrB,OAAO,IAAI,IAAI,CAAC;YAChB,IAAI,IAAI,GAAG,OAAO;gBAAE,OAAO,GAAG,IAAI,CAAC;QACrC,CAAC;QACD,MAAM,QAAQ,GAAG,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC;QAE1C,OAAO;YACL,SAAS,EAAE,OAAO,CAAC,QAAQ,CAAC;YAC5B,6DAA6D;YAC7D,UAAU,EAAE,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,EAAE,CAAC;YACxC,UAAU,EAAE,IAAI,CAAC,IAAI;YACrB,OAAO,EAAE;gBACP,mEAAmE,OAAO,CAAC,MAAM,cAAc,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;aACpJ;SACF,CAAC;IACJ,CAAC;CACF;AA/CD,oEA+CC;AAeD,MAAa,kBAAkB;IACpB,IAAI,GAAG,8BAA8B,CAAC;IAC9B,MAAM,CAAqC;IAE5D,YAAY,SAAmC,EAAE;QAC/C,IAAI,CAAC,MAAM,GAAG;YACZ,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,CAAC;YAC9B,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,IAAI;SACtD,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,KAAiC;QAC5C,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,IAAI,EAAE,CAAC;QACpC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO;gBACL,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,IAAI;gBAChB,UAAU,EAAE,IAAI,CAAC,IAAI;gBACrB,OAAO,EAAE;oBACP,uHAAuH;iBACxH;aACF,CAAC;QACJ,CAAC;QAED,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,OAAO,CAAC,EAAE,EAAE,QAAQ,EAAE,YAAY,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC;QAClH,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACzC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO;gBACL,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,IAAI;gBAChB,UAAU,EAAE,IAAI,CAAC,IAAI;gBACrB,OAAO,EAAE,CAAC,wEAAwE,CAAC;aACpF,CAAC;QACJ,CAAC;QAED,MAAM,gBAAgB,GAAsB,EAAE,CAAC;QAC/C,IAAI,iBAAiB,GAAG,CAAC,CAAC;QAC1B,IAAI,WAAW,GAAG,CAAC,CAAC;QAEpB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,MAAM,aAAa,GAAG,YAAY,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YAChE,0FAA0F;YAC1F,IAAI,WAAW,GAAG,CAAC,CAAC;YACpB,IAAI,OAA2B,CAAC;YAChC,KAAK,MAAM,EAAE,GAAG,EAAE,QAAQ,EAAE,IAAI,WAAW,EAAE,CAAC;gBAC5C,IAAI,aAAa,CAAC,IAAI,KAAK,CAAC;oBAAE,SAAS;gBACvC,IAAI,KAAK,GAAG,CAAC,CAAC;gBACd,KAAK,MAAM,EAAE,IAAI,aAAa;oBAAE,IAAI,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;wBAAE,KAAK,IAAI,CAAC,CAAC;gBACjE,MAAM,OAAO,GAAG,KAAK,GAAG,aAAa,CAAC,IAAI,CAAC;gBAC3C,IAAI,OAAO,GAAG,WAAW,EAAE,CAAC;oBAC1B,WAAW,GAAG,OAAO,CAAC;oBACtB,OAAO,GAAG,GAAG,CAAC;gBAChB,CAAC;YACH,CAAC;YAED,MAAM,SAAS,GAAG,OAAO,CAAC,CAAC,GAAG,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAC;YAC5E,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;YACzC,WAAW,IAAI,MAAM,CAAC;YACtB,IAAI,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,EAAE,CAAC;gBACjD,iBAAiB,IAAI,MAAM,CAAC;YAC9B,CAAC;YAED,gBAAgB,CAAC,IAAI,CAAC;gBACpB,KAAK;gBACL,SAAS,EAAE,SAAS;gBACpB,UAAU,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE;aACrC,CAAC,CAAC;QACL,CAAC;QAED,MAAM,WAAW,GAAG,WAAW,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,iBAAiB,GAAG,WAAW,CAAC;QAC5E,MAAM,gBAAgB,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC;QAEjF,OAAO;YACL,SAAS,EAAE,OAAO,CAAC,WAAW,CAAC;YAC/B,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,4BAA4B;YACpE,UAAU,EAAE,IAAI,CAAC,IAAI;YACrB,OAAO,EAAE;gBACP,GAAG,gBAAgB,IAAI,MAAM,CAAC,MAAM,oDAAoD,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,OAAO,CAAC,MAAM,mBAAmB;aAC/K;YACD,MAAM,EAAE,gBAAgB;SACzB,CAAC;IACJ,CAAC;CACF;AAlFD,gDAkFC;AAED,gFAAgF;AAEhF;;;;GAIG;AACH,MAAM,mBAAmB,GAAa;IACpC,sFAAsF;IACtF,+CAA+C;IAC/C,4EAA4E;IAC5E,yDAAyD;IACzD,yEAAyE;IACzE,kDAAkD;IAClD,yEAAyE;IACzE,0EAA0E;IAC1E,+DAA+D;CAChE,CAAC;AAYF,MAAa,kBAAkB;IACpB,IAAI,GAAG,6BAA6B,CAAC;IAC7B,MAAM,CAAqC;IAE5D,YAAY,SAAmC,EAAE;QAC/C,IAAI,CAAC,MAAM,GAAG;YACZ,qBAAqB,EAAE,MAAM,CAAC,qBAAqB,IAAI,IAAI;SAC5D,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,KAAiC;QAC5C,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC;QAC1B,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO;gBACL,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,CAAC;gBACb,UAAU,EAAE,IAAI,CAAC,IAAI;gBACrB,OAAO,EAAE,CAAC,yCAAyC,CAAC;gBACpD,UAAU,EAAE,IAAI;aACjB,CAAC;QACJ,CAAC;QAED,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,KAAK,MAAM,EAAE,IAAI,mBAAmB,EAAE,CAAC;YACrC,MAAM,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,IAAI,CAAC,EAAE,CAAC;gBACN,YAAY,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;gBAC5B,OAAO,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACzD,CAAC;QACH,CAAC;QAED,MAAM,QAAQ,GAAG,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC;QAC5C,MAAM,YAAY,GAAG,QAAQ,IAAI,IAAI,CAAC,MAAM,CAAC,qBAAqB,CAAC;QAEnE,OAAO;YACL,gEAAgE;YAChE,gEAAgE;YAChE,SAAS,EAAE,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;YACnD,UAAU,EAAE,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI;YACrC,UAAU,EAAE,IAAI,CAAC,IAAI;YACrB,OAAO,EAAE,YAAY;gBACnB,CAAC,CAAC,OAAO;gBACT,CAAC,CAAC,CAAC,6CAA6C,CAAC;YACnD,UAAU,EAAE,YAAY;SACzB,CAAC;IACJ,CAAC;CACF;AA/CD,gDA+CC;AAsBD,MAAa,6BAA6B;IAC/B,IAAI,CAAS;IACL,OAAO,CAAyC;IAEjE,YAAY,OAAqC;QAC/C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,MAAM,IAAI,KAAK,CAAC,4DAA4D,CAAC,CAAC;QAChF,CAAC;QACD,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACjC,QAAQ,EAAE,CAAC,CAAC,QAAQ;YACpB,MAAM,EAAE,CAAC,CAAC,MAAM,IAAI,CAAC;SACtB,CAAC,CAAC,CAAC;QACJ,IAAI,CAAC,IAAI,GAAG,YAAY,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;IAChF,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,KAAiC;QAC5C,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC,GAAG,CACnC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YAC7B,UAAU,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC;YAC1C,MAAM,EAAE,CAAC,CAAC,MAAM;SACjB,CAAC,CAAC,CACJ,CAAC;QAEF,sEAAsE;QACtE,MAAM,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,UAAU,IAAI,CAAC,CAAC,UAAU,CAAC,UAAU,IAAI,GAAG,CAAC,CAAC;QACtG,IAAI,UAAU,EAAE,CAAC;YACf,OAAO;gBACL,SAAS,EAAE,UAAU,CAAC,UAAU,CAAC,SAAS;gBAC1C,UAAU,EAAE,UAAU,CAAC,UAAU,CAAC,UAAU;gBAC5C,UAAU,EAAE,IAAI,CAAC,IAAI;gBACrB,OAAO,EAAE;oBACP,yCAAyC;oBACzC,GAAG,UAAU,CAAC,UAAU,CAAC,OAAO;iBACjC;gBACD,UAAU,EAAE,IAAI;aACjB,CAAC;QACJ,CAAC;QAED,2EAA2E;QAC3E,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;YAC5B,MAAM,SAAS,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,UAAU,CAAC,UAAU,CAAC;YACrD,SAAS,IAAI,SAAS,CAAC;YACvB,OAAO,IAAI,SAAS,GAAG,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC;YAC9C,OAAO,CAAC,IAAI,CACV,IAAI,CAAC,CAAC,UAAU,CAAC,UAAU,UAAU,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,UAAU,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,MAAM,EAAE,CACvI,CAAC;QACJ,CAAC;QAED,MAAM,WAAW,GAAG,SAAS,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,GAAG,SAAS,CAAC;QAC9D,oEAAoE;QACpE,iEAAiE;QACjE,MAAM,iBAAiB,GACrB,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,UAAU,EAAE,CAAC,CAAC,GAAG,WAAW,CAAC,MAAM,CAAC;QAEpF,OAAO;YACL,SAAS,EAAE,OAAO,CAAC,WAAW,CAAC;YAC/B,UAAU,EAAE,OAAO,CAAC,iBAAiB,CAAC;YACtC,UAAU,EAAE,IAAI,CAAC,IAAI;YACrB,OAAO;SACR,CAAC;IACJ,CAAC;CACF;AAhED,sEAgEC"}
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ojas Raksha (ओजस रक्षा) — AI Immune Defense System
|
|
3
|
+
*
|
|
4
|
+
* Detects harmful cognitive inputs, protects instruction hierarchy,
|
|
5
|
+
* and quarantines prompt injection / memory poisoning attempts.
|
|
6
|
+
*/
|
|
7
|
+
import { ContextItem, DefenseHealth, DefensePolicy, HallucinationAssessment, HallucinationDetector, HallucinationDetectorInput, HealthEvent, HealthRecommendation, PromptInjectionClassifier, PromptInjectionDetector, ThreatAssessment } from '../types';
|
|
8
|
+
import type { RakshaStateSnapshot } from '../persistence/types';
|
|
9
|
+
export declare function normalizeForScan(content: string): string;
|
|
10
|
+
export declare function expandBase64(content: string): string | null;
|
|
11
|
+
export interface RakshaOptions {
|
|
12
|
+
/**
|
|
13
|
+
* Hallucination detector for `Raksha.detectHallucination()`. Defaults
|
|
14
|
+
* to an ensemble of `BestOfNInconsistencyDetector + ClaimLevelDetector
|
|
15
|
+
* + AbstentionDetector`. The ensemble short-circuits on confident
|
|
16
|
+
* abstention and weights each detector by its self-reported
|
|
17
|
+
* confidence — so a consistency detector that received no samples
|
|
18
|
+
* does not pollute the score.
|
|
19
|
+
*
|
|
20
|
+
* Plug in heavier ML-backed external detectors by implementing
|
|
21
|
+
* `HallucinationDetector` and passing them here.
|
|
22
|
+
*/
|
|
23
|
+
hallucinationDetector?: HallucinationDetector;
|
|
24
|
+
/**
|
|
25
|
+
* Detector stack used by `scanItem()` for input-side prompt injection
|
|
26
|
+
* and unsafe-instruction detection. Defaults to rule patterns plus a
|
|
27
|
+
* deterministic semantic-intent detector for policy laundering.
|
|
28
|
+
*/
|
|
29
|
+
promptInjectionDetector?: PromptInjectionDetector;
|
|
30
|
+
/**
|
|
31
|
+
* Async ML-backed classifiers for prompt injection. Run *after* the
|
|
32
|
+
* deterministic detector stack; the highest probability across all
|
|
33
|
+
* classifiers and the rule-based score wins. This lets Raksha catch
|
|
34
|
+
* attacks that bypass regex/pattern detectors (roleplay, recursive
|
|
35
|
+
* obfuscation, indirect multi-document injection).
|
|
36
|
+
*/
|
|
37
|
+
classifiers?: PromptInjectionClassifier[];
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Default detector — the ensemble that Raksha uses if the caller did
|
|
41
|
+
* not supply one. Exported so callers can compose with their own
|
|
42
|
+
* detectors via `EnsembleHallucinationDetector`.
|
|
43
|
+
*/
|
|
44
|
+
export declare function defaultHallucinationDetector(): HallucinationDetector;
|
|
45
|
+
export declare function defaultPromptInjectionDetector(): PromptInjectionDetector;
|
|
46
|
+
export declare class Raksha {
|
|
47
|
+
private policy;
|
|
48
|
+
private assessments;
|
|
49
|
+
private events;
|
|
50
|
+
private readonly hallucinationDetector;
|
|
51
|
+
private readonly promptInjectionDetector;
|
|
52
|
+
private readonly classifiers;
|
|
53
|
+
constructor(policy?: Partial<DefensePolicy>, options?: RakshaOptions);
|
|
54
|
+
/**
|
|
55
|
+
* Name of the configured hallucination detector. Useful for telemetry
|
|
56
|
+
* and report stamping so operators know which detector produced the
|
|
57
|
+
* `riskScore` they're seeing.
|
|
58
|
+
*/
|
|
59
|
+
getHallucinationDetectorName(): string;
|
|
60
|
+
getPromptInjectionDetectorName(): string;
|
|
61
|
+
/**
|
|
62
|
+
* Score an agent's **output** for hallucination risk, optionally
|
|
63
|
+
* grounded in retrieved context and/or compared against alternative
|
|
64
|
+
* samples of the same prompt. Returns the detector's assessment with
|
|
65
|
+
* a wrapper that:
|
|
66
|
+
* - guards against thrown errors (detectors are expected not to
|
|
67
|
+
* throw, but we defence-in-depth);
|
|
68
|
+
* - emits a `hallucination_detected` Pulse event when risk is high
|
|
69
|
+
* AND confidence is non-trivial (so we don't emit noise for
|
|
70
|
+
* no-signal calls).
|
|
71
|
+
*
|
|
72
|
+
* The emission threshold is the same `warnThreshold` Raksha uses for
|
|
73
|
+
* input-side prompt-injection assessments, for consistency.
|
|
74
|
+
*/
|
|
75
|
+
detectHallucination(input: HallucinationDetectorInput, opts?: {
|
|
76
|
+
agentId?: string;
|
|
77
|
+
}): Promise<{
|
|
78
|
+
assessment: HallucinationAssessment;
|
|
79
|
+
event?: HealthEvent;
|
|
80
|
+
}>;
|
|
81
|
+
private validatePolicy;
|
|
82
|
+
private enforceRetention;
|
|
83
|
+
/**
|
|
84
|
+
* Pure threat-scan: scores an item and returns the assessment (plus a quarantine
|
|
85
|
+
* event if applicable). Does NOT touch internal state. Use this for read-only
|
|
86
|
+
* checks like MCP `ojas_scan_for_injection` so that simply evaluating
|
|
87
|
+
* candidate content never pollutes defense health.
|
|
88
|
+
*
|
|
89
|
+
* Bypass-reduction pipeline applied before regex matching:
|
|
90
|
+
* 1. `normalizeForScan(content)` — NFKC + zero-width strip + homoglyph fold.
|
|
91
|
+
* Catches `іgnore previous instructions` (Cyrillic), `i\u200Bgnore`,
|
|
92
|
+
* and full-width "IGNORE".
|
|
93
|
+
* 2. `expandBase64(normalized)` — decodes base64-looking tokens once
|
|
94
|
+
* (non-recursive, bounded) and concatenates the decoded text for a
|
|
95
|
+
* second-pass scan. Catches the canonical
|
|
96
|
+
* `aWdub3JlIHByZXZpb3VzIGluc3RydWN0aW9ucw==` bypass.
|
|
97
|
+
*
|
|
98
|
+
* Detection is a deterministic detector stack, not a security boundary.
|
|
99
|
+
* See `docs/KNOWN_FAILURES.md` for what remains (indirect, multi-turn,
|
|
100
|
+
* and novel semantic injection outside the current detector rules).
|
|
101
|
+
*/
|
|
102
|
+
scanItem(item: ContextItem, opts?: {
|
|
103
|
+
agentId?: string;
|
|
104
|
+
}): {
|
|
105
|
+
assessment: ThreatAssessment;
|
|
106
|
+
event?: HealthEvent;
|
|
107
|
+
};
|
|
108
|
+
/**
|
|
109
|
+
* Async variant of `scanItem` that also runs any configured ML classifiers.
|
|
110
|
+
* The final riskScore is the max of the deterministic detector score and
|
|
111
|
+
* the highest classifier probability. Quarantine is re-evaluated after
|
|
112
|
+
* the merge so a classifier can elevate a below-threshold item into
|
|
113
|
+
* quarantine.
|
|
114
|
+
*/
|
|
115
|
+
scanItemAsync(item: ContextItem, opts?: {
|
|
116
|
+
agentId?: string;
|
|
117
|
+
signal?: AbortSignal;
|
|
118
|
+
}): Promise<{
|
|
119
|
+
assessment: ThreatAssessment;
|
|
120
|
+
event?: HealthEvent;
|
|
121
|
+
}>;
|
|
122
|
+
/** Whether this Raksha instance has async classifiers configured. */
|
|
123
|
+
hasClassifiers(): boolean;
|
|
124
|
+
/**
|
|
125
|
+
* Recording variant: scans the item AND appends the assessment (and any event)
|
|
126
|
+
* to internal state so it contributes to future defense health. Use this when
|
|
127
|
+
* the scan represents real agent intake; use `scanItem` for read-only checks.
|
|
128
|
+
*/
|
|
129
|
+
assessItem(item: ContextItem, opts?: {
|
|
130
|
+
agentId?: string;
|
|
131
|
+
}): ThreatAssessment;
|
|
132
|
+
/**
|
|
133
|
+
* Async recording variant used when classifiers are configured.
|
|
134
|
+
*/
|
|
135
|
+
assessItemAsync(item: ContextItem, opts?: {
|
|
136
|
+
agentId?: string;
|
|
137
|
+
signal?: AbortSignal;
|
|
138
|
+
}): Promise<ThreatAssessment>;
|
|
139
|
+
defendContext(items: ContextItem[], opts?: {
|
|
140
|
+
agentId?: string;
|
|
141
|
+
}): {
|
|
142
|
+
safe: ContextItem[];
|
|
143
|
+
quarantined: ContextItem[];
|
|
144
|
+
assessments: ThreatAssessment[];
|
|
145
|
+
events: HealthEvent[];
|
|
146
|
+
};
|
|
147
|
+
/**
|
|
148
|
+
* Async version of `defendContext` that runs ML classifiers on each item.
|
|
149
|
+
*/
|
|
150
|
+
defendContextAsync(items: ContextItem[], opts?: {
|
|
151
|
+
agentId?: string;
|
|
152
|
+
signal?: AbortSignal;
|
|
153
|
+
}): Promise<{
|
|
154
|
+
safe: ContextItem[];
|
|
155
|
+
quarantined: ContextItem[];
|
|
156
|
+
assessments: ThreatAssessment[];
|
|
157
|
+
events: HealthEvent[];
|
|
158
|
+
}>;
|
|
159
|
+
assess(): DefenseHealth;
|
|
160
|
+
recommend(): HealthRecommendation[];
|
|
161
|
+
getEvents(): readonly Readonly<HealthEvent>[];
|
|
162
|
+
getAssessments(): readonly Readonly<ThreatAssessment>[];
|
|
163
|
+
exportState(): RakshaStateSnapshot;
|
|
164
|
+
importState(snapshot: Partial<RakshaStateSnapshot> | undefined): void;
|
|
165
|
+
getPolicy(): DefensePolicy;
|
|
166
|
+
updatePolicy(updates: Partial<DefensePolicy>): void;
|
|
167
|
+
}
|
|
168
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/raksha/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EACL,WAAW,EAEX,aAAa,EACb,aAAa,EACb,uBAAuB,EACvB,qBAAqB,EACrB,0BAA0B,EAC1B,WAAW,EACX,oBAAoB,EAEpB,yBAAyB,EACzB,uBAAuB,EACvB,gBAAgB,EAEjB,MAAM,UAAU,CAAC;AAClB,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAuIhE,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAWxD;AAgCD,wBAAgB,YAAY,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAgC3D;AAED,MAAM,WAAW,aAAa;IAC5B;;;;;;;;;;OAUG;IACH,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IAC9C;;;;OAIG;IACH,uBAAuB,CAAC,EAAE,uBAAuB,CAAC;IAClD;;;;;;OAMG;IACH,WAAW,CAAC,EAAE,yBAAyB,EAAE,CAAC;CAC3C;AAED;;;;GAIG;AACH,wBAAgB,4BAA4B,IAAI,qBAAqB,CAMpE;AAED,wBAAgB,8BAA8B,IAAI,uBAAuB,CAKxE;AAED,qBAAa,MAAM;IACjB,OAAO,CAAC,MAAM,CAAgB;IAC9B,OAAO,CAAC,WAAW,CAA0B;IAC7C,OAAO,CAAC,MAAM,CAAqB;IACnC,OAAO,CAAC,QAAQ,CAAC,qBAAqB,CAAwB;IAC9D,OAAO,CAAC,QAAQ,CAAC,uBAAuB,CAA0B;IAClE,OAAO,CAAC,QAAQ,CAAC,WAAW,CAA8B;gBAE9C,MAAM,GAAE,OAAO,CAAC,aAAa,CAAM,EAAE,OAAO,GAAE,aAAkB;IAO5E;;;;OAIG;IACH,4BAA4B,IAAI,MAAM;IAItC,8BAA8B,IAAI,MAAM;IAIxC;;;;;;;;;;;;;OAaG;IACG,mBAAmB,CACvB,KAAK,EAAE,0BAA0B,EACjC,IAAI,GAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAA;KAAO,GAC9B,OAAO,CAAC;QAAE,UAAU,EAAE,uBAAuB,CAAC;QAAC,KAAK,CAAC,EAAE,WAAW,CAAA;KAAE,CAAC;IA+CxE,OAAO,CAAC,cAAc;IAiBtB,OAAO,CAAC,gBAAgB;IAWxB;;;;;;;;;;;;;;;;;;OAkBG;IACH,QAAQ,CAAC,IAAI,EAAE,WAAW,EAAE,IAAI,GAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAA;KAAO,GAAG;QAAE,UAAU,EAAE,gBAAgB,CAAC;QAAC,KAAK,CAAC,EAAE,WAAW,CAAA;KAAE;IA0DnH;;;;;;OAMG;IACG,aAAa,CACjB,IAAI,EAAE,WAAW,EACjB,IAAI,GAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,WAAW,CAAA;KAAO,GACpD,OAAO,CAAC;QAAE,UAAU,EAAE,gBAAgB,CAAC;QAAC,KAAK,CAAC,EAAE,WAAW,CAAA;KAAE,CAAC;IAyDjE,qEAAqE;IACrE,cAAc,IAAI,OAAO;IAIzB;;;;OAIG;IACH,UAAU,CAAC,IAAI,EAAE,WAAW,EAAE,IAAI,GAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAA;KAAO,GAAG,gBAAgB;IAQhF;;OAEG;IACG,eAAe,CAAC,IAAI,EAAE,WAAW,EAAE,IAAI,GAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,WAAW,CAAA;KAAO,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAQ1H,aAAa,CACX,KAAK,EAAE,WAAW,EAAE,EACpB,IAAI,GAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAA;KAAO,GAC9B;QAAE,IAAI,EAAE,WAAW,EAAE,CAAC;QAAC,WAAW,EAAE,WAAW,EAAE,CAAC;QAAC,WAAW,EAAE,gBAAgB,EAAE,CAAC;QAAC,MAAM,EAAE,WAAW,EAAE,CAAA;KAAE;IAyB9G;;OAEG;IACG,kBAAkB,CACtB,KAAK,EAAE,WAAW,EAAE,EACpB,IAAI,GAAE;QAAE,OAAO,CAAC,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,WAAW,CAAA;KAAO,GACpD,OAAO,CAAC;QAAE,IAAI,EAAE,WAAW,EAAE,CAAC;QAAC,WAAW,EAAE,WAAW,EAAE,CAAC;QAAC,WAAW,EAAE,gBAAgB,EAAE,CAAC;QAAC,MAAM,EAAE,WAAW,EAAE,CAAA;KAAE,CAAC;IAyBvH,MAAM,IAAI,aAAa;IA0BvB,SAAS,IAAI,oBAAoB,EAAE;IA6BnC,SAAS,IAAI,SAAS,QAAQ,CAAC,WAAW,CAAC,EAAE;IAI7C,cAAc,IAAI,SAAS,QAAQ,CAAC,gBAAgB,CAAC,EAAE;IAIvD,WAAW,IAAI,mBAAmB;IAOlC,WAAW,CAAC,QAAQ,EAAE,OAAO,CAAC,mBAAmB,CAAC,GAAG,SAAS,GAAG,IAAI;IAMrE,SAAS,IAAI,aAAa;IAI1B,YAAY,CAAC,OAAO,EAAE,OAAO,CAAC,aAAa,CAAC,GAAG,IAAI;CAIpD"}
|