hallucination-validator 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -24
- package/dist/index.d.ts +30 -0
- package/{index.js → dist/index.js} +21 -55
- package/package.json +20 -4
- package/test/index.test.js +0 -57
package/README.md
CHANGED
|
@@ -1,23 +1,24 @@
|
|
|
1
1
|
# hallucination-validator
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+

|
|
4
|
+

|
|
5
|
+

|
|
4
6
|
|
|
5
|
-
|
|
7
|
+
**AI Output Validator for Security & Fact-Checking**
|
|
6
8
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
LLMs often:
|
|
10
|
-
1. Invent URLs that look real but are dead (or worse, hijacked).
|
|
11
|
-
2. Suggest "standard" but insecure code (like `eval()`).
|
|
12
|
-
3. Fabricate quotes from documents they are summarizing.
|
|
9
|
+
`hallucination-validator` is a comprehensive library for validating Large Language Model (LLM) outputs. It prevents common AI risks such as link hallucination (linkrot/hijacking), dangerous code generation, and context fabrication.
|
|
13
10
|
|
|
14
|
-
|
|
11
|
+
## Why use this?
|
|
12
|
+
LLMs are confident but often incorrect. Security risks arise when:
|
|
13
|
+
1. **Hallucinated URLs** point to non-existent domains that can be hijacked by attackers.
|
|
14
|
+
2. **Generated Code** contains dangerous patterns like `eval()` or `exec()`.
|
|
15
|
+
3. **Fabricated Quotes** mislead users by misrepresenting source material.
|
|
15
16
|
|
|
16
17
|
## Features
|
|
17
18
|
|
|
18
|
-
* **Link
|
|
19
|
-
* **Code Safety
|
|
20
|
-
* **Quote Verification**:
|
|
19
|
+
* **Link Integrity**: Extracts URLs and performs async HEAD requests to verify `200 OK` status.
|
|
20
|
+
* **Code Safety**: Scans generated code for unsafe Node.js patterns (`eval`, `child_process`, `document.write`).
|
|
21
|
+
* **Fuzzy Quote Verification**: Verifies if a quoted string exists in the source text, tolerant of minor AI alterations or typos.
|
|
21
22
|
|
|
22
23
|
## Installation
|
|
23
24
|
|
|
@@ -27,22 +28,22 @@ npm install hallucination-validator
|
|
|
27
28
|
|
|
28
29
|
## Usage
|
|
29
30
|
|
|
30
|
-
```
|
|
31
|
-
|
|
31
|
+
```typescript
|
|
32
|
+
import HallucinationValidator from 'hallucination-validator';
|
|
32
33
|
|
|
33
34
|
const validator = new HallucinationValidator();
|
|
34
35
|
|
|
35
36
|
// 1. Validate Links
|
|
36
37
|
const text = "Check out valid.com and broken.link/404";
|
|
37
38
|
validator.validateLinks(text).then(broken => {
|
|
38
|
-
console.log(broken);
|
|
39
|
+
console.log('Broken Links:', broken);
|
|
39
40
|
// [{ url: 'http://broken.link/404', status: 404, ... }]
|
|
40
41
|
});
|
|
41
42
|
|
|
42
43
|
// 2. Scan Code
|
|
43
|
-
const
|
|
44
|
-
const risks = validator.scanCodeSafety(
|
|
45
|
-
console.log(risks);
|
|
44
|
+
const script = "function run() { eval(input); }";
|
|
45
|
+
const risks = validator.scanCodeSafety(script);
|
|
46
|
+
console.log('Risks:', risks);
|
|
46
47
|
// ['eval()']
|
|
47
48
|
|
|
48
49
|
// 3. Verify Quotes
|
|
@@ -50,15 +51,15 @@ const source = "The quick brown fox jumps over the lazy dog.";
|
|
|
50
51
|
const aiQuote = "The quick brown fox jumped over a lazy cat.";
|
|
51
52
|
|
|
52
53
|
const isValid = validator.verifyQuote(aiQuote, source);
|
|
53
|
-
console.log(isValid);
|
|
54
|
-
// false
|
|
54
|
+
console.log('Quote Valid:', isValid);
|
|
55
|
+
// false
|
|
55
56
|
```
|
|
56
57
|
|
|
57
58
|
## Dependencies
|
|
58
|
-
|
|
59
|
-
*
|
|
60
|
-
* **Node 18+**: uses native `fetch`.
|
|
59
|
+
* `fast-levenshtein`: For robust string comparison algorithms.
|
|
60
|
+
* **Node.js 18+**: Requires native `fetch` API.
|
|
61
61
|
|
|
62
62
|
## License
|
|
63
63
|
|
|
64
|
-
MIT
|
|
64
|
+
MIT © Godfrey Lebo
|
|
65
|
+
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
declare class HallucinationValidator {
|
|
2
|
+
/**
|
|
3
|
+
* Scans text for URLs and verifies they are reachable (200 OK).
|
|
4
|
+
* @param {string} text
|
|
5
|
+
* @returns {Promise<Array<HallucinationValidator.BrokenLink>>} List of broken links
|
|
6
|
+
*/
|
|
7
|
+
validateLinks(text: string): Promise<Array<HallucinationValidator.BrokenLink>>;
|
|
8
|
+
/**
|
|
9
|
+
* Scans code snippets for potentially dangerous patterns often hallucinated.
|
|
10
|
+
* @param {string} code
|
|
11
|
+
* @returns {Array<string>} List of found dangerous patterns
|
|
12
|
+
*/
|
|
13
|
+
scanCodeSafety(code: string): Array<string>;
|
|
14
|
+
/**
|
|
15
|
+
* Verifies if a quote exists within a source context, allowing for typo tolerance.
|
|
16
|
+
* @param {string} quote
|
|
17
|
+
* @param {string} context
|
|
18
|
+
* @param {number} threshold - Allowed matching distance ratio (0.0 to 1.0)
|
|
19
|
+
* @returns {boolean}
|
|
20
|
+
*/
|
|
21
|
+
verifyQuote(quote: string, context: string, threshold?: number): boolean;
|
|
22
|
+
}
|
|
23
|
+
declare namespace HallucinationValidator {
|
|
24
|
+
interface BrokenLink {
|
|
25
|
+
url: string;
|
|
26
|
+
status: number;
|
|
27
|
+
error?: string;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
export = HallucinationValidator;
|
|
@@ -1,40 +1,35 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
"use strict";
|
|
2
|
+
const levenshtein = require("fast-levenshtein");
|
|
3
3
|
class HallucinationValidator {
|
|
4
|
-
|
|
5
4
|
/**
|
|
6
5
|
* Scans text for URLs and verifies they are reachable (200 OK).
|
|
7
|
-
* @param {string} text
|
|
8
|
-
* @returns {Promise<Array<
|
|
6
|
+
* @param {string} text
|
|
7
|
+
* @returns {Promise<Array<HallucinationValidator.BrokenLink>>} List of broken links
|
|
9
8
|
*/
|
|
10
9
|
async validateLinks(text) {
|
|
11
10
|
const urlRegex = /https?:\/\/[^\s)]+/g;
|
|
12
11
|
const matches = text.match(urlRegex) || [];
|
|
13
12
|
const brokenLinks = [];
|
|
14
|
-
|
|
15
13
|
// De-duplicate URLs
|
|
16
14
|
const uniqueUrls = [...new Set(matches)];
|
|
17
|
-
|
|
18
15
|
for (const url of uniqueUrls) {
|
|
19
16
|
// Remove trailing punctuation often captured by regex
|
|
20
17
|
const cleanUrl = url.replace(/[.,;]$/, '');
|
|
21
|
-
|
|
22
18
|
try {
|
|
23
19
|
const response = await fetch(cleanUrl, { method: 'HEAD' });
|
|
24
20
|
if (!response.ok) {
|
|
25
21
|
brokenLinks.push({ url: cleanUrl, status: response.status, error: 'Non-200 status' });
|
|
26
22
|
}
|
|
27
|
-
}
|
|
23
|
+
}
|
|
24
|
+
catch (err) {
|
|
28
25
|
brokenLinks.push({ url: cleanUrl, status: 0, error: err.message });
|
|
29
26
|
}
|
|
30
27
|
}
|
|
31
|
-
|
|
32
28
|
return brokenLinks;
|
|
33
29
|
}
|
|
34
|
-
|
|
35
30
|
/**
|
|
36
31
|
* Scans code snippets for potentially dangerous patterns often hallucinated.
|
|
37
|
-
* @param {string} code
|
|
32
|
+
* @param {string} code
|
|
38
33
|
* @returns {Array<string>} List of found dangerous patterns
|
|
39
34
|
*/
|
|
40
35
|
scanCodeSafety(code) {
|
|
@@ -46,7 +41,6 @@ class HallucinationValidator {
|
|
|
46
41
|
{ pattern: /innerHTML/g, name: 'innerHTML usage' },
|
|
47
42
|
{ pattern: /document\.write/g, name: 'document.write' }
|
|
48
43
|
];
|
|
49
|
-
|
|
50
44
|
const findings = [];
|
|
51
45
|
for (const { pattern, name } of dangerousPatterns) {
|
|
52
46
|
if (pattern.test(code)) {
|
|
@@ -55,71 +49,43 @@ class HallucinationValidator {
|
|
|
55
49
|
}
|
|
56
50
|
return findings;
|
|
57
51
|
}
|
|
58
|
-
|
|
59
52
|
/**
|
|
60
53
|
* Verifies if a quote exists within a source context, allowing for typo tolerance.
|
|
61
|
-
* @param {string} quote
|
|
62
|
-
* @param {string} context
|
|
54
|
+
* @param {string} quote
|
|
55
|
+
* @param {string} context
|
|
63
56
|
* @param {number} threshold - Allowed matching distance ratio (0.0 to 1.0)
|
|
64
57
|
* @returns {boolean}
|
|
65
58
|
*/
|
|
66
59
|
verifyQuote(quote, context, threshold = 0.2) {
|
|
67
|
-
if (!quote || !context)
|
|
68
|
-
|
|
60
|
+
if (!quote || !context)
|
|
61
|
+
return false;
|
|
69
62
|
// 1. Direct inclusion check (fastest)
|
|
70
|
-
if (context.includes(quote))
|
|
71
|
-
|
|
63
|
+
if (context.includes(quote))
|
|
64
|
+
return true;
|
|
72
65
|
// 2. Normalized inclusion check (ignore case/punctuation)
|
|
73
66
|
const normalize = (s) => s.toLowerCase().replace(/[^\w\s]/g, '').replace(/\s+/g, ' ');
|
|
74
67
|
const nQuote = normalize(quote);
|
|
75
68
|
const nContext = normalize(context);
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
69
|
+
if (nContext.includes(nQuote))
|
|
70
|
+
return true;
|
|
79
71
|
// 3. Fuzzy match
|
|
80
|
-
// We scan the context for a window of text similar in length to the quote
|
|
81
|
-
// and check Levenshtein distance. This is O(N*M) worst case but valid for verification.
|
|
82
72
|
const qLen = nQuote.length;
|
|
83
73
|
const cLen = nContext.length;
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if (cLen < qLen * (1 - threshold)) return false;
|
|
87
|
-
|
|
88
|
-
// Sliding window scan
|
|
89
|
-
// To be performant, we only check windows starting at word boundaries
|
|
90
|
-
// or just brute force every N chars if high precision needed.
|
|
91
|
-
// For "Senior Dev" approach: Let's use a simplified heuristic.
|
|
92
|
-
// If the context is massive, real fuzzy search is complex.
|
|
93
|
-
// We will do a character-by-character validation for small/medium texts.
|
|
94
|
-
|
|
95
|
-
// WindowStep: 1 char is safest.
|
|
96
|
-
// Limit: This is sync and blocking.
|
|
97
|
-
// If usage assumes short texts (like checking a paragraph), this is fine.
|
|
98
|
-
|
|
74
|
+
if (cLen < qLen * (1 - threshold))
|
|
75
|
+
return false;
|
|
99
76
|
let minDistance = Infinity;
|
|
100
|
-
|
|
101
|
-
// Safety cap: if context > 10kb, maybe warn or skip fuzzy?
|
|
102
|
-
// Let's implement looking for the best window.
|
|
103
|
-
|
|
104
77
|
for (let i = 0; i <= cLen - qLen; i++) {
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
78
|
+
if (nContext[i] !== nQuote[0])
|
|
79
|
+
continue;
|
|
108
80
|
const window = nContext.substring(i, i + qLen);
|
|
109
81
|
const dist = levenshtein.get(nQuote, window);
|
|
110
82
|
if (dist < minDistance) {
|
|
111
83
|
minDistance = dist;
|
|
112
84
|
}
|
|
113
|
-
if (minDistance / qLen <= threshold)
|
|
85
|
+
if (minDistance / qLen <= threshold)
|
|
86
|
+
return true;
|
|
114
87
|
}
|
|
115
|
-
|
|
116
|
-
// Check allows for slightly larger or smaller windows?
|
|
117
|
-
// nQuote might be missing a word, so window size in context might vary.
|
|
118
|
-
// But `verifyQuote` usually implies checking if the STATED quote is in source.
|
|
119
|
-
// So checking strict length window is a fair approximation for "is this string present".
|
|
120
|
-
|
|
121
88
|
return (minDistance / qLen) <= threshold;
|
|
122
89
|
}
|
|
123
90
|
}
|
|
124
|
-
|
|
125
91
|
module.exports = HallucinationValidator;
|
package/package.json
CHANGED
|
@@ -1,10 +1,16 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "hallucination-validator",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"description": "Validates AI outputs for linkrot, dangerous code, and hallucinations.",
|
|
5
|
-
"main": "index.js",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"files": [
|
|
8
|
+
"dist"
|
|
9
|
+
],
|
|
6
10
|
"scripts": {
|
|
7
|
-
"
|
|
11
|
+
"build": "rimraf dist && tsc",
|
|
12
|
+
"prepublishOnly": "npm run build",
|
|
13
|
+
"test": "npm run build && node --test"
|
|
8
14
|
},
|
|
9
15
|
"keywords": [
|
|
10
16
|
"ai",
|
|
@@ -16,10 +22,20 @@
|
|
|
16
22
|
],
|
|
17
23
|
"author": "Godfrey Lebo <emorylebo@gmail.com>",
|
|
18
24
|
"license": "MIT",
|
|
25
|
+
"repository": {
|
|
26
|
+
"type": "git",
|
|
27
|
+
"url": "git+https://github.com/emorilebo/hallucination-validator.git"
|
|
28
|
+
},
|
|
19
29
|
"dependencies": {
|
|
20
30
|
"fast-levenshtein": "^3.0.0"
|
|
21
31
|
},
|
|
22
32
|
"engines": {
|
|
23
33
|
"node": ">=18.0.0"
|
|
34
|
+
},
|
|
35
|
+
"devDependencies": {
|
|
36
|
+
"@types/fast-levenshtein": "^0.0.4",
|
|
37
|
+
"@types/node": "^25.0.3",
|
|
38
|
+
"rimraf": "^6.1.2",
|
|
39
|
+
"typescript": "^5.9.3"
|
|
24
40
|
}
|
|
25
|
-
}
|
|
41
|
+
}
|
package/test/index.test.js
DELETED
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
const { test, mock } = require('node:test');
|
|
2
|
-
const assert = require('node:assert');
|
|
3
|
-
const HallucinationValidator = require('../index.js');
|
|
4
|
-
|
|
5
|
-
test('validateLinks detects broken links', async (t) => {
|
|
6
|
-
// Mock global fetch
|
|
7
|
-
const originalFetch = global.fetch;
|
|
8
|
-
|
|
9
|
-
global.fetch = async (url) => {
|
|
10
|
-
if (url.includes('google.com')) {
|
|
11
|
-
return { ok: true, status: 200 };
|
|
12
|
-
}
|
|
13
|
-
if (url.includes('broken.link')) {
|
|
14
|
-
return { ok: false, status: 404 };
|
|
15
|
-
}
|
|
16
|
-
throw new Error('Network error');
|
|
17
|
-
};
|
|
18
|
-
|
|
19
|
-
const validator = new HallucinationValidator();
|
|
20
|
-
const text = "Check this out: https://google.com and https://broken.link/resource.";
|
|
21
|
-
|
|
22
|
-
const results = await validator.validateLinks(text);
|
|
23
|
-
|
|
24
|
-
assert.strictEqual(results.length, 1);
|
|
25
|
-
assert.strictEqual(results[0].url, 'https://broken.link/resource');
|
|
26
|
-
assert.strictEqual(results[0].status, 404);
|
|
27
|
-
|
|
28
|
-
// Restore fetch
|
|
29
|
-
global.fetch = originalFetch;
|
|
30
|
-
});
|
|
31
|
-
|
|
32
|
-
test('scanCodeSafety finds dangerous coding patterns', (t) => {
|
|
33
|
-
const validator = new HallucinationValidator();
|
|
34
|
-
const unsafeCode = `
|
|
35
|
-
function bad() {
|
|
36
|
-
eval("alert('hacked')");
|
|
37
|
-
const cp = require('child_process');
|
|
38
|
-
}
|
|
39
|
-
`;
|
|
40
|
-
|
|
41
|
-
const findings = validator.scanCodeSafety(unsafeCode);
|
|
42
|
-
assert.ok(findings.includes('eval()'));
|
|
43
|
-
assert.ok(findings.includes('child_process'));
|
|
44
|
-
assert.strictEqual(findings.length, 2);
|
|
45
|
-
});
|
|
46
|
-
|
|
47
|
-
test('verifyQuote confirms existence of fuzzy quote', (t) => {
|
|
48
|
-
const validator = new HallucinationValidator();
|
|
49
|
-
const context = "The quick brown fox jumps over the lazy dog.";
|
|
50
|
-
const exactQuote = "quick brown fox";
|
|
51
|
-
const fuzzyQuote = "quick brwn fox"; // typo
|
|
52
|
-
const wrongQuote = "lazy cat";
|
|
53
|
-
|
|
54
|
-
assert.strictEqual(validator.verifyQuote(exactQuote, context), true);
|
|
55
|
-
assert.strictEqual(validator.verifyQuote(fuzzyQuote, context, 0.3), true);
|
|
56
|
-
assert.strictEqual(validator.verifyQuote(wrongQuote, context), false);
|
|
57
|
-
});
|