hallucination-validator 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,64 @@
1
+ # hallucination-validator
2
+
3
+ **AI Output Validator for Security and Fact-Checking**
4
+
5
+ `hallucination-validator` is a Node.js library that validates the output of Large Language Models (LLMs) to catch "hallucinations" that are actually security risks. It checks for broken links, dangerous code patterns, and verifies that quotes actually exist in your source context.
6
+
7
+ ## Why use this?
8
+
9
+ LLMs often:
10
+ 1. Invent URLs that look real but are dead (or worse, hijacked).
11
+ 2. Suggest "standard" but insecure code (like `eval()`).
12
+ 3. Fabricate quotes from documents they are summarizing.
13
+
14
+ This package provides a programmatic way to flag these issues before showing the response to a user.
15
+
16
+ ## Features
17
+
18
+ * **Link Validation**: Extracts URLs and pings them (HEAD request) to ensure they are 200 OK.
19
+ * **Code Safety Scanning**: Detects `eval()`, `child_process`, and other dangerous Node.js/JS patterns.
20
+ * **Quote Verification**: Fuzzy-matching to verify that a quoted string actually exists in the provided context source.
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ npm install hallucination-validator
26
+ ```
27
+
28
+ ## Usage
29
+
30
+ ```javascript
31
+ const HallucinationValidator = require('hallucination-validator');
32
+
33
+ const validator = new HallucinationValidator();
34
+
35
+ // 1. Validate Links
36
+ const text = "Check out valid.com and broken.link/404";
37
+ validator.validateLinks(text).then(broken => {
38
+ console.log(broken);
39
+ // [{ url: 'http://broken.link/404', status: 404, ... }]
40
+ });
41
+
42
+ // 2. Scan Code
43
+ const code = "function run() { eval(input); }";
44
+ const risks = validator.scanCodeSafety(code);
45
+ console.log(risks);
46
+ // ['eval()']
47
+
48
+ // 3. Verify Quotes
49
+ const source = "The quick brown fox jumps over the lazy dog.";
50
+ const aiQuote = "The quick brown fox jumped over a lazy cat.";
51
+
52
+ const isValid = validator.verifyQuote(aiQuote, source);
53
+ console.log(isValid);
54
+ // false (too different)
55
+ ```
56
+
57
+ ## Dependencies
58
+
59
+ * `fast-levenshtein`: For robust string comparison.
60
+ * **Node 18+**: uses native `fetch`.
61
+
62
+ ## License
63
+
64
+ MIT
package/index.js ADDED
@@ -0,0 +1,125 @@
1
+ const levenshtein = require('fast-levenshtein');
2
+
3
+ class HallucinationValidator {
4
+
5
+ /**
6
+ * Scans text for URLs and verifies they are reachable (200 OK).
7
+ * @param {string} text
8
+ * @returns {Promise<Array<{url: string, status: number, error: string}>>} List of broken links
9
+ */
10
+ async validateLinks(text) {
11
+ const urlRegex = /https?:\/\/[^\s)]+/g;
12
+ const matches = text.match(urlRegex) || [];
13
+ const brokenLinks = [];
14
+
15
+ // De-duplicate URLs
16
+ const uniqueUrls = [...new Set(matches)];
17
+
18
+ for (const url of uniqueUrls) {
19
+ // Remove trailing punctuation often captured by regex
20
+ const cleanUrl = url.replace(/[.,;]$/, '');
21
+
22
+ try {
23
+ const response = await fetch(cleanUrl, { method: 'HEAD' });
24
+ if (!response.ok) {
25
+ brokenLinks.push({ url: cleanUrl, status: response.status, error: 'Non-200 status' });
26
+ }
27
+ } catch (err) {
28
+ brokenLinks.push({ url: cleanUrl, status: 0, error: err.message });
29
+ }
30
+ }
31
+
32
+ return brokenLinks;
33
+ }
34
+
35
+ /**
36
+ * Scans code snippets for potentially dangerous patterns often hallucinated.
37
+ * @param {string} code
38
+ * @returns {Array<string>} List of found dangerous patterns
39
+ */
40
+ scanCodeSafety(code) {
41
+ const dangerousPatterns = [
42
+ { pattern: /eval\s*\(/g, name: 'eval()' },
43
+ { pattern: /child_process/g, name: 'child_process' },
44
+ { pattern: /exec\s*\(/g, name: 'exec()' },
45
+ { pattern: /spawn\s*\(/g, name: 'spawn()' },
46
+ { pattern: /innerHTML/g, name: 'innerHTML usage' },
47
+ { pattern: /document\.write/g, name: 'document.write' }
48
+ ];
49
+
50
+ const findings = [];
51
+ for (const { pattern, name } of dangerousPatterns) {
52
+ if (pattern.test(code)) {
53
+ findings.push(name);
54
+ }
55
+ }
56
+ return findings;
57
+ }
58
+
59
+ /**
60
+ * Verifies if a quote exists within a source context, allowing for typo tolerance.
61
+ * @param {string} quote
62
+ * @param {string} context
63
+ * @param {number} threshold - Allowed matching distance ratio (0.0 to 1.0)
64
+ * @returns {boolean}
65
+ */
66
+ verifyQuote(quote, context, threshold = 0.2) {
67
+ if (!quote || !context) return false;
68
+
69
+ // 1. Direct inclusion check (fastest)
70
+ if (context.includes(quote)) return true;
71
+
72
+ // 2. Normalized inclusion check (ignore case/punctuation)
73
+ const normalize = (s) => s.toLowerCase().replace(/[^\w\s]/g, '').replace(/\s+/g, ' ');
74
+ const nQuote = normalize(quote);
75
+ const nContext = normalize(context);
76
+
77
+ if (nContext.includes(nQuote)) return true;
78
+
79
+ // 3. Fuzzy match
80
+ // We scan the context for a window of text similar in length to the quote
81
+ // and check Levenshtein distance. This is O(N*M) worst case but valid for verification.
82
+ const qLen = nQuote.length;
83
+ const cLen = nContext.length;
84
+
85
+ // Optimization: Don't scan if size diff is huge or context smaller logic
86
+ if (cLen < qLen * (1 - threshold)) return false;
87
+
88
+ // Sliding window scan
89
+ // To be performant, we only check windows starting at word boundaries
90
+ // or just brute force every N chars if high precision needed.
91
+ // For "Senior Dev" approach: Let's use a simplified heuristic.
92
+ // If the context is massive, real fuzzy search is complex.
93
+ // We will do a character-by-character validation for small/medium texts.
94
+
95
+ // WindowStep: 1 char is safest.
96
+ // Limit: This is sync and blocking.
97
+ // If usage assumes short texts (like checking a paragraph), this is fine.
98
+
99
+ let minDistance = Infinity;
100
+
101
+ // Safety cap: if context > 10kb, maybe warn or skip fuzzy?
102
+ // Let's implement looking for the best window.
103
+
104
+ for (let i = 0; i <= cLen - qLen; i++) {
105
+ // Heuristic: check first char matches to skip redundant calc
106
+ if (nContext[i] !== nQuote[0]) continue;
107
+
108
+ const window = nContext.substring(i, i + qLen);
109
+ const dist = levenshtein.get(nQuote, window);
110
+ if (dist < minDistance) {
111
+ minDistance = dist;
112
+ }
113
+ if (minDistance / qLen <= threshold) return true;
114
+ }
115
+
116
+ // Check allows for slightly larger or smaller windows?
117
+ // nQuote might be missing a word, so window size in context might vary.
118
+ // But `verifyQuote` usually implies checking if the STATED quote is in source.
119
+ // So checking strict length window is a fair approximation for "is this string present".
120
+
121
+ return (minDistance / qLen) <= threshold;
122
+ }
123
+ }
124
+
125
+ module.exports = HallucinationValidator;
package/package.json ADDED
@@ -0,0 +1,25 @@
1
+ {
2
+ "name": "hallucination-validator",
3
+ "version": "1.0.0",
4
+ "description": "Validates AI outputs for linkrot, dangerous code, and hallucinations.",
5
+ "main": "index.js",
6
+ "scripts": {
7
+ "test": "node --test"
8
+ },
9
+ "keywords": [
10
+ "ai",
11
+ "security",
12
+ "hallucination",
13
+ "validation",
14
+ "llm",
15
+ "fact-check"
16
+ ],
17
+ "author": "Godfrey Lebo <emorylebo@gmail.com>",
18
+ "license": "MIT",
19
+ "dependencies": {
20
+ "fast-levenshtein": "^3.0.0"
21
+ },
22
+ "engines": {
23
+ "node": ">=18.0.0"
24
+ }
25
+ }
@@ -0,0 +1,57 @@
1
+ const { test, mock } = require('node:test');
2
+ const assert = require('node:assert');
3
+ const HallucinationValidator = require('../index.js');
4
+
5
+ test('validateLinks detects broken links', async (t) => {
6
+ // Mock global fetch
7
+ const originalFetch = global.fetch;
8
+
9
+ global.fetch = async (url) => {
10
+ if (url.includes('google.com')) {
11
+ return { ok: true, status: 200 };
12
+ }
13
+ if (url.includes('broken.link')) {
14
+ return { ok: false, status: 404 };
15
+ }
16
+ throw new Error('Network error');
17
+ };
18
+
19
+ const validator = new HallucinationValidator();
20
+ const text = "Check this out: https://google.com and https://broken.link/resource.";
21
+
22
+ const results = await validator.validateLinks(text);
23
+
24
+ assert.strictEqual(results.length, 1);
25
+ assert.strictEqual(results[0].url, 'https://broken.link/resource');
26
+ assert.strictEqual(results[0].status, 404);
27
+
28
+ // Restore fetch
29
+ global.fetch = originalFetch;
30
+ });
31
+
32
+ test('scanCodeSafety finds dangerous coding patterns', (t) => {
33
+ const validator = new HallucinationValidator();
34
+ const unsafeCode = `
35
+ function bad() {
36
+ eval("alert('hacked')");
37
+ const cp = require('child_process');
38
+ }
39
+ `;
40
+
41
+ const findings = validator.scanCodeSafety(unsafeCode);
42
+ assert.ok(findings.includes('eval()'));
43
+ assert.ok(findings.includes('child_process'));
44
+ assert.strictEqual(findings.length, 2);
45
+ });
46
+
47
+ test('verifyQuote confirms existence of fuzzy quote', (t) => {
48
+ const validator = new HallucinationValidator();
49
+ const context = "The quick brown fox jumps over the lazy dog.";
50
+ const exactQuote = "quick brown fox";
51
+ const fuzzyQuote = "quick brwn fox"; // typo
52
+ const wrongQuote = "lazy cat";
53
+
54
+ assert.strictEqual(validator.verifyQuote(exactQuote, context), true);
55
+ assert.strictEqual(validator.verifyQuote(fuzzyQuote, context, 0.3), true);
56
+ assert.strictEqual(validator.verifyQuote(wrongQuote, context), false);
57
+ });