hallucination-validator 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,23 +1,24 @@
1
1
  # hallucination-validator
2
2
 
3
- **AI Output Validator for Security and Fact-Checking**
3
+ ![NPM Version](https://img.shields.io/npm/v/hallucination-validator)
4
+ ![License](https://img.shields.io/npm/l/hallucination-validator)
5
+ ![TypeScript](https://img.shields.io/badge/types-included-blue)
4
6
 
5
- `hallucination-validator` is a Node.js library that validates the output of Large Language Models (LLMs) to catch "hallucinations" that are actually security risks. It checks for broken links, dangerous code patterns, and verifies that quotes actually exist in your source context.
7
+ **AI Output Validator for Security & Fact-Checking**
6
8
 
7
- ## Why use this?
8
-
9
- LLMs often:
10
- 1. Invent URLs that look real but are dead (or worse, hijacked).
11
- 2. Suggest "standard" but insecure code (like `eval()`).
12
- 3. Fabricate quotes from documents they are summarizing.
9
+ `hallucination-validator` is a comprehensive library for validating Large Language Model (LLM) outputs. It prevents common AI risks such as link hallucination (linkrot/hijacking), dangerous code generation, and context fabrication.
13
10
 
14
- This package provides a programmatic way to flag these issues before showing the response to a user.
11
+ ## Why use this?
12
+ LLMs are confident but often incorrect. Security risks arise when:
13
+ 1. **Hallucinated URLs** point to non-existent domains that can be hijacked by attackers.
14
+ 2. **Generated Code** contains dangerous patterns like `eval()` or `exec()`.
15
+ 3. **Fabricated Quotes** mislead users by misrepresenting source material.
15
16
 
16
17
  ## Features
17
18
 
18
- * **Link Validation**: Extracts URLs and pings them (HEAD request) to ensure they are 200 OK.
19
- * **Code Safety Scanning**: Detects `eval()`, `child_process`, and other dangerous Node.js/JS patterns.
20
- * **Quote Verification**: Fuzzy-matching to verify that a quoted string actually exists in the provided context source.
19
+ * **Link Integrity**: Extracts URLs and performs async HEAD requests to verify `200 OK` status.
20
+ * **Code Safety**: Scans generated code for unsafe Node.js patterns (`eval`, `child_process`, `document.write`).
21
+ * **Fuzzy Quote Verification**: Verifies if a quoted string exists in the source text, tolerant of minor AI alterations or typos.
21
22
 
22
23
  ## Installation
23
24
 
@@ -27,22 +28,22 @@ npm install hallucination-validator
27
28
 
28
29
  ## Usage
29
30
 
30
- ```javascript
31
- const HallucinationValidator = require('hallucination-validator');
31
+ ```typescript
32
+ import HallucinationValidator from 'hallucination-validator';
32
33
 
33
34
  const validator = new HallucinationValidator();
34
35
 
35
36
  // 1. Validate Links
36
37
  const text = "Check out valid.com and broken.link/404";
37
38
  validator.validateLinks(text).then(broken => {
38
- console.log(broken);
39
+ console.log('Broken Links:', broken);
39
40
  // [{ url: 'http://broken.link/404', status: 404, ... }]
40
41
  });
41
42
 
42
43
  // 2. Scan Code
43
- const code = "function run() { eval(input); }";
44
- const risks = validator.scanCodeSafety(code);
45
- console.log(risks);
44
+ const script = "function run() { eval(input); }";
45
+ const risks = validator.scanCodeSafety(script);
46
+ console.log('Risks:', risks);
46
47
  // ['eval()']
47
48
 
48
49
  // 3. Verify Quotes
@@ -50,15 +51,15 @@ const source = "The quick brown fox jumps over the lazy dog.";
50
51
  const aiQuote = "The quick brown fox jumped over a lazy cat.";
51
52
 
52
53
  const isValid = validator.verifyQuote(aiQuote, source);
53
- console.log(isValid);
54
- // false (too different)
54
+ console.log('Quote Valid:', isValid);
55
+ // false
55
56
  ```
56
57
 
57
58
  ## Dependencies
58
-
59
- * `fast-levenshtein`: For robust string comparison.
60
- * **Node 18+**: uses native `fetch`.
59
+ * `fast-levenshtein`: For robust string comparison algorithms.
60
+ * **Node.js 18+**: Requires native `fetch` API.
61
61
 
62
62
  ## License
63
63
 
64
- MIT
64
+ MIT © Godfrey Lebo
65
+
@@ -0,0 +1,30 @@
1
+ declare class HallucinationValidator {
2
+ /**
3
+ * Scans text for URLs and verifies they are reachable (200 OK).
4
+ * @param {string} text
5
+ * @returns {Promise<Array<HallucinationValidator.BrokenLink>>} List of broken links
6
+ */
7
+ validateLinks(text: string): Promise<Array<HallucinationValidator.BrokenLink>>;
8
+ /**
9
+ * Scans code snippets for potentially dangerous patterns often hallucinated.
10
+ * @param {string} code
11
+ * @returns {Array<string>} List of found dangerous patterns
12
+ */
13
+ scanCodeSafety(code: string): Array<string>;
14
+ /**
15
+ * Verifies if a quote exists within a source context, allowing for typo tolerance.
16
+ * @param {string} quote
17
+ * @param {string} context
18
+ * @param {number} threshold - Allowed matching distance ratio (0.0 to 1.0)
19
+ * @returns {boolean}
20
+ */
21
+ verifyQuote(quote: string, context: string, threshold?: number): boolean;
22
+ }
23
+ declare namespace HallucinationValidator {
24
+ interface BrokenLink {
25
+ url: string;
26
+ status: number;
27
+ error?: string;
28
+ }
29
+ }
30
+ export = HallucinationValidator;
@@ -1,40 +1,35 @@
1
- const levenshtein = require('fast-levenshtein');
2
-
1
+ "use strict";
2
+ const levenshtein = require("fast-levenshtein");
3
3
  class HallucinationValidator {
4
-
5
4
  /**
6
5
  * Scans text for URLs and verifies they are reachable (200 OK).
7
- * @param {string} text
8
- * @returns {Promise<Array<{url: string, status: number, error: string}>>} List of broken links
6
+ * @param {string} text
7
+ * @returns {Promise<Array<HallucinationValidator.BrokenLink>>} List of broken links
9
8
  */
10
9
  async validateLinks(text) {
11
10
  const urlRegex = /https?:\/\/[^\s)]+/g;
12
11
  const matches = text.match(urlRegex) || [];
13
12
  const brokenLinks = [];
14
-
15
13
  // De-duplicate URLs
16
14
  const uniqueUrls = [...new Set(matches)];
17
-
18
15
  for (const url of uniqueUrls) {
19
16
  // Remove trailing punctuation often captured by regex
20
17
  const cleanUrl = url.replace(/[.,;]$/, '');
21
-
22
18
  try {
23
19
  const response = await fetch(cleanUrl, { method: 'HEAD' });
24
20
  if (!response.ok) {
25
21
  brokenLinks.push({ url: cleanUrl, status: response.status, error: 'Non-200 status' });
26
22
  }
27
- } catch (err) {
23
+ }
24
+ catch (err) {
28
25
  brokenLinks.push({ url: cleanUrl, status: 0, error: err.message });
29
26
  }
30
27
  }
31
-
32
28
  return brokenLinks;
33
29
  }
34
-
35
30
  /**
36
31
  * Scans code snippets for potentially dangerous patterns often hallucinated.
37
- * @param {string} code
32
+ * @param {string} code
38
33
  * @returns {Array<string>} List of found dangerous patterns
39
34
  */
40
35
  scanCodeSafety(code) {
@@ -46,7 +41,6 @@ class HallucinationValidator {
46
41
  { pattern: /innerHTML/g, name: 'innerHTML usage' },
47
42
  { pattern: /document\.write/g, name: 'document.write' }
48
43
  ];
49
-
50
44
  const findings = [];
51
45
  for (const { pattern, name } of dangerousPatterns) {
52
46
  if (pattern.test(code)) {
@@ -55,71 +49,43 @@ class HallucinationValidator {
55
49
  }
56
50
  return findings;
57
51
  }
58
-
59
52
  /**
60
53
  * Verifies if a quote exists within a source context, allowing for typo tolerance.
61
- * @param {string} quote
62
- * @param {string} context
54
+ * @param {string} quote
55
+ * @param {string} context
63
56
  * @param {number} threshold - Allowed matching distance ratio (0.0 to 1.0)
64
57
  * @returns {boolean}
65
58
  */
66
59
  verifyQuote(quote, context, threshold = 0.2) {
67
- if (!quote || !context) return false;
68
-
60
+ if (!quote || !context)
61
+ return false;
69
62
  // 1. Direct inclusion check (fastest)
70
- if (context.includes(quote)) return true;
71
-
63
+ if (context.includes(quote))
64
+ return true;
72
65
  // 2. Normalized inclusion check (ignore case/punctuation)
73
66
  const normalize = (s) => s.toLowerCase().replace(/[^\w\s]/g, '').replace(/\s+/g, ' ');
74
67
  const nQuote = normalize(quote);
75
68
  const nContext = normalize(context);
76
-
77
- if (nContext.includes(nQuote)) return true;
78
-
69
+ if (nContext.includes(nQuote))
70
+ return true;
79
71
  // 3. Fuzzy match
80
- // We scan the context for a window of text similar in length to the quote
81
- // and check Levenshtein distance. This is O(N*M) worst case but valid for verification.
82
72
  const qLen = nQuote.length;
83
73
  const cLen = nContext.length;
84
-
85
- // Optimization: Don't scan if size diff is huge or context smaller logic
86
- if (cLen < qLen * (1 - threshold)) return false;
87
-
88
- // Sliding window scan
89
- // To be performant, we only check windows starting at word boundaries
90
- // or just brute force every N chars if high precision needed.
91
- // For "Senior Dev" approach: Let's use a simplified heuristic.
92
- // If the context is massive, real fuzzy search is complex.
93
- // We will do a character-by-character validation for small/medium texts.
94
-
95
- // WindowStep: 1 char is safest.
96
- // Limit: This is sync and blocking.
97
- // If usage assumes short texts (like checking a paragraph), this is fine.
98
-
74
+ if (cLen < qLen * (1 - threshold))
75
+ return false;
99
76
  let minDistance = Infinity;
100
-
101
- // Safety cap: if context > 10kb, maybe warn or skip fuzzy?
102
- // Let's implement looking for the best window.
103
-
104
77
  for (let i = 0; i <= cLen - qLen; i++) {
105
- // Heuristic: check first char matches to skip redundant calc
106
- if (nContext[i] !== nQuote[0]) continue;
107
-
78
+ if (nContext[i] !== nQuote[0])
79
+ continue;
108
80
  const window = nContext.substring(i, i + qLen);
109
81
  const dist = levenshtein.get(nQuote, window);
110
82
  if (dist < minDistance) {
111
83
  minDistance = dist;
112
84
  }
113
- if (minDistance / qLen <= threshold) return true;
85
+ if (minDistance / qLen <= threshold)
86
+ return true;
114
87
  }
115
-
116
- // Check allows for slightly larger or smaller windows?
117
- // nQuote might be missing a word, so window size in context might vary.
118
- // But `verifyQuote` usually implies checking if the STATED quote is in source.
119
- // So checking strict length window is a fair approximation for "is this string present".
120
-
121
88
  return (minDistance / qLen) <= threshold;
122
89
  }
123
90
  }
124
-
125
91
  module.exports = HallucinationValidator;
package/package.json CHANGED
@@ -1,10 +1,16 @@
1
1
  {
2
2
  "name": "hallucination-validator",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "description": "Validates AI outputs for linkrot, dangerous code, and hallucinations.",
5
- "main": "index.js",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "files": [
8
+ "dist"
9
+ ],
6
10
  "scripts": {
7
- "test": "node --test"
11
+ "build": "rimraf dist && tsc",
12
+ "prepublishOnly": "npm run build",
13
+ "test": "npm run build && node --test"
8
14
  },
9
15
  "keywords": [
10
16
  "ai",
@@ -16,10 +22,20 @@
16
22
  ],
17
23
  "author": "Godfrey Lebo <emorylebo@gmail.com>",
18
24
  "license": "MIT",
25
+ "repository": {
26
+ "type": "git",
27
+ "url": "git+https://github.com/emorilebo/hallucination-validator.git"
28
+ },
19
29
  "dependencies": {
20
30
  "fast-levenshtein": "^3.0.0"
21
31
  },
22
32
  "engines": {
23
33
  "node": ">=18.0.0"
34
+ },
35
+ "devDependencies": {
36
+ "@types/fast-levenshtein": "^0.0.4",
37
+ "@types/node": "^25.0.3",
38
+ "rimraf": "^6.1.2",
39
+ "typescript": "^5.9.3"
24
40
  }
25
- }
41
+ }
@@ -1,57 +0,0 @@
1
- const { test, mock } = require('node:test');
2
- const assert = require('node:assert');
3
- const HallucinationValidator = require('../index.js');
4
-
5
- test('validateLinks detects broken links', async (t) => {
6
- // Mock global fetch
7
- const originalFetch = global.fetch;
8
-
9
- global.fetch = async (url) => {
10
- if (url.includes('google.com')) {
11
- return { ok: true, status: 200 };
12
- }
13
- if (url.includes('broken.link')) {
14
- return { ok: false, status: 404 };
15
- }
16
- throw new Error('Network error');
17
- };
18
-
19
- const validator = new HallucinationValidator();
20
- const text = "Check this out: https://google.com and https://broken.link/resource.";
21
-
22
- const results = await validator.validateLinks(text);
23
-
24
- assert.strictEqual(results.length, 1);
25
- assert.strictEqual(results[0].url, 'https://broken.link/resource');
26
- assert.strictEqual(results[0].status, 404);
27
-
28
- // Restore fetch
29
- global.fetch = originalFetch;
30
- });
31
-
32
- test('scanCodeSafety finds dangerous coding patterns', (t) => {
33
- const validator = new HallucinationValidator();
34
- const unsafeCode = `
35
- function bad() {
36
- eval("alert('hacked')");
37
- const cp = require('child_process');
38
- }
39
- `;
40
-
41
- const findings = validator.scanCodeSafety(unsafeCode);
42
- assert.ok(findings.includes('eval()'));
43
- assert.ok(findings.includes('child_process'));
44
- assert.strictEqual(findings.length, 2);
45
- });
46
-
47
- test('verifyQuote confirms existence of fuzzy quote', (t) => {
48
- const validator = new HallucinationValidator();
49
- const context = "The quick brown fox jumps over the lazy dog.";
50
- const exactQuote = "quick brown fox";
51
- const fuzzyQuote = "quick brwn fox"; // typo
52
- const wrongQuote = "lazy cat";
53
-
54
- assert.strictEqual(validator.verifyQuote(exactQuote, context), true);
55
- assert.strictEqual(validator.verifyQuote(fuzzyQuote, context, 0.3), true);
56
- assert.strictEqual(validator.verifyQuote(wrongQuote, context), false);
57
- });