bekindprofanityfilter 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/CONTRIBUTORS.md +106 -0
  2. package/LICENSE +22 -0
  3. package/README.md +1015 -0
  4. package/allprofanity.config.example.json +35 -0
  5. package/bin/init.js +49 -0
  6. package/config.schema.json +163 -0
  7. package/dist/algos/aho-corasick.d.ts +75 -0
  8. package/dist/algos/aho-corasick.js +238 -0
  9. package/dist/algos/aho-corasick.js.map +1 -0
  10. package/dist/algos/bloom-filter.d.ts +103 -0
  11. package/dist/algos/bloom-filter.js +208 -0
  12. package/dist/algos/bloom-filter.js.map +1 -0
  13. package/dist/algos/context-patterns.d.ts +102 -0
  14. package/dist/algos/context-patterns.js +484 -0
  15. package/dist/algos/context-patterns.js.map +1 -0
  16. package/dist/index.d.ts +1332 -0
  17. package/dist/index.js +2631 -0
  18. package/dist/index.js.map +1 -0
  19. package/dist/innocence-scoring.d.ts +23 -0
  20. package/dist/innocence-scoring.js +118 -0
  21. package/dist/innocence-scoring.js.map +1 -0
  22. package/dist/language-detector.d.ts +162 -0
  23. package/dist/language-detector.js +952 -0
  24. package/dist/language-detector.js.map +1 -0
  25. package/dist/language-dicts.d.ts +60 -0
  26. package/dist/language-dicts.js +2718 -0
  27. package/dist/language-dicts.js.map +1 -0
  28. package/dist/languages/arabic-words.d.ts +10 -0
  29. package/dist/languages/arabic-words.js +1649 -0
  30. package/dist/languages/arabic-words.js.map +1 -0
  31. package/dist/languages/bengali-words.d.ts +10 -0
  32. package/dist/languages/bengali-words.js +1696 -0
  33. package/dist/languages/bengali-words.js.map +1 -0
  34. package/dist/languages/brazilian-words.d.ts +10 -0
  35. package/dist/languages/brazilian-words.js +2122 -0
  36. package/dist/languages/brazilian-words.js.map +1 -0
  37. package/dist/languages/chinese-words.d.ts +10 -0
  38. package/dist/languages/chinese-words.js +2728 -0
  39. package/dist/languages/chinese-words.js.map +1 -0
  40. package/dist/languages/english-primary-all-languages.d.ts +23 -0
  41. package/dist/languages/english-primary-all-languages.js +36894 -0
  42. package/dist/languages/english-primary-all-languages.js.map +1 -0
  43. package/dist/languages/english-words.d.ts +5 -0
  44. package/dist/languages/english-words.js +5156 -0
  45. package/dist/languages/english-words.js.map +1 -0
  46. package/dist/languages/french-words.d.ts +10 -0
  47. package/dist/languages/french-words.js +2326 -0
  48. package/dist/languages/french-words.js.map +1 -0
  49. package/dist/languages/german-words.d.ts +10 -0
  50. package/dist/languages/german-words.js +2633 -0
  51. package/dist/languages/german-words.js.map +1 -0
  52. package/dist/languages/hindi-words.d.ts +10 -0
  53. package/dist/languages/hindi-words.js +2341 -0
  54. package/dist/languages/hindi-words.js.map +1 -0
  55. package/dist/languages/innocent-words.d.ts +41 -0
  56. package/dist/languages/innocent-words.js +109 -0
  57. package/dist/languages/innocent-words.js.map +1 -0
  58. package/dist/languages/italian-words.d.ts +10 -0
  59. package/dist/languages/italian-words.js +2287 -0
  60. package/dist/languages/italian-words.js.map +1 -0
  61. package/dist/languages/japanese-words.d.ts +11 -0
  62. package/dist/languages/japanese-words.js +2557 -0
  63. package/dist/languages/japanese-words.js.map +1 -0
  64. package/dist/languages/korean-words.d.ts +10 -0
  65. package/dist/languages/korean-words.js +2509 -0
  66. package/dist/languages/korean-words.js.map +1 -0
  67. package/dist/languages/russian-words.d.ts +10 -0
  68. package/dist/languages/russian-words.js +2175 -0
  69. package/dist/languages/russian-words.js.map +1 -0
  70. package/dist/languages/spanish-words.d.ts +11 -0
  71. package/dist/languages/spanish-words.js +2536 -0
  72. package/dist/languages/spanish-words.js.map +1 -0
  73. package/dist/languages/tamil-words.d.ts +10 -0
  74. package/dist/languages/tamil-words.js +1722 -0
  75. package/dist/languages/tamil-words.js.map +1 -0
  76. package/dist/languages/telugu-words.d.ts +10 -0
  77. package/dist/languages/telugu-words.js +1739 -0
  78. package/dist/languages/telugu-words.js.map +1 -0
  79. package/dist/romanization-detector.d.ts +50 -0
  80. package/dist/romanization-detector.js +779 -0
  81. package/dist/romanization-detector.js.map +1 -0
  82. package/package.json +79 -0
@@ -0,0 +1,35 @@
1
+ {
2
+ "$schema": "./config.schema.json",
3
+ "algorithm": {
4
+ "matching": "hybrid",
5
+ "useAhoCorasick": true,
6
+ "useBloomFilter": true,
7
+ "useContextAnalysis": true
8
+ },
9
+ "bloomFilter": {
10
+ "enabled": true,
11
+ "expectedItems": 10000,
12
+ "falsePositiveRate": 0.01
13
+ },
14
+ "ahoCorasick": {
15
+ "enabled": true,
16
+ "prebuild": true
17
+ },
18
+ "contextAnalysis": {
19
+ "enabled": true,
20
+ "contextWindow": 50,
21
+ "languages": ["en"],
22
+ "scoreThreshold": 0.5
23
+ },
24
+ "profanityDetection": {
25
+ "enableLeetSpeak": true,
26
+ "caseSensitive": false,
27
+ "strictMode": false,
28
+ "detectPartialWords": false,
29
+ "defaultPlaceholder": "*"
30
+ },
31
+ "performance": {
32
+ "cacheSize": 1000,
33
+ "enableCaching": true
34
+ }
35
+ }
package/bin/init.js ADDED
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { readFileSync, writeFileSync, existsSync } from 'fs';
4
+ import { fileURLToPath } from 'url';
5
+ import { dirname, join } from 'path';
6
+
7
+ const __filename = fileURLToPath(import.meta.url);
8
+ const __dirname = dirname(__filename);
9
+
10
+ const configFileName = 'allprofanity.config.json';
11
+ const schemaFileName = 'config.schema.json';
12
+
13
+ // Check if config already exists
14
+ if (existsSync(configFileName)) {
15
+ console.log(`❌ ${configFileName} already exists in current directory`);
16
+ console.log(' Delete it first or use a different name');
17
+ process.exit(1);
18
+ }
19
+
20
+ try {
21
+ // Copy example config to current directory
22
+ const examplePath = join(__dirname, '..', 'allprofanity.config.example.json');
23
+ const schemaPath = join(__dirname, '..', 'config.schema.json');
24
+
25
+ const configContent = readFileSync(examplePath, 'utf-8');
26
+ const schemaContent = readFileSync(schemaPath, 'utf-8');
27
+
28
+ writeFileSync(configFileName, configContent);
29
+ writeFileSync(schemaFileName, schemaContent);
30
+
31
+ console.log('✅ BeKind configuration files created!');
32
+ console.log('');
33
+ console.log('Created files:');
34
+ console.log(` 📄 ${configFileName} - Main configuration`);
35
+ console.log(` 📄 ${schemaFileName} - JSON schema for IDE autocomplete`);
36
+ console.log('');
37
+ console.log('Next steps:');
38
+ console.log(' 1. Edit allprofanity.config.json to customize settings');
39
+ console.log(' 2. Import and use:');
40
+ console.log('');
41
+ console.log(' import { BeKind } from "allprofanity";');
42
+ console.log(' import config from "./allprofanity.config.json";');
43
+ console.log(' const filter = BeKind.fromConfig(config);');
44
+ console.log('');
45
+
46
+ } catch (error) {
47
+ console.error('❌ Error creating config files:', error.message);
48
+ process.exit(1);
49
+ }
@@ -0,0 +1,163 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "title": "BeKind Configuration",
4
+ "description": "Configuration schema for BeKind advanced algorithms and detection settings",
5
+ "type": "object",
6
+ "properties": {
7
+ "algorithm": {
8
+ "type": "object",
9
+ "description": "Algorithm selection and configuration",
10
+ "properties": {
11
+ "matching": {
12
+ "type": "string",
13
+ "enum": ["trie", "aho-corasick", "hybrid"],
14
+ "default": "trie",
15
+ "description": "Primary matching algorithm: trie (default), aho-corasick (faster for many patterns), or hybrid"
16
+ },
17
+ "useAhoCorasick": {
18
+ "type": "boolean",
19
+ "default": false,
20
+ "description": "Enable Aho-Corasick algorithm for multi-pattern matching"
21
+ },
22
+ "useBloomFilter": {
23
+ "type": "boolean",
24
+ "default": false,
25
+ "description": "Enable Bloom Filter for fast pre-filtering (reduces false positives)"
26
+ },
27
+ "useContextAnalysis": {
28
+ "type": "boolean",
29
+ "default": false,
30
+ "description": "Enable context-aware analysis to reduce false positives"
31
+ }
32
+ },
33
+ "required": ["matching"]
34
+ },
35
+ "bloomFilter": {
36
+ "type": "object",
37
+ "description": "Bloom filter configuration",
38
+ "properties": {
39
+ "enabled": {
40
+ "type": "boolean",
41
+ "default": false,
42
+ "description": "Enable/disable Bloom filter"
43
+ },
44
+ "expectedItems": {
45
+ "type": "number",
46
+ "minimum": 100,
47
+ "maximum": 1000000,
48
+ "default": 10000,
49
+ "description": "Expected number of items in the filter"
50
+ },
51
+ "falsePositiveRate": {
52
+ "type": "number",
53
+ "minimum": 0.001,
54
+ "maximum": 0.1,
55
+ "default": 0.01,
56
+ "description": "Acceptable false positive rate (0.01 = 1%)"
57
+ }
58
+ }
59
+ },
60
+ "ahoCorasick": {
61
+ "type": "object",
62
+ "description": "Aho-Corasick algorithm configuration",
63
+ "properties": {
64
+ "enabled": {
65
+ "type": "boolean",
66
+ "default": false,
67
+ "description": "Enable/disable Aho-Corasick algorithm"
68
+ },
69
+ "prebuild": {
70
+ "type": "boolean",
71
+ "default": true,
72
+ "description": "Prebuild the automaton at initialization (recommended for production)"
73
+ }
74
+ }
75
+ },
76
+ "contextAnalysis": {
77
+ "type": "object",
78
+ "description": "Context-aware analysis configuration",
79
+ "properties": {
80
+ "enabled": {
81
+ "type": "boolean",
82
+ "default": false,
83
+ "description": "Enable/disable context analysis"
84
+ },
85
+ "contextWindow": {
86
+ "type": "number",
87
+ "minimum": 10,
88
+ "maximum": 200,
89
+ "default": 50,
90
+ "description": "Number of characters to analyze before/after match"
91
+ },
92
+ "languages": {
93
+ "type": "array",
94
+ "items": {
95
+ "type": "string",
96
+ "enum": ["en", "hi", "fr", "de", "es", "bn", "ta", "te", "*"]
97
+ },
98
+ "default": ["en"],
99
+ "description": "Languages for context pattern matching"
100
+ },
101
+ "scoreThreshold": {
102
+ "type": "number",
103
+ "minimum": 0,
104
+ "maximum": 1,
105
+ "default": 0.5,
106
+ "description": "Minimum confidence score to flag as profanity (0-1)"
107
+ }
108
+ }
109
+ },
110
+ "profanityDetection": {
111
+ "type": "object",
112
+ "description": "Core profanity detection settings",
113
+ "properties": {
114
+ "enableLeetSpeak": {
115
+ "type": "boolean",
116
+ "default": true,
117
+ "description": "Enable leet-speak normalization (f#ck, 4ss, etc.)"
118
+ },
119
+ "caseSensitive": {
120
+ "type": "boolean",
121
+ "default": false,
122
+ "description": "Enable case-sensitive matching"
123
+ },
124
+ "strictMode": {
125
+ "type": "boolean",
126
+ "default": false,
127
+ "description": "Require word boundaries for matches"
128
+ },
129
+ "detectPartialWords": {
130
+ "type": "boolean",
131
+ "default": false,
132
+ "description": "Detect profanity within larger words"
133
+ },
134
+ "defaultPlaceholder": {
135
+ "type": "string",
136
+ "minLength": 1,
137
+ "maxLength": 1,
138
+ "default": "*",
139
+ "description": "Default character for censoring"
140
+ }
141
+ }
142
+ },
143
+ "performance": {
144
+ "type": "object",
145
+ "description": "Performance optimization settings",
146
+ "properties": {
147
+ "cacheSize": {
148
+ "type": "number",
149
+ "minimum": 0,
150
+ "maximum": 100000,
151
+ "default": 1000,
152
+ "description": "Size of result cache (0 = disabled)"
153
+ },
154
+ "enableCaching": {
155
+ "type": "boolean",
156
+ "default": false,
157
+ "description": "Enable result caching for repeated checks"
158
+ }
159
+ }
160
+ }
161
+ },
162
+ "required": ["algorithm", "profanityDetection"]
163
+ }
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Aho-Corasick algorithm implementation for efficient multi-pattern string matching
3
+ */
4
+ export interface Match {
5
+ pattern: string;
6
+ start: number;
7
+ end: number;
8
+ patternIndex: number;
9
+ }
10
+ export declare class AhoCorasick {
11
+ private root;
12
+ private patterns;
13
+ private compiled;
14
+ constructor(patterns?: string[]);
15
+ /**
16
+ * Create a new trie node
17
+ */
18
+ private createNode;
19
+ /**
20
+ * Add patterns to the automaton
21
+ */
22
+ addPatterns(patterns: string[]): void;
23
+ /**
24
+ * Add a single pattern to the automaton
25
+ */
26
+ addPattern(pattern: string): void;
27
+ /**
28
+ * Build the Aho-Corasick automaton
29
+ */
30
+ private buildAutomaton;
31
+ /**
32
+ * Build the trie structure
33
+ */
34
+ private buildTrie;
35
+ /**
36
+ * Build failure links using BFS
37
+ */
38
+ private buildFailureLinks;
39
+ /**
40
+ * Build output links for failure transitions
41
+ */
42
+ private buildOutputLinks;
43
+ /**
44
+ * Find all pattern matches in the given text
45
+ */
46
+ findAll(text: string): Match[];
47
+ /**
48
+ * Check if text contains any patterns
49
+ */
50
+ hasMatch(text: string): boolean;
51
+ /**
52
+ * Find first match in text
53
+ */
54
+ findFirst(text: string): Match | null;
55
+ /**
56
+ * Get the patterns stored in this automaton
57
+ */
58
+ getPatterns(): string[];
59
+ /**
60
+ * Clear all patterns and reset the automaton
61
+ */
62
+ clear(): void;
63
+ /**
64
+ * Get statistics about the automaton
65
+ */
66
+ getStats(): {
67
+ patternCount: number;
68
+ nodeCount: number;
69
+ averagePatternLength: number;
70
+ };
71
+ /**
72
+ * Count total nodes in the trie
73
+ */
74
+ private countNodes;
75
+ }
@@ -0,0 +1,238 @@
1
+ /**
2
+ * Aho-Corasick algorithm implementation for efficient multi-pattern string matching
3
+ */
4
+ export class AhoCorasick {
5
+ constructor(patterns = []) {
6
+ this.compiled = false;
7
+ this.patterns = [...patterns];
8
+ this.root = this.createNode();
9
+ if (patterns.length > 0) {
10
+ this.buildAutomaton();
11
+ }
12
+ }
13
+ /**
14
+ * Create a new trie node
15
+ */
16
+ createNode() {
17
+ return {
18
+ children: new Map(),
19
+ output: [],
20
+ outputIndices: [],
21
+ failure: null,
22
+ isEndOfPattern: false,
23
+ };
24
+ }
25
+ /**
26
+ * Add patterns to the automaton
27
+ */
28
+ addPatterns(patterns) {
29
+ this.patterns.push(...patterns);
30
+ this.compiled = false;
31
+ }
32
+ /**
33
+ * Add a single pattern to the automaton
34
+ */
35
+ addPattern(pattern) {
36
+ if (pattern && pattern.length > 0) {
37
+ this.patterns.push(pattern);
38
+ this.compiled = false;
39
+ }
40
+ }
41
+ /**
42
+ * Build the Aho-Corasick automaton
43
+ */
44
+ buildAutomaton() {
45
+ this.buildTrie();
46
+ this.buildFailureLinks();
47
+ this.buildOutputLinks();
48
+ this.compiled = true;
49
+ }
50
+ /**
51
+ * Build the trie structure
52
+ */
53
+ buildTrie() {
54
+ this.root = this.createNode();
55
+ for (let i = 0; i < this.patterns.length; i++) {
56
+ const pattern = this.patterns[i];
57
+ let current = this.root;
58
+ for (const char of pattern) {
59
+ if (!current.children.has(char)) {
60
+ current.children.set(char, this.createNode());
61
+ }
62
+ current = current.children.get(char);
63
+ }
64
+ current.isEndOfPattern = true;
65
+ current.output.push(pattern);
66
+ current.outputIndices.push(i);
67
+ }
68
+ }
69
+ /**
70
+ * Build failure links using BFS
71
+ */
72
+ buildFailureLinks() {
73
+ const queue = [];
74
+ // Initialize failure links for depth 1 nodes
75
+ for (const child of this.root.children.values()) {
76
+ child.failure = this.root;
77
+ queue.push(child);
78
+ }
79
+ // Build failure links for deeper nodes
80
+ while (queue.length > 0) {
81
+ const current = queue.shift();
82
+ for (const [char, child] of current.children) {
83
+ queue.push(child);
84
+ let failure = current.failure;
85
+ while (failure !== null && !failure.children.has(char)) {
86
+ failure = failure.failure;
87
+ }
88
+ child.failure = failure ? failure.children.get(char) : this.root;
89
+ }
90
+ }
91
+ }
92
+ /**
93
+ * Build output links for failure transitions
94
+ */
95
+ buildOutputLinks() {
96
+ const queue = [];
97
+ for (const child of this.root.children.values()) {
98
+ queue.push(child);
99
+ }
100
+ while (queue.length > 0) {
101
+ const current = queue.shift();
102
+ // Add failure node outputs to current node
103
+ if (current.failure && current.failure.output.length > 0) {
104
+ current.output.push(...current.failure.output);
105
+ current.outputIndices.push(...current.failure.outputIndices);
106
+ }
107
+ for (const child of current.children.values()) {
108
+ queue.push(child);
109
+ }
110
+ }
111
+ }
112
+ /**
113
+ * Find all pattern matches in the given text
114
+ */
115
+ findAll(text) {
116
+ if (!this.compiled) {
117
+ this.buildAutomaton();
118
+ }
119
+ const matches = [];
120
+ let current = this.root;
121
+ for (let i = 0; i < text.length; i++) {
122
+ const char = text[i];
123
+ // Follow failure links until we find a transition or reach root
124
+ while (current !== this.root && !current.children.has(char)) {
125
+ current = current.failure;
126
+ }
127
+ // Transition to next state if possible
128
+ if (current.children.has(char)) {
129
+ current = current.children.get(char);
130
+ }
131
+ // Report all patterns that end at this position
132
+ for (let j = 0; j < current.output.length; j++) {
133
+ const pattern = current.output[j];
134
+ const patternIndex = current.outputIndices[j];
135
+ const start = i - pattern.length + 1;
136
+ matches.push({
137
+ pattern,
138
+ start,
139
+ end: i + 1,
140
+ patternIndex,
141
+ });
142
+ }
143
+ }
144
+ return matches;
145
+ }
146
+ /**
147
+ * Check if text contains any patterns
148
+ */
149
+ hasMatch(text) {
150
+ if (!this.compiled) {
151
+ this.buildAutomaton();
152
+ }
153
+ let current = this.root;
154
+ for (let i = 0; i < text.length; i++) {
155
+ const char = text[i];
156
+ while (current !== this.root && !current.children.has(char)) {
157
+ current = current.failure;
158
+ }
159
+ if (current.children.has(char)) {
160
+ current = current.children.get(char);
161
+ }
162
+ if (current.output.length > 0) {
163
+ return true;
164
+ }
165
+ }
166
+ return false;
167
+ }
168
+ /**
169
+ * Find first match in text
170
+ */
171
+ findFirst(text) {
172
+ if (!this.compiled) {
173
+ this.buildAutomaton();
174
+ }
175
+ let current = this.root;
176
+ for (let i = 0; i < text.length; i++) {
177
+ const char = text[i];
178
+ while (current !== this.root && !current.children.has(char)) {
179
+ current = current.failure;
180
+ }
181
+ if (current.children.has(char)) {
182
+ current = current.children.get(char);
183
+ }
184
+ if (current.output.length > 0) {
185
+ const pattern = current.output[0];
186
+ const patternIndex = current.outputIndices[0];
187
+ const start = i - pattern.length + 1;
188
+ return {
189
+ pattern,
190
+ start,
191
+ end: i + 1,
192
+ patternIndex,
193
+ };
194
+ }
195
+ }
196
+ return null;
197
+ }
198
+ /**
199
+ * Get the patterns stored in this automaton
200
+ */
201
+ getPatterns() {
202
+ return [...this.patterns];
203
+ }
204
+ /**
205
+ * Clear all patterns and reset the automaton
206
+ */
207
+ clear() {
208
+ this.patterns = [];
209
+ this.root = this.createNode();
210
+ this.compiled = false;
211
+ }
212
+ /**
213
+ * Get statistics about the automaton
214
+ */
215
+ getStats() {
216
+ const nodeCount = this.countNodes(this.root);
217
+ const averagePatternLength = this.patterns.length > 0
218
+ ? this.patterns.reduce((sum, p) => sum + p.length, 0) /
219
+ this.patterns.length
220
+ : 0;
221
+ return {
222
+ patternCount: this.patterns.length,
223
+ nodeCount,
224
+ averagePatternLength,
225
+ };
226
+ }
227
+ /**
228
+ * Count total nodes in the trie
229
+ */
230
+ countNodes(node) {
231
+ let count = 1;
232
+ for (const child of node.children.values()) {
233
+ count += this.countNodes(child);
234
+ }
235
+ return count;
236
+ }
237
+ }
238
+ //# sourceMappingURL=aho-corasick.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"aho-corasick.js","sourceRoot":"","sources":["../../src/algos/aho-corasick.ts"],"names":[],"mappings":"AAAA;;GAEG;AAiBH,MAAM,OAAO,WAAW;IAKtB,YAAY,WAAqB,EAAE;QAF3B,aAAQ,GAAY,KAAK,CAAC;QAGhC,IAAI,CAAC,QAAQ,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;QAC9B,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;QAC9B,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE;YACvB,IAAI,CAAC,cAAc,EAAE,CAAC;SACvB;IACH,CAAC;IAED;;OAEG;IACK,UAAU;QAChB,OAAO;YACL,QAAQ,EAAE,IAAI,GAAG,EAAoB;YACrC,MAAM,EAAE,EAAE;YACV,aAAa,EAAE,EAAE;YACjB,OAAO,EAAE,IAAI;YACb,cAAc,EAAE,KAAK;SACtB,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,WAAW,CAAC,QAAkB;QAC5B,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,CAAC;QAChC,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,OAAe;QACxB,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE;YACjC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAC5B,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;SACvB;IACH,CAAC;IAED;;OAEG;IACK,cAAc;QACpB,IAAI,CAAC,SAAS,EAAE,CAAC;QACjB,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACzB,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACxB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;IACvB,CAAC;IAED;;OAEG;IACK,SAAS;QACf,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;QAE9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;YACjC,IAAI,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC;YAExB,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE;gBAC1B,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;oBAC/B,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;iBAC/C;gBACD,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC;aACvC;YAED,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC;YAC9B,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAC7B,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;SAC/B;IACH,CAAC;IAED;;OAEG;IACK,iBAAiB;QACvB,MAAM,KAAK,GAAe,EAAE,CAAC;QAE7B,6CAA6C;QAC7C,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE;YAC/C,KAAK,CAAC,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC;YAC1B,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;SACnB;QAED,uCAAuC;QACvC,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE;YACvB,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,EAAG,CAAC;YAE/B,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,OAAO,CAAC,QAAQ,EAAE;gBAC5C,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBAElB,IAAI,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC;gBAC9B,OAAO,OAAO,KAAK,IAAI,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;oBACtD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC;iBAC3B;gBAED,KAAK,CAAC,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;aACnE;SACF;IACH,CAAC;IAED;;OAEG;IACK,gBAAgB;QACtB,MAAM,KAAK,GAAe,EAAE,CAAC;QAE7B,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE;YAC/C,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;SACnB;QAED,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE;YACvB,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,EAAG,CAAC;YAE/B,2CAA2C;YAC3C,IAAI,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBACxD,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;gBAC/C,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;aAC9D;YAED,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE;gBAC7C,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aACnB;SACF;IACH,CAAC;IAED;;OAEG;IACH,OAAO,CAAC,IAAY;QAClB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;YAClB,IAAI,CAAC,cAAc,EAAE,CAAC;SACvB;QAED,MAAM,OAAO,GAAY,EAAE,CAAC;QAC5B,IAAI,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC;QAExB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YACpC,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAErB,gEAAgE;YAChE,OAAO,OAAO,KAAK,IAAI,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;gBAC3D,OAAO,GAAG,OAAO,CAAC,OAAQ,CAAC;aAC5B;YAED,uCAAuC;YACvC,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;gBAC9B,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC;aACvC;YAED,gDAAgD;YAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC9C,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;gBAClC,MAAM,YAAY,GAAG,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;gBAC9C,MAAM,KAAK,GAAG,CAAC,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;gBAErC,OAAO,CAAC,IAAI,CAAC;oBACX,OAAO;oBACP,KAAK;oBACL,GAAG,EAAE,CAAC,GAAG,CAAC;oBACV,YAAY;iBACb,CAAC,CAAC;aACJ;SACF;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,QAAQ,CAAC,IAAY;QACnB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;YAClB,IAAI,CAAC,cAAc,EAAE,CAAC;SACvB;QAED,IAAI,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC;QAExB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YACpC,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAErB,OAAO,OAAO,KAAK,IAAI,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;gBAC3D,OAAO,GAAG,OAAO,CAAC,OAAQ,CAAC;aAC5B;YAED,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;gBAC9B,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC;aACvC;YAED,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBAC7B,OAAO,IAAI,CAAC;aACb;SACF;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;OAEG;IACH,SAAS,CAAC,IAAY;QACpB,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;YAClB,IAAI,CAAC,cAAc,EAAE,CAAC;SACvB;QAED,IAAI,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC;QAExB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YACpC,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAErB,OAAO,OAAO,KAAK,IAAI,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;gBAC3D,OAAO,GAAG,OAAO,CAAC,OAAQ,CAAC;aAC5B;YAED,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;gBAC9B,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC;aACvC;YAED,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE;gBAC7B,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;gBAClC,MAAM,YAAY,GAAG,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;gBAC9C,MAAM,KAAK,GAAG,CAAC,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;gBAErC,OAAO;oBACL,OAAO;oBACP,KAAK;oBACL,GAAG,EAAE,CAAC,GAAG,CAAC;oBACV,YAAY;iBACb,CAAC;aACH;SACF;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,WAAW;QACT,OAAO,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,QAAQ,GAAG,EAAE,CAAC;QACnB,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;QAC9B,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;IACxB,CAAC;IAED;;OAEG;IACH,QAAQ;QAKN,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7C,MAAM,oBAAoB,GACxB,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC;YACtB,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;gBACnD,IAAI,CAAC,QAAQ,CAAC,MAAM;YACtB,CAAC,CAAC,CAAC,CAAC;QAER,OAAO;YACL,YAAY,EAAE,IAAI,CAAC,QAAQ,CAAC,MAAM;YAClC,SAAS;YACT,oBAAoB;SACrB,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,UAAU,CAAC,IAAc;QAC/B,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE;YAC1C,KAAK,IAAI,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;SACjC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;CACF"}
@@ -0,0 +1,103 @@
1
+ /**
2
+ * Bloom Filter implementation for efficient set membership testing
3
+ */
4
+ export declare class BloomFilter {
5
+ private bitArray;
6
+ private size;
7
+ private hashCount;
8
+ private itemCount;
9
+ constructor(expectedItems: number, falsePositiveRate?: number);
10
+ /**
11
+ * Calculate optimal bit array size
12
+ */
13
+ private calculateOptimalSize;
14
+ /**
15
+ * Calculate optimal number of hash functions
16
+ */
17
+ private calculateOptimalHashCount;
18
+ /**
19
+ * Hash function 1 (FNV-1a variant)
20
+ */
21
+ private hash1;
22
+ /**
23
+ * Hash function 2 (djb2 variant)
24
+ */
25
+ private hash2;
26
+ /**
27
+ * Generate k hash values for an item using double hashing
28
+ */
29
+ private getHashes;
30
+ /**
31
+ * Set a bit in the bit array
32
+ */
33
+ private setBit;
34
+ /**
35
+ * Get a bit from the bit array
36
+ */
37
+ private getBit;
38
+ /**
39
+ * Add an item to the bloom filter
40
+ */
41
+ add(item: string): void;
42
+ /**
43
+ * Add multiple items to the bloom filter
44
+ */
45
+ addAll(items: string[]): void;
46
+ /**
47
+ * Test if an item might be in the set
48
+ */
49
+ mightContain(item: string): boolean;
50
+ /**
51
+ * Test multiple items at once
52
+ */
53
+ mightContainAny(items: string[]): boolean;
54
+ /**
55
+ * Filter items that might be in the set
56
+ */
57
+ filter(items: string[]): string[];
58
+ /**
59
+ * Clear the bloom filter
60
+ */
61
+ clear(): void;
62
+ /**
63
+ * Get current false positive probability
64
+ */
65
+ getCurrentFalsePositiveRate(): number;
66
+ /**
67
+ * Get bloom filter statistics
68
+ */
69
+ getStats(): {
70
+ size: number;
71
+ hashCount: number;
72
+ itemCount: number;
73
+ bitsSet: number;
74
+ loadFactor: number;
75
+ estimatedFalsePositiveRate: number;
76
+ };
77
+ /**
78
+ * Serialize bloom filter to JSON
79
+ */
80
+ toJSON(): {
81
+ size: number;
82
+ hashCount: number;
83
+ itemCount: number;
84
+ bitArray: number[];
85
+ };
86
+ /**
87
+ * Deserialize bloom filter from JSON
88
+ */
89
+ static fromJSON(data: {
90
+ size: number;
91
+ hashCount: number;
92
+ itemCount: number;
93
+ bitArray: number[];
94
+ }): BloomFilter;
95
+ /**
96
+ * Union operation with another bloom filter
97
+ */
98
+ union(other: BloomFilter): BloomFilter;
99
+ /**
100
+ * Intersection operation with another bloom filter
101
+ */
102
+ intersect(other: BloomFilter): BloomFilter;
103
+ }