bekindprofanityfilter 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,16 +1,16 @@
1
1
  {
2
2
  "name": "bekindprofanityfilter",
3
- "version": "0.0.5",
3
+ "version": "0.0.6",
4
4
  "description": "A multi-language profanity filter with romanization detection, language-aware innocence scoring, leet-speak detection, and cross-language collision handling. Forked from AllProfanity.",
5
5
  "main": "dist/cjs/index.js",
6
- "module": "dist/esm/index.js",
6
+ "module": "dist/esm.min.js",
7
7
  "types": "dist/esm/index.d.ts",
8
8
  "type": "module",
9
9
  "exports": {
10
10
  ".": {
11
11
  "import": {
12
12
  "types": "./dist/esm/index.d.ts",
13
- "default": "./dist/esm/index.js"
13
+ "default": "./dist/esm.min.js"
14
14
  },
15
15
  "require": {
16
16
  "types": "./dist/esm/index.d.ts",
@@ -19,7 +19,7 @@
19
19
  }
20
20
  },
21
21
  "scripts": {
22
- "build": "tsc && esbuild dist/esm/index.js --bundle --minify --platform=node --format=cjs --outfile=dist/cjs/index.js --packages=bundle && echo '{\"type\":\"commonjs\"}' > dist/cjs/package.json",
22
+ "build": "tsc && esbuild dist/esm/index.js --bundle --minify --platform=node --format=esm --outfile=dist/esm.min.js --packages=bundle && esbuild dist/esm/index.js --bundle --minify --platform=node --format=cjs --outfile=dist/cjs/index.js --packages=bundle && echo '{\"type\":\"commonjs\"}' > dist/cjs/package.json",
23
23
  "test": "jest",
24
24
  "test:watch": "jest --watch",
25
25
  "test:coverage": "jest --coverage",
@@ -75,9 +75,8 @@
75
75
  "typescript": "^4.0.0"
76
76
  },
77
77
  "files": [
78
- "dist/esm/**/*.js",
78
+ "dist/esm.min.js",
79
79
  "dist/esm/**/*.d.ts",
80
- "!dist/esm/languages/*-words.js",
81
80
  "!dist/esm/languages/*-words.d.ts",
82
81
  "dist/cjs/index.js",
83
82
  "dist/cjs/package.json",
@@ -1,238 +0,0 @@
1
- /**
2
- * Aho-Corasick algorithm implementation for efficient multi-pattern string matching
3
- */
4
- export class AhoCorasick {
5
- constructor(patterns = []) {
6
- this.compiled = false;
7
- this.patterns = [...patterns];
8
- this.root = this.createNode();
9
- if (patterns.length > 0) {
10
- this.buildAutomaton();
11
- }
12
- }
13
- /**
14
- * Create a new trie node
15
- */
16
- createNode() {
17
- return {
18
- children: new Map(),
19
- output: [],
20
- outputIndices: [],
21
- failure: null,
22
- isEndOfPattern: false,
23
- };
24
- }
25
- /**
26
- * Add patterns to the automaton
27
- */
28
- addPatterns(patterns) {
29
- this.patterns.push(...patterns);
30
- this.compiled = false;
31
- }
32
- /**
33
- * Add a single pattern to the automaton
34
- */
35
- addPattern(pattern) {
36
- if (pattern && pattern.length > 0) {
37
- this.patterns.push(pattern);
38
- this.compiled = false;
39
- }
40
- }
41
- /**
42
- * Build the Aho-Corasick automaton
43
- */
44
- buildAutomaton() {
45
- this.buildTrie();
46
- this.buildFailureLinks();
47
- this.buildOutputLinks();
48
- this.compiled = true;
49
- }
50
- /**
51
- * Build the trie structure
52
- */
53
- buildTrie() {
54
- this.root = this.createNode();
55
- for (let i = 0; i < this.patterns.length; i++) {
56
- const pattern = this.patterns[i];
57
- let current = this.root;
58
- for (const char of pattern) {
59
- if (!current.children.has(char)) {
60
- current.children.set(char, this.createNode());
61
- }
62
- current = current.children.get(char);
63
- }
64
- current.isEndOfPattern = true;
65
- current.output.push(pattern);
66
- current.outputIndices.push(i);
67
- }
68
- }
69
- /**
70
- * Build failure links using BFS
71
- */
72
- buildFailureLinks() {
73
- const queue = [];
74
- // Initialize failure links for depth 1 nodes
75
- for (const child of this.root.children.values()) {
76
- child.failure = this.root;
77
- queue.push(child);
78
- }
79
- // Build failure links for deeper nodes
80
- while (queue.length > 0) {
81
- const current = queue.shift();
82
- for (const [char, child] of current.children) {
83
- queue.push(child);
84
- let failure = current.failure;
85
- while (failure !== null && !failure.children.has(char)) {
86
- failure = failure.failure;
87
- }
88
- child.failure = failure ? failure.children.get(char) : this.root;
89
- }
90
- }
91
- }
92
- /**
93
- * Build output links for failure transitions
94
- */
95
- buildOutputLinks() {
96
- const queue = [];
97
- for (const child of this.root.children.values()) {
98
- queue.push(child);
99
- }
100
- while (queue.length > 0) {
101
- const current = queue.shift();
102
- // Add failure node outputs to current node
103
- if (current.failure && current.failure.output.length > 0) {
104
- current.output.push(...current.failure.output);
105
- current.outputIndices.push(...current.failure.outputIndices);
106
- }
107
- for (const child of current.children.values()) {
108
- queue.push(child);
109
- }
110
- }
111
- }
112
- /**
113
- * Find all pattern matches in the given text
114
- */
115
- findAll(text) {
116
- if (!this.compiled) {
117
- this.buildAutomaton();
118
- }
119
- const matches = [];
120
- let current = this.root;
121
- for (let i = 0; i < text.length; i++) {
122
- const char = text[i];
123
- // Follow failure links until we find a transition or reach root
124
- while (current !== this.root && !current.children.has(char)) {
125
- current = current.failure;
126
- }
127
- // Transition to next state if possible
128
- if (current.children.has(char)) {
129
- current = current.children.get(char);
130
- }
131
- // Report all patterns that end at this position
132
- for (let j = 0; j < current.output.length; j++) {
133
- const pattern = current.output[j];
134
- const patternIndex = current.outputIndices[j];
135
- const start = i - pattern.length + 1;
136
- matches.push({
137
- pattern,
138
- start,
139
- end: i + 1,
140
- patternIndex,
141
- });
142
- }
143
- }
144
- return matches;
145
- }
146
- /**
147
- * Check if text contains any patterns
148
- */
149
- hasMatch(text) {
150
- if (!this.compiled) {
151
- this.buildAutomaton();
152
- }
153
- let current = this.root;
154
- for (let i = 0; i < text.length; i++) {
155
- const char = text[i];
156
- while (current !== this.root && !current.children.has(char)) {
157
- current = current.failure;
158
- }
159
- if (current.children.has(char)) {
160
- current = current.children.get(char);
161
- }
162
- if (current.output.length > 0) {
163
- return true;
164
- }
165
- }
166
- return false;
167
- }
168
- /**
169
- * Find first match in text
170
- */
171
- findFirst(text) {
172
- if (!this.compiled) {
173
- this.buildAutomaton();
174
- }
175
- let current = this.root;
176
- for (let i = 0; i < text.length; i++) {
177
- const char = text[i];
178
- while (current !== this.root && !current.children.has(char)) {
179
- current = current.failure;
180
- }
181
- if (current.children.has(char)) {
182
- current = current.children.get(char);
183
- }
184
- if (current.output.length > 0) {
185
- const pattern = current.output[0];
186
- const patternIndex = current.outputIndices[0];
187
- const start = i - pattern.length + 1;
188
- return {
189
- pattern,
190
- start,
191
- end: i + 1,
192
- patternIndex,
193
- };
194
- }
195
- }
196
- return null;
197
- }
198
- /**
199
- * Get the patterns stored in this automaton
200
- */
201
- getPatterns() {
202
- return [...this.patterns];
203
- }
204
- /**
205
- * Clear all patterns and reset the automaton
206
- */
207
- clear() {
208
- this.patterns = [];
209
- this.root = this.createNode();
210
- this.compiled = false;
211
- }
212
- /**
213
- * Get statistics about the automaton
214
- */
215
- getStats() {
216
- const nodeCount = this.countNodes(this.root);
217
- const averagePatternLength = this.patterns.length > 0
218
- ? this.patterns.reduce((sum, p) => sum + p.length, 0) /
219
- this.patterns.length
220
- : 0;
221
- return {
222
- patternCount: this.patterns.length,
223
- nodeCount,
224
- averagePatternLength,
225
- };
226
- }
227
- /**
228
- * Count total nodes in the trie
229
- */
230
- countNodes(node) {
231
- let count = 1;
232
- for (const child of node.children.values()) {
233
- count += this.countNodes(child);
234
- }
235
- return count;
236
- }
237
- }
238
- //# sourceMappingURL=aho-corasick.js.map
@@ -1,208 +0,0 @@
1
- /**
2
- * Bloom Filter implementation for efficient set membership testing
3
- */
4
- export class BloomFilter {
5
- constructor(expectedItems, falsePositiveRate = 0.01) {
6
- this.itemCount = 0;
7
- // Calculate optimal size and hash count
8
- this.size = this.calculateOptimalSize(expectedItems, falsePositiveRate);
9
- this.hashCount = this.calculateOptimalHashCount(this.size, expectedItems);
10
- this.bitArray = new Uint8Array(Math.ceil(this.size / 8));
11
- }
12
- /**
13
- * Calculate optimal bit array size
14
- */
15
- calculateOptimalSize(n, p) {
16
- return Math.ceil((-n * Math.log(p)) / Math.log(2) ** 2);
17
- }
18
- /**
19
- * Calculate optimal number of hash functions
20
- */
21
- calculateOptimalHashCount(m, n) {
22
- return Math.ceil((m / n) * Math.log(2));
23
- }
24
- /**
25
- * Hash function 1 (FNV-1a variant)
26
- */
27
- hash1(item) {
28
- let hash = 2166136261;
29
- for (let i = 0; i < item.length; i++) {
30
- hash ^= item.charCodeAt(i);
31
- hash *= 16777619;
32
- }
33
- return Math.abs(hash) % this.size;
34
- }
35
- /**
36
- * Hash function 2 (djb2 variant)
37
- */
38
- hash2(item) {
39
- let hash = 5381;
40
- for (let i = 0; i < item.length; i++) {
41
- hash = (hash << 5) + hash + item.charCodeAt(i);
42
- }
43
- return Math.abs(hash) % this.size;
44
- }
45
- /**
46
- * Generate k hash values for an item using double hashing
47
- */
48
- getHashes(item) {
49
- const hash1 = this.hash1(item);
50
- const hash2 = this.hash2(item);
51
- const hashes = [];
52
- for (let i = 0; i < this.hashCount; i++) {
53
- const hash = (hash1 + i * hash2) % this.size;
54
- hashes.push(Math.abs(hash));
55
- }
56
- return hashes;
57
- }
58
- /**
59
- * Set a bit in the bit array
60
- */
61
- setBit(index) {
62
- const byteIndex = Math.floor(index / 8);
63
- const bitIndex = index % 8;
64
- this.bitArray[byteIndex] |= 1 << bitIndex;
65
- }
66
- /**
67
- * Get a bit from the bit array
68
- */
69
- getBit(index) {
70
- const byteIndex = Math.floor(index / 8);
71
- const bitIndex = index % 8;
72
- return (this.bitArray[byteIndex] & (1 << bitIndex)) !== 0;
73
- }
74
- /**
75
- * Add an item to the bloom filter
76
- */
77
- add(item) {
78
- const hashes = this.getHashes(item);
79
- for (const hash of hashes) {
80
- this.setBit(hash);
81
- }
82
- this.itemCount++;
83
- }
84
- /**
85
- * Add multiple items to the bloom filter
86
- */
87
- addAll(items) {
88
- for (const item of items) {
89
- this.add(item);
90
- }
91
- }
92
- /**
93
- * Test if an item might be in the set
94
- */
95
- mightContain(item) {
96
- const hashes = this.getHashes(item);
97
- for (const hash of hashes) {
98
- if (!this.getBit(hash)) {
99
- return false;
100
- }
101
- }
102
- return true;
103
- }
104
- /**
105
- * Test multiple items at once
106
- */
107
- mightContainAny(items) {
108
- return items.some((item) => this.mightContain(item));
109
- }
110
- /**
111
- * Filter items that might be in the set
112
- */
113
- filter(items) {
114
- return items.filter((item) => this.mightContain(item));
115
- }
116
- /**
117
- * Clear the bloom filter
118
- */
119
- clear() {
120
- this.bitArray.fill(0);
121
- this.itemCount = 0;
122
- }
123
- /**
124
- * Get current false positive probability
125
- */
126
- getCurrentFalsePositiveRate() {
127
- const ratio = this.itemCount / this.size;
128
- return Math.pow(1 - Math.exp(-this.hashCount * ratio), this.hashCount);
129
- }
130
- /**
131
- * Get bloom filter statistics
132
- */
133
- getStats() {
134
- let bitsSet = 0;
135
- for (let i = 0; i < this.size; i++) {
136
- if (this.getBit(i)) {
137
- bitsSet++;
138
- }
139
- }
140
- const loadFactor = bitsSet / this.size;
141
- const estimatedFalsePositiveRate = Math.pow(loadFactor, this.hashCount);
142
- return {
143
- size: this.size,
144
- hashCount: this.hashCount,
145
- itemCount: this.itemCount,
146
- bitsSet,
147
- loadFactor,
148
- estimatedFalsePositiveRate,
149
- };
150
- }
151
- /**
152
- * Serialize bloom filter to JSON
153
- */
154
- toJSON() {
155
- return {
156
- size: this.size,
157
- hashCount: this.hashCount,
158
- itemCount: this.itemCount,
159
- bitArray: Array.from(this.bitArray),
160
- };
161
- }
162
- /**
163
- * Deserialize bloom filter from JSON
164
- */
165
- static fromJSON(data) {
166
- const filter = Object.create(BloomFilter.prototype);
167
- filter.size = data.size;
168
- filter.hashCount = data.hashCount;
169
- filter.itemCount = data.itemCount;
170
- filter.bitArray = new Uint8Array(data.bitArray);
171
- return filter;
172
- }
173
- /**
174
- * Union operation with another bloom filter
175
- */
176
- union(other) {
177
- if (this.size !== other.size || this.hashCount !== other.hashCount) {
178
- throw new Error("Bloom filters must have same size and hash count for union operation");
179
- }
180
- const result = new BloomFilter(1, 0.01);
181
- result.size = this.size;
182
- result.hashCount = this.hashCount;
183
- result.bitArray = new Uint8Array(this.bitArray.length);
184
- result.itemCount = this.itemCount + other.itemCount;
185
- for (let i = 0; i < this.bitArray.length; i++) {
186
- result.bitArray[i] = this.bitArray[i] | other.bitArray[i];
187
- }
188
- return result;
189
- }
190
- /**
191
- * Intersection operation with another bloom filter
192
- */
193
- intersect(other) {
194
- if (this.size !== other.size || this.hashCount !== other.hashCount) {
195
- throw new Error("Bloom filters must have same size and hash count for intersection operation");
196
- }
197
- const result = new BloomFilter(1, 0.01);
198
- result.size = this.size;
199
- result.hashCount = this.hashCount;
200
- result.bitArray = new Uint8Array(this.bitArray.length);
201
- result.itemCount = Math.min(this.itemCount, other.itemCount);
202
- for (let i = 0; i < this.bitArray.length; i++) {
203
- result.bitArray[i] = this.bitArray[i] & other.bitArray[i];
204
- }
205
- return result;
206
- }
207
- }
208
- //# sourceMappingURL=bloom-filter.js.map