bekindprofanityfilter 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTORS.md +106 -0
- package/LICENSE +22 -0
- package/README.md +1015 -0
- package/allprofanity.config.example.json +35 -0
- package/bin/init.js +49 -0
- package/config.schema.json +163 -0
- package/dist/algos/aho-corasick.d.ts +75 -0
- package/dist/algos/aho-corasick.js +238 -0
- package/dist/algos/aho-corasick.js.map +1 -0
- package/dist/algos/bloom-filter.d.ts +103 -0
- package/dist/algos/bloom-filter.js +208 -0
- package/dist/algos/bloom-filter.js.map +1 -0
- package/dist/algos/context-patterns.d.ts +102 -0
- package/dist/algos/context-patterns.js +484 -0
- package/dist/algos/context-patterns.js.map +1 -0
- package/dist/index.d.ts +1332 -0
- package/dist/index.js +2631 -0
- package/dist/index.js.map +1 -0
- package/dist/innocence-scoring.d.ts +23 -0
- package/dist/innocence-scoring.js +118 -0
- package/dist/innocence-scoring.js.map +1 -0
- package/dist/language-detector.d.ts +162 -0
- package/dist/language-detector.js +952 -0
- package/dist/language-detector.js.map +1 -0
- package/dist/language-dicts.d.ts +60 -0
- package/dist/language-dicts.js +2718 -0
- package/dist/language-dicts.js.map +1 -0
- package/dist/languages/arabic-words.d.ts +10 -0
- package/dist/languages/arabic-words.js +1649 -0
- package/dist/languages/arabic-words.js.map +1 -0
- package/dist/languages/bengali-words.d.ts +10 -0
- package/dist/languages/bengali-words.js +1696 -0
- package/dist/languages/bengali-words.js.map +1 -0
- package/dist/languages/brazilian-words.d.ts +10 -0
- package/dist/languages/brazilian-words.js +2122 -0
- package/dist/languages/brazilian-words.js.map +1 -0
- package/dist/languages/chinese-words.d.ts +10 -0
- package/dist/languages/chinese-words.js +2728 -0
- package/dist/languages/chinese-words.js.map +1 -0
- package/dist/languages/english-primary-all-languages.d.ts +23 -0
- package/dist/languages/english-primary-all-languages.js +36894 -0
- package/dist/languages/english-primary-all-languages.js.map +1 -0
- package/dist/languages/english-words.d.ts +5 -0
- package/dist/languages/english-words.js +5156 -0
- package/dist/languages/english-words.js.map +1 -0
- package/dist/languages/french-words.d.ts +10 -0
- package/dist/languages/french-words.js +2326 -0
- package/dist/languages/french-words.js.map +1 -0
- package/dist/languages/german-words.d.ts +10 -0
- package/dist/languages/german-words.js +2633 -0
- package/dist/languages/german-words.js.map +1 -0
- package/dist/languages/hindi-words.d.ts +10 -0
- package/dist/languages/hindi-words.js +2341 -0
- package/dist/languages/hindi-words.js.map +1 -0
- package/dist/languages/innocent-words.d.ts +41 -0
- package/dist/languages/innocent-words.js +109 -0
- package/dist/languages/innocent-words.js.map +1 -0
- package/dist/languages/italian-words.d.ts +10 -0
- package/dist/languages/italian-words.js +2287 -0
- package/dist/languages/italian-words.js.map +1 -0
- package/dist/languages/japanese-words.d.ts +11 -0
- package/dist/languages/japanese-words.js +2557 -0
- package/dist/languages/japanese-words.js.map +1 -0
- package/dist/languages/korean-words.d.ts +10 -0
- package/dist/languages/korean-words.js +2509 -0
- package/dist/languages/korean-words.js.map +1 -0
- package/dist/languages/russian-words.d.ts +10 -0
- package/dist/languages/russian-words.js +2175 -0
- package/dist/languages/russian-words.js.map +1 -0
- package/dist/languages/spanish-words.d.ts +11 -0
- package/dist/languages/spanish-words.js +2536 -0
- package/dist/languages/spanish-words.js.map +1 -0
- package/dist/languages/tamil-words.d.ts +10 -0
- package/dist/languages/tamil-words.js +1722 -0
- package/dist/languages/tamil-words.js.map +1 -0
- package/dist/languages/telugu-words.d.ts +10 -0
- package/dist/languages/telugu-words.js +1739 -0
- package/dist/languages/telugu-words.js.map +1 -0
- package/dist/romanization-detector.d.ts +50 -0
- package/dist/romanization-detector.js +779 -0
- package/dist/romanization-detector.js.map +1 -0
- package/package.json +79 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bloom Filter implementation for efficient set membership testing
|
|
3
|
+
*/
|
|
4
|
+
export class BloomFilter {
|
|
5
|
+
constructor(expectedItems, falsePositiveRate = 0.01) {
|
|
6
|
+
this.itemCount = 0;
|
|
7
|
+
// Calculate optimal size and hash count
|
|
8
|
+
this.size = this.calculateOptimalSize(expectedItems, falsePositiveRate);
|
|
9
|
+
this.hashCount = this.calculateOptimalHashCount(this.size, expectedItems);
|
|
10
|
+
this.bitArray = new Uint8Array(Math.ceil(this.size / 8));
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Calculate optimal bit array size
|
|
14
|
+
*/
|
|
15
|
+
calculateOptimalSize(n, p) {
|
|
16
|
+
return Math.ceil((-n * Math.log(p)) / Math.log(2) ** 2);
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Calculate optimal number of hash functions
|
|
20
|
+
*/
|
|
21
|
+
calculateOptimalHashCount(m, n) {
|
|
22
|
+
return Math.ceil((m / n) * Math.log(2));
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Hash function 1 (FNV-1a variant)
|
|
26
|
+
*/
|
|
27
|
+
hash1(item) {
|
|
28
|
+
let hash = 2166136261;
|
|
29
|
+
for (let i = 0; i < item.length; i++) {
|
|
30
|
+
hash ^= item.charCodeAt(i);
|
|
31
|
+
hash *= 16777619;
|
|
32
|
+
}
|
|
33
|
+
return Math.abs(hash) % this.size;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Hash function 2 (djb2 variant)
|
|
37
|
+
*/
|
|
38
|
+
hash2(item) {
|
|
39
|
+
let hash = 5381;
|
|
40
|
+
for (let i = 0; i < item.length; i++) {
|
|
41
|
+
hash = (hash << 5) + hash + item.charCodeAt(i);
|
|
42
|
+
}
|
|
43
|
+
return Math.abs(hash) % this.size;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Generate k hash values for an item using double hashing
|
|
47
|
+
*/
|
|
48
|
+
getHashes(item) {
|
|
49
|
+
const hash1 = this.hash1(item);
|
|
50
|
+
const hash2 = this.hash2(item);
|
|
51
|
+
const hashes = [];
|
|
52
|
+
for (let i = 0; i < this.hashCount; i++) {
|
|
53
|
+
const hash = (hash1 + i * hash2) % this.size;
|
|
54
|
+
hashes.push(Math.abs(hash));
|
|
55
|
+
}
|
|
56
|
+
return hashes;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Set a bit in the bit array
|
|
60
|
+
*/
|
|
61
|
+
setBit(index) {
|
|
62
|
+
const byteIndex = Math.floor(index / 8);
|
|
63
|
+
const bitIndex = index % 8;
|
|
64
|
+
this.bitArray[byteIndex] |= 1 << bitIndex;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Get a bit from the bit array
|
|
68
|
+
*/
|
|
69
|
+
getBit(index) {
|
|
70
|
+
const byteIndex = Math.floor(index / 8);
|
|
71
|
+
const bitIndex = index % 8;
|
|
72
|
+
return (this.bitArray[byteIndex] & (1 << bitIndex)) !== 0;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Add an item to the bloom filter
|
|
76
|
+
*/
|
|
77
|
+
add(item) {
|
|
78
|
+
const hashes = this.getHashes(item);
|
|
79
|
+
for (const hash of hashes) {
|
|
80
|
+
this.setBit(hash);
|
|
81
|
+
}
|
|
82
|
+
this.itemCount++;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Add multiple items to the bloom filter
|
|
86
|
+
*/
|
|
87
|
+
addAll(items) {
|
|
88
|
+
for (const item of items) {
|
|
89
|
+
this.add(item);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Test if an item might be in the set
|
|
94
|
+
*/
|
|
95
|
+
mightContain(item) {
|
|
96
|
+
const hashes = this.getHashes(item);
|
|
97
|
+
for (const hash of hashes) {
|
|
98
|
+
if (!this.getBit(hash)) {
|
|
99
|
+
return false;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return true;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Test multiple items at once
|
|
106
|
+
*/
|
|
107
|
+
mightContainAny(items) {
|
|
108
|
+
return items.some((item) => this.mightContain(item));
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Filter items that might be in the set
|
|
112
|
+
*/
|
|
113
|
+
filter(items) {
|
|
114
|
+
return items.filter((item) => this.mightContain(item));
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Clear the bloom filter
|
|
118
|
+
*/
|
|
119
|
+
clear() {
|
|
120
|
+
this.bitArray.fill(0);
|
|
121
|
+
this.itemCount = 0;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Get current false positive probability
|
|
125
|
+
*/
|
|
126
|
+
getCurrentFalsePositiveRate() {
|
|
127
|
+
const ratio = this.itemCount / this.size;
|
|
128
|
+
return Math.pow(1 - Math.exp(-this.hashCount * ratio), this.hashCount);
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Get bloom filter statistics
|
|
132
|
+
*/
|
|
133
|
+
getStats() {
|
|
134
|
+
let bitsSet = 0;
|
|
135
|
+
for (let i = 0; i < this.size; i++) {
|
|
136
|
+
if (this.getBit(i)) {
|
|
137
|
+
bitsSet++;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
const loadFactor = bitsSet / this.size;
|
|
141
|
+
const estimatedFalsePositiveRate = Math.pow(loadFactor, this.hashCount);
|
|
142
|
+
return {
|
|
143
|
+
size: this.size,
|
|
144
|
+
hashCount: this.hashCount,
|
|
145
|
+
itemCount: this.itemCount,
|
|
146
|
+
bitsSet,
|
|
147
|
+
loadFactor,
|
|
148
|
+
estimatedFalsePositiveRate,
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Serialize bloom filter to JSON
|
|
153
|
+
*/
|
|
154
|
+
toJSON() {
|
|
155
|
+
return {
|
|
156
|
+
size: this.size,
|
|
157
|
+
hashCount: this.hashCount,
|
|
158
|
+
itemCount: this.itemCount,
|
|
159
|
+
bitArray: Array.from(this.bitArray),
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Deserialize bloom filter from JSON
|
|
164
|
+
*/
|
|
165
|
+
static fromJSON(data) {
|
|
166
|
+
const filter = Object.create(BloomFilter.prototype);
|
|
167
|
+
filter.size = data.size;
|
|
168
|
+
filter.hashCount = data.hashCount;
|
|
169
|
+
filter.itemCount = data.itemCount;
|
|
170
|
+
filter.bitArray = new Uint8Array(data.bitArray);
|
|
171
|
+
return filter;
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Union operation with another bloom filter
|
|
175
|
+
*/
|
|
176
|
+
union(other) {
|
|
177
|
+
if (this.size !== other.size || this.hashCount !== other.hashCount) {
|
|
178
|
+
throw new Error("Bloom filters must have same size and hash count for union operation");
|
|
179
|
+
}
|
|
180
|
+
const result = new BloomFilter(1, 0.01);
|
|
181
|
+
result.size = this.size;
|
|
182
|
+
result.hashCount = this.hashCount;
|
|
183
|
+
result.bitArray = new Uint8Array(this.bitArray.length);
|
|
184
|
+
result.itemCount = this.itemCount + other.itemCount;
|
|
185
|
+
for (let i = 0; i < this.bitArray.length; i++) {
|
|
186
|
+
result.bitArray[i] = this.bitArray[i] | other.bitArray[i];
|
|
187
|
+
}
|
|
188
|
+
return result;
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Intersection operation with another bloom filter
|
|
192
|
+
*/
|
|
193
|
+
intersect(other) {
|
|
194
|
+
if (this.size !== other.size || this.hashCount !== other.hashCount) {
|
|
195
|
+
throw new Error("Bloom filters must have same size and hash count for intersection operation");
|
|
196
|
+
}
|
|
197
|
+
const result = new BloomFilter(1, 0.01);
|
|
198
|
+
result.size = this.size;
|
|
199
|
+
result.hashCount = this.hashCount;
|
|
200
|
+
result.bitArray = new Uint8Array(this.bitArray.length);
|
|
201
|
+
result.itemCount = Math.min(this.itemCount, other.itemCount);
|
|
202
|
+
for (let i = 0; i < this.bitArray.length; i++) {
|
|
203
|
+
result.bitArray[i] = this.bitArray[i] & other.bitArray[i];
|
|
204
|
+
}
|
|
205
|
+
return result;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
//# sourceMappingURL=bloom-filter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bloom-filter.js","sourceRoot":"","sources":["../../src/algos/bloom-filter.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,OAAO,WAAW;IAMtB,YAAY,aAAqB,EAAE,oBAA4B,IAAI;QAF3D,cAAS,GAAW,CAAC,CAAC;QAG5B,wCAAwC;QACxC,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,oBAAoB,CAAC,aAAa,EAAE,iBAAiB,CAAC,CAAC;QACxE,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,yBAAyB,CAAC,IAAI,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;QAC1E,IAAI,CAAC,QAAQ,GAAG,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;IAC3D,CAAC;IAED;;OAEG;IACK,oBAAoB,CAAC,CAAS,EAAE,CAAS;QAC/C,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IAC1D,CAAC;IAED;;OAEG;IACK,yBAAyB,CAAC,CAAS,EAAE,CAAS;QACpD,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAC1C,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,IAAY;QACxB,IAAI,IAAI,GAAG,UAAU,CAAC;QACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YACpC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YAC3B,IAAI,IAAI,QAAQ,CAAC;SAClB;QACD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC;IACpC,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,IAAY;QACxB,IAAI,IAAI,GAAG,IAAI,CAAC;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YACpC,IAAI,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;SAChD;QACD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC;IACpC,CAAC;IAED;;OAEG;IACK,SAAS,CAAC,IAAY;QAC5B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC/B,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE;YACvC,MAAM,IAAI,GAAG,CAAC,KAAK,GAAG,CAAC,GAAG,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC;YAC7C,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC;SAC7B;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,KAAa;QAC1B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;QACxC,MAAM,QAAQ,GAAG,KAAK,GAAG,CAAC,CAAC;QAC3B,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,QAAQ,CAAC;IAC5C,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,KAAa;QAC1B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;QACxC,MAAM,QAAQ,GAAG,KAAK,GAAG,CAAC,CAAC;QAC3B,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,QAAQ,CAAC,CAAC,KAAK,CAAC,CAAC;IAC5D,CAAC;IAED;;OAEG;IACH,GAAG,CAAC,IAAY;QACd,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACpC,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE;YACzB,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;SACnB;QACD,IAAI,CAAC,SAAS,EAAE,CAAC;IACnB,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAAe;QACpB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE;YACxB,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;SAChB;IACH,CAAC;IAED;;OAEG;IACH,YAAY,CAAC,IAAY;QACvB,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACpC,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE;YACzB,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE;gBACtB,OAAO,KAAK,CAAC;aACd;SACF;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,eAAe,CAAC,KAAe;QAC7B,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC;IACvD,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAAe;QACpB,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC;IACzD,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACtB,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,2BAA2B;QACzB,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC;QACzC,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;IACzE,CAAC;IAED;;OAEG;IACH,QAAQ;QAQN,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;YAClC,IAAI,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE;gBAClB,OAAO,EAAE,CAAC;aACX;SACF;QAED,MAAM,UAAU,GAAG,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC;QACvC,MAAM,0BAA0B,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;QAExE,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,OAAO;YACP,UAAU;YACV,0BAA0B;SAC3B,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,MAAM;QAMJ,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,QAAQ,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC;SACpC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,QAAQ,CAAC,IAKf;QACC,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;QACpD,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;QACxB,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;QAClC,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;QAClC,MAAM,CAAC,QAAQ,GAAG,IAAI,UAAU,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAkB;QACtB,IAAI,IAAI,CAAC,IAAI,KAAK,KAAK,CAAC,IAAI,IAAI,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC,SAAS,EAAE;YAClE,MAAM,IAAI,KAAK,CACb,sEAAsE,CACvE,CAAC;SACH;QAED,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;QACxC,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;QACxB,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;QAClC,MAAM,CAAC,QAAQ,GAAG,IAAI,UAAU,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QACvD,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC,SAAS,CAAC;QAEpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YAC7C,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;SAC3D;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACH,SAAS,CAAC,KAAkB;QAC1B,IAAI,IAAI,CAAC,IAAI,KAAK,KAAK,CAAC,IAAI,IAAI,IAAI,CAAC,SAAS,KAAK,KAAK,CAAC,SAAS,EAAE;YAClE,MAAM,IAAI,KAAK,CACb,6EAA6E,CAC9E,CAAC;SACH;QAED,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;QACxC,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;QACxB,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;QAClC,MAAM,CAAC,QAAQ,GAAG,IAAI,UAAU,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QACvD,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;QAE7D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YAC7C,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;SAC3D;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Universal context patterns for multi-language profanity detection
|
|
3
|
+
*/
|
|
4
|
+
export interface UniversalContextPattern {
|
|
5
|
+
type: "negation" | "possessive" | "compound" | "proper_noun" | "article" | "quotation" | "medical" | "anatomical" | "sexual_verb_before" | "sexual_verb_after" | "compound_slur" | "insult_construction" | "direct_address" | "pejorative_adj";
|
|
6
|
+
pattern: RegExp;
|
|
7
|
+
weight: number;
|
|
8
|
+
delta: number;
|
|
9
|
+
languages: string[];
|
|
10
|
+
description: string;
|
|
11
|
+
examples: string[];
|
|
12
|
+
}
|
|
13
|
+
export interface ContextRule {
|
|
14
|
+
pattern: RegExp;
|
|
15
|
+
action: "reduce_score" | "increase_score" | "whitelist" | "flag";
|
|
16
|
+
weight: number;
|
|
17
|
+
delta: number;
|
|
18
|
+
priority: number;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Universal context patterns that work across multiple languages
|
|
22
|
+
*/
|
|
23
|
+
export declare const UNIVERSAL_CONTEXT_PATTERNS: UniversalContextPattern[];
|
|
24
|
+
/**
|
|
25
|
+
* Language-specific context patterns
|
|
26
|
+
*/
|
|
27
|
+
export declare const LANGUAGE_SPECIFIC_PATTERNS: Record<string, UniversalContextPattern[]>;
|
|
28
|
+
/**
|
|
29
|
+
* Word-specific context patterns for disambiguating ambiguous profane words.
|
|
30
|
+
* Keyed by the lowercase profane word.
|
|
31
|
+
*/
|
|
32
|
+
export declare const WORD_SPECIFIC_PATTERNS: Record<string, UniversalContextPattern[]>;
|
|
33
|
+
/**
|
|
34
|
+
* Context rule generator
|
|
35
|
+
*/
|
|
36
|
+
export declare class ContextPatternMatcher {
|
|
37
|
+
private patterns;
|
|
38
|
+
private languagePatterns;
|
|
39
|
+
constructor(languages?: string[]);
|
|
40
|
+
/**
|
|
41
|
+
* Generate context rules for a specific word
|
|
42
|
+
*/
|
|
43
|
+
generateRules(word: string, languages?: string[]): ContextRule[];
|
|
44
|
+
/**
|
|
45
|
+
* Get priority for pattern type (reducers before boosters)
|
|
46
|
+
*/
|
|
47
|
+
private getPriority;
|
|
48
|
+
/**
|
|
49
|
+
* Escape regex special characters
|
|
50
|
+
*/
|
|
51
|
+
private escapeRegex;
|
|
52
|
+
/**
|
|
53
|
+
* Add custom pattern
|
|
54
|
+
*/
|
|
55
|
+
addPattern(pattern: UniversalContextPattern): void;
|
|
56
|
+
/**
|
|
57
|
+
* Add language-specific pattern
|
|
58
|
+
*/
|
|
59
|
+
addLanguagePattern(language: string, pattern: UniversalContextPattern): void;
|
|
60
|
+
/**
|
|
61
|
+
* Get all patterns for debugging
|
|
62
|
+
*/
|
|
63
|
+
getAllPatterns(): {
|
|
64
|
+
universal: UniversalContextPattern[];
|
|
65
|
+
languageSpecific: Map<string, UniversalContextPattern[]>;
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Context analyzer for scoring matches
|
|
70
|
+
*/
|
|
71
|
+
export declare class ContextAnalyzer {
|
|
72
|
+
private patternMatcher;
|
|
73
|
+
private contextWindow;
|
|
74
|
+
constructor(languages?: string[]);
|
|
75
|
+
/**
|
|
76
|
+
* Analyze context around a potential profanity match (legacy score-based model)
|
|
77
|
+
*/
|
|
78
|
+
analyzeContext(text: string, matchStart: number, matchEnd: number, word: string): {
|
|
79
|
+
score: number;
|
|
80
|
+
confidence: "high" | "medium" | "low";
|
|
81
|
+
appliedRules: Array<{
|
|
82
|
+
rule: ContextRule;
|
|
83
|
+
matched: boolean;
|
|
84
|
+
}>;
|
|
85
|
+
context: string;
|
|
86
|
+
};
|
|
87
|
+
/**
|
|
88
|
+
* Calculate the certainty delta for a word based on surrounding context.
|
|
89
|
+
* Positive delta = booster (more likely profane).
|
|
90
|
+
* Negative delta = reducer (more likely innocent).
|
|
91
|
+
* Returns the sum of all matching pattern deltas.
|
|
92
|
+
*/
|
|
93
|
+
getCertaintyDelta(text: string, matchStart: number, matchEnd: number, word: string): number;
|
|
94
|
+
/**
|
|
95
|
+
* Set context window size
|
|
96
|
+
*/
|
|
97
|
+
setContextWindow(size: number): void;
|
|
98
|
+
/**
|
|
99
|
+
* Add custom pattern to the analyzer
|
|
100
|
+
*/
|
|
101
|
+
addCustomPattern(pattern: UniversalContextPattern): void;
|
|
102
|
+
}
|