wr_vocab_blacklist 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 83cba9b3f2d5a661c2a4106ba948db0b8da5913e523a50b282495686ad06d744
|
4
|
+
data.tar.gz: d02403de8157e30b1cf4d868b9f274f536ea4b131c6aef2bddaf6b9a8520a872
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 158019d306ff7b712cd77a1ce5f2ef37644938e0a6a12ed53818a3e45b4b12c399997fa7f81099473c050d39676c47b17d466756092da29a52177e914a5d141c
|
7
|
+
data.tar.gz: 67934a1895f260bfd39b6b8b30cc32adc045576f0be1222d22669a5489941a83eae3ade1ac6b639a3cf0ed8cb813ccfc863d09c09a6e955a8849d995860a8d15
|
data/README.md
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
L2E Vocab Blacklist
|
2
|
+
==================
|
3
|
+
|
4
|
+
Detect words and partial-words that shouldn't be used on Learn2Earn's website
|
5
|
+
|
6
|
+
To be expanded to allow for different classes of banned words so
|
7
|
+
administrators may loosen or expand restrictions, or so we can be more
|
8
|
+
permissive with older students.
|
9
|
+
|
10
|
+
Use:
|
11
|
+
```
|
12
|
+
gem 'l2e_vocab_blacklist'
|
13
|
+
```
|
14
|
+
|
15
|
+
### To update and publish the gem
|
16
|
+
|
17
|
+
1. Merge pull requests, pull to local master
|
18
|
+
2. Run tests: `bundle exec rspec spec/l2e_vocab_blacklist_spec.rb`
|
19
|
+
3. Increase the gem version and date in `l2e_vocab_blacklist.gemspec`
|
20
|
+
4. Run `gem build l2e_vocab_blacklist.gemspec`
|
21
|
+
5. Remove the old `.gem` file
|
22
|
+
6. Push the new gem with: `gem push l2e_vocab_blacklist-X.X.X.gem` substituting in the gem version
|
23
|
+
1. You may need to sign in on the CLI to push the gem. Talk to Greg about this.
|
24
|
+
7. Commit and push your changes
|
@@ -0,0 +1,282 @@
|
|
1
|
+
50 Shades of Gray,15
|
2
|
+
50 Shades of Grey,15
|
3
|
+
A-Hole,10
|
4
|
+
Anal,5
|
5
|
+
Anus,5
|
6
|
+
Apeshit,15
|
7
|
+
Ass,15
|
8
|
+
Asshole,10
|
9
|
+
B*tch,15
|
10
|
+
Badass,15
|
11
|
+
Bad-ass,15
|
12
|
+
Barbiturates,5
|
13
|
+
Bastard,10
|
14
|
+
Beaner,15
|
15
|
+
Beastiality,15
|
16
|
+
Bisexual,5
|
17
|
+
BJ,15
|
18
|
+
Blowjob,15
|
19
|
+
Boner,15
|
20
|
+
Boob,10
|
21
|
+
Boobs,10
|
22
|
+
Boobies,15
|
23
|
+
Brazzer,15
|
24
|
+
Breasts,5
|
25
|
+
Bugger,15
|
26
|
+
Bullshit,15
|
27
|
+
Bunghole,15
|
28
|
+
Busty,5
|
29
|
+
Butthole,15
|
30
|
+
C*ck,15
|
31
|
+
C*cksucker,15
|
32
|
+
C*nt,15
|
33
|
+
Cannabis,5
|
34
|
+
Cervix,5
|
35
|
+
Chink,15
|
36
|
+
Chino,15
|
37
|
+
Chlamydia,10
|
38
|
+
ChoMo,15
|
39
|
+
Chulo,5
|
40
|
+
Cleavage,5
|
41
|
+
Clitoris,15
|
42
|
+
Cocaine,5
|
43
|
+
Cock,15
|
44
|
+
Cockblock,15
|
45
|
+
Cocksucker,15
|
46
|
+
Cocksucking,15
|
47
|
+
Codeine,5
|
48
|
+
Condom,15
|
49
|
+
Coolie,15
|
50
|
+
Cornholing,15
|
51
|
+
Cr*p,15
|
52
|
+
Crap,10
|
53
|
+
Crip,10
|
54
|
+
Cum,15
|
55
|
+
Cumming,15
|
56
|
+
Cumshot,15
|
57
|
+
Cumslut,15
|
58
|
+
Cum-slut,15
|
59
|
+
Cunnilingus,15
|
60
|
+
Cunt,15
|
61
|
+
Damn,5
|
62
|
+
Daterape,10
|
63
|
+
Deep Throat,15
|
64
|
+
Deepthroat,15
|
65
|
+
Dental Dam,15
|
66
|
+
Dick,15
|
67
|
+
Dicking,15
|
68
|
+
Dildo,15
|
69
|
+
Dingleberry,15
|
70
|
+
Dipshit,15
|
71
|
+
DMT,15
|
72
|
+
Doggy Style,15
|
73
|
+
Doobie,15
|
74
|
+
Douche,15
|
75
|
+
Drag Queen,5
|
76
|
+
Dyke,15
|
77
|
+
Ejaculate,10
|
78
|
+
Ejaculation,10
|
79
|
+
Erotic,15
|
80
|
+
Erotica,15
|
81
|
+
Erection,15
|
82
|
+
F*ck,15
|
83
|
+
F*cker,15
|
84
|
+
F**k,15
|
85
|
+
F**ker,15
|
86
|
+
F*@k,15
|
87
|
+
F*@ker,15
|
88
|
+
F*#k,15
|
89
|
+
F*#ker,15
|
90
|
+
F*?k,15
|
91
|
+
F*?ker,15
|
92
|
+
F*ck, 15
|
93
|
+
F*cking, 15
|
94
|
+
Fu*k
|
95
|
+
Fag,15
|
96
|
+
Faggot,15
|
97
|
+
Fetal,5
|
98
|
+
Fetish,10
|
99
|
+
Fifty Shades of Gray,15
|
100
|
+
Fifty Shades of Grey,15
|
101
|
+
Fisting,15
|
102
|
+
Foreskin,10
|
103
|
+
g spot,15
|
104
|
+
G-spot,15
|
105
|
+
Gang Bang,15
|
106
|
+
Genital,10
|
107
|
+
Genitals,10
|
108
|
+
Gigolo,5
|
109
|
+
Gimp,5
|
110
|
+
Golden Shower,15
|
111
|
+
Gonorrhea,5
|
112
|
+
Gook,15
|
113
|
+
Grundle,15
|
114
|
+
Hallucinogen,5
|
115
|
+
Hand Job,15
|
116
|
+
Handjob,15
|
117
|
+
Hangover,10
|
118
|
+
Hard On,5
|
119
|
+
Harem,5
|
120
|
+
Harlot,10
|
121
|
+
Heroin,5
|
122
|
+
Heroina,5
|
123
|
+
Herpes,10
|
124
|
+
Hick,5
|
125
|
+
Hickey,5
|
126
|
+
Hickie,5
|
127
|
+
Hooker,10
|
128
|
+
Horny,15
|
129
|
+
Huff,10
|
130
|
+
Huffing,10
|
131
|
+
Hump,15
|
132
|
+
Humping,15
|
133
|
+
Hungover,10
|
134
|
+
Intercourse,5
|
135
|
+
Jackass,15
|
136
|
+
Jap,15
|
137
|
+
Jerk It,15
|
138
|
+
Jerk Off,15
|
139
|
+
Jerkoff,15
|
140
|
+
Jigga,15
|
141
|
+
Joint,10
|
142
|
+
Kickass,15
|
143
|
+
Kick-ass,15
|
144
|
+
Kunt,15
|
145
|
+
Kyke,15
|
146
|
+
Labia,10
|
147
|
+
Lesbo,15
|
148
|
+
LSD,10
|
149
|
+
Lube,15
|
150
|
+
Lubricant,10
|
151
|
+
Lubrication,5
|
152
|
+
Lust,5
|
153
|
+
Lustful,5
|
154
|
+
Marijuana,10
|
155
|
+
Masturbate,10
|
156
|
+
Masturbating,10
|
157
|
+
Masturbation,10
|
158
|
+
MDMA,15
|
159
|
+
Merk,15
|
160
|
+
Meth,10
|
161
|
+
Methamphetamine,10
|
162
|
+
Milf,15
|
163
|
+
Mofo,15
|
164
|
+
Money Shot,15
|
165
|
+
Moneyshot,15
|
166
|
+
Moron,5
|
167
|
+
Motherf*cker,15
|
168
|
+
Narcotic,5
|
169
|
+
Nigga,15
|
170
|
+
Nignog,15
|
171
|
+
Nips,15
|
172
|
+
Niptip,15
|
173
|
+
Nookie,15
|
174
|
+
Nooky,15
|
175
|
+
Nude,10
|
176
|
+
Opiates,5
|
177
|
+
Opium,5
|
178
|
+
Oral sex,15
|
179
|
+
Orgasm,15
|
180
|
+
Orgasmic,15
|
181
|
+
Orgy,10
|
182
|
+
Ovaries,5
|
183
|
+
Oxy,15
|
184
|
+
Oxycodone,10
|
185
|
+
P*ss,15
|
186
|
+
Panties,10
|
187
|
+
Penile,5
|
188
|
+
Penis,10
|
189
|
+
Playboy,10
|
190
|
+
Porn,15
|
191
|
+
Pornograph,15
|
192
|
+
Preggers,15
|
193
|
+
Promiscuity, 15
|
194
|
+
Prostitute,10
|
195
|
+
Prostitution,10
|
196
|
+
Pussy,15
|
197
|
+
Queef,15
|
198
|
+
Racy,10
|
199
|
+
Rape,5
|
200
|
+
Rapist,10
|
201
|
+
Raunchy,10
|
202
|
+
Rim Job,15
|
203
|
+
Rimjob,15
|
204
|
+
Rohypnol,10
|
205
|
+
Roofie,10
|
206
|
+
S&M,15
|
207
|
+
S/M,15
|
208
|
+
Salvia,15
|
209
|
+
Sangria,5
|
210
|
+
Schlong,15
|
211
|
+
Scrotum,15
|
212
|
+
Seduce,10
|
213
|
+
Seduced,10
|
214
|
+
Seducing,10
|
215
|
+
Seductress,10
|
216
|
+
Sex,10
|
217
|
+
Sexier,10
|
218
|
+
Sexiest,10
|
219
|
+
Sexing,10
|
220
|
+
Sexting,10
|
221
|
+
Sexual,10
|
222
|
+
Sexy,10
|
223
|
+
Sh*t,15
|
224
|
+
Shank,15
|
225
|
+
Shit,15
|
226
|
+
Shitter,15
|
227
|
+
Shitting,15
|
228
|
+
Shitty,15
|
229
|
+
Shmegma,15
|
230
|
+
Shrooms,15
|
231
|
+
Slut,15
|
232
|
+
Snorting,10
|
233
|
+
Sperm,10
|
234
|
+
Spermicide,10
|
235
|
+
Splooge,15
|
236
|
+
STD,10
|
237
|
+
Strip tease,15
|
238
|
+
Stripper,10
|
239
|
+
Sucker,10
|
240
|
+
Suicide,5
|
241
|
+
Syphilis,10
|
242
|
+
T*t,15
|
243
|
+
Testes,10
|
244
|
+
Testicle,10
|
245
|
+
Testicular,10
|
246
|
+
THC,15
|
247
|
+
Thong,10
|
248
|
+
Threesome,15
|
249
|
+
Tit,10
|
250
|
+
Tittie,15
|
251
|
+
Titty,15
|
252
|
+
Tranny,15
|
253
|
+
Turd,15
|
254
|
+
Twerk,15
|
255
|
+
Uncensored,10
|
256
|
+
Urethra,10
|
257
|
+
Urine,5
|
258
|
+
Uterus,5
|
259
|
+
Vag,15
|
260
|
+
Vagina,10
|
261
|
+
Vagina,10
|
262
|
+
Vamp,5
|
263
|
+
Viagra,15
|
264
|
+
Vicodin,10
|
265
|
+
Vulva,5
|
266
|
+
Wanker,15
|
267
|
+
Weiner,10
|
268
|
+
Wetback,15
|
269
|
+
Wet dream,10
|
270
|
+
Whack Off,15
|
271
|
+
Whisky,5
|
272
|
+
Whitie,15
|
273
|
+
Whity,15
|
274
|
+
Whore,10
|
275
|
+
Whorehouse,10
|
276
|
+
Whoring,10
|
277
|
+
Wiener,10
|
278
|
+
Wigger,15
|
279
|
+
X Rated,10
|
280
|
+
XX,10
|
281
|
+
XXX,10
|
282
|
+
Zipperhead,15
|
@@ -0,0 +1 @@
|
|
1
|
+
moby dick,5
|
@@ -0,0 +1,115 @@
|
|
1
|
+
require 'active_support/inflector'
|
2
|
+
require 'csv'
|
3
|
+
|
4
|
+
class VocabBlacklist
|
5
|
+
|
6
|
+
# Returns true or false, check to see if the string is on the blacklist
|
7
|
+
def self.blacklisted?(str, age = "0")
|
8
|
+
# Sanitize string
|
9
|
+
str = str.downcase.strip
|
10
|
+
|
11
|
+
whitelisted_phrases = self.whitelist_matches(str)
|
12
|
+
|
13
|
+
# Blacklist if any of the words
|
14
|
+
str.split(/[ -]/).each do |word|
|
15
|
+
word = word.gsub(CONSIDER_REGEX, "")
|
16
|
+
|
17
|
+
if check_full_words_csv(word, age)
|
18
|
+
|
19
|
+
is_whitelisted = whitelisted_phrases.any? do |phrase|
|
20
|
+
phrase.include?(word)
|
21
|
+
end
|
22
|
+
if !is_whitelisted
|
23
|
+
return true
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
# For compound dirty words
|
28
|
+
PHRASES.each do |bad_phrase|
|
29
|
+
return true if str.include?(bad_phrase)
|
30
|
+
end
|
31
|
+
|
32
|
+
return GREEDY_WORDS.any? { |s| str.include?(s) }
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.whitelist_matches(text)
|
36
|
+
text = text.downcase.strip.gsub(CONSIDER_REGEX, "")
|
37
|
+
WHITELIST.select do |whitelist_phrase|
|
38
|
+
text.include?(whitelist_phrase)
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.censor(str, age = "0", replace_with = "*")
|
44
|
+
PHRASES.each do |bad_phrase|
|
45
|
+
# match number of characters for any replace_with that is 1 character
|
46
|
+
if replace_with.length == 1
|
47
|
+
str.gsub!(/#{ bad_phrase }/i, replace_with * bad_phrase.length)
|
48
|
+
else
|
49
|
+
str.gsub!(/#{ bad_phrase }/i, replace_with)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
whitelisted_phrases = self.whitelist_matches(str)
|
54
|
+
|
55
|
+
str.split(/ /).map do |working_word|
|
56
|
+
working_word.split(/-/).map do |sub_working_word|
|
57
|
+
word = sub_working_word.downcase.gsub(CONSIDER_REGEX, "")
|
58
|
+
|
59
|
+
is_whitelisted = whitelisted_phrases.any? do |phrase|
|
60
|
+
phrase.include?(word)
|
61
|
+
end
|
62
|
+
|
63
|
+
if !is_whitelisted
|
64
|
+
if check_full_words_csv(word, age)
|
65
|
+
# match number of characters for any replace_with that is 1 character
|
66
|
+
if replace_with.length == 1
|
67
|
+
sub_working_word.gsub!(/#{ word }/i, replace_with * word.length)
|
68
|
+
else
|
69
|
+
sub_working_word.gsub!(/#{ word }/i, replace_with)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
if GREEDY_WORDS.any? { |w| word.include?(w) }
|
74
|
+
# match number of characters for any replace_with that is 1 character
|
75
|
+
if replace_with.length == 1
|
76
|
+
sub_working_word = replace_with * sub_working_word.length
|
77
|
+
else
|
78
|
+
sub_working_word = replace_with
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
sub_working_word
|
84
|
+
end.join("-")
|
85
|
+
end.join(" ")
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.file_to_normalized_words(file)
|
89
|
+
CSV.parse(File.read(file)).map(&:first).reject { |s| s.to_s.strip.empty? }.map(&:downcase).map { |s| s.gsub(CONSIDER_REGEX, "") }
|
90
|
+
end
|
91
|
+
|
92
|
+
def self.words_with_expansions(words)
|
93
|
+
words.map { |s| [s, s.pluralize, s.singularize] }.uniq.flatten
|
94
|
+
end
|
95
|
+
|
96
|
+
BLACKLIST_DIR = File.join(File.dirname(__FILE__), 'l2e_vocab_blacklist/blacklists')
|
97
|
+
CONSIDER_REGEX = /[^0-9a-z\* ]/i
|
98
|
+
|
99
|
+
PHRASES = file_to_normalized_words("#{BLACKLIST_DIR}/full_words.csv").select { |w| w.split(" ").length > 1 }
|
100
|
+
FULL_WORDS = file_to_normalized_words("#{BLACKLIST_DIR}/full_words.csv").reject { |w| w.split(" ").length > 1 }
|
101
|
+
GREEDY_WORDS = words_with_expansions(file_to_normalized_words("#{BLACKLIST_DIR}/greedy_words.txt")).uniq.freeze
|
102
|
+
WHITELIST = file_to_normalized_words("#{BLACKLIST_DIR}/whitelist.csv").uniq.freeze
|
103
|
+
|
104
|
+
FULL_WORDS_CSV = CSV.parse(File.read("#{BLACKLIST_DIR}/full_words.csv"))
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
def self.check_full_words_csv(word, age)
|
109
|
+
FULL_WORDS_CSV.each do |row|
|
110
|
+
return true if row[1].to_i >= age.to_i && row[0].downcase == word.downcase
|
111
|
+
end
|
112
|
+
return false
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
metadata
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wr_vocab_blacklist
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Greg Sherrid
|
8
|
+
- Gilles Ferone
|
9
|
+
- Kevin Schroeder
|
10
|
+
- Matt Schleifman
|
11
|
+
- Whooo's Reading by Learn2Earn
|
12
|
+
autorequire:
|
13
|
+
bindir: bin
|
14
|
+
cert_chain: []
|
15
|
+
date: 2022-03-15 00:00:00.000000000 Z
|
16
|
+
dependencies: []
|
17
|
+
description: Detect words and partial-words that shouldn't be used on Whooo's Reading
|
18
|
+
website
|
19
|
+
email:
|
20
|
+
- kevin@whooosreading.org
|
21
|
+
- gilles@whooosreading.org
|
22
|
+
- matt@whooosreading.org
|
23
|
+
executables: []
|
24
|
+
extensions: []
|
25
|
+
extra_rdoc_files: []
|
26
|
+
files:
|
27
|
+
- README.md
|
28
|
+
- lib/l2e_vocab_blacklist.rb
|
29
|
+
- lib/l2e_vocab_blacklist/blacklists/full_words.csv
|
30
|
+
- lib/l2e_vocab_blacklist/blacklists/greedy_words.txt
|
31
|
+
- lib/l2e_vocab_blacklist/blacklists/whitelist.csv
|
32
|
+
homepage: https://github.com/whooosreading/l2e_vocab_blacklist
|
33
|
+
licenses:
|
34
|
+
- All rights reserved, for now
|
35
|
+
metadata: {}
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options: []
|
38
|
+
require_paths:
|
39
|
+
- lib
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
requirements: []
|
51
|
+
rubygems_version: 3.2.3
|
52
|
+
signing_key:
|
53
|
+
specification_version: 4
|
54
|
+
summary: Whooo's Reading Vocab Blacklist
|
55
|
+
test_files: []
|