wr_vocab_blacklist 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 83cba9b3f2d5a661c2a4106ba948db0b8da5913e523a50b282495686ad06d744
4
+ data.tar.gz: d02403de8157e30b1cf4d868b9f274f536ea4b131c6aef2bddaf6b9a8520a872
5
+ SHA512:
6
+ metadata.gz: 158019d306ff7b712cd77a1ce5f2ef37644938e0a6a12ed53818a3e45b4b12c399997fa7f81099473c050d39676c47b17d466756092da29a52177e914a5d141c
7
+ data.tar.gz: 67934a1895f260bfd39b6b8b30cc32adc045576f0be1222d22669a5489941a83eae3ade1ac6b639a3cf0ed8cb813ccfc863d09c09a6e955a8849d995860a8d15
data/README.md ADDED
@@ -0,0 +1,24 @@
1
+ L2E Vocab Blacklist
2
+ ==================
3
+
4
+ Detect words and partial-words that shouldn't be used on Learn2Earn's website
5
+
6
+ To be expanded to allow for different classes of banned words so
7
+ administrators may loosen or expand restrictions, or so we can be more
8
+ permissive with older students.
9
+
10
+ Use:
11
+ ```
12
+ gem 'l2e_vocab_blacklist'
13
+ ```
14
+
15
+ ### To update and publish the gem
16
+
17
+ 1. Merge pull requests, pull to local master
18
+ 2. Run tests: `bundle exec rspec spec/l2e_vocab_blacklist_spec.rb`
19
+ 3. Increase the gem version and date in `l2e_vocab_blacklist.gemspec`
20
+ 4. Run `gem build l2e_vocab_blacklist.gemspec`
21
+ 5. Remove the old `.gem` file
22
+ 6. Push the new gem with: `gem push l2e_vocab_blacklist-X.X.X.gem` substituting in the gem version
23
+ 1. You may need to sign in on the CLI to push the gem. Talk to Greg about this.
24
+ 7. Commit and push your changes
@@ -0,0 +1,282 @@
1
+ 50 Shades of Gray,15
2
+ 50 Shades of Grey,15
3
+ A-Hole,10
4
+ Anal,5
5
+ Anus,5
6
+ Apeshit,15
7
+ Ass,15
8
+ Asshole,10
9
+ B*tch,15
10
+ Badass,15
11
+ Bad-ass,15
12
+ Barbiturates,5
13
+ Bastard,10
14
+ Beaner,15
15
+ Beastiality,15
16
+ Bisexual,5
17
+ BJ,15
18
+ Blowjob,15
19
+ Boner,15
20
+ Boob,10
21
+ Boobs,10
22
+ Boobies,15
23
+ Brazzer,15
24
+ Breasts,5
25
+ Bugger,15
26
+ Bullshit,15
27
+ Bunghole,15
28
+ Busty,5
29
+ Butthole,15
30
+ C*ck,15
31
+ C*cksucker,15
32
+ C*nt,15
33
+ Cannabis,5
34
+ Cervix,5
35
+ Chink,15
36
+ Chino,15
37
+ Chlamydia,10
38
+ ChoMo,15
39
+ Chulo,5
40
+ Cleavage,5
41
+ Clitoris,15
42
+ Cocaine,5
43
+ Cock,15
44
+ Cockblock,15
45
+ Cocksucker,15
46
+ Cocksucking,15
47
+ Codeine,5
48
+ Condom,15
49
+ Coolie,15
50
+ Cornholing,15
51
+ Cr*p,15
52
+ Crap,10
53
+ Crip,10
54
+ Cum,15
55
+ Cumming,15
56
+ Cumshot,15
57
+ Cumslut,15
58
+ Cum-slut,15
59
+ Cunnilingus,15
60
+ Cunt,15
61
+ Damn,5
62
+ Daterape,10
63
+ Deep Throat,15
64
+ Deepthroat,15
65
+ Dental Dam,15
66
+ Dick,15
67
+ Dicking,15
68
+ Dildo,15
69
+ Dingleberry,15
70
+ Dipshit,15
71
+ DMT,15
72
+ Doggy Style,15
73
+ Doobie,15
74
+ Douche,15
75
+ Drag Queen,5
76
+ Dyke,15
77
+ Ejaculate,10
78
+ Ejaculation,10
79
+ Erotic,15
80
+ Erotica,15
81
+ Erection,15
82
+ F*ck,15
83
+ F*cker,15
84
+ F**k,15
85
+ F**ker,15
86
+ F*@k,15
87
+ F*@ker,15
88
+ F*#k,15
89
+ F*#ker,15
90
+ F*?k,15
91
+ F*?ker,15
92
+ F*ck, 15
93
+ F*cking, 15
94
+ Fu*k
95
+ Fag,15
96
+ Faggot,15
97
+ Fetal,5
98
+ Fetish,10
99
+ Fifty Shades of Gray,15
100
+ Fifty Shades of Grey,15
101
+ Fisting,15
102
+ Foreskin,10
103
+ g spot,15
104
+ G-spot,15
105
+ Gang Bang,15
106
+ Genital,10
107
+ Genitals,10
108
+ Gigolo,5
109
+ Gimp,5
110
+ Golden Shower,15
111
+ Gonorrhea,5
112
+ Gook,15
113
+ Grundle,15
114
+ Hallucinogen,5
115
+ Hand Job,15
116
+ Handjob,15
117
+ Hangover,10
118
+ Hard On,5
119
+ Harem,5
120
+ Harlot,10
121
+ Heroin,5
122
+ Heroina,5
123
+ Herpes,10
124
+ Hick,5
125
+ Hickey,5
126
+ Hickie,5
127
+ Hooker,10
128
+ Horny,15
129
+ Huff,10
130
+ Huffing,10
131
+ Hump,15
132
+ Humping,15
133
+ Hungover,10
134
+ Intercourse,5
135
+ Jackass,15
136
+ Jap,15
137
+ Jerk It,15
138
+ Jerk Off,15
139
+ Jerkoff,15
140
+ Jigga,15
141
+ Joint,10
142
+ Kickass,15
143
+ Kick-ass,15
144
+ Kunt,15
145
+ Kyke,15
146
+ Labia,10
147
+ Lesbo,15
148
+ LSD,10
149
+ Lube,15
150
+ Lubricant,10
151
+ Lubrication,5
152
+ Lust,5
153
+ Lustful,5
154
+ Marijuana,10
155
+ Masturbate,10
156
+ Masturbating,10
157
+ Masturbation,10
158
+ MDMA,15
159
+ Merk,15
160
+ Meth,10
161
+ Methamphetamine,10
162
+ Milf,15
163
+ Mofo,15
164
+ Money Shot,15
165
+ Moneyshot,15
166
+ Moron,5
167
+ Motherf*cker,15
168
+ Narcotic,5
169
+ Nigga,15
170
+ Nignog,15
171
+ Nips,15
172
+ Niptip,15
173
+ Nookie,15
174
+ Nooky,15
175
+ Nude,10
176
+ Opiates,5
177
+ Opium,5
178
+ Oral sex,15
179
+ Orgasm,15
180
+ Orgasmic,15
181
+ Orgy,10
182
+ Ovaries,5
183
+ Oxy,15
184
+ Oxycodone,10
185
+ P*ss,15
186
+ Panties,10
187
+ Penile,5
188
+ Penis,10
189
+ Playboy,10
190
+ Porn,15
191
+ Pornograph,15
192
+ Preggers,15
193
+ Promiscuity, 15
194
+ Prostitute,10
195
+ Prostitution,10
196
+ Pussy,15
197
+ Queef,15
198
+ Racy,10
199
+ Rape,5
200
+ Rapist,10
201
+ Raunchy,10
202
+ Rim Job,15
203
+ Rimjob,15
204
+ Rohypnol,10
205
+ Roofie,10
206
+ S&M,15
207
+ S/M,15
208
+ Salvia,15
209
+ Sangria,5
210
+ Schlong,15
211
+ Scrotum,15
212
+ Seduce,10
213
+ Seduced,10
214
+ Seducing,10
215
+ Seductress,10
216
+ Sex,10
217
+ Sexier,10
218
+ Sexiest,10
219
+ Sexing,10
220
+ Sexting,10
221
+ Sexual,10
222
+ Sexy,10
223
+ Sh*t,15
224
+ Shank,15
225
+ Shit,15
226
+ Shitter,15
227
+ Shitting,15
228
+ Shitty,15
229
+ Shmegma,15
230
+ Shrooms,15
231
+ Slut,15
232
+ Snorting,10
233
+ Sperm,10
234
+ Spermicide,10
235
+ Splooge,15
236
+ STD,10
237
+ Strip tease,15
238
+ Stripper,10
239
+ Sucker,10
240
+ Suicide,5
241
+ Syphilis,10
242
+ T*t,15
243
+ Testes,10
244
+ Testicle,10
245
+ Testicular,10
246
+ THC,15
247
+ Thong,10
248
+ Threesome,15
249
+ Tit,10
250
+ Tittie,15
251
+ Titty,15
252
+ Tranny,15
253
+ Turd,15
254
+ Twerk,15
255
+ Uncensored,10
256
+ Urethra,10
257
+ Urine,5
258
+ Uterus,5
259
+ Vag,15
260
+ Vagina,10
261
+ Vagina,10
262
+ Vamp,5
263
+ Viagra,15
264
+ Vicodin,10
265
+ Vulva,5
266
+ Wanker,15
267
+ Weiner,10
268
+ Wetback,15
269
+ Wet dream,10
270
+ Whack Off,15
271
+ Whisky,5
272
+ Whitie,15
273
+ Whity,15
274
+ Whore,10
275
+ Whorehouse,10
276
+ Whoring,10
277
+ Wiener,10
278
+ Wigger,15
279
+ X Rated,10
280
+ XX,10
281
+ XXX,10
282
+ Zipperhead,15
@@ -0,0 +1,4 @@
1
+ fuck
2
+ bitch
3
+ nigger
4
+ **
@@ -0,0 +1 @@
1
+ moby dick,5
@@ -0,0 +1,115 @@
1
+ require 'active_support/inflector'
2
+ require 'csv'
3
+
4
+ class VocabBlacklist
5
+
6
+ # Returns true or false, check to see if the string is on the blacklist
7
+ def self.blacklisted?(str, age = "0")
8
+ # Sanitize string
9
+ str = str.downcase.strip
10
+
11
+ whitelisted_phrases = self.whitelist_matches(str)
12
+
13
+ # Blacklist if any of the words
14
+ str.split(/[ -]/).each do |word|
15
+ word = word.gsub(CONSIDER_REGEX, "")
16
+
17
+ if check_full_words_csv(word, age)
18
+
19
+ is_whitelisted = whitelisted_phrases.any? do |phrase|
20
+ phrase.include?(word)
21
+ end
22
+ if !is_whitelisted
23
+ return true
24
+ end
25
+ end
26
+ end
27
+ # For compound dirty words
28
+ PHRASES.each do |bad_phrase|
29
+ return true if str.include?(bad_phrase)
30
+ end
31
+
32
+ return GREEDY_WORDS.any? { |s| str.include?(s) }
33
+ end
34
+
35
+ def self.whitelist_matches(text)
36
+ text = text.downcase.strip.gsub(CONSIDER_REGEX, "")
37
+ WHITELIST.select do |whitelist_phrase|
38
+ text.include?(whitelist_phrase)
39
+ end
40
+
41
+ end
42
+
43
+ def self.censor(str, age = "0", replace_with = "*")
44
+ PHRASES.each do |bad_phrase|
45
+ # match number of characters for any replace_with that is 1 character
46
+ if replace_with.length == 1
47
+ str.gsub!(/#{ bad_phrase }/i, replace_with * bad_phrase.length)
48
+ else
49
+ str.gsub!(/#{ bad_phrase }/i, replace_with)
50
+ end
51
+ end
52
+
53
+ whitelisted_phrases = self.whitelist_matches(str)
54
+
55
+ str.split(/ /).map do |working_word|
56
+ working_word.split(/-/).map do |sub_working_word|
57
+ word = sub_working_word.downcase.gsub(CONSIDER_REGEX, "")
58
+
59
+ is_whitelisted = whitelisted_phrases.any? do |phrase|
60
+ phrase.include?(word)
61
+ end
62
+
63
+ if !is_whitelisted
64
+ if check_full_words_csv(word, age)
65
+ # match number of characters for any replace_with that is 1 character
66
+ if replace_with.length == 1
67
+ sub_working_word.gsub!(/#{ word }/i, replace_with * word.length)
68
+ else
69
+ sub_working_word.gsub!(/#{ word }/i, replace_with)
70
+ end
71
+ end
72
+
73
+ if GREEDY_WORDS.any? { |w| word.include?(w) }
74
+ # match number of characters for any replace_with that is 1 character
75
+ if replace_with.length == 1
76
+ sub_working_word = replace_with * sub_working_word.length
77
+ else
78
+ sub_working_word = replace_with
79
+ end
80
+ end
81
+ end
82
+
83
+ sub_working_word
84
+ end.join("-")
85
+ end.join(" ")
86
+ end
87
+
88
+ def self.file_to_normalized_words(file)
89
+ CSV.parse(File.read(file)).map(&:first).reject { |s| s.to_s.strip.empty? }.map(&:downcase).map { |s| s.gsub(CONSIDER_REGEX, "") }
90
+ end
91
+
92
+ def self.words_with_expansions(words)
93
+ words.map { |s| [s, s.pluralize, s.singularize] }.uniq.flatten
94
+ end
95
+
96
+ BLACKLIST_DIR = File.join(File.dirname(__FILE__), 'l2e_vocab_blacklist/blacklists')
97
+ CONSIDER_REGEX = /[^0-9a-z\* ]/i
98
+
99
+ PHRASES = file_to_normalized_words("#{BLACKLIST_DIR}/full_words.csv").select { |w| w.split(" ").length > 1 }
100
+ FULL_WORDS = file_to_normalized_words("#{BLACKLIST_DIR}/full_words.csv").reject { |w| w.split(" ").length > 1 }
101
+ GREEDY_WORDS = words_with_expansions(file_to_normalized_words("#{BLACKLIST_DIR}/greedy_words.txt")).uniq.freeze
102
+ WHITELIST = file_to_normalized_words("#{BLACKLIST_DIR}/whitelist.csv").uniq.freeze
103
+
104
+ FULL_WORDS_CSV = CSV.parse(File.read("#{BLACKLIST_DIR}/full_words.csv"))
105
+
106
+ private
107
+
108
+ def self.check_full_words_csv(word, age)
109
+ FULL_WORDS_CSV.each do |row|
110
+ return true if row[1].to_i >= age.to_i && row[0].downcase == word.downcase
111
+ end
112
+ return false
113
+ end
114
+
115
+ end
metadata ADDED
@@ -0,0 +1,55 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wr_vocab_blacklist
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Greg Sherrid
8
+ - Gilles Ferone
9
+ - Kevin Schroeder
10
+ - Matt Schleifman
11
+ - Whooo's Reading by Learn2Earn
12
+ autorequire:
13
+ bindir: bin
14
+ cert_chain: []
15
+ date: 2022-03-15 00:00:00.000000000 Z
16
+ dependencies: []
17
+ description: Detect words and partial-words that shouldn't be used on Whooo's Reading
18
+ website
19
+ email:
20
+ - kevin@whooosreading.org
21
+ - gilles@whooosreading.org
22
+ - matt@whooosreading.org
23
+ executables: []
24
+ extensions: []
25
+ extra_rdoc_files: []
26
+ files:
27
+ - README.md
28
+ - lib/l2e_vocab_blacklist.rb
29
+ - lib/l2e_vocab_blacklist/blacklists/full_words.csv
30
+ - lib/l2e_vocab_blacklist/blacklists/greedy_words.txt
31
+ - lib/l2e_vocab_blacklist/blacklists/whitelist.csv
32
+ homepage: https://github.com/whooosreading/l2e_vocab_blacklist
33
+ licenses:
34
+ - All rights reserved, for now
35
+ metadata: {}
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ requirements: []
51
+ rubygems_version: 3.2.3
52
+ signing_key:
53
+ specification_version: 4
54
+ summary: Whooo's Reading Vocab Blacklist
55
+ test_files: []