zaarly-swearjar 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/Gemfile +3 -0
- data/README.rdoc +20 -0
- data/Rakefile +13 -0
- data/lib/config/en.yml +440 -0
- data/lib/swearjar/tester.rb +32 -0
- data/lib/swearjar/version.rb +3 -0
- data/lib/swearjar.rb +58 -0
- data/lib/zaarly-swearjar.rb +1 -0
- data/spec/spec.opts +7 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/swearjar_spec.rb +38 -0
- data/zaarly-swearjar.gemspec +37 -0
- metadata +112 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.rdoc
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
= Zaarly Swearjar
|
2
|
+
|
3
|
+
Simple profanity detection with content analysis.
|
4
|
+
|
5
|
+
== Installation
|
6
|
+
|
7
|
+
gem install swearjar
|
8
|
+
|
9
|
+
== Usage
|
10
|
+
|
11
|
+
require 'swearjar'
|
12
|
+
|
13
|
+
Swearjar.default.profane?('jim henson has a massive hard on he is gonna use to fuck everybody')
|
14
|
+
<< true
|
15
|
+
|
16
|
+
Swearjar.default.scorecard('jim henson has a massive hard on he is gonna use to fuck everybody')
|
17
|
+
<< {:sexual => 2}
|
18
|
+
|
19
|
+
Swearjar.default.censor('jim henson has a massive hard on he is gonna use to fuck everybody')
|
20
|
+
<< 'jim henson has a massive **** ** he is gonna use to **** everybody'
|
data/Rakefile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rake'
|
4
|
+
|
5
|
+
require 'rspec/core/rake_task'
|
6
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
7
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
8
|
+
spec.rspec_opts = ['--backtrace']
|
9
|
+
end
|
10
|
+
|
11
|
+
task :default => :spec
|
12
|
+
|
13
|
+
Bundler::GemHelper.install_tasks
|
data/lib/config/en.yml
ADDED
@@ -0,0 +1,440 @@
|
|
1
|
+
regex:
|
2
|
+
'hard ons?\b': ["sexual"]
|
3
|
+
'jerk off\b': ["sexual"]
|
4
|
+
'pissed off\b': ["inappropriate"]
|
5
|
+
'sand nigger': ["discriminatory"]
|
6
|
+
simple:
|
7
|
+
"anus": ["sexual"]
|
8
|
+
"arse": ["insult"]
|
9
|
+
"arsehole": ["insult"]
|
10
|
+
"ass": ["sexual", "insult"]
|
11
|
+
"ass-hat": ["insult"]
|
12
|
+
"ass-pirate": ["discriminatory"]
|
13
|
+
"assbag": ["insult"]
|
14
|
+
"assbandit": ["discriminatory"]
|
15
|
+
"assbanger": ["discriminatory"]
|
16
|
+
"assbite": ["insult"]
|
17
|
+
"assclown": ["sexual"]
|
18
|
+
"asscock": ["insult"]
|
19
|
+
"asscracker": ["sexual"]
|
20
|
+
"assface": ["sexual"]
|
21
|
+
"assfuck": ["sexual"]
|
22
|
+
"assfucker": ["discriminatory"]
|
23
|
+
"assgoblin": ["discriminatory"]
|
24
|
+
"asshat": ["sexual"]
|
25
|
+
"asshead": ["insult"]
|
26
|
+
"asshole": ["insult"]
|
27
|
+
"asshopper": ["discriminatory"]
|
28
|
+
"assjacker": ["discriminatory"]
|
29
|
+
"asslick": ["insult"]
|
30
|
+
"asslicker": ["insult"]
|
31
|
+
"assmonkey": ["insult"]
|
32
|
+
"assmunch": ["insult"]
|
33
|
+
"assmuncher": ["sexual"]
|
34
|
+
"assnigger": ["discriminatory"]
|
35
|
+
"asspirate": ["discriminatory"]
|
36
|
+
"assshit": ["insult"]
|
37
|
+
"assshole": ["sexual"]
|
38
|
+
"asssucker": ["insult"]
|
39
|
+
"asswad": ["sexual"]
|
40
|
+
"asswipe": ["sexual"]
|
41
|
+
"bampot": ["insult"]
|
42
|
+
"bastard": ["insult"]
|
43
|
+
"beaner": ["discriminatory"]
|
44
|
+
"beastial": ["sexual"]
|
45
|
+
"beastiality": ["sexual"]
|
46
|
+
"beastility": ["sexual"]
|
47
|
+
"bestial": ["sexual"]
|
48
|
+
"bestiality": ["sexual"]
|
49
|
+
"bitch": ["insult"]
|
50
|
+
"bitchass": ["insult"]
|
51
|
+
"bitcher": ["insult"]
|
52
|
+
"bitchin": ["inappropriate"]
|
53
|
+
"bitching": ["inappropriate"]
|
54
|
+
"bitchtit": ["discriminatory"]
|
55
|
+
"bitchy": ["insult"]
|
56
|
+
"blow job": ["sexual"]
|
57
|
+
"blowjob": ["sexual"]
|
58
|
+
"bollocks": ["sexual"]
|
59
|
+
"bollox": ["sexual"]
|
60
|
+
"boner": ["sexual"]
|
61
|
+
"brotherfucker": ["discriminatory"]
|
62
|
+
"bullshit": ["inappropriate"]
|
63
|
+
"bullshit": ["inappropriate"]
|
64
|
+
"bumblefuck": ["discriminatory"]
|
65
|
+
"butt plug": ["sexual"]
|
66
|
+
"butt-pirate": ["discriminatory"]
|
67
|
+
"buttfucka": ["discriminatory"]
|
68
|
+
"buttfucker": ["discriminatory"]
|
69
|
+
"camel toe": ["sexual"]
|
70
|
+
"carpetmuncher": ["discriminatory"]
|
71
|
+
"chinc": ["discriminatory"]
|
72
|
+
"chink": ["discriminatory"]
|
73
|
+
"choad": ["sexual"]
|
74
|
+
"chode": ["sexual"]
|
75
|
+
"clit": ["sexual"]
|
76
|
+
"clitface": ["insult"]
|
77
|
+
"clitfuck": ["sexual"]
|
78
|
+
"clusterfuck": ["inappropriate"]
|
79
|
+
"cock": ["sexual"]
|
80
|
+
"cockass": "Jerk"
|
81
|
+
"cockbite": ["insult"]
|
82
|
+
"cockburger": ["insult"]
|
83
|
+
"cockface": ["insult"]
|
84
|
+
"cockfucker": ["insult"]
|
85
|
+
"cockhead": ["insult"]
|
86
|
+
"cockjockey": ["discriminatory"]
|
87
|
+
"cockknoker": ["discriminatory"]
|
88
|
+
"cockmaster": ["discriminatory"]
|
89
|
+
"cockmongler": ["discriminatory"]
|
90
|
+
"cockmongruel": ["discriminatory"]
|
91
|
+
"cockmonkey": ["insult"]
|
92
|
+
"cockmuncher": ["discriminatory"]
|
93
|
+
"cocknose": ["insult"]
|
94
|
+
"cocknugget": ["insult"]
|
95
|
+
"cockshit": ["insult"]
|
96
|
+
"cocksmith": ["discriminatory"]
|
97
|
+
"cocksmoker": ["discriminatory"]
|
98
|
+
"cocksuck": ["sexual"]
|
99
|
+
"cocksucked": ["sexual"]
|
100
|
+
"cocksucker": ["discriminatory", "sexual"]
|
101
|
+
"cocksucking": ["sexual", "discriminatory"]
|
102
|
+
"cocksucks": ["sexual", "discriminatory"]
|
103
|
+
"coochie": ["sexual"]
|
104
|
+
"coochy": ["sexual"]
|
105
|
+
"coon": ["discriminatory"]
|
106
|
+
"cooter": ["sexual"]
|
107
|
+
"cracker": ["discriminatory"]
|
108
|
+
"cum": ["sexual"]
|
109
|
+
"cumbubble": ["insult"]
|
110
|
+
"cumdumpster": ["sexual"]
|
111
|
+
"cumguzzler": ["discriminatory"]
|
112
|
+
"cumjockey": ["discriminatory"]
|
113
|
+
"cummer": ["sexual"]
|
114
|
+
"cumming": ["sexual"]
|
115
|
+
"cumshot": ["sexual"]
|
116
|
+
"cumslut": ["sexual", "insult"]
|
117
|
+
"cumtart": ["insult"]
|
118
|
+
"cunillingus": ["sexual"]
|
119
|
+
"cunnie": ["sexual"]
|
120
|
+
"cunnilingus": ["sexual"]
|
121
|
+
"cunnilingus": ["sexual"]
|
122
|
+
"cunt": ["insult", "sexual"]
|
123
|
+
"cuntface": ["insult"]
|
124
|
+
"cunthole": ["sexual"]
|
125
|
+
"cuntlick": ["sexual"]
|
126
|
+
"cuntlicker": ["sexual", "discriminatory"]
|
127
|
+
"cuntlicking": ["sexual"]
|
128
|
+
"cuntrag": ["insult"]
|
129
|
+
"cuntslut": ["insult"]
|
130
|
+
"cyberfuc": ["sexual"]
|
131
|
+
"cyberfuck": ["sexual"]
|
132
|
+
"cyberfucked": ["sexual"]
|
133
|
+
"cyberfucker": ["sexual"]
|
134
|
+
"cyberfucking": ["sexual"]
|
135
|
+
"dago": ["discriminatory"]
|
136
|
+
"damn": ["inappropriate"]
|
137
|
+
"deggo": ["discriminatory"]
|
138
|
+
"dick": ["sexual", "insult"]
|
139
|
+
"dickbag": ["insult"]
|
140
|
+
"dickbeaters": ["sexual"]
|
141
|
+
"dickface": ["insult"]
|
142
|
+
"dickfuck": ["insult"]
|
143
|
+
"dickfucker": ["discriminatory"]
|
144
|
+
"dickhead": ["insult"]
|
145
|
+
"dickhole": ["sexual"]
|
146
|
+
"dickjuice": ["sexual"]
|
147
|
+
"dickmilk": ["sexual"]
|
148
|
+
"dickmonger": ["discriminatory"]
|
149
|
+
"dickslap": ["sexual"]
|
150
|
+
"dicksucker": ["discriminatory"]
|
151
|
+
"dickwad": ["insult"]
|
152
|
+
"dickweasel": ["insult"]
|
153
|
+
"dickweed": ["insult"]
|
154
|
+
"dickwod": ["insult"]
|
155
|
+
"dike": ["discriminatory"]
|
156
|
+
"dildo": ["sexual"]
|
157
|
+
"dink": ["insult", "sexual"]
|
158
|
+
"dipshit": ["insult"]
|
159
|
+
"doochbag": ["insult"]
|
160
|
+
"dookie": ["inappropriate"]
|
161
|
+
"douche": ["insult"]
|
162
|
+
"douche-fag": ["insult"]
|
163
|
+
"douchebag": ["insult"]
|
164
|
+
"douchewaffle": ["discriminatory"]
|
165
|
+
"dumass": ["insult"]
|
166
|
+
"dumb ass": ["insult"]
|
167
|
+
"dumbass": ["insult"]
|
168
|
+
"dumbfuck": ["insult"]
|
169
|
+
"dumbshit": ["insult"]
|
170
|
+
"dumshit": ["insult"]
|
171
|
+
"dyke": ["discriminatory"]
|
172
|
+
"ejaculate": ["sexual"]
|
173
|
+
"ejaculated": ["sexual"]
|
174
|
+
"ejaculates": ["sexual"]
|
175
|
+
"ejaculating": ["sexual"]
|
176
|
+
"ejaculation": ["sexual"]
|
177
|
+
"fag": ["discriminatory"]
|
178
|
+
"fagbag": ["discriminatory"]
|
179
|
+
"fagfucker": ["discriminatory"]
|
180
|
+
"fagging": ["discriminatory"]
|
181
|
+
"faggit": ["discriminatory"]
|
182
|
+
"faggot": ["discriminatory"]
|
183
|
+
"faggot": ["discriminatory"]
|
184
|
+
"faggotcock": ["discriminatory"]
|
185
|
+
"faggs": ["discriminatory"]
|
186
|
+
"fagot": ["discriminatory"]
|
187
|
+
"fags": ["discriminatory"]
|
188
|
+
"fagtard": ["discriminatory"]
|
189
|
+
"fart": ["inappropriate"]
|
190
|
+
"farted": ["inappropriate"]
|
191
|
+
"farting": ["inappropriate"]
|
192
|
+
"farty": ["inappropriate"]
|
193
|
+
"fatass": ["insult"]
|
194
|
+
"felatio": ["sexual"]
|
195
|
+
"fellatio": ["sexual"]
|
196
|
+
"fellatio": ["sexual"]
|
197
|
+
"feltch": ["sexual"]
|
198
|
+
"fingerfuck": ["sexual"]
|
199
|
+
"fingerfucked": ["sexual"]
|
200
|
+
"fingerfucker": ["sexual"]
|
201
|
+
"fingerfucking": ["sexual"]
|
202
|
+
"fingerfucks": ["sexual"]
|
203
|
+
"fistfuck": ["sexual"]
|
204
|
+
"fistfucked": ["sexual"]
|
205
|
+
"fistfucker": ["sexual"]
|
206
|
+
"fistfucking": ["sexual"]
|
207
|
+
"flamer": ["discriminatory"]
|
208
|
+
"fuck": ["sexual"]
|
209
|
+
"fuckass": ["insult"]
|
210
|
+
"fuckbag": ["insult"]
|
211
|
+
"fuckboy": ["insult"]
|
212
|
+
"fuckbrain": ["insult"]
|
213
|
+
"fuckbutt": ["sexual"]
|
214
|
+
"fucked": ["sexual"]
|
215
|
+
"fucker": ["sexual", "insult"]
|
216
|
+
"fuckersucker": ["insult"]
|
217
|
+
"fuckface": ["insult"]
|
218
|
+
"fuckhead": ["sexual"]
|
219
|
+
"fuckhole": ["insult"]
|
220
|
+
"fuckin": ["sexual"]
|
221
|
+
"fucking": ["sexual"]
|
222
|
+
"fuckme": ["sexual"]
|
223
|
+
"fucknut": ["insult"]
|
224
|
+
"fucknutt": ["insult"]
|
225
|
+
"fuckoff": ["insult"]
|
226
|
+
"fuckstick": ["sexual"]
|
227
|
+
"fucktard": ["insult"]
|
228
|
+
"fuckup": ["insult"]
|
229
|
+
"fuckwad": ["insult"]
|
230
|
+
"fuckwit": ["insult"]
|
231
|
+
"fuckwitt": ["insult"]
|
232
|
+
"fudgepacker": ["discriminatory"]
|
233
|
+
"fuk": ["sexual"]
|
234
|
+
"gangbang": ["sexual"]
|
235
|
+
"gangbanged": ["sexual"]
|
236
|
+
"gay": ["discriminatory"]
|
237
|
+
"gayass": ["sexual"]
|
238
|
+
"gaybob": ["discriminatory"]
|
239
|
+
"gaydo": ["discriminatory"]
|
240
|
+
"gayfuck": ["discriminatory"]
|
241
|
+
"gayfuckist": ["discriminatory"]
|
242
|
+
"gaylord": ["discriminatory"]
|
243
|
+
"gaysex": ["discriminatory"]
|
244
|
+
"gaytard": ["discriminatory"]
|
245
|
+
"gaywad": ["discriminatory"]
|
246
|
+
"goddamn": ["inappropriate", "blasphemy"]
|
247
|
+
"goddamn": ["inappropriate", "blasphemy"]
|
248
|
+
"goddamnit": ["inappropriate", "blasphemy"]
|
249
|
+
"gooch": ["sexual"]
|
250
|
+
"gook": ["discriminatory"]
|
251
|
+
"gringo": ["discriminatory"]
|
252
|
+
"guido": ["discriminatory"]
|
253
|
+
"handjob": ["sexual"]
|
254
|
+
"hardcoresex": ["sexual"]
|
255
|
+
"heeb": ["discriminatory"]
|
256
|
+
"hell": ["inappropriate"]
|
257
|
+
"hell": ["inappropriate"]
|
258
|
+
"ho": ["discriminatory"]
|
259
|
+
"hoe": ["discriminatory"]
|
260
|
+
"homo": ["discriminatory"]
|
261
|
+
"homodumbshit": ["insult"]
|
262
|
+
"honkey": ["discriminatory"]
|
263
|
+
"horniest": ["sexual"]
|
264
|
+
"horny": ["sexual"]
|
265
|
+
"hotsex": ["sexual"]
|
266
|
+
"humping": ["sexual"]
|
267
|
+
"jackass": ["insult"]
|
268
|
+
"jap": ["discriminatory"]
|
269
|
+
"jigaboo": ["discriminatory"]
|
270
|
+
"jism": ["sexual"]
|
271
|
+
"jiz": ["sexual"]
|
272
|
+
"jizm": ["sexual"]
|
273
|
+
"jizz": ["sexual"]
|
274
|
+
"jungle bunny": ["discriminatory"]
|
275
|
+
"junglebunny": ["discriminatory"]
|
276
|
+
"kike": ["discriminatory"]
|
277
|
+
"kock": ["sexual"]
|
278
|
+
"kondum": ["sexual"]
|
279
|
+
"kooch": ["sexual"]
|
280
|
+
"kootch": ["sexual"]
|
281
|
+
"kum": ["sexual"]
|
282
|
+
"kumer": ["sexual"]
|
283
|
+
"kummer": ["sexual"]
|
284
|
+
"kumming": ["sexual"]
|
285
|
+
"kums": ["sexual"]
|
286
|
+
"kunilingus": ["sexual"]
|
287
|
+
"kunt": ["sexual"]
|
288
|
+
"kyke": ["discriminatory"]
|
289
|
+
"lesbian": ["discriminatory"]
|
290
|
+
"lesbo": ["discriminatory"]
|
291
|
+
"lezzie": ["discriminatory"]
|
292
|
+
"lust": ["sexual"]
|
293
|
+
"lusting": ["sexual"]
|
294
|
+
"mcfagget": ["discriminatory"]
|
295
|
+
"mick": ["discriminatory"]
|
296
|
+
"minge": ["sexual"]
|
297
|
+
"mothafuck": ["sexual"]
|
298
|
+
"mothafucka": ["sexual"]
|
299
|
+
"mothafucka": ["insult"]
|
300
|
+
"mothafuckaz": ["sexual"]
|
301
|
+
"mothafucked": ["sexual"]
|
302
|
+
"mothafucker": ["sexual", "insult"]
|
303
|
+
"mothafuckin": ["sexual"]
|
304
|
+
"mothafucking": ["sexual"]
|
305
|
+
"mothafucks": ["sexual"]
|
306
|
+
"motherfuck": ["sexual"]
|
307
|
+
"motherfucked": ["sexual"]
|
308
|
+
"motherfucker": ["sexual", "insult"]
|
309
|
+
"motherfuckin": ["sexual"]
|
310
|
+
"motherfucking": ["sexual"]
|
311
|
+
"muff": ["sexual"]
|
312
|
+
"muffdiver": ["discriminatory", "sexual"]
|
313
|
+
"munging": ["sexual"]
|
314
|
+
"negro": ["discriminatory"]
|
315
|
+
"nigga": ["discriminatory"]
|
316
|
+
"nigger": ["discriminatory"]
|
317
|
+
"niglet": ["discriminatory"]
|
318
|
+
"nut sack": ["sexual"]
|
319
|
+
"nutsack": ["sexual"]
|
320
|
+
"orgasim": ["sexual"]
|
321
|
+
"orgasm": ["sexual"]
|
322
|
+
"paki": ["discriminatory"]
|
323
|
+
"panooch": ["sexual"]
|
324
|
+
"pecker": ["sexual"]
|
325
|
+
"peckerhead": ["insult"]
|
326
|
+
"penis": ["sexual"]
|
327
|
+
"penisfucker": ["discriminatory"]
|
328
|
+
"penispuffer": ["discriminatory"]
|
329
|
+
"phonesex": ["sexual"]
|
330
|
+
"phuk": ["sexual"]
|
331
|
+
"phuked": ["sexual"]
|
332
|
+
"phuking": ["sexual"]
|
333
|
+
"phukked": ["sexual"]
|
334
|
+
"phukking": ["sexual"]
|
335
|
+
"phuks": ["sexual"]
|
336
|
+
"phuq": ["sexual"]
|
337
|
+
"pis": ["sexual"]
|
338
|
+
"pises": ["sexual"]
|
339
|
+
"pisin": ["sexual"]
|
340
|
+
"pising": ["sexual"]
|
341
|
+
"pisof": ["sexual"]
|
342
|
+
"piss": ["inappropriate"]
|
343
|
+
"pissed": ["inappropriate"]
|
344
|
+
"pisser": ["sexual"]
|
345
|
+
"pisses": ["sexual"]
|
346
|
+
"pissflaps": ["sexual"]
|
347
|
+
"pissin": ["sexual"]
|
348
|
+
"pissing": ["sexual"]
|
349
|
+
"pissoff": ["sexual"]
|
350
|
+
"polesmoker": ["discriminatory"]
|
351
|
+
"pollock": ["discriminatory"]
|
352
|
+
"poon": ["sexual"]
|
353
|
+
"poonani": ["sexual"]
|
354
|
+
"poonany": ["sexual"]
|
355
|
+
"poontang": ["sexual"]
|
356
|
+
"porch monkey": ["discriminatory"]
|
357
|
+
"porchmonkey": ["discriminatory"]
|
358
|
+
"porn": ["sexual"]
|
359
|
+
"porno": ["sexual"]
|
360
|
+
"pornography": ["sexual"]
|
361
|
+
"pornos": ["sexual"]
|
362
|
+
"prick": ["sexual"]
|
363
|
+
"punanny": ["sexual"]
|
364
|
+
"punta": ["insult"]
|
365
|
+
"pusies": ["sexual", "insult"]
|
366
|
+
"pussies": ["sexual", "insult"]
|
367
|
+
"pussy": ["sexual", "insult"]
|
368
|
+
"pussylicking": ["sexual"]
|
369
|
+
"pusy": ["sexual"]
|
370
|
+
"puto": ["insult"]
|
371
|
+
"queef": ["sexual"]
|
372
|
+
"queer": ["discriminatory"]
|
373
|
+
"queerbait": ["discriminatory"]
|
374
|
+
"queerhole": ["discriminatory"]
|
375
|
+
"renob": ["sexual"]
|
376
|
+
"rimjob": ["sexual"]
|
377
|
+
"ruski": ["discriminatory"]
|
378
|
+
"sandnigger": ["discriminatory"]
|
379
|
+
"schlong": ["sexual"]
|
380
|
+
"scrote": ["sexual"]
|
381
|
+
"shit": ["sexual", "inappropriate"]
|
382
|
+
"shitass": ["insult"]
|
383
|
+
"shitbag": ["insult"]
|
384
|
+
"shitbagger": ["insult"]
|
385
|
+
"shitbrain": ["insult"]
|
386
|
+
"shitbreath": ["insult"]
|
387
|
+
"shitcunt": ["insult"]
|
388
|
+
"shitdick": ["insult"]
|
389
|
+
"shited": ["sexual"]
|
390
|
+
"shitface": ["insult"]
|
391
|
+
"shitfaced": ["inappropriate", "insult"]
|
392
|
+
"shitfull": ["sexual"]
|
393
|
+
"shithead": ["insult"]
|
394
|
+
"shithole": ["insult"]
|
395
|
+
"shithouse": ["inappropriate"]
|
396
|
+
"shiting": ["sexual"]
|
397
|
+
"shitspitter": ["sexual"]
|
398
|
+
"shitstain": ["inappropriate", "insult"]
|
399
|
+
"shitted": ["sexual"]
|
400
|
+
"shitter": ["sexual"]
|
401
|
+
"shittiest": ["inappropriate"]
|
402
|
+
"shitting": ["inappropriate"]
|
403
|
+
"shitty": ["inappropriate"]
|
404
|
+
"shity": ["sexual"]
|
405
|
+
"shiz": ["inappropriate"]
|
406
|
+
"shiznit": ["inappropriate"]
|
407
|
+
"skank": ["insult"]
|
408
|
+
"skeet": ["sexual"]
|
409
|
+
"skullfuck": ["sexual"]
|
410
|
+
"slut": ["sexual"]
|
411
|
+
"slut": ["discriminatory"]
|
412
|
+
"slutbag": ["discriminatory"]
|
413
|
+
"sluts": ["sexual"]
|
414
|
+
"smeg": ["inappropriate"]
|
415
|
+
"smut": ["sexual"]
|
416
|
+
"snatch": ["sexual"]
|
417
|
+
"spic": ["discriminatory"]
|
418
|
+
"spick": ["discriminatory"]
|
419
|
+
"splooge": ["sexual"]
|
420
|
+
"spunk": ["sexual"]
|
421
|
+
"tard": ["discriminatory"]
|
422
|
+
"testicle": ["sexual"]
|
423
|
+
"thundercunt": ["insult"]
|
424
|
+
"tit": ["sexual"]
|
425
|
+
"titfuck": ["sexual"]
|
426
|
+
"tittyfuck": ["sexual"]
|
427
|
+
"twat": ["sexual"]
|
428
|
+
"twatlips": ["insult"]
|
429
|
+
"twatwaffle": ["discriminatory"]
|
430
|
+
"unclefucker": ["discriminatory"]
|
431
|
+
"va-j-j": ["sexual"]
|
432
|
+
"vag": ["sexual"]
|
433
|
+
"vagina": ["sexual"]
|
434
|
+
"vjayjay": ["sexual"]
|
435
|
+
"wank": ["sexual"]
|
436
|
+
"wetback": ["discriminatory"]
|
437
|
+
"whore": ["insult"]
|
438
|
+
"whorebag": ["insult"]
|
439
|
+
"whoreface": ["insult"]
|
440
|
+
"wop": ["discriminatory"]
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'fuzzy_hash'
|
3
|
+
require 'bloomfilter'
|
4
|
+
|
5
|
+
class Swearjar
|
6
|
+
class Tester
|
7
|
+
|
8
|
+
def initialize(config_file)
|
9
|
+
data = YAML.load_file
|
10
|
+
|
11
|
+
@tester = FuzzyHash.new
|
12
|
+
|
13
|
+
data['regex'].each do |pattern, type|
|
14
|
+
@tester[Regexp.new(pattern)] = type
|
15
|
+
end
|
16
|
+
|
17
|
+
data['simple'].each do |test, type|
|
18
|
+
@tester[test] = type
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
def scan(string, &block)
|
24
|
+
string.scan(/\b[\b]+\b/, &block)
|
25
|
+
end
|
26
|
+
|
27
|
+
def profane?(string)
|
28
|
+
scan(string) {|w| return true}
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
data/lib/swearjar.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'fuzzy_hash'
|
3
|
+
|
4
|
+
class Swearjar
|
5
|
+
|
6
|
+
def self.default
|
7
|
+
from_language
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.from_language(language='en')
|
11
|
+
new(File.join(File.dirname(__FILE__), 'config', "#{language}.yml"))
|
12
|
+
end
|
13
|
+
|
14
|
+
attr_reader :tester, :hash
|
15
|
+
|
16
|
+
def initialize(file)
|
17
|
+
data = YAML.load_file(file)
|
18
|
+
|
19
|
+
@tester = FuzzyHash.new
|
20
|
+
@hash = {}
|
21
|
+
|
22
|
+
data['regex'].each do |pattern, type|
|
23
|
+
@tester[Regexp.new(pattern)] = type
|
24
|
+
end
|
25
|
+
|
26
|
+
data['simple'].each do |test, type|
|
27
|
+
@hash[test] = type
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
def scan(string, &block)
|
33
|
+
string.scan(/\b[a-zA-Z-]+\b/) do |word|
|
34
|
+
block.call(word, hash[word.downcase] || hash[word.downcase.gsub(/e?s$/,'')] )
|
35
|
+
end
|
36
|
+
if match = tester.match_with_result(string)
|
37
|
+
block.call(match.last, match.first)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def profane?(string)
|
42
|
+
scan(string) {|word, test| return true if !test.nil?}
|
43
|
+
return false
|
44
|
+
end
|
45
|
+
|
46
|
+
def scorecard(string)
|
47
|
+
scorecard = {}
|
48
|
+
scan(string) {|word, test| test.each { |type| scorecard.key?(type) ? scorecard[type] += 1 : scorecard[type] = 1} if test}
|
49
|
+
scorecard
|
50
|
+
end
|
51
|
+
|
52
|
+
def censor(string)
|
53
|
+
censored_string = string.dup
|
54
|
+
scan(string) {|word, test| censored_string.gsub!(word, block_given? ? yield(word) : word.gsub(/\S/, '*')) if test}
|
55
|
+
censored_string
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'swearjar'
|
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'swearjar'))
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Swearjar do
|
4
|
+
|
5
|
+
it "should detect dirty words" do
|
6
|
+
Swearjar.default.profane?('fuck you jim henson').should be_true
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should detect dirty words regardless of case" do
|
10
|
+
Swearjar.default.profane?('FuCk you jim henson').should be_true
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should detect simple dirty plurals" do
|
14
|
+
Swearjar.default.profane?('jim henson had two dicks').should be_true
|
15
|
+
Swearjar.default.profane?('jim henson has two asses').should be_true
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should not detect non-dirty words" do
|
19
|
+
Swearjar.default.profane?('i love you jim henson').should be_false
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should give us a scorecard" do
|
23
|
+
Swearjar.default.scorecard('fuck you jim henson').should == {'sexual'=>1}
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should detect multiword" do
|
27
|
+
Swearjar.default.scorecard('jim henson has a hard on').should == {'sexual'=>1}
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should detect multiword plurals" do
|
31
|
+
Swearjar.default.scorecard('jim henson has a hard ons').should == {'sexual'=>1}
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should censor a string" do
|
35
|
+
Swearjar.default.censor('jim henson has a massive hard on he is gonna use to fuck everybody').should == 'jim henson has a massive **** ** he is gonna use to **** everybody'
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require File.join(File.dirname(__FILE__), 'lib', 'swearjar', 'version')
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'zaarly-swearjar'
|
7
|
+
s.version = Swearjar::VERSION
|
8
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
9
|
+
s.authors = ["Zaarly, Inc.", "Joshua Hull"]
|
10
|
+
s.summary = "Put another nickel in the swearjar. Simple profanity detection with content analysis"
|
11
|
+
s.description = "#{s.summary}."
|
12
|
+
s.email = %q{joshbuddy@gmail.com}
|
13
|
+
s.extra_rdoc_files = ['README.rdoc']
|
14
|
+
s.files = `git ls-files`.split("\n")
|
15
|
+
s.homepage = %q{http://github.com/zaarly/swearjar}
|
16
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
s.rubygems_version = %q{1.3.7}
|
19
|
+
s.test_files = `git ls-files`.split("\n").select{|f| f =~ /^spec/}
|
20
|
+
|
21
|
+
# dependencies
|
22
|
+
s.add_runtime_dependency 'fuzzyhash', '~> 0.0.11'
|
23
|
+
s.add_development_dependency 'bundler'
|
24
|
+
s.add_development_dependency 'rake'
|
25
|
+
s.add_development_dependency 'rspec'
|
26
|
+
|
27
|
+
if s.respond_to? :specification_version then
|
28
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
29
|
+
s.specification_version = 3
|
30
|
+
|
31
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
32
|
+
else
|
33
|
+
end
|
34
|
+
else
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
metadata
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: zaarly-swearjar
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Zaarly, Inc.
|
9
|
+
- Joshua Hull
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
12
|
+
cert_chain: []
|
13
|
+
date: 2011-11-09 00:00:00.000000000 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: fuzzyhash
|
17
|
+
requirement: &70302130398180 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ~>
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 0.0.11
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *70302130398180
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: bundler
|
28
|
+
requirement: &70302130396580 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: *70302130396580
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: rake
|
39
|
+
requirement: &70302130395960 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ! '>='
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
type: :development
|
46
|
+
prerelease: false
|
47
|
+
version_requirements: *70302130395960
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: rspec
|
50
|
+
requirement: &70302130395180 !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ! '>='
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
type: :development
|
57
|
+
prerelease: false
|
58
|
+
version_requirements: *70302130395180
|
59
|
+
description: Put another nickel in the swearjar. Simple profanity detection with content
|
60
|
+
analysis.
|
61
|
+
email: joshbuddy@gmail.com
|
62
|
+
executables: []
|
63
|
+
extensions: []
|
64
|
+
extra_rdoc_files:
|
65
|
+
- README.rdoc
|
66
|
+
files:
|
67
|
+
- .gitignore
|
68
|
+
- Gemfile
|
69
|
+
- README.rdoc
|
70
|
+
- Rakefile
|
71
|
+
- lib/config/en.yml
|
72
|
+
- lib/swearjar.rb
|
73
|
+
- lib/swearjar/tester.rb
|
74
|
+
- lib/swearjar/version.rb
|
75
|
+
- lib/zaarly-swearjar.rb
|
76
|
+
- spec/spec.opts
|
77
|
+
- spec/spec_helper.rb
|
78
|
+
- spec/swearjar_spec.rb
|
79
|
+
- zaarly-swearjar.gemspec
|
80
|
+
homepage: http://github.com/zaarly/swearjar
|
81
|
+
licenses: []
|
82
|
+
post_install_message:
|
83
|
+
rdoc_options:
|
84
|
+
- --charset=UTF-8
|
85
|
+
require_paths:
|
86
|
+
- lib
|
87
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
88
|
+
none: false
|
89
|
+
requirements:
|
90
|
+
- - ! '>='
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
segments:
|
94
|
+
- 0
|
95
|
+
hash: -766704783701987807
|
96
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
requirements: []
|
103
|
+
rubyforge_project:
|
104
|
+
rubygems_version: 1.8.10
|
105
|
+
signing_key:
|
106
|
+
specification_version: 3
|
107
|
+
summary: Put another nickel in the swearjar. Simple profanity detection with content
|
108
|
+
analysis
|
109
|
+
test_files:
|
110
|
+
- spec/spec.opts
|
111
|
+
- spec/spec_helper.rb
|
112
|
+
- spec/swearjar_spec.rb
|