swearjar 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.travis.yml +6 -0
- data/CHANGELOG.md +8 -0
- data/Gemfile +2 -2
- data/README.md +31 -0
- data/Rakefile +2 -15
- data/lib/config/en.yml +17 -124
- data/lib/swearjar.rb +59 -33
- data/lib/swearjar/version.rb +2 -2
- data/spec/data/swear.yml +1 -0
- data/spec/swearjar_spec.rb +54 -15
- data/swearjar.gemspec +4 -14
- metadata +41 -35
- data/README.rdoc +0 -28
- data/lib/swearjar/tester.rb +0 -32
- data/spec/spec.opts +0 -7
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: cfbc2f8e57099d5a6b9a4c60961977e8bfb542d3
|
4
|
+
data.tar.gz: d176f8879508172566f2582bf29a5d0d6c47d27d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2980ee653f0b552d3e822b991041fd91d5e1ae494d9e2823109e67c927b0dbe53ae0e2192397bd5fed38684d78af07c4713f2271a2ebc93cd0538dfe07aa40be
|
7
|
+
data.tar.gz: 4116470e1e7cdb71c5958888833306e20196cbc6f11ddc7e4219a8ebecb7fe8b0375feb50ede7fcdd03635e588002a401de414ca73d64a8cc661a36100639fd2
|
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
source
|
1
|
+
source "https://rubygems.org"
|
2
2
|
|
3
|
-
gemspec
|
3
|
+
gemspec
|
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# Swearjar
|
2
|
+
|
3
|
+
Simple profanity detection with content analysis.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
`gem install swearjar`
|
8
|
+
|
9
|
+
## Usage
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
require "swearjar"
|
13
|
+
|
14
|
+
sj = Swearjar.default
|
15
|
+
|
16
|
+
sj.profane?("jim henson has a massive hard on he is gonna use to fuck everybody")
|
17
|
+
# => true
|
18
|
+
|
19
|
+
sj.scorecard("jim henson has a massive hard on he is gonna use to fuck everybody")
|
20
|
+
# => {:sexual => 2}
|
21
|
+
|
22
|
+
sj.censor("jim henson has a massive hard on he is gonna use to fuck everybody")
|
23
|
+
# => "jim henson has a massive **** ** he is gonna use to **** everybody"
|
24
|
+
```
|
25
|
+
|
26
|
+
To load from a custom config file, you can do the following:
|
27
|
+
|
28
|
+
```ruby
|
29
|
+
# For an example see lib/config/en.yml
|
30
|
+
sj = Swearjar.new("my_swears.yml")
|
31
|
+
```
|
data/Rakefile
CHANGED
@@ -1,18 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
+
require "rspec/core/rake_task"
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
3
5
|
task :default => :spec
|
4
|
-
|
5
|
-
require 'spec'
|
6
|
-
require 'spec/rake/spectask'
|
7
|
-
task :spec => 'spec:all'
|
8
|
-
namespace(:spec) do
|
9
|
-
Spec::Rake::SpecTask.new(:all) do |t|
|
10
|
-
t.spec_opts ||= []
|
11
|
-
t.spec_opts << "-rubygems"
|
12
|
-
t.spec_opts << "--options" << "spec/spec.opts"
|
13
|
-
t.spec_files = FileList['spec/**/*_spec.rb']
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
require 'bundler'
|
18
|
-
Bundler::GemHelper.install_tasks
|
data/lib/config/en.yml
CHANGED
@@ -1,8 +1,16 @@
|
|
1
|
+
---
|
1
2
|
regex:
|
2
3
|
'hard ons?\b': ["sexual"]
|
3
4
|
'jerk off\b': ["sexual"]
|
4
5
|
'pissed off\b': ["inappropriate"]
|
5
|
-
'
|
6
|
+
'\w*n[i1]gg[e3]r\w*': ["discriminatory"]
|
7
|
+
'\w*fuck\w*': ["sexual"]
|
8
|
+
'\w*b[i1]tch\w*': ["insult"]
|
9
|
+
'\w*ejaculat\w*': ["sexual"]
|
10
|
+
'\w*damn\w*': ["inappropriate", "blasphemy"]
|
11
|
+
'\w*f[a4]gg[o0]t\w*': ["discriminatory"]
|
12
|
+
'\w*wh[o0]r[e3]\w*': ["insult"]
|
13
|
+
'\w*p[e3]n[i1]s\w*': ["sexual"]
|
6
14
|
simple:
|
7
15
|
"anus": ["sexual"]
|
8
16
|
"arse": ["insult"]
|
@@ -18,8 +26,6 @@ simple:
|
|
18
26
|
"asscock": ["insult"]
|
19
27
|
"asscracker": ["sexual"]
|
20
28
|
"assface": ["sexual"]
|
21
|
-
"assfuck": ["sexual"]
|
22
|
-
"assfucker": ["discriminatory"]
|
23
29
|
"assgoblin": ["discriminatory"]
|
24
30
|
"asshat": ["sexual"]
|
25
31
|
"asshead": ["insult"]
|
@@ -31,9 +37,7 @@ simple:
|
|
31
37
|
"assmonkey": ["insult"]
|
32
38
|
"assmunch": ["insult"]
|
33
39
|
"assmuncher": ["sexual"]
|
34
|
-
"assnigger": ["discriminatory"]
|
35
40
|
"asspirate": ["discriminatory"]
|
36
|
-
"assshit": ["insult"]
|
37
41
|
"assshole": ["sexual"]
|
38
42
|
"asssucker": ["insult"]
|
39
43
|
"asswad": ["sexual"]
|
@@ -41,31 +45,17 @@ simple:
|
|
41
45
|
"bampot": ["insult"]
|
42
46
|
"bastard": ["insult"]
|
43
47
|
"beaner": ["discriminatory"]
|
44
|
-
"beastial": ["sexual"]
|
45
48
|
"beastiality": ["sexual"]
|
46
49
|
"beastility": ["sexual"]
|
47
|
-
"bestial": ["sexual"]
|
48
50
|
"bestiality": ["sexual"]
|
49
|
-
"bitch": ["insult"]
|
50
|
-
"bitchass": ["insult"]
|
51
|
-
"bitcher": ["insult"]
|
52
|
-
"bitchin": ["inappropriate"]
|
53
|
-
"bitching": ["inappropriate"]
|
54
|
-
"bitchtit": ["discriminatory"]
|
55
|
-
"bitchy": ["insult"]
|
56
51
|
"blow job": ["sexual"]
|
57
52
|
"blowjob": ["sexual"]
|
58
53
|
"bollocks": ["sexual"]
|
59
54
|
"bollox": ["sexual"]
|
60
55
|
"boner": ["sexual"]
|
61
|
-
"brotherfucker": ["discriminatory"]
|
62
56
|
"bullshit": ["inappropriate"]
|
63
|
-
"bullshit": ["inappropriate"]
|
64
|
-
"bumblefuck": ["discriminatory"]
|
65
57
|
"butt plug": ["sexual"]
|
66
58
|
"butt-pirate": ["discriminatory"]
|
67
|
-
"buttfucka": ["discriminatory"]
|
68
|
-
"buttfucker": ["discriminatory"]
|
69
59
|
"camel toe": ["sexual"]
|
70
60
|
"carpetmuncher": ["discriminatory"]
|
71
61
|
"chinc": ["discriminatory"]
|
@@ -74,14 +64,11 @@ simple:
|
|
74
64
|
"chode": ["sexual"]
|
75
65
|
"clit": ["sexual"]
|
76
66
|
"clitface": ["insult"]
|
77
|
-
"clitfuck": ["sexual"]
|
78
|
-
"clusterfuck": ["inappropriate"]
|
79
67
|
"cock": ["sexual"]
|
80
68
|
"cockass": "Jerk"
|
81
69
|
"cockbite": ["insult"]
|
82
70
|
"cockburger": ["insult"]
|
83
71
|
"cockface": ["insult"]
|
84
|
-
"cockfucker": ["insult"]
|
85
72
|
"cockhead": ["insult"]
|
86
73
|
"cockjockey": ["discriminatory"]
|
87
74
|
"cockknoker": ["discriminatory"]
|
@@ -95,16 +82,14 @@ simple:
|
|
95
82
|
"cockshit": ["insult"]
|
96
83
|
"cocksmith": ["discriminatory"]
|
97
84
|
"cocksmoker": ["discriminatory"]
|
98
|
-
"cocksuck": ["sexual"]
|
85
|
+
"cocksuck": ["sexual", "discriminatory"]
|
99
86
|
"cocksucked": ["sexual"]
|
100
87
|
"cocksucker": ["discriminatory", "sexual"]
|
101
88
|
"cocksucking": ["sexual", "discriminatory"]
|
102
|
-
"cocksucks": ["sexual", "discriminatory"]
|
103
89
|
"coochie": ["sexual"]
|
104
90
|
"coochy": ["sexual"]
|
105
91
|
"coon": ["discriminatory"]
|
106
92
|
"cooter": ["sexual"]
|
107
|
-
"cracker": ["discriminatory"]
|
108
93
|
"cum": ["sexual"]
|
109
94
|
"cumbubble": ["insult"]
|
110
95
|
"cumdumpster": ["sexual"]
|
@@ -118,7 +103,6 @@ simple:
|
|
118
103
|
"cunillingus": ["sexual"]
|
119
104
|
"cunnie": ["sexual"]
|
120
105
|
"cunnilingus": ["sexual"]
|
121
|
-
"cunnilingus": ["sexual"]
|
122
106
|
"cunt": ["insult", "sexual"]
|
123
107
|
"cuntface": ["insult"]
|
124
108
|
"cunthole": ["sexual"]
|
@@ -128,19 +112,11 @@ simple:
|
|
128
112
|
"cuntrag": ["insult"]
|
129
113
|
"cuntslut": ["insult"]
|
130
114
|
"cyberfuc": ["sexual"]
|
131
|
-
"
|
132
|
-
"cyberfucked": ["sexual"]
|
133
|
-
"cyberfucker": ["sexual"]
|
134
|
-
"cyberfucking": ["sexual"]
|
135
|
-
"dago": ["discriminatory"]
|
136
|
-
"damn": ["inappropriate"]
|
137
|
-
"deggo": ["discriminatory"]
|
115
|
+
"dammit": ["inappropriate", "blasphemy"]
|
138
116
|
"dick": ["sexual", "insult"]
|
139
117
|
"dickbag": ["insult"]
|
140
|
-
"
|
118
|
+
"dickbeater": ["sexual"]
|
141
119
|
"dickface": ["insult"]
|
142
|
-
"dickfuck": ["insult"]
|
143
|
-
"dickfucker": ["discriminatory"]
|
144
120
|
"dickhead": ["insult"]
|
145
121
|
"dickhole": ["sexual"]
|
146
122
|
"dickjuice": ["sexual"]
|
@@ -165,26 +141,15 @@ simple:
|
|
165
141
|
"dumass": ["insult"]
|
166
142
|
"dumb ass": ["insult"]
|
167
143
|
"dumbass": ["insult"]
|
168
|
-
"dumbfuck": ["insult"]
|
169
144
|
"dumbshit": ["insult"]
|
170
145
|
"dumshit": ["insult"]
|
171
146
|
"dyke": ["discriminatory"]
|
172
|
-
"ejaculate": ["sexual"]
|
173
|
-
"ejaculated": ["sexual"]
|
174
|
-
"ejaculates": ["sexual"]
|
175
|
-
"ejaculating": ["sexual"]
|
176
|
-
"ejaculation": ["sexual"]
|
177
147
|
"fag": ["discriminatory"]
|
178
148
|
"fagbag": ["discriminatory"]
|
179
|
-
"fagfucker": ["discriminatory"]
|
180
149
|
"fagging": ["discriminatory"]
|
181
150
|
"faggit": ["discriminatory"]
|
182
|
-
"
|
183
|
-
"faggot": ["discriminatory"]
|
184
|
-
"faggotcock": ["discriminatory"]
|
185
|
-
"faggs": ["discriminatory"]
|
151
|
+
"fagg": ["discriminatory"]
|
186
152
|
"fagot": ["discriminatory"]
|
187
|
-
"fags": ["discriminatory"]
|
188
153
|
"fagtard": ["discriminatory"]
|
189
154
|
"fart": ["inappropriate"]
|
190
155
|
"farted": ["inappropriate"]
|
@@ -193,42 +158,8 @@ simple:
|
|
193
158
|
"fatass": ["insult"]
|
194
159
|
"felatio": ["sexual"]
|
195
160
|
"fellatio": ["sexual"]
|
196
|
-
"fellatio": ["sexual"]
|
197
161
|
"feltch": ["sexual"]
|
198
|
-
"fingerfuck": ["sexual"]
|
199
|
-
"fingerfucked": ["sexual"]
|
200
|
-
"fingerfucker": ["sexual"]
|
201
|
-
"fingerfucking": ["sexual"]
|
202
|
-
"fingerfucks": ["sexual"]
|
203
|
-
"fistfuck": ["sexual"]
|
204
|
-
"fistfucked": ["sexual"]
|
205
|
-
"fistfucker": ["sexual"]
|
206
|
-
"fistfucking": ["sexual"]
|
207
162
|
"flamer": ["discriminatory"]
|
208
|
-
"fuck": ["sexual"]
|
209
|
-
"fuckass": ["insult"]
|
210
|
-
"fuckbag": ["insult"]
|
211
|
-
"fuckboy": ["insult"]
|
212
|
-
"fuckbrain": ["insult"]
|
213
|
-
"fuckbutt": ["sexual"]
|
214
|
-
"fucked": ["sexual"]
|
215
|
-
"fucker": ["sexual", "insult"]
|
216
|
-
"fuckersucker": ["insult"]
|
217
|
-
"fuckface": ["insult"]
|
218
|
-
"fuckhead": ["sexual"]
|
219
|
-
"fuckhole": ["insult"]
|
220
|
-
"fuckin": ["sexual"]
|
221
|
-
"fucking": ["sexual"]
|
222
|
-
"fuckme": ["sexual"]
|
223
|
-
"fucknut": ["insult"]
|
224
|
-
"fucknutt": ["insult"]
|
225
|
-
"fuckoff": ["insult"]
|
226
|
-
"fuckstick": ["sexual"]
|
227
|
-
"fucktard": ["insult"]
|
228
|
-
"fuckup": ["insult"]
|
229
|
-
"fuckwad": ["insult"]
|
230
|
-
"fuckwit": ["insult"]
|
231
|
-
"fuckwitt": ["insult"]
|
232
163
|
"fudgepacker": ["discriminatory"]
|
233
164
|
"fuk": ["sexual"]
|
234
165
|
"gangbang": ["sexual"]
|
@@ -237,15 +168,10 @@ simple:
|
|
237
168
|
"gayass": ["sexual"]
|
238
169
|
"gaybob": ["discriminatory"]
|
239
170
|
"gaydo": ["discriminatory"]
|
240
|
-
"gayfuck": ["discriminatory"]
|
241
|
-
"gayfuckist": ["discriminatory"]
|
242
171
|
"gaylord": ["discriminatory"]
|
243
172
|
"gaysex": ["discriminatory"]
|
244
173
|
"gaytard": ["discriminatory"]
|
245
174
|
"gaywad": ["discriminatory"]
|
246
|
-
"goddamn": ["inappropriate", "blasphemy"]
|
247
|
-
"goddamn": ["inappropriate", "blasphemy"]
|
248
|
-
"goddamnit": ["inappropriate", "blasphemy"]
|
249
175
|
"gooch": ["sexual"]
|
250
176
|
"gook": ["discriminatory"]
|
251
177
|
"gringo": ["discriminatory"]
|
@@ -254,12 +180,11 @@ simple:
|
|
254
180
|
"hardcoresex": ["sexual"]
|
255
181
|
"heeb": ["discriminatory"]
|
256
182
|
"hell": ["inappropriate"]
|
257
|
-
"hell": ["inappropriate"]
|
258
183
|
"ho": ["discriminatory"]
|
259
184
|
"hoe": ["discriminatory"]
|
260
185
|
"homo": ["discriminatory"]
|
261
|
-
"homodumbshit": ["insult"]
|
262
186
|
"honkey": ["discriminatory"]
|
187
|
+
"honky": ["discriminatory"]
|
263
188
|
"horniest": ["sexual"]
|
264
189
|
"horny": ["sexual"]
|
265
190
|
"hotsex": ["sexual"]
|
@@ -282,7 +207,6 @@ simple:
|
|
282
207
|
"kumer": ["sexual"]
|
283
208
|
"kummer": ["sexual"]
|
284
209
|
"kumming": ["sexual"]
|
285
|
-
"kums": ["sexual"]
|
286
210
|
"kunilingus": ["sexual"]
|
287
211
|
"kunt": ["sexual"]
|
288
212
|
"kyke": ["discriminatory"]
|
@@ -292,27 +216,11 @@ simple:
|
|
292
216
|
"lust": ["sexual"]
|
293
217
|
"lusting": ["sexual"]
|
294
218
|
"mcfagget": ["discriminatory"]
|
295
|
-
"mick": ["discriminatory"]
|
296
219
|
"minge": ["sexual"]
|
297
|
-
"mothafuck": ["sexual"]
|
298
|
-
"mothafucka": ["sexual", "insult"]
|
299
|
-
"mothafuckaz": ["sexual"]
|
300
|
-
"mothafucked": ["sexual"]
|
301
|
-
"mothafucker": ["sexual", "insult"]
|
302
|
-
"mothafuckin": ["sexual"]
|
303
|
-
"mothafucking": ["sexual"]
|
304
|
-
"mothafucks": ["sexual"]
|
305
|
-
"motherfuck": ["sexual"]
|
306
|
-
"motherfucked": ["sexual"]
|
307
|
-
"motherfucker": ["sexual", "insult"]
|
308
|
-
"motherfuckin": ["sexual"]
|
309
|
-
"motherfucking": ["sexual"]
|
310
220
|
"muff": ["sexual"]
|
311
221
|
"muffdiver": ["discriminatory", "sexual"]
|
312
|
-
"munging": ["sexual"]
|
313
222
|
"negro": ["discriminatory"]
|
314
223
|
"nigga": ["discriminatory"]
|
315
|
-
"nigger": ["discriminatory"]
|
316
224
|
"niglet": ["discriminatory"]
|
317
225
|
"nut sack": ["sexual"]
|
318
226
|
"nutsack": ["sexual"]
|
@@ -322,16 +230,12 @@ simple:
|
|
322
230
|
"panooch": ["sexual"]
|
323
231
|
"pecker": ["sexual"]
|
324
232
|
"peckerhead": ["insult"]
|
325
|
-
"penis": ["sexual"]
|
326
|
-
"penisfucker": ["discriminatory"]
|
327
|
-
"penispuffer": ["discriminatory"]
|
328
233
|
"phonesex": ["sexual"]
|
329
234
|
"phuk": ["sexual"]
|
330
235
|
"phuked": ["sexual"]
|
331
236
|
"phuking": ["sexual"]
|
332
237
|
"phukked": ["sexual"]
|
333
238
|
"phukking": ["sexual"]
|
334
|
-
"phuks": ["sexual"]
|
335
239
|
"phuq": ["sexual"]
|
336
240
|
"pis": ["sexual"]
|
337
241
|
"pises": ["sexual"]
|
@@ -341,8 +245,7 @@ simple:
|
|
341
245
|
"piss": ["inappropriate"]
|
342
246
|
"pissed": ["inappropriate"]
|
343
247
|
"pisser": ["sexual"]
|
344
|
-
"
|
345
|
-
"pissflaps": ["sexual"]
|
248
|
+
"pissflap": ["sexual"]
|
346
249
|
"pissin": ["sexual"]
|
347
250
|
"pissing": ["sexual"]
|
348
251
|
"pissoff": ["sexual"]
|
@@ -357,7 +260,6 @@ simple:
|
|
357
260
|
"porn": ["sexual"]
|
358
261
|
"porno": ["sexual"]
|
359
262
|
"pornography": ["sexual"]
|
360
|
-
"pornos": ["sexual"]
|
361
263
|
"prick": ["sexual"]
|
362
264
|
"punanny": ["sexual"]
|
363
265
|
"punta": ["insult"]
|
@@ -374,7 +276,6 @@ simple:
|
|
374
276
|
"renob": ["sexual"]
|
375
277
|
"rimjob": ["sexual"]
|
376
278
|
"ruski": ["discriminatory"]
|
377
|
-
"sandnigger": ["discriminatory"]
|
378
279
|
"schlong": ["sexual"]
|
379
280
|
"scrote": ["sexual"]
|
380
281
|
"shit": ["sexual", "inappropriate"]
|
@@ -405,11 +306,8 @@ simple:
|
|
405
306
|
"shiznit": ["inappropriate"]
|
406
307
|
"skank": ["insult"]
|
407
308
|
"skeet": ["sexual"]
|
408
|
-
"skullfuck": ["sexual"]
|
409
|
-
"slut": ["sexual"]
|
410
309
|
"slut": ["discriminatory"]
|
411
310
|
"slutbag": ["discriminatory"]
|
412
|
-
"sluts": ["sexual"]
|
413
311
|
"smeg": ["inappropriate"]
|
414
312
|
"smut": ["sexual"]
|
415
313
|
"snatch": ["sexual"]
|
@@ -421,19 +319,14 @@ simple:
|
|
421
319
|
"testicle": ["sexual"]
|
422
320
|
"thundercunt": ["insult"]
|
423
321
|
"tit": ["sexual"]
|
424
|
-
"titfuck": ["sexual"]
|
425
|
-
"tittyfuck": ["sexual"]
|
426
322
|
"twat": ["sexual"]
|
427
323
|
"twatlips": ["insult"]
|
428
324
|
"twatwaffle": ["discriminatory"]
|
429
|
-
"unclefucker": ["discriminatory"]
|
430
325
|
"va-j-j": ["sexual"]
|
431
326
|
"vag": ["sexual"]
|
432
327
|
"vagina": ["sexual"]
|
433
328
|
"vjayjay": ["sexual"]
|
434
329
|
"wank": ["sexual"]
|
435
330
|
"wetback": ["discriminatory"]
|
436
|
-
|
437
|
-
"
|
438
|
-
"whoreface": ["insult"]
|
439
|
-
"wop": ["discriminatory"]
|
331
|
+
emoji:
|
332
|
+
"1f595": ["insult"]
|
data/lib/swearjar.rb
CHANGED
@@ -1,63 +1,89 @@
|
|
1
1
|
require 'yaml'
|
2
|
-
require 'fuzzy_hash'
|
3
2
|
|
4
3
|
class Swearjar
|
5
|
-
|
6
4
|
def self.default
|
7
|
-
from_language
|
5
|
+
from_language('en')
|
8
6
|
end
|
9
7
|
|
10
|
-
def self.from_language(language
|
8
|
+
def self.from_language(language)
|
11
9
|
new(File.join(File.dirname(__FILE__), 'config', "#{language}.yml"))
|
12
10
|
end
|
13
11
|
|
14
|
-
attr_reader :tester, :hash
|
15
|
-
|
16
12
|
def initialize(file = nil)
|
17
|
-
@tester = FuzzyHash.new
|
18
13
|
@hash = {}
|
14
|
+
@regexs = {}
|
19
15
|
load_file(file) if file
|
20
16
|
end
|
21
17
|
|
18
|
+
def profane?(string)
|
19
|
+
string = string.to_s
|
20
|
+
scan(string) {|_word, test| return true if test }
|
21
|
+
false
|
22
|
+
end
|
23
|
+
|
24
|
+
def scorecard(string)
|
25
|
+
string = string.to_s
|
26
|
+
scorecard = {}
|
27
|
+
scan(string) do |_word, test|
|
28
|
+
next unless test
|
29
|
+
test.each do |type|
|
30
|
+
scorecard[type] = 0 unless scorecard.key?(type)
|
31
|
+
scorecard[type] += 1
|
32
|
+
end
|
33
|
+
end
|
34
|
+
scorecard
|
35
|
+
end
|
36
|
+
|
37
|
+
def censor(string)
|
38
|
+
censored_string = string.to_s.dup
|
39
|
+
scan(string) do |word, test|
|
40
|
+
next unless test
|
41
|
+
replacement = block_given? ? yield(word) : word.gsub(/\S/, '*')
|
42
|
+
censored_string.gsub!(word, replacement)
|
43
|
+
end
|
44
|
+
censored_string
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
22
49
|
def load_file(file)
|
23
50
|
data = YAML.load_file(file)
|
24
51
|
|
25
52
|
data['regex'].each do |pattern, type|
|
26
|
-
@
|
53
|
+
@regexs[Regexp.new(pattern, "i")] = type
|
27
54
|
end if data['regex']
|
28
55
|
|
29
56
|
data['simple'].each do |test, type|
|
30
57
|
@hash[test] = type
|
31
58
|
end if data['simple']
|
59
|
+
|
60
|
+
data['emoji'].each do |unicode, type|
|
61
|
+
char = [unicode.hex].pack("U")
|
62
|
+
@hash[char] = type
|
63
|
+
end if data['emoji']
|
32
64
|
end
|
33
65
|
|
66
|
+
WORD_REGEX = /\b[a-zA-Z-]+\b/
|
67
|
+
|
68
|
+
# https://github.com/franklsf95/ruby-emoji-regex
|
69
|
+
EMOJI_REGEX = /[\u{00A9}\u{00AE}\u{203C}\u{2049}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{2328}\u{23CF}\u{23E9}-\u{23F3}\u{23F8}-\u{23FA}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2604}\u{260E}\u{2611}\u{2614}-\u{2615}\u{2618}\u{261D}\u{2620}\u{2622}-\u{2623}\u{2626}\u{262A}\u{262E}-\u{262F}\u{2638}-\u{263A}\u{2648}-\u{2653}\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267F}\u{2692}-\u{2694}\u{2696}-\u{2697}\u{2699}\u{269B}-\u{269C}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26B0}-\u{26B1}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26C8}\u{26CE}-\u{26CF}\u{26D1}\u{26D3}-\u{26D4}\u{26E9}-\u{26EA}\u{26F0}-\u{26F5}\u{26F7}-\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270D}\u{270F}\u{2712}\u{2714}\u{2716}\u{271D}\u{2721}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2763}-\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{27BF}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F321}\u{1F324}-\u{1F393}\u{1F396}-\u{1F397}\u{1F399}-\u{1F39B}\u{1F39E}-\u{1F3F0}\u{1F3F3}-\u{1F3F5}\u{1F3F7}-\u{1F4FD}\u{1F4FF}-\u{1F53D}\u{1F549}-\u{1F54E}\u{1F550}-\u{1F567}\u{1F56F}-\u{1F570}\u{1F573}-\u{1F579}\u{1F587}\u{1F58A}-\u{1F58D}\u{1F590}\u{1F595}-\u{1F596}\u{1F5A5}\u{1F5A8}\u{1F5B1}-\u{1F5B2}\u{1F5BC}\u{1F5C2}-\u{1F5C4}\u{1F5D1}-\u{1F5D3}\u{1F5DC}-\u{1F5DE}\u{1F5E1}\u{1F5E3}\u{1F5EF}\u{1F5F3}\u{1F5FA}-\u{1F64F}\u{1F680}-\u{1F6C5}\u{1F6CB}-\u{1F6D0}\u{1F6E0}-\u{1F6E5}\u{1F6E9}\u{1F6EB}-\u{1F6EC}\u{1F6F0}\u{1F6F3}\u{1F910}-\u{1F918}\u{1F980}-\u{1F984}\u{1F9C0}]/
|
70
|
+
|
34
71
|
def scan(string, &block)
|
35
|
-
string
|
36
|
-
|
37
|
-
|
72
|
+
string.scan(WORD_REGEX) do |word|
|
73
|
+
block.call(word,
|
74
|
+
@hash[word.downcase] ||
|
75
|
+
@hash[word.downcase.gsub(/s\z/,'')] ||
|
76
|
+
@hash[word.downcase.gsub(/es\z/,'')])
|
38
77
|
end
|
39
|
-
if match = tester.match_with_result(string)
|
40
|
-
block.call(match.last, match.first)
|
41
|
-
end
|
42
|
-
end
|
43
78
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
return false
|
48
|
-
end
|
49
|
-
|
50
|
-
def scorecard(string)
|
51
|
-
string = string.to_s
|
52
|
-
scorecard = {}
|
53
|
-
scan(string) {|word, test| test.each { |type| scorecard.key?(type) ? scorecard[type] += 1 : scorecard[type] = 1} if test}
|
54
|
-
scorecard
|
55
|
-
end
|
79
|
+
string.scan(EMOJI_REGEX) do |emoji_char|
|
80
|
+
block.call(emoji_char, @hash[emoji_char])
|
81
|
+
end
|
56
82
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
83
|
+
@regexs.each do |regex, type|
|
84
|
+
string.scan(regex) do |word|
|
85
|
+
block.call(word, type)
|
86
|
+
end
|
87
|
+
end
|
61
88
|
end
|
62
|
-
|
63
|
-
end
|
89
|
+
end
|
data/lib/swearjar/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
class Swearjar
|
2
|
-
VERSION = '1.
|
3
|
-
end
|
2
|
+
VERSION = '1.1.0'
|
3
|
+
end
|
data/spec/data/swear.yml
CHANGED
data/spec/swearjar_spec.rb
CHANGED
@@ -1,48 +1,87 @@
|
|
1
|
+
# encoding: UTF-8
|
1
2
|
require 'spec_helper'
|
2
3
|
|
3
4
|
describe Swearjar do
|
4
|
-
|
5
5
|
it "should detect dirty words" do
|
6
|
-
Swearjar.default.profane?('
|
6
|
+
expect(Swearjar.default.profane?('jackass chan')).to be_truthy
|
7
7
|
end
|
8
8
|
|
9
9
|
it "should detect dirty words regardless of case" do
|
10
|
-
Swearjar.default.profane?('
|
10
|
+
expect(Swearjar.default.profane?('JACKASS CHAN')).to be_truthy
|
11
11
|
end
|
12
12
|
|
13
13
|
it "should not detect non-dirty words" do
|
14
|
-
Swearjar.default.profane?('
|
14
|
+
expect(Swearjar.default.profane?('I love Jackie Chan movies')).to be_falsey
|
15
15
|
end
|
16
16
|
|
17
17
|
it "should give us a scorecard" do
|
18
|
-
Swearjar.default.scorecard('
|
18
|
+
expect(Swearjar.default.scorecard('honky jim henson')).to eq({'discriminatory'=>1})
|
19
19
|
end
|
20
20
|
|
21
21
|
it "should detect multiword" do
|
22
|
-
Swearjar.default.scorecard('jim henson has a hard on').
|
22
|
+
expect(Swearjar.default.scorecard('jim henson has a hard on')).to eq({'sexual'=>1})
|
23
23
|
end
|
24
24
|
|
25
25
|
it "should detect multiword plurals" do
|
26
|
-
Swearjar.default.scorecard('jim henson has a hard ons').
|
26
|
+
expect(Swearjar.default.scorecard('jim henson has a hard ons')).to eq({'sexual'=>1})
|
27
27
|
end
|
28
28
|
|
29
29
|
it "should detect simple dirty plurals" do
|
30
|
-
Swearjar.default.profane?('jim henson had two dicks').
|
31
|
-
Swearjar.default.profane?('jim henson has two asses').
|
30
|
+
expect(Swearjar.default.profane?('jim henson had two dicks')).to be_truthy
|
31
|
+
expect(Swearjar.default.profane?('jim henson has two asses')).to be_truthy
|
32
32
|
end
|
33
33
|
|
34
34
|
it "should censor a string" do
|
35
|
-
Swearjar.default.censor('jim henson has a massive hard on he is gonna use to fuck everybody').
|
35
|
+
expect(Swearjar.default.censor('jim henson has a massive hard on he is gonna use to fuck everybody')).to eq('jim henson has a massive **** ** he is gonna use to **** everybody')
|
36
36
|
end
|
37
37
|
|
38
38
|
it "should not do much when given a non-string" do
|
39
|
-
Swearjar.default.profane?(nil).
|
39
|
+
expect(Swearjar.default.profane?(nil)).to be_falsey
|
40
|
+
end
|
41
|
+
|
42
|
+
it "doesn't mark an empty string as profane" do
|
43
|
+
expect(Swearjar.default.profane?("")).to be_falsey
|
40
44
|
end
|
41
45
|
|
42
46
|
it "should allow you to load a new yaml file" do
|
43
|
-
sj = Swearjar.new
|
44
|
-
sj.
|
45
|
-
|
47
|
+
sj = Swearjar.new(File.expand_path('../data/swear.yml', __FILE__))
|
48
|
+
expect(sj.censor("Python is the best language!")).to eq("****** is the best language!")
|
49
|
+
end
|
50
|
+
|
51
|
+
it "detects multiple entries" do
|
52
|
+
expect(Swearjar.default.scorecard("cunts cunts cunts")).to eq({"insult" => 3, "sexual" => 3})
|
53
|
+
expect(Swearjar.default.scorecard("damn damnit dammit")).to eq({"inappropriate" => 3, "blasphemy" => 3})
|
54
|
+
end
|
55
|
+
|
56
|
+
it "detects plurals of words ending in 'e'" do
|
57
|
+
expect(Swearjar.default.profane?("asspirates")).to be_truthy
|
58
|
+
end
|
59
|
+
|
60
|
+
it "detects profane emojis" do
|
61
|
+
expect(Swearjar.default.profane?("🖕")).to be_truthy
|
62
|
+
end
|
63
|
+
|
64
|
+
it "detects profane emojis with skin tone" do
|
65
|
+
expect(Swearjar.default.profane?("🖕🏾")).to be_truthy
|
66
|
+
end
|
67
|
+
|
68
|
+
it "censors profane emojis" do
|
69
|
+
expect(Swearjar.default.censor("Fuck you🖕 🖕🖕")).to eq("**** you* **")
|
70
|
+
end
|
71
|
+
|
72
|
+
it "censors with regular expression matching" do
|
73
|
+
expect(Swearjar.default.censor("foonIgg3rbar foo nigger")).to eq("************ foo ******")
|
46
74
|
end
|
47
75
|
|
48
|
-
|
76
|
+
it "censors with a mix of normal and regular expression matches" do
|
77
|
+
expect(Swearjar.default.censor("fagfaggot faggotfag")).to eq("********* *********")
|
78
|
+
end
|
79
|
+
|
80
|
+
it "detects scorecard with regular expression matching" do
|
81
|
+
expect(Swearjar.default.scorecard("foonIgg3rbar foo nigger")).to eq({"discriminatory" => 2})
|
82
|
+
end
|
83
|
+
|
84
|
+
xit "doesn't substitute simple words when they occur later as substrings" do
|
85
|
+
expect(Swearjar.default.censor("anus janus")).to eq("**** janus")
|
86
|
+
end
|
87
|
+
end
|
data/swearjar.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
|
|
10
10
|
s.summary = "Put another nickel in the swearjar. Simple profanity detection with content analysis"
|
11
11
|
s.description = "#{s.summary}."
|
12
12
|
s.email = %q{joshbuddy@gmail.com}
|
13
|
-
s.extra_rdoc_files = ['README.
|
13
|
+
s.extra_rdoc_files = ['README.md']
|
14
14
|
s.files = `git ls-files`.split("\n")
|
15
15
|
s.homepage = %q{http://github.com/joshbuddy/swearjar}
|
16
16
|
s.rdoc_options = ["--charset=UTF-8"]
|
@@ -20,18 +20,8 @@ Gem::Specification.new do |s|
|
|
20
20
|
s.rubyforge_project = 'swearjar'
|
21
21
|
|
22
22
|
# dependencies
|
23
|
-
s.
|
24
|
-
s.add_development_dependency '
|
25
|
-
s.add_development_dependency '
|
26
|
-
|
27
|
-
if s.respond_to? :specification_version then
|
28
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
29
|
-
s.specification_version = 3
|
30
|
-
|
31
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
32
|
-
else
|
33
|
-
end
|
34
|
-
else
|
35
|
-
end
|
23
|
+
s.add_development_dependency 'rake', '~> 10.5'
|
24
|
+
s.add_development_dependency 'rspec', '~> 3.4'
|
25
|
+
s.add_development_dependency 'pry', '~> 0.10'
|
36
26
|
end
|
37
27
|
|
metadata
CHANGED
@@ -1,99 +1,105 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: swearjar
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Joshua Hull
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2016-02-14 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
16
|
-
requirement:
|
17
|
-
none: false
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- - ~>
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
22
|
-
type: :
|
19
|
+
version: '10.5'
|
20
|
+
type: :development
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '10.5'
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
|
-
name:
|
27
|
-
requirement:
|
28
|
-
none: false
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- - ~>
|
31
|
+
- - "~>"
|
31
32
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
33
|
+
version: '3.4'
|
33
34
|
type: :development
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.4'
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
|
-
name:
|
38
|
-
requirement:
|
39
|
-
none: false
|
42
|
+
name: pry
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
|
-
- - ~>
|
45
|
+
- - "~>"
|
42
46
|
- !ruby/object:Gem::Version
|
43
|
-
version:
|
47
|
+
version: '0.10'
|
44
48
|
type: :development
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.10'
|
47
55
|
description: Put another nickel in the swearjar. Simple profanity detection with content
|
48
56
|
analysis.
|
49
57
|
email: joshbuddy@gmail.com
|
50
58
|
executables: []
|
51
59
|
extensions: []
|
52
60
|
extra_rdoc_files:
|
53
|
-
- README.
|
61
|
+
- README.md
|
54
62
|
files:
|
55
|
-
- .gitignore
|
63
|
+
- ".gitignore"
|
64
|
+
- ".travis.yml"
|
65
|
+
- CHANGELOG.md
|
56
66
|
- Gemfile
|
57
|
-
- README.
|
67
|
+
- README.md
|
58
68
|
- Rakefile
|
59
69
|
- lib/config/en.yml
|
60
70
|
- lib/swearjar.rb
|
61
|
-
- lib/swearjar/tester.rb
|
62
71
|
- lib/swearjar/version.rb
|
63
72
|
- spec/data/swear.yml
|
64
|
-
- spec/spec.opts
|
65
73
|
- spec/spec_helper.rb
|
66
74
|
- spec/swearjar_spec.rb
|
67
75
|
- swearjar.gemspec
|
68
76
|
homepage: http://github.com/joshbuddy/swearjar
|
69
77
|
licenses: []
|
78
|
+
metadata: {}
|
70
79
|
post_install_message:
|
71
80
|
rdoc_options:
|
72
|
-
- --charset=UTF-8
|
81
|
+
- "--charset=UTF-8"
|
73
82
|
require_paths:
|
74
83
|
- lib
|
75
84
|
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
-
none: false
|
77
85
|
requirements:
|
78
|
-
- -
|
86
|
+
- - ">="
|
79
87
|
- !ruby/object:Gem::Version
|
80
88
|
version: '0'
|
81
89
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
|
-
none: false
|
83
90
|
requirements:
|
84
|
-
- -
|
91
|
+
- - ">="
|
85
92
|
- !ruby/object:Gem::Version
|
86
93
|
version: '0'
|
87
94
|
requirements: []
|
88
95
|
rubyforge_project: swearjar
|
89
|
-
rubygems_version:
|
96
|
+
rubygems_version: 2.2.3
|
90
97
|
signing_key:
|
91
|
-
specification_version:
|
98
|
+
specification_version: 4
|
92
99
|
summary: Put another nickel in the swearjar. Simple profanity detection with content
|
93
100
|
analysis
|
94
101
|
test_files:
|
95
102
|
- spec/data/swear.yml
|
96
|
-
- spec/spec.opts
|
97
103
|
- spec/spec_helper.rb
|
98
104
|
- spec/swearjar_spec.rb
|
99
105
|
has_rdoc:
|
data/README.rdoc
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
= Swearjar
|
2
|
-
|
3
|
-
Simple profanity detection with content analysis.
|
4
|
-
|
5
|
-
== Installation
|
6
|
-
|
7
|
-
gem install swearjar
|
8
|
-
|
9
|
-
== Usage
|
10
|
-
|
11
|
-
require 'swearjar'
|
12
|
-
|
13
|
-
Swearjar.default.profane?('jim henson has a massive hard on he is gonna use to fuck everybody')
|
14
|
-
<< true
|
15
|
-
|
16
|
-
Swearjar.default.scorecard('jim henson has a massive hard on he is gonna use to fuck everybody')
|
17
|
-
<< {:sexual => 2}
|
18
|
-
|
19
|
-
Swearjar.default.censor('jim henson has a massive hard on he is gonna use to fuck everybody')
|
20
|
-
<< 'jim henson has a massive **** ** he is gonna use to **** everybody'
|
21
|
-
|
22
|
-
To load from a custom yaml file, you can do the following
|
23
|
-
|
24
|
-
sj = Swearjar.new
|
25
|
-
sj.load_file('my_yaml.yml')
|
26
|
-
|
27
|
-
The YAML file can have two sections, `simple` and `regex`. For an example, see `lib/config/en.yml`.
|
28
|
-
|
data/lib/swearjar/tester.rb
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
require 'yaml'
|
2
|
-
require 'fuzzy_hash'
|
3
|
-
require 'bloomfilter'
|
4
|
-
|
5
|
-
class Swearjar
|
6
|
-
class Tester
|
7
|
-
|
8
|
-
def initialize(config_file)
|
9
|
-
data = YAML.load_file
|
10
|
-
|
11
|
-
@tester = FuzzyHash.new
|
12
|
-
|
13
|
-
data['regex'].each do |pattern, type|
|
14
|
-
@tester[Regexp.new(pattern)] = type
|
15
|
-
end
|
16
|
-
|
17
|
-
data['simple'].each do |test, type|
|
18
|
-
@tester[test] = type
|
19
|
-
end
|
20
|
-
|
21
|
-
end
|
22
|
-
|
23
|
-
def scan(string, &block)
|
24
|
-
string.scan(/\b[\b]+\b/, &block)
|
25
|
-
end
|
26
|
-
|
27
|
-
def profane?(string)
|
28
|
-
scan(string) {|w| return true}
|
29
|
-
end
|
30
|
-
|
31
|
-
end
|
32
|
-
end
|