swearjar 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.travis.yml +6 -0
- data/CHANGELOG.md +8 -0
- data/Gemfile +2 -2
- data/README.md +31 -0
- data/Rakefile +2 -15
- data/lib/config/en.yml +17 -124
- data/lib/swearjar.rb +59 -33
- data/lib/swearjar/version.rb +2 -2
- data/spec/data/swear.yml +1 -0
- data/spec/swearjar_spec.rb +54 -15
- data/swearjar.gemspec +4 -14
- metadata +41 -35
- data/README.rdoc +0 -28
- data/lib/swearjar/tester.rb +0 -32
- data/spec/spec.opts +0 -7
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: cfbc2f8e57099d5a6b9a4c60961977e8bfb542d3
|
4
|
+
data.tar.gz: d176f8879508172566f2582bf29a5d0d6c47d27d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2980ee653f0b552d3e822b991041fd91d5e1ae494d9e2823109e67c927b0dbe53ae0e2192397bd5fed38684d78af07c4713f2271a2ebc93cd0538dfe07aa40be
|
7
|
+
data.tar.gz: 4116470e1e7cdb71c5958888833306e20196cbc6f11ddc7e4219a8ebecb7fe8b0375feb50ede7fcdd03635e588002a401de414ca73d64a8cc661a36100639fd2
|
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
source
|
1
|
+
source "https://rubygems.org"
|
2
2
|
|
3
|
-
gemspec
|
3
|
+
gemspec
|
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# Swearjar
|
2
|
+
|
3
|
+
Simple profanity detection with content analysis.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
`gem install swearjar`
|
8
|
+
|
9
|
+
## Usage
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
require "swearjar"
|
13
|
+
|
14
|
+
sj = Swearjar.default
|
15
|
+
|
16
|
+
sj.profane?("jim henson has a massive hard on he is gonna use to fuck everybody")
|
17
|
+
# => true
|
18
|
+
|
19
|
+
sj.scorecard("jim henson has a massive hard on he is gonna use to fuck everybody")
|
20
|
+
# => {:sexual => 2}
|
21
|
+
|
22
|
+
sj.censor("jim henson has a massive hard on he is gonna use to fuck everybody")
|
23
|
+
# => "jim henson has a massive **** ** he is gonna use to **** everybody"
|
24
|
+
```
|
25
|
+
|
26
|
+
To load from a custom config file, you can do the following:
|
27
|
+
|
28
|
+
```ruby
|
29
|
+
# For an example see lib/config/en.yml
|
30
|
+
sj = Swearjar.new("my_swears.yml")
|
31
|
+
```
|
data/Rakefile
CHANGED
@@ -1,18 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
+
require "rspec/core/rake_task"
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
3
5
|
task :default => :spec
|
4
|
-
|
5
|
-
require 'spec'
|
6
|
-
require 'spec/rake/spectask'
|
7
|
-
task :spec => 'spec:all'
|
8
|
-
namespace(:spec) do
|
9
|
-
Spec::Rake::SpecTask.new(:all) do |t|
|
10
|
-
t.spec_opts ||= []
|
11
|
-
t.spec_opts << "-rubygems"
|
12
|
-
t.spec_opts << "--options" << "spec/spec.opts"
|
13
|
-
t.spec_files = FileList['spec/**/*_spec.rb']
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
require 'bundler'
|
18
|
-
Bundler::GemHelper.install_tasks
|
data/lib/config/en.yml
CHANGED
@@ -1,8 +1,16 @@
|
|
1
|
+
---
|
1
2
|
regex:
|
2
3
|
'hard ons?\b': ["sexual"]
|
3
4
|
'jerk off\b': ["sexual"]
|
4
5
|
'pissed off\b': ["inappropriate"]
|
5
|
-
'
|
6
|
+
'\w*n[i1]gg[e3]r\w*': ["discriminatory"]
|
7
|
+
'\w*fuck\w*': ["sexual"]
|
8
|
+
'\w*b[i1]tch\w*': ["insult"]
|
9
|
+
'\w*ejaculat\w*': ["sexual"]
|
10
|
+
'\w*damn\w*': ["inappropriate", "blasphemy"]
|
11
|
+
'\w*f[a4]gg[o0]t\w*': ["discriminatory"]
|
12
|
+
'\w*wh[o0]r[e3]\w*': ["insult"]
|
13
|
+
'\w*p[e3]n[i1]s\w*': ["sexual"]
|
6
14
|
simple:
|
7
15
|
"anus": ["sexual"]
|
8
16
|
"arse": ["insult"]
|
@@ -18,8 +26,6 @@ simple:
|
|
18
26
|
"asscock": ["insult"]
|
19
27
|
"asscracker": ["sexual"]
|
20
28
|
"assface": ["sexual"]
|
21
|
-
"assfuck": ["sexual"]
|
22
|
-
"assfucker": ["discriminatory"]
|
23
29
|
"assgoblin": ["discriminatory"]
|
24
30
|
"asshat": ["sexual"]
|
25
31
|
"asshead": ["insult"]
|
@@ -31,9 +37,7 @@ simple:
|
|
31
37
|
"assmonkey": ["insult"]
|
32
38
|
"assmunch": ["insult"]
|
33
39
|
"assmuncher": ["sexual"]
|
34
|
-
"assnigger": ["discriminatory"]
|
35
40
|
"asspirate": ["discriminatory"]
|
36
|
-
"assshit": ["insult"]
|
37
41
|
"assshole": ["sexual"]
|
38
42
|
"asssucker": ["insult"]
|
39
43
|
"asswad": ["sexual"]
|
@@ -41,31 +45,17 @@ simple:
|
|
41
45
|
"bampot": ["insult"]
|
42
46
|
"bastard": ["insult"]
|
43
47
|
"beaner": ["discriminatory"]
|
44
|
-
"beastial": ["sexual"]
|
45
48
|
"beastiality": ["sexual"]
|
46
49
|
"beastility": ["sexual"]
|
47
|
-
"bestial": ["sexual"]
|
48
50
|
"bestiality": ["sexual"]
|
49
|
-
"bitch": ["insult"]
|
50
|
-
"bitchass": ["insult"]
|
51
|
-
"bitcher": ["insult"]
|
52
|
-
"bitchin": ["inappropriate"]
|
53
|
-
"bitching": ["inappropriate"]
|
54
|
-
"bitchtit": ["discriminatory"]
|
55
|
-
"bitchy": ["insult"]
|
56
51
|
"blow job": ["sexual"]
|
57
52
|
"blowjob": ["sexual"]
|
58
53
|
"bollocks": ["sexual"]
|
59
54
|
"bollox": ["sexual"]
|
60
55
|
"boner": ["sexual"]
|
61
|
-
"brotherfucker": ["discriminatory"]
|
62
56
|
"bullshit": ["inappropriate"]
|
63
|
-
"bullshit": ["inappropriate"]
|
64
|
-
"bumblefuck": ["discriminatory"]
|
65
57
|
"butt plug": ["sexual"]
|
66
58
|
"butt-pirate": ["discriminatory"]
|
67
|
-
"buttfucka": ["discriminatory"]
|
68
|
-
"buttfucker": ["discriminatory"]
|
69
59
|
"camel toe": ["sexual"]
|
70
60
|
"carpetmuncher": ["discriminatory"]
|
71
61
|
"chinc": ["discriminatory"]
|
@@ -74,14 +64,11 @@ simple:
|
|
74
64
|
"chode": ["sexual"]
|
75
65
|
"clit": ["sexual"]
|
76
66
|
"clitface": ["insult"]
|
77
|
-
"clitfuck": ["sexual"]
|
78
|
-
"clusterfuck": ["inappropriate"]
|
79
67
|
"cock": ["sexual"]
|
80
68
|
"cockass": "Jerk"
|
81
69
|
"cockbite": ["insult"]
|
82
70
|
"cockburger": ["insult"]
|
83
71
|
"cockface": ["insult"]
|
84
|
-
"cockfucker": ["insult"]
|
85
72
|
"cockhead": ["insult"]
|
86
73
|
"cockjockey": ["discriminatory"]
|
87
74
|
"cockknoker": ["discriminatory"]
|
@@ -95,16 +82,14 @@ simple:
|
|
95
82
|
"cockshit": ["insult"]
|
96
83
|
"cocksmith": ["discriminatory"]
|
97
84
|
"cocksmoker": ["discriminatory"]
|
98
|
-
"cocksuck": ["sexual"]
|
85
|
+
"cocksuck": ["sexual", "discriminatory"]
|
99
86
|
"cocksucked": ["sexual"]
|
100
87
|
"cocksucker": ["discriminatory", "sexual"]
|
101
88
|
"cocksucking": ["sexual", "discriminatory"]
|
102
|
-
"cocksucks": ["sexual", "discriminatory"]
|
103
89
|
"coochie": ["sexual"]
|
104
90
|
"coochy": ["sexual"]
|
105
91
|
"coon": ["discriminatory"]
|
106
92
|
"cooter": ["sexual"]
|
107
|
-
"cracker": ["discriminatory"]
|
108
93
|
"cum": ["sexual"]
|
109
94
|
"cumbubble": ["insult"]
|
110
95
|
"cumdumpster": ["sexual"]
|
@@ -118,7 +103,6 @@ simple:
|
|
118
103
|
"cunillingus": ["sexual"]
|
119
104
|
"cunnie": ["sexual"]
|
120
105
|
"cunnilingus": ["sexual"]
|
121
|
-
"cunnilingus": ["sexual"]
|
122
106
|
"cunt": ["insult", "sexual"]
|
123
107
|
"cuntface": ["insult"]
|
124
108
|
"cunthole": ["sexual"]
|
@@ -128,19 +112,11 @@ simple:
|
|
128
112
|
"cuntrag": ["insult"]
|
129
113
|
"cuntslut": ["insult"]
|
130
114
|
"cyberfuc": ["sexual"]
|
131
|
-
"
|
132
|
-
"cyberfucked": ["sexual"]
|
133
|
-
"cyberfucker": ["sexual"]
|
134
|
-
"cyberfucking": ["sexual"]
|
135
|
-
"dago": ["discriminatory"]
|
136
|
-
"damn": ["inappropriate"]
|
137
|
-
"deggo": ["discriminatory"]
|
115
|
+
"dammit": ["inappropriate", "blasphemy"]
|
138
116
|
"dick": ["sexual", "insult"]
|
139
117
|
"dickbag": ["insult"]
|
140
|
-
"
|
118
|
+
"dickbeater": ["sexual"]
|
141
119
|
"dickface": ["insult"]
|
142
|
-
"dickfuck": ["insult"]
|
143
|
-
"dickfucker": ["discriminatory"]
|
144
120
|
"dickhead": ["insult"]
|
145
121
|
"dickhole": ["sexual"]
|
146
122
|
"dickjuice": ["sexual"]
|
@@ -165,26 +141,15 @@ simple:
|
|
165
141
|
"dumass": ["insult"]
|
166
142
|
"dumb ass": ["insult"]
|
167
143
|
"dumbass": ["insult"]
|
168
|
-
"dumbfuck": ["insult"]
|
169
144
|
"dumbshit": ["insult"]
|
170
145
|
"dumshit": ["insult"]
|
171
146
|
"dyke": ["discriminatory"]
|
172
|
-
"ejaculate": ["sexual"]
|
173
|
-
"ejaculated": ["sexual"]
|
174
|
-
"ejaculates": ["sexual"]
|
175
|
-
"ejaculating": ["sexual"]
|
176
|
-
"ejaculation": ["sexual"]
|
177
147
|
"fag": ["discriminatory"]
|
178
148
|
"fagbag": ["discriminatory"]
|
179
|
-
"fagfucker": ["discriminatory"]
|
180
149
|
"fagging": ["discriminatory"]
|
181
150
|
"faggit": ["discriminatory"]
|
182
|
-
"
|
183
|
-
"faggot": ["discriminatory"]
|
184
|
-
"faggotcock": ["discriminatory"]
|
185
|
-
"faggs": ["discriminatory"]
|
151
|
+
"fagg": ["discriminatory"]
|
186
152
|
"fagot": ["discriminatory"]
|
187
|
-
"fags": ["discriminatory"]
|
188
153
|
"fagtard": ["discriminatory"]
|
189
154
|
"fart": ["inappropriate"]
|
190
155
|
"farted": ["inappropriate"]
|
@@ -193,42 +158,8 @@ simple:
|
|
193
158
|
"fatass": ["insult"]
|
194
159
|
"felatio": ["sexual"]
|
195
160
|
"fellatio": ["sexual"]
|
196
|
-
"fellatio": ["sexual"]
|
197
161
|
"feltch": ["sexual"]
|
198
|
-
"fingerfuck": ["sexual"]
|
199
|
-
"fingerfucked": ["sexual"]
|
200
|
-
"fingerfucker": ["sexual"]
|
201
|
-
"fingerfucking": ["sexual"]
|
202
|
-
"fingerfucks": ["sexual"]
|
203
|
-
"fistfuck": ["sexual"]
|
204
|
-
"fistfucked": ["sexual"]
|
205
|
-
"fistfucker": ["sexual"]
|
206
|
-
"fistfucking": ["sexual"]
|
207
162
|
"flamer": ["discriminatory"]
|
208
|
-
"fuck": ["sexual"]
|
209
|
-
"fuckass": ["insult"]
|
210
|
-
"fuckbag": ["insult"]
|
211
|
-
"fuckboy": ["insult"]
|
212
|
-
"fuckbrain": ["insult"]
|
213
|
-
"fuckbutt": ["sexual"]
|
214
|
-
"fucked": ["sexual"]
|
215
|
-
"fucker": ["sexual", "insult"]
|
216
|
-
"fuckersucker": ["insult"]
|
217
|
-
"fuckface": ["insult"]
|
218
|
-
"fuckhead": ["sexual"]
|
219
|
-
"fuckhole": ["insult"]
|
220
|
-
"fuckin": ["sexual"]
|
221
|
-
"fucking": ["sexual"]
|
222
|
-
"fuckme": ["sexual"]
|
223
|
-
"fucknut": ["insult"]
|
224
|
-
"fucknutt": ["insult"]
|
225
|
-
"fuckoff": ["insult"]
|
226
|
-
"fuckstick": ["sexual"]
|
227
|
-
"fucktard": ["insult"]
|
228
|
-
"fuckup": ["insult"]
|
229
|
-
"fuckwad": ["insult"]
|
230
|
-
"fuckwit": ["insult"]
|
231
|
-
"fuckwitt": ["insult"]
|
232
163
|
"fudgepacker": ["discriminatory"]
|
233
164
|
"fuk": ["sexual"]
|
234
165
|
"gangbang": ["sexual"]
|
@@ -237,15 +168,10 @@ simple:
|
|
237
168
|
"gayass": ["sexual"]
|
238
169
|
"gaybob": ["discriminatory"]
|
239
170
|
"gaydo": ["discriminatory"]
|
240
|
-
"gayfuck": ["discriminatory"]
|
241
|
-
"gayfuckist": ["discriminatory"]
|
242
171
|
"gaylord": ["discriminatory"]
|
243
172
|
"gaysex": ["discriminatory"]
|
244
173
|
"gaytard": ["discriminatory"]
|
245
174
|
"gaywad": ["discriminatory"]
|
246
|
-
"goddamn": ["inappropriate", "blasphemy"]
|
247
|
-
"goddamn": ["inappropriate", "blasphemy"]
|
248
|
-
"goddamnit": ["inappropriate", "blasphemy"]
|
249
175
|
"gooch": ["sexual"]
|
250
176
|
"gook": ["discriminatory"]
|
251
177
|
"gringo": ["discriminatory"]
|
@@ -254,12 +180,11 @@ simple:
|
|
254
180
|
"hardcoresex": ["sexual"]
|
255
181
|
"heeb": ["discriminatory"]
|
256
182
|
"hell": ["inappropriate"]
|
257
|
-
"hell": ["inappropriate"]
|
258
183
|
"ho": ["discriminatory"]
|
259
184
|
"hoe": ["discriminatory"]
|
260
185
|
"homo": ["discriminatory"]
|
261
|
-
"homodumbshit": ["insult"]
|
262
186
|
"honkey": ["discriminatory"]
|
187
|
+
"honky": ["discriminatory"]
|
263
188
|
"horniest": ["sexual"]
|
264
189
|
"horny": ["sexual"]
|
265
190
|
"hotsex": ["sexual"]
|
@@ -282,7 +207,6 @@ simple:
|
|
282
207
|
"kumer": ["sexual"]
|
283
208
|
"kummer": ["sexual"]
|
284
209
|
"kumming": ["sexual"]
|
285
|
-
"kums": ["sexual"]
|
286
210
|
"kunilingus": ["sexual"]
|
287
211
|
"kunt": ["sexual"]
|
288
212
|
"kyke": ["discriminatory"]
|
@@ -292,27 +216,11 @@ simple:
|
|
292
216
|
"lust": ["sexual"]
|
293
217
|
"lusting": ["sexual"]
|
294
218
|
"mcfagget": ["discriminatory"]
|
295
|
-
"mick": ["discriminatory"]
|
296
219
|
"minge": ["sexual"]
|
297
|
-
"mothafuck": ["sexual"]
|
298
|
-
"mothafucka": ["sexual", "insult"]
|
299
|
-
"mothafuckaz": ["sexual"]
|
300
|
-
"mothafucked": ["sexual"]
|
301
|
-
"mothafucker": ["sexual", "insult"]
|
302
|
-
"mothafuckin": ["sexual"]
|
303
|
-
"mothafucking": ["sexual"]
|
304
|
-
"mothafucks": ["sexual"]
|
305
|
-
"motherfuck": ["sexual"]
|
306
|
-
"motherfucked": ["sexual"]
|
307
|
-
"motherfucker": ["sexual", "insult"]
|
308
|
-
"motherfuckin": ["sexual"]
|
309
|
-
"motherfucking": ["sexual"]
|
310
220
|
"muff": ["sexual"]
|
311
221
|
"muffdiver": ["discriminatory", "sexual"]
|
312
|
-
"munging": ["sexual"]
|
313
222
|
"negro": ["discriminatory"]
|
314
223
|
"nigga": ["discriminatory"]
|
315
|
-
"nigger": ["discriminatory"]
|
316
224
|
"niglet": ["discriminatory"]
|
317
225
|
"nut sack": ["sexual"]
|
318
226
|
"nutsack": ["sexual"]
|
@@ -322,16 +230,12 @@ simple:
|
|
322
230
|
"panooch": ["sexual"]
|
323
231
|
"pecker": ["sexual"]
|
324
232
|
"peckerhead": ["insult"]
|
325
|
-
"penis": ["sexual"]
|
326
|
-
"penisfucker": ["discriminatory"]
|
327
|
-
"penispuffer": ["discriminatory"]
|
328
233
|
"phonesex": ["sexual"]
|
329
234
|
"phuk": ["sexual"]
|
330
235
|
"phuked": ["sexual"]
|
331
236
|
"phuking": ["sexual"]
|
332
237
|
"phukked": ["sexual"]
|
333
238
|
"phukking": ["sexual"]
|
334
|
-
"phuks": ["sexual"]
|
335
239
|
"phuq": ["sexual"]
|
336
240
|
"pis": ["sexual"]
|
337
241
|
"pises": ["sexual"]
|
@@ -341,8 +245,7 @@ simple:
|
|
341
245
|
"piss": ["inappropriate"]
|
342
246
|
"pissed": ["inappropriate"]
|
343
247
|
"pisser": ["sexual"]
|
344
|
-
"
|
345
|
-
"pissflaps": ["sexual"]
|
248
|
+
"pissflap": ["sexual"]
|
346
249
|
"pissin": ["sexual"]
|
347
250
|
"pissing": ["sexual"]
|
348
251
|
"pissoff": ["sexual"]
|
@@ -357,7 +260,6 @@ simple:
|
|
357
260
|
"porn": ["sexual"]
|
358
261
|
"porno": ["sexual"]
|
359
262
|
"pornography": ["sexual"]
|
360
|
-
"pornos": ["sexual"]
|
361
263
|
"prick": ["sexual"]
|
362
264
|
"punanny": ["sexual"]
|
363
265
|
"punta": ["insult"]
|
@@ -374,7 +276,6 @@ simple:
|
|
374
276
|
"renob": ["sexual"]
|
375
277
|
"rimjob": ["sexual"]
|
376
278
|
"ruski": ["discriminatory"]
|
377
|
-
"sandnigger": ["discriminatory"]
|
378
279
|
"schlong": ["sexual"]
|
379
280
|
"scrote": ["sexual"]
|
380
281
|
"shit": ["sexual", "inappropriate"]
|
@@ -405,11 +306,8 @@ simple:
|
|
405
306
|
"shiznit": ["inappropriate"]
|
406
307
|
"skank": ["insult"]
|
407
308
|
"skeet": ["sexual"]
|
408
|
-
"skullfuck": ["sexual"]
|
409
|
-
"slut": ["sexual"]
|
410
309
|
"slut": ["discriminatory"]
|
411
310
|
"slutbag": ["discriminatory"]
|
412
|
-
"sluts": ["sexual"]
|
413
311
|
"smeg": ["inappropriate"]
|
414
312
|
"smut": ["sexual"]
|
415
313
|
"snatch": ["sexual"]
|
@@ -421,19 +319,14 @@ simple:
|
|
421
319
|
"testicle": ["sexual"]
|
422
320
|
"thundercunt": ["insult"]
|
423
321
|
"tit": ["sexual"]
|
424
|
-
"titfuck": ["sexual"]
|
425
|
-
"tittyfuck": ["sexual"]
|
426
322
|
"twat": ["sexual"]
|
427
323
|
"twatlips": ["insult"]
|
428
324
|
"twatwaffle": ["discriminatory"]
|
429
|
-
"unclefucker": ["discriminatory"]
|
430
325
|
"va-j-j": ["sexual"]
|
431
326
|
"vag": ["sexual"]
|
432
327
|
"vagina": ["sexual"]
|
433
328
|
"vjayjay": ["sexual"]
|
434
329
|
"wank": ["sexual"]
|
435
330
|
"wetback": ["discriminatory"]
|
436
|
-
|
437
|
-
"
|
438
|
-
"whoreface": ["insult"]
|
439
|
-
"wop": ["discriminatory"]
|
331
|
+
emoji:
|
332
|
+
"1f595": ["insult"]
|
data/lib/swearjar.rb
CHANGED
@@ -1,63 +1,89 @@
|
|
1
1
|
require 'yaml'
|
2
|
-
require 'fuzzy_hash'
|
3
2
|
|
4
3
|
class Swearjar
|
5
|
-
|
6
4
|
def self.default
|
7
|
-
from_language
|
5
|
+
from_language('en')
|
8
6
|
end
|
9
7
|
|
10
|
-
def self.from_language(language
|
8
|
+
def self.from_language(language)
|
11
9
|
new(File.join(File.dirname(__FILE__), 'config', "#{language}.yml"))
|
12
10
|
end
|
13
11
|
|
14
|
-
attr_reader :tester, :hash
|
15
|
-
|
16
12
|
def initialize(file = nil)
|
17
|
-
@tester = FuzzyHash.new
|
18
13
|
@hash = {}
|
14
|
+
@regexs = {}
|
19
15
|
load_file(file) if file
|
20
16
|
end
|
21
17
|
|
18
|
+
def profane?(string)
|
19
|
+
string = string.to_s
|
20
|
+
scan(string) {|_word, test| return true if test }
|
21
|
+
false
|
22
|
+
end
|
23
|
+
|
24
|
+
def scorecard(string)
|
25
|
+
string = string.to_s
|
26
|
+
scorecard = {}
|
27
|
+
scan(string) do |_word, test|
|
28
|
+
next unless test
|
29
|
+
test.each do |type|
|
30
|
+
scorecard[type] = 0 unless scorecard.key?(type)
|
31
|
+
scorecard[type] += 1
|
32
|
+
end
|
33
|
+
end
|
34
|
+
scorecard
|
35
|
+
end
|
36
|
+
|
37
|
+
def censor(string)
|
38
|
+
censored_string = string.to_s.dup
|
39
|
+
scan(string) do |word, test|
|
40
|
+
next unless test
|
41
|
+
replacement = block_given? ? yield(word) : word.gsub(/\S/, '*')
|
42
|
+
censored_string.gsub!(word, replacement)
|
43
|
+
end
|
44
|
+
censored_string
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
22
49
|
def load_file(file)
|
23
50
|
data = YAML.load_file(file)
|
24
51
|
|
25
52
|
data['regex'].each do |pattern, type|
|
26
|
-
@
|
53
|
+
@regexs[Regexp.new(pattern, "i")] = type
|
27
54
|
end if data['regex']
|
28
55
|
|
29
56
|
data['simple'].each do |test, type|
|
30
57
|
@hash[test] = type
|
31
58
|
end if data['simple']
|
59
|
+
|
60
|
+
data['emoji'].each do |unicode, type|
|
61
|
+
char = [unicode.hex].pack("U")
|
62
|
+
@hash[char] = type
|
63
|
+
end if data['emoji']
|
32
64
|
end
|
33
65
|
|
66
|
+
WORD_REGEX = /\b[a-zA-Z-]+\b/
|
67
|
+
|
68
|
+
# https://github.com/franklsf95/ruby-emoji-regex
|
69
|
+
EMOJI_REGEX = /[\u{00A9}\u{00AE}\u{203C}\u{2049}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{2328}\u{23CF}\u{23E9}-\u{23F3}\u{23F8}-\u{23FA}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2604}\u{260E}\u{2611}\u{2614}-\u{2615}\u{2618}\u{261D}\u{2620}\u{2622}-\u{2623}\u{2626}\u{262A}\u{262E}-\u{262F}\u{2638}-\u{263A}\u{2648}-\u{2653}\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267F}\u{2692}-\u{2694}\u{2696}-\u{2697}\u{2699}\u{269B}-\u{269C}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26B0}-\u{26B1}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26C8}\u{26CE}-\u{26CF}\u{26D1}\u{26D3}-\u{26D4}\u{26E9}-\u{26EA}\u{26F0}-\u{26F5}\u{26F7}-\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270D}\u{270F}\u{2712}\u{2714}\u{2716}\u{271D}\u{2721}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2763}-\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{27BF}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F321}\u{1F324}-\u{1F393}\u{1F396}-\u{1F397}\u{1F399}-\u{1F39B}\u{1F39E}-\u{1F3F0}\u{1F3F3}-\u{1F3F5}\u{1F3F7}-\u{1F4FD}\u{1F4FF}-\u{1F53D}\u{1F549}-\u{1F54E}\u{1F550}-\u{1F567}\u{1F56F}-\u{1F570}\u{1F573}-\u{1F579}\u{1F587}\u{1F58A}-\u{1F58D}\u{1F590}\u{1F595}-\u{1F596}\u{1F5A5}\u{1F5A8}\u{1F5B1}-\u{1F5B2}\u{1F5BC}\u{1F5C2}-\u{1F5C4}\u{1F5D1}-\u{1F5D3}\u{1F5DC}-\u{1F5DE}\u{1F5E1}\u{1F5E3}\u{1F5EF}\u{1F5F3}\u{1F5FA}-\u{1F64F}\u{1F680}-\u{1F6C5}\u{1F6CB}-\u{1F6D0}\u{1F6E0}-\u{1F6E5}\u{1F6E9}\u{1F6EB}-\u{1F6EC}\u{1F6F0}\u{1F6F3}\u{1F910}-\u{1F918}\u{1F980}-\u{1F984}\u{1F9C0}]/
|
70
|
+
|
34
71
|
def scan(string, &block)
|
35
|
-
string
|
36
|
-
|
37
|
-
|
72
|
+
string.scan(WORD_REGEX) do |word|
|
73
|
+
block.call(word,
|
74
|
+
@hash[word.downcase] ||
|
75
|
+
@hash[word.downcase.gsub(/s\z/,'')] ||
|
76
|
+
@hash[word.downcase.gsub(/es\z/,'')])
|
38
77
|
end
|
39
|
-
if match = tester.match_with_result(string)
|
40
|
-
block.call(match.last, match.first)
|
41
|
-
end
|
42
|
-
end
|
43
78
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
return false
|
48
|
-
end
|
49
|
-
|
50
|
-
def scorecard(string)
|
51
|
-
string = string.to_s
|
52
|
-
scorecard = {}
|
53
|
-
scan(string) {|word, test| test.each { |type| scorecard.key?(type) ? scorecard[type] += 1 : scorecard[type] = 1} if test}
|
54
|
-
scorecard
|
55
|
-
end
|
79
|
+
string.scan(EMOJI_REGEX) do |emoji_char|
|
80
|
+
block.call(emoji_char, @hash[emoji_char])
|
81
|
+
end
|
56
82
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
83
|
+
@regexs.each do |regex, type|
|
84
|
+
string.scan(regex) do |word|
|
85
|
+
block.call(word, type)
|
86
|
+
end
|
87
|
+
end
|
61
88
|
end
|
62
|
-
|
63
|
-
end
|
89
|
+
end
|
data/lib/swearjar/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
class Swearjar
|
2
|
-
VERSION = '1.
|
3
|
-
end
|
2
|
+
VERSION = '1.1.0'
|
3
|
+
end
|
data/spec/data/swear.yml
CHANGED
data/spec/swearjar_spec.rb
CHANGED
@@ -1,48 +1,87 @@
|
|
1
|
+
# encoding: UTF-8
|
1
2
|
require 'spec_helper'
|
2
3
|
|
3
4
|
describe Swearjar do
|
4
|
-
|
5
5
|
it "should detect dirty words" do
|
6
|
-
Swearjar.default.profane?('
|
6
|
+
expect(Swearjar.default.profane?('jackass chan')).to be_truthy
|
7
7
|
end
|
8
8
|
|
9
9
|
it "should detect dirty words regardless of case" do
|
10
|
-
Swearjar.default.profane?('
|
10
|
+
expect(Swearjar.default.profane?('JACKASS CHAN')).to be_truthy
|
11
11
|
end
|
12
12
|
|
13
13
|
it "should not detect non-dirty words" do
|
14
|
-
Swearjar.default.profane?('
|
14
|
+
expect(Swearjar.default.profane?('I love Jackie Chan movies')).to be_falsey
|
15
15
|
end
|
16
16
|
|
17
17
|
it "should give us a scorecard" do
|
18
|
-
Swearjar.default.scorecard('
|
18
|
+
expect(Swearjar.default.scorecard('honky jim henson')).to eq({'discriminatory'=>1})
|
19
19
|
end
|
20
20
|
|
21
21
|
it "should detect multiword" do
|
22
|
-
Swearjar.default.scorecard('jim henson has a hard on').
|
22
|
+
expect(Swearjar.default.scorecard('jim henson has a hard on')).to eq({'sexual'=>1})
|
23
23
|
end
|
24
24
|
|
25
25
|
it "should detect multiword plurals" do
|
26
|
-
Swearjar.default.scorecard('jim henson has a hard ons').
|
26
|
+
expect(Swearjar.default.scorecard('jim henson has a hard ons')).to eq({'sexual'=>1})
|
27
27
|
end
|
28
28
|
|
29
29
|
it "should detect simple dirty plurals" do
|
30
|
-
Swearjar.default.profane?('jim henson had two dicks').
|
31
|
-
Swearjar.default.profane?('jim henson has two asses').
|
30
|
+
expect(Swearjar.default.profane?('jim henson had two dicks')).to be_truthy
|
31
|
+
expect(Swearjar.default.profane?('jim henson has two asses')).to be_truthy
|
32
32
|
end
|
33
33
|
|
34
34
|
it "should censor a string" do
|
35
|
-
Swearjar.default.censor('jim henson has a massive hard on he is gonna use to fuck everybody').
|
35
|
+
expect(Swearjar.default.censor('jim henson has a massive hard on he is gonna use to fuck everybody')).to eq('jim henson has a massive **** ** he is gonna use to **** everybody')
|
36
36
|
end
|
37
37
|
|
38
38
|
it "should not do much when given a non-string" do
|
39
|
-
Swearjar.default.profane?(nil).
|
39
|
+
expect(Swearjar.default.profane?(nil)).to be_falsey
|
40
|
+
end
|
41
|
+
|
42
|
+
it "doesn't mark an empty string as profane" do
|
43
|
+
expect(Swearjar.default.profane?("")).to be_falsey
|
40
44
|
end
|
41
45
|
|
42
46
|
it "should allow you to load a new yaml file" do
|
43
|
-
sj = Swearjar.new
|
44
|
-
sj.
|
45
|
-
|
47
|
+
sj = Swearjar.new(File.expand_path('../data/swear.yml', __FILE__))
|
48
|
+
expect(sj.censor("Python is the best language!")).to eq("****** is the best language!")
|
49
|
+
end
|
50
|
+
|
51
|
+
it "detects multiple entries" do
|
52
|
+
expect(Swearjar.default.scorecard("cunts cunts cunts")).to eq({"insult" => 3, "sexual" => 3})
|
53
|
+
expect(Swearjar.default.scorecard("damn damnit dammit")).to eq({"inappropriate" => 3, "blasphemy" => 3})
|
54
|
+
end
|
55
|
+
|
56
|
+
it "detects plurals of words ending in 'e'" do
|
57
|
+
expect(Swearjar.default.profane?("asspirates")).to be_truthy
|
58
|
+
end
|
59
|
+
|
60
|
+
it "detects profane emojis" do
|
61
|
+
expect(Swearjar.default.profane?("🖕")).to be_truthy
|
62
|
+
end
|
63
|
+
|
64
|
+
it "detects profane emojis with skin tone" do
|
65
|
+
expect(Swearjar.default.profane?("🖕🏾")).to be_truthy
|
66
|
+
end
|
67
|
+
|
68
|
+
it "censors profane emojis" do
|
69
|
+
expect(Swearjar.default.censor("Fuck you🖕 🖕🖕")).to eq("**** you* **")
|
70
|
+
end
|
71
|
+
|
72
|
+
it "censors with regular expression matching" do
|
73
|
+
expect(Swearjar.default.censor("foonIgg3rbar foo nigger")).to eq("************ foo ******")
|
46
74
|
end
|
47
75
|
|
48
|
-
|
76
|
+
it "censors with a mix of normal and regular expression matches" do
|
77
|
+
expect(Swearjar.default.censor("fagfaggot faggotfag")).to eq("********* *********")
|
78
|
+
end
|
79
|
+
|
80
|
+
it "detects scorecard with regular expression matching" do
|
81
|
+
expect(Swearjar.default.scorecard("foonIgg3rbar foo nigger")).to eq({"discriminatory" => 2})
|
82
|
+
end
|
83
|
+
|
84
|
+
xit "doesn't substitute simple words when they occur later as substrings" do
|
85
|
+
expect(Swearjar.default.censor("anus janus")).to eq("**** janus")
|
86
|
+
end
|
87
|
+
end
|
data/swearjar.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
|
|
10
10
|
s.summary = "Put another nickel in the swearjar. Simple profanity detection with content analysis"
|
11
11
|
s.description = "#{s.summary}."
|
12
12
|
s.email = %q{joshbuddy@gmail.com}
|
13
|
-
s.extra_rdoc_files = ['README.
|
13
|
+
s.extra_rdoc_files = ['README.md']
|
14
14
|
s.files = `git ls-files`.split("\n")
|
15
15
|
s.homepage = %q{http://github.com/joshbuddy/swearjar}
|
16
16
|
s.rdoc_options = ["--charset=UTF-8"]
|
@@ -20,18 +20,8 @@ Gem::Specification.new do |s|
|
|
20
20
|
s.rubyforge_project = 'swearjar'
|
21
21
|
|
22
22
|
# dependencies
|
23
|
-
s.
|
24
|
-
s.add_development_dependency '
|
25
|
-
s.add_development_dependency '
|
26
|
-
|
27
|
-
if s.respond_to? :specification_version then
|
28
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
29
|
-
s.specification_version = 3
|
30
|
-
|
31
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
32
|
-
else
|
33
|
-
end
|
34
|
-
else
|
35
|
-
end
|
23
|
+
s.add_development_dependency 'rake', '~> 10.5'
|
24
|
+
s.add_development_dependency 'rspec', '~> 3.4'
|
25
|
+
s.add_development_dependency 'pry', '~> 0.10'
|
36
26
|
end
|
37
27
|
|
metadata
CHANGED
@@ -1,99 +1,105 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: swearjar
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Joshua Hull
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2016-02-14 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
16
|
-
requirement:
|
17
|
-
none: false
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- - ~>
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
22
|
-
type: :
|
19
|
+
version: '10.5'
|
20
|
+
type: :development
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '10.5'
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
|
-
name:
|
27
|
-
requirement:
|
28
|
-
none: false
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- - ~>
|
31
|
+
- - "~>"
|
31
32
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
33
|
+
version: '3.4'
|
33
34
|
type: :development
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.4'
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
|
-
name:
|
38
|
-
requirement:
|
39
|
-
none: false
|
42
|
+
name: pry
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
|
-
- - ~>
|
45
|
+
- - "~>"
|
42
46
|
- !ruby/object:Gem::Version
|
43
|
-
version:
|
47
|
+
version: '0.10'
|
44
48
|
type: :development
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.10'
|
47
55
|
description: Put another nickel in the swearjar. Simple profanity detection with content
|
48
56
|
analysis.
|
49
57
|
email: joshbuddy@gmail.com
|
50
58
|
executables: []
|
51
59
|
extensions: []
|
52
60
|
extra_rdoc_files:
|
53
|
-
- README.
|
61
|
+
- README.md
|
54
62
|
files:
|
55
|
-
- .gitignore
|
63
|
+
- ".gitignore"
|
64
|
+
- ".travis.yml"
|
65
|
+
- CHANGELOG.md
|
56
66
|
- Gemfile
|
57
|
-
- README.
|
67
|
+
- README.md
|
58
68
|
- Rakefile
|
59
69
|
- lib/config/en.yml
|
60
70
|
- lib/swearjar.rb
|
61
|
-
- lib/swearjar/tester.rb
|
62
71
|
- lib/swearjar/version.rb
|
63
72
|
- spec/data/swear.yml
|
64
|
-
- spec/spec.opts
|
65
73
|
- spec/spec_helper.rb
|
66
74
|
- spec/swearjar_spec.rb
|
67
75
|
- swearjar.gemspec
|
68
76
|
homepage: http://github.com/joshbuddy/swearjar
|
69
77
|
licenses: []
|
78
|
+
metadata: {}
|
70
79
|
post_install_message:
|
71
80
|
rdoc_options:
|
72
|
-
- --charset=UTF-8
|
81
|
+
- "--charset=UTF-8"
|
73
82
|
require_paths:
|
74
83
|
- lib
|
75
84
|
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
-
none: false
|
77
85
|
requirements:
|
78
|
-
- -
|
86
|
+
- - ">="
|
79
87
|
- !ruby/object:Gem::Version
|
80
88
|
version: '0'
|
81
89
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
|
-
none: false
|
83
90
|
requirements:
|
84
|
-
- -
|
91
|
+
- - ">="
|
85
92
|
- !ruby/object:Gem::Version
|
86
93
|
version: '0'
|
87
94
|
requirements: []
|
88
95
|
rubyforge_project: swearjar
|
89
|
-
rubygems_version:
|
96
|
+
rubygems_version: 2.2.3
|
90
97
|
signing_key:
|
91
|
-
specification_version:
|
98
|
+
specification_version: 4
|
92
99
|
summary: Put another nickel in the swearjar. Simple profanity detection with content
|
93
100
|
analysis
|
94
101
|
test_files:
|
95
102
|
- spec/data/swear.yml
|
96
|
-
- spec/spec.opts
|
97
103
|
- spec/spec_helper.rb
|
98
104
|
- spec/swearjar_spec.rb
|
99
105
|
has_rdoc:
|
data/README.rdoc
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
= Swearjar
|
2
|
-
|
3
|
-
Simple profanity detection with content analysis.
|
4
|
-
|
5
|
-
== Installation
|
6
|
-
|
7
|
-
gem install swearjar
|
8
|
-
|
9
|
-
== Usage
|
10
|
-
|
11
|
-
require 'swearjar'
|
12
|
-
|
13
|
-
Swearjar.default.profane?('jim henson has a massive hard on he is gonna use to fuck everybody')
|
14
|
-
<< true
|
15
|
-
|
16
|
-
Swearjar.default.scorecard('jim henson has a massive hard on he is gonna use to fuck everybody')
|
17
|
-
<< {:sexual => 2}
|
18
|
-
|
19
|
-
Swearjar.default.censor('jim henson has a massive hard on he is gonna use to fuck everybody')
|
20
|
-
<< 'jim henson has a massive **** ** he is gonna use to **** everybody'
|
21
|
-
|
22
|
-
To load from a custom yaml file, you can do the following
|
23
|
-
|
24
|
-
sj = Swearjar.new
|
25
|
-
sj.load_file('my_yaml.yml')
|
26
|
-
|
27
|
-
The YAML file can have two sections, `simple` and `regex`. For an example, see `lib/config/en.yml`.
|
28
|
-
|
data/lib/swearjar/tester.rb
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
require 'yaml'
|
2
|
-
require 'fuzzy_hash'
|
3
|
-
require 'bloomfilter'
|
4
|
-
|
5
|
-
class Swearjar
|
6
|
-
class Tester
|
7
|
-
|
8
|
-
def initialize(config_file)
|
9
|
-
data = YAML.load_file
|
10
|
-
|
11
|
-
@tester = FuzzyHash.new
|
12
|
-
|
13
|
-
data['regex'].each do |pattern, type|
|
14
|
-
@tester[Regexp.new(pattern)] = type
|
15
|
-
end
|
16
|
-
|
17
|
-
data['simple'].each do |test, type|
|
18
|
-
@tester[test] = type
|
19
|
-
end
|
20
|
-
|
21
|
-
end
|
22
|
-
|
23
|
-
def scan(string, &block)
|
24
|
-
string.scan(/\b[\b]+\b/, &block)
|
25
|
-
end
|
26
|
-
|
27
|
-
def profane?(string)
|
28
|
-
scan(string) {|w| return true}
|
29
|
-
end
|
30
|
-
|
31
|
-
end
|
32
|
-
end
|