swearjar 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: cfbc2f8e57099d5a6b9a4c60961977e8bfb542d3
4
+ data.tar.gz: d176f8879508172566f2582bf29a5d0d6c47d27d
5
+ SHA512:
6
+ metadata.gz: 2980ee653f0b552d3e822b991041fd91d5e1ae494d9e2823109e67c927b0dbe53ae0e2192397bd5fed38684d78af07c4713f2271a2ebc93cd0538dfe07aa40be
7
+ data.tar.gz: 4116470e1e7cdb71c5958888833306e20196cbc6f11ddc7e4219a8ebecb7fe8b0375feb50ede7fcdd03635e588002a401de414ca73d64a8cc661a36100639fd2
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ rvm:
2
+ - ruby-1.9.3
3
+ - ruby-2.0.0
4
+ - ruby-2.1
5
+ - jruby
6
+ sudo: false
data/CHANGELOG.md ADDED
@@ -0,0 +1,8 @@
1
+ # Changelog
2
+
3
+ ### v1.1.0 (2016-02-13)
4
+ - Add emoji support
5
+ - Censor middle finger emoji by default
6
+
7
+ ### v1.0.0 (2012-04-02)
8
+ - Detect plurals
data/Gemfile CHANGED
@@ -1,3 +1,3 @@
1
- source :rubygems
1
+ source "https://rubygems.org"
2
2
 
3
- gemspec
3
+ gemspec
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # Swearjar
2
+
3
+ Simple profanity detection with content analysis.
4
+
5
+ ## Installation
6
+
7
+ `gem install swearjar`
8
+
9
+ ## Usage
10
+
11
+ ```ruby
12
+ require "swearjar"
13
+
14
+ sj = Swearjar.default
15
+
16
+ sj.profane?("jim henson has a massive hard on he is gonna use to fuck everybody")
17
+ # => true
18
+
19
+ sj.scorecard("jim henson has a massive hard on he is gonna use to fuck everybody")
20
+ # => {:sexual => 2}
21
+
22
+ sj.censor("jim henson has a massive hard on he is gonna use to fuck everybody")
23
+ # => "jim henson has a massive **** ** he is gonna use to **** everybody"
24
+ ```
25
+
26
+ To load from a custom config file, you can do the following:
27
+
28
+ ```ruby
29
+ # For an example see lib/config/en.yml
30
+ sj = Swearjar.new("my_swears.yml")
31
+ ```
data/Rakefile CHANGED
@@ -1,18 +1,5 @@
1
1
  # encoding: utf-8
2
2
 
3
+ require "rspec/core/rake_task"
4
+ RSpec::Core::RakeTask.new(:spec)
3
5
  task :default => :spec
4
-
5
- require 'spec'
6
- require 'spec/rake/spectask'
7
- task :spec => 'spec:all'
8
- namespace(:spec) do
9
- Spec::Rake::SpecTask.new(:all) do |t|
10
- t.spec_opts ||= []
11
- t.spec_opts << "-rubygems"
12
- t.spec_opts << "--options" << "spec/spec.opts"
13
- t.spec_files = FileList['spec/**/*_spec.rb']
14
- end
15
- end
16
-
17
- require 'bundler'
18
- Bundler::GemHelper.install_tasks
data/lib/config/en.yml CHANGED
@@ -1,8 +1,16 @@
1
+ ---
1
2
  regex:
2
3
  'hard ons?\b': ["sexual"]
3
4
  'jerk off\b': ["sexual"]
4
5
  'pissed off\b': ["inappropriate"]
5
- 'sand nigger': ["discriminatory"]
6
+ '\w*n[i1]gg[e3]r\w*': ["discriminatory"]
7
+ '\w*fuck\w*': ["sexual"]
8
+ '\w*b[i1]tch\w*': ["insult"]
9
+ '\w*ejaculat\w*': ["sexual"]
10
+ '\w*damn\w*': ["inappropriate", "blasphemy"]
11
+ '\w*f[a4]gg[o0]t\w*': ["discriminatory"]
12
+ '\w*wh[o0]r[e3]\w*': ["insult"]
13
+ '\w*p[e3]n[i1]s\w*': ["sexual"]
6
14
  simple:
7
15
  "anus": ["sexual"]
8
16
  "arse": ["insult"]
@@ -18,8 +26,6 @@ simple:
18
26
  "asscock": ["insult"]
19
27
  "asscracker": ["sexual"]
20
28
  "assface": ["sexual"]
21
- "assfuck": ["sexual"]
22
- "assfucker": ["discriminatory"]
23
29
  "assgoblin": ["discriminatory"]
24
30
  "asshat": ["sexual"]
25
31
  "asshead": ["insult"]
@@ -31,9 +37,7 @@ simple:
31
37
  "assmonkey": ["insult"]
32
38
  "assmunch": ["insult"]
33
39
  "assmuncher": ["sexual"]
34
- "assnigger": ["discriminatory"]
35
40
  "asspirate": ["discriminatory"]
36
- "assshit": ["insult"]
37
41
  "assshole": ["sexual"]
38
42
  "asssucker": ["insult"]
39
43
  "asswad": ["sexual"]
@@ -41,31 +45,17 @@ simple:
41
45
  "bampot": ["insult"]
42
46
  "bastard": ["insult"]
43
47
  "beaner": ["discriminatory"]
44
- "beastial": ["sexual"]
45
48
  "beastiality": ["sexual"]
46
49
  "beastility": ["sexual"]
47
- "bestial": ["sexual"]
48
50
  "bestiality": ["sexual"]
49
- "bitch": ["insult"]
50
- "bitchass": ["insult"]
51
- "bitcher": ["insult"]
52
- "bitchin": ["inappropriate"]
53
- "bitching": ["inappropriate"]
54
- "bitchtit": ["discriminatory"]
55
- "bitchy": ["insult"]
56
51
  "blow job": ["sexual"]
57
52
  "blowjob": ["sexual"]
58
53
  "bollocks": ["sexual"]
59
54
  "bollox": ["sexual"]
60
55
  "boner": ["sexual"]
61
- "brotherfucker": ["discriminatory"]
62
56
  "bullshit": ["inappropriate"]
63
- "bullshit": ["inappropriate"]
64
- "bumblefuck": ["discriminatory"]
65
57
  "butt plug": ["sexual"]
66
58
  "butt-pirate": ["discriminatory"]
67
- "buttfucka": ["discriminatory"]
68
- "buttfucker": ["discriminatory"]
69
59
  "camel toe": ["sexual"]
70
60
  "carpetmuncher": ["discriminatory"]
71
61
  "chinc": ["discriminatory"]
@@ -74,14 +64,11 @@ simple:
74
64
  "chode": ["sexual"]
75
65
  "clit": ["sexual"]
76
66
  "clitface": ["insult"]
77
- "clitfuck": ["sexual"]
78
- "clusterfuck": ["inappropriate"]
79
67
  "cock": ["sexual"]
80
68
  "cockass": "Jerk"
81
69
  "cockbite": ["insult"]
82
70
  "cockburger": ["insult"]
83
71
  "cockface": ["insult"]
84
- "cockfucker": ["insult"]
85
72
  "cockhead": ["insult"]
86
73
  "cockjockey": ["discriminatory"]
87
74
  "cockknoker": ["discriminatory"]
@@ -95,16 +82,14 @@ simple:
95
82
  "cockshit": ["insult"]
96
83
  "cocksmith": ["discriminatory"]
97
84
  "cocksmoker": ["discriminatory"]
98
- "cocksuck": ["sexual"]
85
+ "cocksuck": ["sexual", "discriminatory"]
99
86
  "cocksucked": ["sexual"]
100
87
  "cocksucker": ["discriminatory", "sexual"]
101
88
  "cocksucking": ["sexual", "discriminatory"]
102
- "cocksucks": ["sexual", "discriminatory"]
103
89
  "coochie": ["sexual"]
104
90
  "coochy": ["sexual"]
105
91
  "coon": ["discriminatory"]
106
92
  "cooter": ["sexual"]
107
- "cracker": ["discriminatory"]
108
93
  "cum": ["sexual"]
109
94
  "cumbubble": ["insult"]
110
95
  "cumdumpster": ["sexual"]
@@ -118,7 +103,6 @@ simple:
118
103
  "cunillingus": ["sexual"]
119
104
  "cunnie": ["sexual"]
120
105
  "cunnilingus": ["sexual"]
121
- "cunnilingus": ["sexual"]
122
106
  "cunt": ["insult", "sexual"]
123
107
  "cuntface": ["insult"]
124
108
  "cunthole": ["sexual"]
@@ -128,19 +112,11 @@ simple:
128
112
  "cuntrag": ["insult"]
129
113
  "cuntslut": ["insult"]
130
114
  "cyberfuc": ["sexual"]
131
- "cyberfuck": ["sexual"]
132
- "cyberfucked": ["sexual"]
133
- "cyberfucker": ["sexual"]
134
- "cyberfucking": ["sexual"]
135
- "dago": ["discriminatory"]
136
- "damn": ["inappropriate"]
137
- "deggo": ["discriminatory"]
115
+ "dammit": ["inappropriate", "blasphemy"]
138
116
  "dick": ["sexual", "insult"]
139
117
  "dickbag": ["insult"]
140
- "dickbeaters": ["sexual"]
118
+ "dickbeater": ["sexual"]
141
119
  "dickface": ["insult"]
142
- "dickfuck": ["insult"]
143
- "dickfucker": ["discriminatory"]
144
120
  "dickhead": ["insult"]
145
121
  "dickhole": ["sexual"]
146
122
  "dickjuice": ["sexual"]
@@ -165,26 +141,15 @@ simple:
165
141
  "dumass": ["insult"]
166
142
  "dumb ass": ["insult"]
167
143
  "dumbass": ["insult"]
168
- "dumbfuck": ["insult"]
169
144
  "dumbshit": ["insult"]
170
145
  "dumshit": ["insult"]
171
146
  "dyke": ["discriminatory"]
172
- "ejaculate": ["sexual"]
173
- "ejaculated": ["sexual"]
174
- "ejaculates": ["sexual"]
175
- "ejaculating": ["sexual"]
176
- "ejaculation": ["sexual"]
177
147
  "fag": ["discriminatory"]
178
148
  "fagbag": ["discriminatory"]
179
- "fagfucker": ["discriminatory"]
180
149
  "fagging": ["discriminatory"]
181
150
  "faggit": ["discriminatory"]
182
- "faggot": ["discriminatory"]
183
- "faggot": ["discriminatory"]
184
- "faggotcock": ["discriminatory"]
185
- "faggs": ["discriminatory"]
151
+ "fagg": ["discriminatory"]
186
152
  "fagot": ["discriminatory"]
187
- "fags": ["discriminatory"]
188
153
  "fagtard": ["discriminatory"]
189
154
  "fart": ["inappropriate"]
190
155
  "farted": ["inappropriate"]
@@ -193,42 +158,8 @@ simple:
193
158
  "fatass": ["insult"]
194
159
  "felatio": ["sexual"]
195
160
  "fellatio": ["sexual"]
196
- "fellatio": ["sexual"]
197
161
  "feltch": ["sexual"]
198
- "fingerfuck": ["sexual"]
199
- "fingerfucked": ["sexual"]
200
- "fingerfucker": ["sexual"]
201
- "fingerfucking": ["sexual"]
202
- "fingerfucks": ["sexual"]
203
- "fistfuck": ["sexual"]
204
- "fistfucked": ["sexual"]
205
- "fistfucker": ["sexual"]
206
- "fistfucking": ["sexual"]
207
162
  "flamer": ["discriminatory"]
208
- "fuck": ["sexual"]
209
- "fuckass": ["insult"]
210
- "fuckbag": ["insult"]
211
- "fuckboy": ["insult"]
212
- "fuckbrain": ["insult"]
213
- "fuckbutt": ["sexual"]
214
- "fucked": ["sexual"]
215
- "fucker": ["sexual", "insult"]
216
- "fuckersucker": ["insult"]
217
- "fuckface": ["insult"]
218
- "fuckhead": ["sexual"]
219
- "fuckhole": ["insult"]
220
- "fuckin": ["sexual"]
221
- "fucking": ["sexual"]
222
- "fuckme": ["sexual"]
223
- "fucknut": ["insult"]
224
- "fucknutt": ["insult"]
225
- "fuckoff": ["insult"]
226
- "fuckstick": ["sexual"]
227
- "fucktard": ["insult"]
228
- "fuckup": ["insult"]
229
- "fuckwad": ["insult"]
230
- "fuckwit": ["insult"]
231
- "fuckwitt": ["insult"]
232
163
  "fudgepacker": ["discriminatory"]
233
164
  "fuk": ["sexual"]
234
165
  "gangbang": ["sexual"]
@@ -237,15 +168,10 @@ simple:
237
168
  "gayass": ["sexual"]
238
169
  "gaybob": ["discriminatory"]
239
170
  "gaydo": ["discriminatory"]
240
- "gayfuck": ["discriminatory"]
241
- "gayfuckist": ["discriminatory"]
242
171
  "gaylord": ["discriminatory"]
243
172
  "gaysex": ["discriminatory"]
244
173
  "gaytard": ["discriminatory"]
245
174
  "gaywad": ["discriminatory"]
246
- "goddamn": ["inappropriate", "blasphemy"]
247
- "goddamn": ["inappropriate", "blasphemy"]
248
- "goddamnit": ["inappropriate", "blasphemy"]
249
175
  "gooch": ["sexual"]
250
176
  "gook": ["discriminatory"]
251
177
  "gringo": ["discriminatory"]
@@ -254,12 +180,11 @@ simple:
254
180
  "hardcoresex": ["sexual"]
255
181
  "heeb": ["discriminatory"]
256
182
  "hell": ["inappropriate"]
257
- "hell": ["inappropriate"]
258
183
  "ho": ["discriminatory"]
259
184
  "hoe": ["discriminatory"]
260
185
  "homo": ["discriminatory"]
261
- "homodumbshit": ["insult"]
262
186
  "honkey": ["discriminatory"]
187
+ "honky": ["discriminatory"]
263
188
  "horniest": ["sexual"]
264
189
  "horny": ["sexual"]
265
190
  "hotsex": ["sexual"]
@@ -282,7 +207,6 @@ simple:
282
207
  "kumer": ["sexual"]
283
208
  "kummer": ["sexual"]
284
209
  "kumming": ["sexual"]
285
- "kums": ["sexual"]
286
210
  "kunilingus": ["sexual"]
287
211
  "kunt": ["sexual"]
288
212
  "kyke": ["discriminatory"]
@@ -292,27 +216,11 @@ simple:
292
216
  "lust": ["sexual"]
293
217
  "lusting": ["sexual"]
294
218
  "mcfagget": ["discriminatory"]
295
- "mick": ["discriminatory"]
296
219
  "minge": ["sexual"]
297
- "mothafuck": ["sexual"]
298
- "mothafucka": ["sexual", "insult"]
299
- "mothafuckaz": ["sexual"]
300
- "mothafucked": ["sexual"]
301
- "mothafucker": ["sexual", "insult"]
302
- "mothafuckin": ["sexual"]
303
- "mothafucking": ["sexual"]
304
- "mothafucks": ["sexual"]
305
- "motherfuck": ["sexual"]
306
- "motherfucked": ["sexual"]
307
- "motherfucker": ["sexual", "insult"]
308
- "motherfuckin": ["sexual"]
309
- "motherfucking": ["sexual"]
310
220
  "muff": ["sexual"]
311
221
  "muffdiver": ["discriminatory", "sexual"]
312
- "munging": ["sexual"]
313
222
  "negro": ["discriminatory"]
314
223
  "nigga": ["discriminatory"]
315
- "nigger": ["discriminatory"]
316
224
  "niglet": ["discriminatory"]
317
225
  "nut sack": ["sexual"]
318
226
  "nutsack": ["sexual"]
@@ -322,16 +230,12 @@ simple:
322
230
  "panooch": ["sexual"]
323
231
  "pecker": ["sexual"]
324
232
  "peckerhead": ["insult"]
325
- "penis": ["sexual"]
326
- "penisfucker": ["discriminatory"]
327
- "penispuffer": ["discriminatory"]
328
233
  "phonesex": ["sexual"]
329
234
  "phuk": ["sexual"]
330
235
  "phuked": ["sexual"]
331
236
  "phuking": ["sexual"]
332
237
  "phukked": ["sexual"]
333
238
  "phukking": ["sexual"]
334
- "phuks": ["sexual"]
335
239
  "phuq": ["sexual"]
336
240
  "pis": ["sexual"]
337
241
  "pises": ["sexual"]
@@ -341,8 +245,7 @@ simple:
341
245
  "piss": ["inappropriate"]
342
246
  "pissed": ["inappropriate"]
343
247
  "pisser": ["sexual"]
344
- "pisses": ["sexual"]
345
- "pissflaps": ["sexual"]
248
+ "pissflap": ["sexual"]
346
249
  "pissin": ["sexual"]
347
250
  "pissing": ["sexual"]
348
251
  "pissoff": ["sexual"]
@@ -357,7 +260,6 @@ simple:
357
260
  "porn": ["sexual"]
358
261
  "porno": ["sexual"]
359
262
  "pornography": ["sexual"]
360
- "pornos": ["sexual"]
361
263
  "prick": ["sexual"]
362
264
  "punanny": ["sexual"]
363
265
  "punta": ["insult"]
@@ -374,7 +276,6 @@ simple:
374
276
  "renob": ["sexual"]
375
277
  "rimjob": ["sexual"]
376
278
  "ruski": ["discriminatory"]
377
- "sandnigger": ["discriminatory"]
378
279
  "schlong": ["sexual"]
379
280
  "scrote": ["sexual"]
380
281
  "shit": ["sexual", "inappropriate"]
@@ -405,11 +306,8 @@ simple:
405
306
  "shiznit": ["inappropriate"]
406
307
  "skank": ["insult"]
407
308
  "skeet": ["sexual"]
408
- "skullfuck": ["sexual"]
409
- "slut": ["sexual"]
410
309
  "slut": ["discriminatory"]
411
310
  "slutbag": ["discriminatory"]
412
- "sluts": ["sexual"]
413
311
  "smeg": ["inappropriate"]
414
312
  "smut": ["sexual"]
415
313
  "snatch": ["sexual"]
@@ -421,19 +319,14 @@ simple:
421
319
  "testicle": ["sexual"]
422
320
  "thundercunt": ["insult"]
423
321
  "tit": ["sexual"]
424
- "titfuck": ["sexual"]
425
- "tittyfuck": ["sexual"]
426
322
  "twat": ["sexual"]
427
323
  "twatlips": ["insult"]
428
324
  "twatwaffle": ["discriminatory"]
429
- "unclefucker": ["discriminatory"]
430
325
  "va-j-j": ["sexual"]
431
326
  "vag": ["sexual"]
432
327
  "vagina": ["sexual"]
433
328
  "vjayjay": ["sexual"]
434
329
  "wank": ["sexual"]
435
330
  "wetback": ["discriminatory"]
436
- "whore": ["insult"]
437
- "whorebag": ["insult"]
438
- "whoreface": ["insult"]
439
- "wop": ["discriminatory"]
331
+ emoji:
332
+ "1f595": ["insult"]
data/lib/swearjar.rb CHANGED
@@ -1,63 +1,89 @@
1
1
  require 'yaml'
2
- require 'fuzzy_hash'
3
2
 
4
3
  class Swearjar
5
-
6
4
  def self.default
7
- from_language
5
+ from_language('en')
8
6
  end
9
7
 
10
- def self.from_language(language = 'en')
8
+ def self.from_language(language)
11
9
  new(File.join(File.dirname(__FILE__), 'config', "#{language}.yml"))
12
10
  end
13
11
 
14
- attr_reader :tester, :hash
15
-
16
12
  def initialize(file = nil)
17
- @tester = FuzzyHash.new
18
13
  @hash = {}
14
+ @regexs = {}
19
15
  load_file(file) if file
20
16
  end
21
17
 
18
+ def profane?(string)
19
+ string = string.to_s
20
+ scan(string) {|_word, test| return true if test }
21
+ false
22
+ end
23
+
24
+ def scorecard(string)
25
+ string = string.to_s
26
+ scorecard = {}
27
+ scan(string) do |_word, test|
28
+ next unless test
29
+ test.each do |type|
30
+ scorecard[type] = 0 unless scorecard.key?(type)
31
+ scorecard[type] += 1
32
+ end
33
+ end
34
+ scorecard
35
+ end
36
+
37
+ def censor(string)
38
+ censored_string = string.to_s.dup
39
+ scan(string) do |word, test|
40
+ next unless test
41
+ replacement = block_given? ? yield(word) : word.gsub(/\S/, '*')
42
+ censored_string.gsub!(word, replacement)
43
+ end
44
+ censored_string
45
+ end
46
+
47
+ private
48
+
22
49
  def load_file(file)
23
50
  data = YAML.load_file(file)
24
51
 
25
52
  data['regex'].each do |pattern, type|
26
- @tester[Regexp.new(pattern)] = type
53
+ @regexs[Regexp.new(pattern, "i")] = type
27
54
  end if data['regex']
28
55
 
29
56
  data['simple'].each do |test, type|
30
57
  @hash[test] = type
31
58
  end if data['simple']
59
+
60
+ data['emoji'].each do |unicode, type|
61
+ char = [unicode.hex].pack("U")
62
+ @hash[char] = type
63
+ end if data['emoji']
32
64
  end
33
65
 
66
+ WORD_REGEX = /\b[a-zA-Z-]+\b/
67
+
68
+ # https://github.com/franklsf95/ruby-emoji-regex
69
+ EMOJI_REGEX = /[\u{00A9}\u{00AE}\u{203C}\u{2049}\u{2122}\u{2139}\u{2194}-\u{2199}\u{21A9}-\u{21AA}\u{231A}-\u{231B}\u{2328}\u{23CF}\u{23E9}-\u{23F3}\u{23F8}-\u{23FA}\u{24C2}\u{25AA}-\u{25AB}\u{25B6}\u{25C0}\u{25FB}-\u{25FE}\u{2600}-\u{2604}\u{260E}\u{2611}\u{2614}-\u{2615}\u{2618}\u{261D}\u{2620}\u{2622}-\u{2623}\u{2626}\u{262A}\u{262E}-\u{262F}\u{2638}-\u{263A}\u{2648}-\u{2653}\u{2660}\u{2663}\u{2665}-\u{2666}\u{2668}\u{267B}\u{267F}\u{2692}-\u{2694}\u{2696}-\u{2697}\u{2699}\u{269B}-\u{269C}\u{26A0}-\u{26A1}\u{26AA}-\u{26AB}\u{26B0}-\u{26B1}\u{26BD}-\u{26BE}\u{26C4}-\u{26C5}\u{26C8}\u{26CE}-\u{26CF}\u{26D1}\u{26D3}-\u{26D4}\u{26E9}-\u{26EA}\u{26F0}-\u{26F5}\u{26F7}-\u{26FA}\u{26FD}\u{2702}\u{2705}\u{2708}-\u{270D}\u{270F}\u{2712}\u{2714}\u{2716}\u{271D}\u{2721}\u{2728}\u{2733}-\u{2734}\u{2744}\u{2747}\u{274C}\u{274E}\u{2753}-\u{2755}\u{2757}\u{2763}-\u{2764}\u{2795}-\u{2797}\u{27A1}\u{27B0}\u{27BF}\u{2934}-\u{2935}\u{2B05}-\u{2B07}\u{2B1B}-\u{2B1C}\u{2B50}\u{2B55}\u{3030}\u{303D}\u{3297}\u{3299}\u{1F004}\u{1F0CF}\u{1F170}-\u{1F171}\u{1F17E}-\u{1F17F}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F201}-\u{1F202}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F23A}\u{1F250}-\u{1F251}\u{1F300}-\u{1F321}\u{1F324}-\u{1F393}\u{1F396}-\u{1F397}\u{1F399}-\u{1F39B}\u{1F39E}-\u{1F3F0}\u{1F3F3}-\u{1F3F5}\u{1F3F7}-\u{1F4FD}\u{1F4FF}-\u{1F53D}\u{1F549}-\u{1F54E}\u{1F550}-\u{1F567}\u{1F56F}-\u{1F570}\u{1F573}-\u{1F579}\u{1F587}\u{1F58A}-\u{1F58D}\u{1F590}\u{1F595}-\u{1F596}\u{1F5A5}\u{1F5A8}\u{1F5B1}-\u{1F5B2}\u{1F5BC}\u{1F5C2}-\u{1F5C4}\u{1F5D1}-\u{1F5D3}\u{1F5DC}-\u{1F5DE}\u{1F5E1}\u{1F5E3}\u{1F5EF}\u{1F5F3}\u{1F5FA}-\u{1F64F}\u{1F680}-\u{1F6C5}\u{1F6CB}-\u{1F6D0}\u{1F6E0}-\u{1F6E5}\u{1F6E9}\u{1F6EB}-\u{1F6EC}\u{1F6F0}\u{1F6F3}\u{1F910}-\u{1F918}\u{1F980}-\u{1F984}\u{1F9C0}]/
70
+
34
71
  def scan(string, &block)
35
- string = string.to_s
36
- string.scan(/\b[a-zA-Z-]+\b/) do |word|
37
- block.call(word, hash[word.downcase] || hash[word.downcase.gsub(/e?s$/,'')] )
72
+ string.scan(WORD_REGEX) do |word|
73
+ block.call(word,
74
+ @hash[word.downcase] ||
75
+ @hash[word.downcase.gsub(/s\z/,'')] ||
76
+ @hash[word.downcase.gsub(/es\z/,'')])
38
77
  end
39
- if match = tester.match_with_result(string)
40
- block.call(match.last, match.first)
41
- end
42
- end
43
78
 
44
- def profane?(string)
45
- string = string.to_s
46
- scan(string) {|word, test| return true if !test.nil?}
47
- return false
48
- end
49
-
50
- def scorecard(string)
51
- string = string.to_s
52
- scorecard = {}
53
- scan(string) {|word, test| test.each { |type| scorecard.key?(type) ? scorecard[type] += 1 : scorecard[type] = 1} if test}
54
- scorecard
55
- end
79
+ string.scan(EMOJI_REGEX) do |emoji_char|
80
+ block.call(emoji_char, @hash[emoji_char])
81
+ end
56
82
 
57
- def censor(string)
58
- censored_string = string.to_s.dup
59
- scan(string) {|word, test| censored_string.gsub!(word, block_given? ? yield(word) : word.gsub(/\S/, '*')) if test}
60
- censored_string
83
+ @regexs.each do |regex, type|
84
+ string.scan(regex) do |word|
85
+ block.call(word, type)
86
+ end
87
+ end
61
88
  end
62
-
63
- end
89
+ end
@@ -1,3 +1,3 @@
1
1
  class Swearjar
2
- VERSION = '1.0.0'
3
- end
2
+ VERSION = '1.1.0'
3
+ end
data/spec/data/swear.yml CHANGED
@@ -1,2 +1,3 @@
1
+ ---
1
2
  simple:
2
3
  "python": ["sexual"]
@@ -1,48 +1,87 @@
1
+ # encoding: UTF-8
1
2
  require 'spec_helper'
2
3
 
3
4
  describe Swearjar do
4
-
5
5
  it "should detect dirty words" do
6
- Swearjar.default.profane?('fuck you jim henson').should be_true
6
+ expect(Swearjar.default.profane?('jackass chan')).to be_truthy
7
7
  end
8
8
 
9
9
  it "should detect dirty words regardless of case" do
10
- Swearjar.default.profane?('FuCk you jim henson').should be_true
10
+ expect(Swearjar.default.profane?('JACKASS CHAN')).to be_truthy
11
11
  end
12
12
 
13
13
  it "should not detect non-dirty words" do
14
- Swearjar.default.profane?('i love you jim henson').should be_false
14
+ expect(Swearjar.default.profane?('I love Jackie Chan movies')).to be_falsey
15
15
  end
16
16
 
17
17
  it "should give us a scorecard" do
18
- Swearjar.default.scorecard('fuck you jim henson').should == {'sexual'=>1}
18
+ expect(Swearjar.default.scorecard('honky jim henson')).to eq({'discriminatory'=>1})
19
19
  end
20
20
 
21
21
  it "should detect multiword" do
22
- Swearjar.default.scorecard('jim henson has a hard on').should == {'sexual'=>1}
22
+ expect(Swearjar.default.scorecard('jim henson has a hard on')).to eq({'sexual'=>1})
23
23
  end
24
24
 
25
25
  it "should detect multiword plurals" do
26
- Swearjar.default.scorecard('jim henson has a hard ons').should == {'sexual'=>1}
26
+ expect(Swearjar.default.scorecard('jim henson has a hard ons')).to eq({'sexual'=>1})
27
27
  end
28
28
 
29
29
  it "should detect simple dirty plurals" do
30
- Swearjar.default.profane?('jim henson had two dicks').should be_true
31
- Swearjar.default.profane?('jim henson has two asses').should be_true
30
+ expect(Swearjar.default.profane?('jim henson had two dicks')).to be_truthy
31
+ expect(Swearjar.default.profane?('jim henson has two asses')).to be_truthy
32
32
  end
33
33
 
34
34
  it "should censor a string" do
35
- Swearjar.default.censor('jim henson has a massive hard on he is gonna use to fuck everybody').should == 'jim henson has a massive **** ** he is gonna use to **** everybody'
35
+ expect(Swearjar.default.censor('jim henson has a massive hard on he is gonna use to fuck everybody')).to eq('jim henson has a massive **** ** he is gonna use to **** everybody')
36
36
  end
37
37
 
38
38
  it "should not do much when given a non-string" do
39
- Swearjar.default.profane?(nil).should be_false
39
+ expect(Swearjar.default.profane?(nil)).to be_falsey
40
+ end
41
+
42
+ it "doesn't mark an empty string as profane" do
43
+ expect(Swearjar.default.profane?("")).to be_falsey
40
44
  end
41
45
 
42
46
  it "should allow you to load a new yaml file" do
43
- sj = Swearjar.new
44
- sj.load_file(File.expand_path('../data/swear.yml', __FILE__))
45
- sj.censor("Python is the best language!").should == "****** is the best language!"
47
+ sj = Swearjar.new(File.expand_path('../data/swear.yml', __FILE__))
48
+ expect(sj.censor("Python is the best language!")).to eq("****** is the best language!")
49
+ end
50
+
51
+ it "detects multiple entries" do
52
+ expect(Swearjar.default.scorecard("cunts cunts cunts")).to eq({"insult" => 3, "sexual" => 3})
53
+ expect(Swearjar.default.scorecard("damn damnit dammit")).to eq({"inappropriate" => 3, "blasphemy" => 3})
54
+ end
55
+
56
+ it "detects plurals of words ending in 'e'" do
57
+ expect(Swearjar.default.profane?("asspirates")).to be_truthy
58
+ end
59
+
60
+ it "detects profane emojis" do
61
+ expect(Swearjar.default.profane?("🖕")).to be_truthy
62
+ end
63
+
64
+ it "detects profane emojis with skin tone" do
65
+ expect(Swearjar.default.profane?("🖕🏾")).to be_truthy
66
+ end
67
+
68
+ it "censors profane emojis" do
69
+ expect(Swearjar.default.censor("Fuck you🖕 🖕🖕")).to eq("**** you* **")
70
+ end
71
+
72
+ it "censors with regular expression matching" do
73
+ expect(Swearjar.default.censor("foonIgg3rbar foo nigger")).to eq("************ foo ******")
46
74
  end
47
75
 
48
- end
76
+ it "censors with a mix of normal and regular expression matches" do
77
+ expect(Swearjar.default.censor("fagfaggot faggotfag")).to eq("********* *********")
78
+ end
79
+
80
+ it "detects scorecard with regular expression matching" do
81
+ expect(Swearjar.default.scorecard("foonIgg3rbar foo nigger")).to eq({"discriminatory" => 2})
82
+ end
83
+
84
+ xit "doesn't substitute simple words when they occur later as substrings" do
85
+ expect(Swearjar.default.censor("anus janus")).to eq("**** janus")
86
+ end
87
+ end
data/swearjar.gemspec CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
10
10
  s.summary = "Put another nickel in the swearjar. Simple profanity detection with content analysis"
11
11
  s.description = "#{s.summary}."
12
12
  s.email = %q{joshbuddy@gmail.com}
13
- s.extra_rdoc_files = ['README.rdoc']
13
+ s.extra_rdoc_files = ['README.md']
14
14
  s.files = `git ls-files`.split("\n")
15
15
  s.homepage = %q{http://github.com/joshbuddy/swearjar}
16
16
  s.rdoc_options = ["--charset=UTF-8"]
@@ -20,18 +20,8 @@ Gem::Specification.new do |s|
20
20
  s.rubyforge_project = 'swearjar'
21
21
 
22
22
  # dependencies
23
- s.add_runtime_dependency 'fuzzyhash', '~> 0.0.11'
24
- s.add_development_dependency 'rake', '~> 0.8.7'
25
- s.add_development_dependency 'rspec', '~> 1.3.0'
26
-
27
- if s.respond_to? :specification_version then
28
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
29
- s.specification_version = 3
30
-
31
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
32
- else
33
- end
34
- else
35
- end
23
+ s.add_development_dependency 'rake', '~> 10.5'
24
+ s.add_development_dependency 'rspec', '~> 3.4'
25
+ s.add_development_dependency 'pry', '~> 0.10'
36
26
  end
37
27
 
metadata CHANGED
@@ -1,99 +1,105 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: swearjar
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
5
- prerelease:
4
+ version: 1.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Joshua Hull
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-04-03 00:00:00.000000000 Z
11
+ date: 2016-02-14 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
- name: fuzzyhash
16
- requirement: &70251065583700 !ruby/object:Gem::Requirement
17
- none: false
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
18
16
  requirements:
19
- - - ~>
17
+ - - "~>"
20
18
  - !ruby/object:Gem::Version
21
- version: 0.0.11
22
- type: :runtime
19
+ version: '10.5'
20
+ type: :development
23
21
  prerelease: false
24
- version_requirements: *70251065583700
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '10.5'
25
27
  - !ruby/object:Gem::Dependency
26
- name: rake
27
- requirement: &70251065583240 !ruby/object:Gem::Requirement
28
- none: false
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
29
30
  requirements:
30
- - - ~>
31
+ - - "~>"
31
32
  - !ruby/object:Gem::Version
32
- version: 0.8.7
33
+ version: '3.4'
33
34
  type: :development
34
35
  prerelease: false
35
- version_requirements: *70251065583240
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.4'
36
41
  - !ruby/object:Gem::Dependency
37
- name: rspec
38
- requirement: &70251065582780 !ruby/object:Gem::Requirement
39
- none: false
42
+ name: pry
43
+ requirement: !ruby/object:Gem::Requirement
40
44
  requirements:
41
- - - ~>
45
+ - - "~>"
42
46
  - !ruby/object:Gem::Version
43
- version: 1.3.0
47
+ version: '0.10'
44
48
  type: :development
45
49
  prerelease: false
46
- version_requirements: *70251065582780
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.10'
47
55
  description: Put another nickel in the swearjar. Simple profanity detection with content
48
56
  analysis.
49
57
  email: joshbuddy@gmail.com
50
58
  executables: []
51
59
  extensions: []
52
60
  extra_rdoc_files:
53
- - README.rdoc
61
+ - README.md
54
62
  files:
55
- - .gitignore
63
+ - ".gitignore"
64
+ - ".travis.yml"
65
+ - CHANGELOG.md
56
66
  - Gemfile
57
- - README.rdoc
67
+ - README.md
58
68
  - Rakefile
59
69
  - lib/config/en.yml
60
70
  - lib/swearjar.rb
61
- - lib/swearjar/tester.rb
62
71
  - lib/swearjar/version.rb
63
72
  - spec/data/swear.yml
64
- - spec/spec.opts
65
73
  - spec/spec_helper.rb
66
74
  - spec/swearjar_spec.rb
67
75
  - swearjar.gemspec
68
76
  homepage: http://github.com/joshbuddy/swearjar
69
77
  licenses: []
78
+ metadata: {}
70
79
  post_install_message:
71
80
  rdoc_options:
72
- - --charset=UTF-8
81
+ - "--charset=UTF-8"
73
82
  require_paths:
74
83
  - lib
75
84
  required_ruby_version: !ruby/object:Gem::Requirement
76
- none: false
77
85
  requirements:
78
- - - ! '>='
86
+ - - ">="
79
87
  - !ruby/object:Gem::Version
80
88
  version: '0'
81
89
  required_rubygems_version: !ruby/object:Gem::Requirement
82
- none: false
83
90
  requirements:
84
- - - ! '>='
91
+ - - ">="
85
92
  - !ruby/object:Gem::Version
86
93
  version: '0'
87
94
  requirements: []
88
95
  rubyforge_project: swearjar
89
- rubygems_version: 1.8.11
96
+ rubygems_version: 2.2.3
90
97
  signing_key:
91
- specification_version: 3
98
+ specification_version: 4
92
99
  summary: Put another nickel in the swearjar. Simple profanity detection with content
93
100
  analysis
94
101
  test_files:
95
102
  - spec/data/swear.yml
96
- - spec/spec.opts
97
103
  - spec/spec_helper.rb
98
104
  - spec/swearjar_spec.rb
99
105
  has_rdoc:
data/README.rdoc DELETED
@@ -1,28 +0,0 @@
1
- = Swearjar
2
-
3
- Simple profanity detection with content analysis.
4
-
5
- == Installation
6
-
7
- gem install swearjar
8
-
9
- == Usage
10
-
11
- require 'swearjar'
12
-
13
- Swearjar.default.profane?('jim henson has a massive hard on he is gonna use to fuck everybody')
14
- << true
15
-
16
- Swearjar.default.scorecard('jim henson has a massive hard on he is gonna use to fuck everybody')
17
- << {:sexual => 2}
18
-
19
- Swearjar.default.censor('jim henson has a massive hard on he is gonna use to fuck everybody')
20
- << 'jim henson has a massive **** ** he is gonna use to **** everybody'
21
-
22
- To load from a custom yaml file, you can do the following
23
-
24
- sj = Swearjar.new
25
- sj.load_file('my_yaml.yml')
26
-
27
- The YAML file can have two sections, `simple` and `regex`. For an example, see `lib/config/en.yml`.
28
-
@@ -1,32 +0,0 @@
1
- require 'yaml'
2
- require 'fuzzy_hash'
3
- require 'bloomfilter'
4
-
5
- class Swearjar
6
- class Tester
7
-
8
- def initialize(config_file)
9
- data = YAML.load_file
10
-
11
- @tester = FuzzyHash.new
12
-
13
- data['regex'].each do |pattern, type|
14
- @tester[Regexp.new(pattern)] = type
15
- end
16
-
17
- data['simple'].each do |test, type|
18
- @tester[test] = type
19
- end
20
-
21
- end
22
-
23
- def scan(string, &block)
24
- string.scan(/\b[\b]+\b/, &block)
25
- end
26
-
27
- def profane?(string)
28
- scan(string) {|w| return true}
29
- end
30
-
31
- end
32
- end
data/spec/spec.opts DELETED
@@ -1,7 +0,0 @@
1
- --colour
2
- --format
3
- specdoc
4
- --loadby
5
- mtime
6
- --reverse
7
- --backtrace