cf-swearjar 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/Gemfile +3 -0
- data/README.rdoc +28 -0
- data/Rakefile +18 -0
- data/cf-swearjar.gemspec +37 -0
- data/lib/config/en.yml +1521 -0
- data/lib/swearjar/tester.rb +32 -0
- data/lib/swearjar/version.rb +3 -0
- data/lib/swearjar.rb +63 -0
- data/spec/data/swear.yml +2 -0
- data/spec/spec.opts +7 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/swearjar_spec.rb +48 -0
- metadata +113 -0
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'fuzzy_hash'
|
3
|
+
require 'bloomfilter'
|
4
|
+
|
5
|
+
class Swearjar
|
6
|
+
class Tester
|
7
|
+
|
8
|
+
def initialize(config_file)
|
9
|
+
data = YAML.load_file
|
10
|
+
|
11
|
+
@tester = FuzzyHash.new
|
12
|
+
|
13
|
+
data['regex'].each do |pattern, type|
|
14
|
+
@tester[Regexp.new(pattern)] = type
|
15
|
+
end
|
16
|
+
|
17
|
+
data['simple'].each do |test, type|
|
18
|
+
@tester[test] = type
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
def scan(string, &block)
|
24
|
+
string.scan(/\b[\b]+\b/, &block)
|
25
|
+
end
|
26
|
+
|
27
|
+
def profane?(string)
|
28
|
+
scan(string) {|w| return true}
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
data/lib/swearjar.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'fuzzy_hash'
|
3
|
+
|
4
|
+
class Swearjar
|
5
|
+
|
6
|
+
def self.default
|
7
|
+
from_language
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.from_language(language = 'en')
|
11
|
+
new(File.join(File.dirname(__FILE__), 'config', "#{language}.yml"))
|
12
|
+
end
|
13
|
+
|
14
|
+
attr_reader :tester, :hash
|
15
|
+
|
16
|
+
def initialize(file = nil)
|
17
|
+
@tester = FuzzyHash.new
|
18
|
+
@hash = {}
|
19
|
+
load_file(file) if file
|
20
|
+
end
|
21
|
+
|
22
|
+
def load_file(file)
|
23
|
+
data = YAML.load_file(file)
|
24
|
+
|
25
|
+
data['regex'].each do |pattern, type|
|
26
|
+
@tester[Regexp.new(pattern)] = type
|
27
|
+
end if data['regex']
|
28
|
+
|
29
|
+
data['simple'].each do |test, type|
|
30
|
+
@hash[test] = type
|
31
|
+
end if data['simple']
|
32
|
+
end
|
33
|
+
|
34
|
+
def scan(string, &block)
|
35
|
+
string = string.to_s
|
36
|
+
string.scan(/\b[a-zA-Z-]+\b/) do |word|
|
37
|
+
block.call(word, hash[word.downcase] || hash[word.downcase.gsub(/e?s$/,'')] )
|
38
|
+
end
|
39
|
+
if match = tester.match_with_result(string)
|
40
|
+
block.call(match.last, match.first)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def profane?(string)
|
45
|
+
string = string.to_s
|
46
|
+
scan(string) {|word, test| return true if !test.nil?}
|
47
|
+
return false
|
48
|
+
end
|
49
|
+
|
50
|
+
def scorecard(string)
|
51
|
+
string = string.to_s
|
52
|
+
scorecard = {}
|
53
|
+
scan(string) {|word, test| test.each { |type| scorecard.key?(type) ? scorecard[type] += 1 : scorecard[type] = 1} if test}
|
54
|
+
scorecard
|
55
|
+
end
|
56
|
+
|
57
|
+
def censor(string)
|
58
|
+
censored_string = string.to_s.dup
|
59
|
+
scan(string) {|word, test| censored_string.gsub!(word, block_given? ? yield(word) : word.gsub(/\S/, '*')) if test}
|
60
|
+
censored_string
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
data/spec/data/swear.yml
ADDED
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib', 'swearjar'))
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Swearjar do
|
4
|
+
|
5
|
+
it "should detect dirty words" do
|
6
|
+
Swearjar.default.profane?('fuck you jim henson').should be_true
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should detect dirty words regardless of case" do
|
10
|
+
Swearjar.default.profane?('FuCk you jim henson').should be_true
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should not detect non-dirty words" do
|
14
|
+
Swearjar.default.profane?('i love you jim henson').should be_false
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should give us a scorecard" do
|
18
|
+
Swearjar.default.scorecard('fuck you jim henson').should == {'sexual'=>1}
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should detect multiword" do
|
22
|
+
Swearjar.default.scorecard('jim henson has a hard on').should == {'sexual'=>1}
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should detect multiword plurals" do
|
26
|
+
Swearjar.default.scorecard('jim henson has a hard ons').should == {'sexual'=>1}
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should detect simple dirty plurals" do
|
30
|
+
Swearjar.default.profane?('jim henson had two dicks').should be_true
|
31
|
+
Swearjar.default.profane?('jim henson has two asses').should be_true
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should censor a string" do
|
35
|
+
Swearjar.default.censor('jim henson has a massive hard on he is gonna use to fuck everybody').should == 'jim henson has a massive **** ** he is gonna use to **** everybody'
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should not do much when given a non-string" do
|
39
|
+
Swearjar.default.profane?(nil).should be_false
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should allow you to load a new yaml file" do
|
43
|
+
sj = Swearjar.new
|
44
|
+
sj.load_file(File.expand_path('../data/swear.yml', __FILE__))
|
45
|
+
sj.censor("Python is the best language!").should == "****** is the best language!"
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
metadata
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cf-swearjar
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Joshua Hull
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-06-08 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: fuzzyhash
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.0.11
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.0.11
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 0.8.7
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 0.8.7
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rspec
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ~>
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 1.3.0
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 1.3.0
|
62
|
+
description: Put another nickel in the swearjar. Simple profanity detection with content
|
63
|
+
analysis.
|
64
|
+
email: joshbuddy@gmail.com
|
65
|
+
executables: []
|
66
|
+
extensions: []
|
67
|
+
extra_rdoc_files:
|
68
|
+
- README.rdoc
|
69
|
+
files:
|
70
|
+
- .gitignore
|
71
|
+
- Gemfile
|
72
|
+
- README.rdoc
|
73
|
+
- Rakefile
|
74
|
+
- cf-swearjar.gemspec
|
75
|
+
- lib/config/en.yml
|
76
|
+
- lib/swearjar.rb
|
77
|
+
- lib/swearjar/tester.rb
|
78
|
+
- lib/swearjar/version.rb
|
79
|
+
- spec/data/swear.yml
|
80
|
+
- spec/spec.opts
|
81
|
+
- spec/spec_helper.rb
|
82
|
+
- spec/swearjar_spec.rb
|
83
|
+
homepage: http://github.com/joshbuddy/swearjar
|
84
|
+
licenses: []
|
85
|
+
post_install_message:
|
86
|
+
rdoc_options:
|
87
|
+
- --charset=UTF-8
|
88
|
+
require_paths:
|
89
|
+
- lib
|
90
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
92
|
+
requirements:
|
93
|
+
- - ! '>='
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
requirements: []
|
103
|
+
rubyforge_project: swearjar
|
104
|
+
rubygems_version: 1.8.24
|
105
|
+
signing_key:
|
106
|
+
specification_version: 3
|
107
|
+
summary: Put another nickel in the swearjar. Simple profanity detection with content
|
108
|
+
analysis
|
109
|
+
test_files:
|
110
|
+
- spec/data/swear.yml
|
111
|
+
- spec/spec.opts
|
112
|
+
- spec/spec_helper.rb
|
113
|
+
- spec/swearjar_spec.rb
|