mr_clean 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +81 -0
- data/bench/bench.rb +22 -0
- data/bench/mr_clean.rb +5 -0
- data/bench/obscenity.rb +6 -0
- data/bench/parallel.rb +40 -0
- data/bench/profanity_filter.rb +8 -0
- data/bench/samples.yml +4 -0
- data/config/bad_words.yml +1060 -0
- data/lib/mr_clean/config.rb +12 -0
- data/lib/mr_clean/filter.rb +17 -0
- data/lib/mr_clean/profanity.rb +11 -0
- data/lib/mr_clean/version.rb +1 -1
- data/lib/mr_clean.rb +4 -0
- data/mr_clean.gemspec +13 -3
- data/spec/mr_clean/config_spec.rb +9 -0
- data/spec/mr_clean/filter_spec.rb +15 -0
- data/spec/mr_clean/profanity_spec.rb +25 -0
- metadata +128 -13
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module MrClean
|
4
|
+
module Config
|
5
|
+
module_function
|
6
|
+
|
7
|
+
def hashify(file)
|
8
|
+
list = YAML::load(File.open(File.expand_path("../../config/#{file}", File.dirname(__FILE__))))
|
9
|
+
list.inject({}) { |memo, word| memo[word] = true; memo }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module MrClean
|
2
|
+
module Filter
|
3
|
+
module_function
|
4
|
+
|
5
|
+
NON_WORDS = /(?!\s|-)\W|\d|_/
|
6
|
+
ONE_SPACE_STRING = ' '
|
7
|
+
EMPTY_STRING = ''
|
8
|
+
|
9
|
+
def clean(text)
|
10
|
+
text.downcase.gsub(NON_WORDS, EMPTY_STRING)
|
11
|
+
end
|
12
|
+
|
13
|
+
def slice(text)
|
14
|
+
clean(text).split(ONE_SPACE_STRING)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/mr_clean/version.rb
CHANGED
data/lib/mr_clean.rb
CHANGED
data/mr_clean.gemspec
CHANGED
@@ -7,7 +7,7 @@ Gem::Specification.new do |s|
|
|
7
7
|
s.name = 'mr_clean'
|
8
8
|
s.version = MrClean::VERSION
|
9
9
|
s.platform = Gem::Platform::RUBY
|
10
|
-
s.authors = 'CrunchBase'
|
10
|
+
s.authors = 'CrunchBase Engineering Team, Wagner Camarao'
|
11
11
|
s.email = 'engineering@crunchbase.com'
|
12
12
|
s.summary = 'A plain ruby library for filtering profanity, spam and such based on black listed words'
|
13
13
|
|
@@ -16,6 +16,16 @@ Gem::Specification.new do |s|
|
|
16
16
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
17
17
|
s.require_paths = ['lib']
|
18
18
|
|
19
|
-
|
20
|
-
s.add_development_dependency '
|
19
|
+
#bench
|
20
|
+
s.add_development_dependency 'activesupport', '~> 4.0.0'
|
21
|
+
s.add_development_dependency 'obscenity', '~> 1.0.2'
|
22
|
+
s.add_development_dependency 'parallel', '~> 0.8.0'
|
23
|
+
s.add_development_dependency 'profanity_filter', '~> 0.1.1'
|
24
|
+
|
25
|
+
#test
|
26
|
+
s.add_development_dependency 'debugger', '~> 1.6.1'
|
27
|
+
s.add_development_dependency 'guard-rspec', '~> 3.0.3'
|
28
|
+
s.add_development_dependency 'pry-rails', '~> 0.3.2'
|
29
|
+
s.add_development_dependency 'rake', '~> 10.1.0'
|
30
|
+
s.add_development_dependency 'rspec', '~> 2.14.1'
|
21
31
|
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe MrClean::Config do
|
4
|
+
it 'loads a given config file out into a hash map' do
|
5
|
+
list = YAML::load(File.open(File.expand_path('../../config/bad_words.yml', File.dirname(__FILE__))))
|
6
|
+
hash = list.inject({}) { |memo, word| memo[word] = true; memo }
|
7
|
+
MrClean::Config.hashify('bad_words.yml').should == hash
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe MrClean::Filter do
|
4
|
+
describe 'clean' do
|
5
|
+
it 'cleans a string keeping only letters and dashes, downcased' do
|
6
|
+
MrClean::Filter.clean('!@\'_foo"123-bar{[(.)]};').should == 'foo-bar'
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
describe 'slice' do
|
11
|
+
it 'cleans and slices a string into an array of words' do
|
12
|
+
MrClean::Filter.slice('foo! bar?').should == %w(foo bar)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe MrClean::Profanity do
|
4
|
+
describe 'config' do
|
5
|
+
it 'builds a constant hash of bad words' do
|
6
|
+
list = YAML::load(File.open(File.expand_path('../../config/bad_words.yml', File.dirname(__FILE__))))
|
7
|
+
hash = list.inject({}) { |memo, word| memo[word] = true; memo }
|
8
|
+
MrClean::Profanity::BAD_WORDS.should == hash
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
describe 'filter' do
|
13
|
+
it 'knows whether a sentence is profane' do
|
14
|
+
MrClean::Profanity.profane?('Funky shit').should be_true
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'knows whether a sentence is not profane' do
|
18
|
+
MrClean::Profanity.profane?('Hello there').should be_false
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'considers a nil value to be not profane' do
|
22
|
+
MrClean::Profanity.profane?(nil).should be_false
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
metadata
CHANGED
@@ -1,43 +1,141 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mr_clean
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
- CrunchBase
|
7
|
+
- CrunchBase Engineering Team, Wagner Camarao
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-10-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 4.0.0
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 4.0.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: obscenity
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.0.2
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.0.2
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: parallel
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.8.0
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.8.0
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: profanity_filter
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.1.1
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.1.1
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: debugger
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ~>
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 1.6.1
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ~>
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.6.1
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: guard-rspec
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ~>
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 3.0.3
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ~>
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 3.0.3
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: pry-rails
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ~>
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 0.3.2
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ~>
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 0.3.2
|
13
111
|
- !ruby/object:Gem::Dependency
|
14
112
|
name: rake
|
15
113
|
requirement: !ruby/object:Gem::Requirement
|
16
114
|
requirements:
|
17
|
-
- -
|
115
|
+
- - ~>
|
18
116
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
117
|
+
version: 10.1.0
|
20
118
|
type: :development
|
21
119
|
prerelease: false
|
22
120
|
version_requirements: !ruby/object:Gem::Requirement
|
23
121
|
requirements:
|
24
|
-
- -
|
122
|
+
- - ~>
|
25
123
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
124
|
+
version: 10.1.0
|
27
125
|
- !ruby/object:Gem::Dependency
|
28
126
|
name: rspec
|
29
127
|
requirement: !ruby/object:Gem::Requirement
|
30
128
|
requirements:
|
31
|
-
- -
|
129
|
+
- - ~>
|
32
130
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
131
|
+
version: 2.14.1
|
34
132
|
type: :development
|
35
133
|
prerelease: false
|
36
134
|
version_requirements: !ruby/object:Gem::Requirement
|
37
135
|
requirements:
|
38
|
-
- -
|
136
|
+
- - ~>
|
39
137
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
138
|
+
version: 2.14.1
|
41
139
|
description:
|
42
140
|
email: engineering@crunchbase.com
|
43
141
|
executables: []
|
@@ -46,12 +144,27 @@ extra_rdoc_files: []
|
|
46
144
|
files:
|
47
145
|
- .gitignore
|
48
146
|
- .rspec
|
147
|
+
- Gemfile
|
148
|
+
- Gemfile.lock
|
49
149
|
- Guardfile
|
50
150
|
- README.md
|
51
151
|
- Rakefile
|
152
|
+
- bench/bench.rb
|
153
|
+
- bench/mr_clean.rb
|
154
|
+
- bench/obscenity.rb
|
155
|
+
- bench/parallel.rb
|
156
|
+
- bench/profanity_filter.rb
|
157
|
+
- bench/samples.yml
|
158
|
+
- config/bad_words.yml
|
52
159
|
- lib/mr_clean.rb
|
160
|
+
- lib/mr_clean/config.rb
|
161
|
+
- lib/mr_clean/filter.rb
|
162
|
+
- lib/mr_clean/profanity.rb
|
53
163
|
- lib/mr_clean/version.rb
|
54
164
|
- mr_clean.gemspec
|
165
|
+
- spec/mr_clean/config_spec.rb
|
166
|
+
- spec/mr_clean/filter_spec.rb
|
167
|
+
- spec/mr_clean/profanity_spec.rb
|
55
168
|
- spec/spec_helper.rb
|
56
169
|
homepage:
|
57
170
|
licenses: []
|
@@ -72,11 +185,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
72
185
|
version: '0'
|
73
186
|
requirements: []
|
74
187
|
rubyforge_project:
|
75
|
-
rubygems_version: 2.
|
188
|
+
rubygems_version: 2.2.2
|
76
189
|
signing_key:
|
77
190
|
specification_version: 4
|
78
191
|
summary: A plain ruby library for filtering profanity, spam and such based on black
|
79
192
|
listed words
|
80
193
|
test_files:
|
194
|
+
- spec/mr_clean/config_spec.rb
|
195
|
+
- spec/mr_clean/filter_spec.rb
|
196
|
+
- spec/mr_clean/profanity_spec.rb
|
81
197
|
- spec/spec_helper.rb
|
82
|
-
has_rdoc:
|