mr_clean 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +81 -0
- data/bench/bench.rb +22 -0
- data/bench/mr_clean.rb +5 -0
- data/bench/obscenity.rb +6 -0
- data/bench/parallel.rb +40 -0
- data/bench/profanity_filter.rb +8 -0
- data/bench/samples.yml +4 -0
- data/config/bad_words.yml +1060 -0
- data/lib/mr_clean/config.rb +12 -0
- data/lib/mr_clean/filter.rb +17 -0
- data/lib/mr_clean/profanity.rb +11 -0
- data/lib/mr_clean/version.rb +1 -1
- data/lib/mr_clean.rb +4 -0
- data/mr_clean.gemspec +13 -3
- data/spec/mr_clean/config_spec.rb +9 -0
- data/spec/mr_clean/filter_spec.rb +15 -0
- data/spec/mr_clean/profanity_spec.rb +25 -0
- metadata +128 -13
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
require 'yaml'
|
|
2
|
+
|
|
3
|
+
module MrClean
|
|
4
|
+
module Config
|
|
5
|
+
module_function
|
|
6
|
+
|
|
7
|
+
def hashify(file)
|
|
8
|
+
list = YAML::load(File.open(File.expand_path("../../config/#{file}", File.dirname(__FILE__))))
|
|
9
|
+
list.inject({}) { |memo, word| memo[word] = true; memo }
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module MrClean
|
|
2
|
+
module Filter
|
|
3
|
+
module_function
|
|
4
|
+
|
|
5
|
+
NON_WORDS = /(?!\s|-)\W|\d|_/
|
|
6
|
+
ONE_SPACE_STRING = ' '
|
|
7
|
+
EMPTY_STRING = ''
|
|
8
|
+
|
|
9
|
+
def clean(text)
|
|
10
|
+
text.downcase.gsub(NON_WORDS, EMPTY_STRING)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def slice(text)
|
|
14
|
+
clean(text).split(ONE_SPACE_STRING)
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
data/lib/mr_clean/version.rb
CHANGED
data/lib/mr_clean.rb
CHANGED
data/mr_clean.gemspec
CHANGED
|
@@ -7,7 +7,7 @@ Gem::Specification.new do |s|
|
|
|
7
7
|
s.name = 'mr_clean'
|
|
8
8
|
s.version = MrClean::VERSION
|
|
9
9
|
s.platform = Gem::Platform::RUBY
|
|
10
|
-
s.authors = 'CrunchBase'
|
|
10
|
+
s.authors = 'CrunchBase Engineering Team, Wagner Camarao'
|
|
11
11
|
s.email = 'engineering@crunchbase.com'
|
|
12
12
|
s.summary = 'A plain ruby library for filtering profanity, spam and such based on black listed words'
|
|
13
13
|
|
|
@@ -16,6 +16,16 @@ Gem::Specification.new do |s|
|
|
|
16
16
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
|
17
17
|
s.require_paths = ['lib']
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
s.add_development_dependency '
|
|
19
|
+
#bench
|
|
20
|
+
s.add_development_dependency 'activesupport', '~> 4.0.0'
|
|
21
|
+
s.add_development_dependency 'obscenity', '~> 1.0.2'
|
|
22
|
+
s.add_development_dependency 'parallel', '~> 0.8.0'
|
|
23
|
+
s.add_development_dependency 'profanity_filter', '~> 0.1.1'
|
|
24
|
+
|
|
25
|
+
#test
|
|
26
|
+
s.add_development_dependency 'debugger', '~> 1.6.1'
|
|
27
|
+
s.add_development_dependency 'guard-rspec', '~> 3.0.3'
|
|
28
|
+
s.add_development_dependency 'pry-rails', '~> 0.3.2'
|
|
29
|
+
s.add_development_dependency 'rake', '~> 10.1.0'
|
|
30
|
+
s.add_development_dependency 'rspec', '~> 2.14.1'
|
|
21
31
|
end
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe MrClean::Config do
|
|
4
|
+
it 'loads a given config file out into a hash map' do
|
|
5
|
+
list = YAML::load(File.open(File.expand_path('../../config/bad_words.yml', File.dirname(__FILE__))))
|
|
6
|
+
hash = list.inject({}) { |memo, word| memo[word] = true; memo }
|
|
7
|
+
MrClean::Config.hashify('bad_words.yml').should == hash
|
|
8
|
+
end
|
|
9
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe MrClean::Filter do
|
|
4
|
+
describe 'clean' do
|
|
5
|
+
it 'cleans a string keeping only letters and dashes, downcased' do
|
|
6
|
+
MrClean::Filter.clean('!@\'_foo"123-bar{[(.)]};').should == 'foo-bar'
|
|
7
|
+
end
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
describe 'slice' do
|
|
11
|
+
it 'cleans and slices a string into an array of words' do
|
|
12
|
+
MrClean::Filter.slice('foo! bar?').should == %w(foo bar)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe MrClean::Profanity do
|
|
4
|
+
describe 'config' do
|
|
5
|
+
it 'builds a constant hash of bad words' do
|
|
6
|
+
list = YAML::load(File.open(File.expand_path('../../config/bad_words.yml', File.dirname(__FILE__))))
|
|
7
|
+
hash = list.inject({}) { |memo, word| memo[word] = true; memo }
|
|
8
|
+
MrClean::Profanity::BAD_WORDS.should == hash
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
describe 'filter' do
|
|
13
|
+
it 'knows whether a sentence is profane' do
|
|
14
|
+
MrClean::Profanity.profane?('Funky shit').should be_true
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it 'knows whether a sentence is not profane' do
|
|
18
|
+
MrClean::Profanity.profane?('Hello there').should be_false
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it 'considers a nil value to be not profane' do
|
|
22
|
+
MrClean::Profanity.profane?(nil).should be_false
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
metadata
CHANGED
|
@@ -1,43 +1,141 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: mr_clean
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
|
-
- CrunchBase
|
|
7
|
+
- CrunchBase Engineering Team, Wagner Camarao
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2014-10-07 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: activesupport
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ~>
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: 4.0.0
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ~>
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: 4.0.0
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: obscenity
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ~>
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: 1.0.2
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ~>
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: 1.0.2
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: parallel
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ~>
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: 0.8.0
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ~>
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: 0.8.0
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: profanity_filter
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - ~>
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: 0.1.1
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - ~>
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: 0.1.1
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: debugger
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - ~>
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: 1.6.1
|
|
76
|
+
type: :development
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - ~>
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: 1.6.1
|
|
83
|
+
- !ruby/object:Gem::Dependency
|
|
84
|
+
name: guard-rspec
|
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
|
86
|
+
requirements:
|
|
87
|
+
- - ~>
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
version: 3.0.3
|
|
90
|
+
type: :development
|
|
91
|
+
prerelease: false
|
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
93
|
+
requirements:
|
|
94
|
+
- - ~>
|
|
95
|
+
- !ruby/object:Gem::Version
|
|
96
|
+
version: 3.0.3
|
|
97
|
+
- !ruby/object:Gem::Dependency
|
|
98
|
+
name: pry-rails
|
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
|
100
|
+
requirements:
|
|
101
|
+
- - ~>
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
version: 0.3.2
|
|
104
|
+
type: :development
|
|
105
|
+
prerelease: false
|
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
107
|
+
requirements:
|
|
108
|
+
- - ~>
|
|
109
|
+
- !ruby/object:Gem::Version
|
|
110
|
+
version: 0.3.2
|
|
13
111
|
- !ruby/object:Gem::Dependency
|
|
14
112
|
name: rake
|
|
15
113
|
requirement: !ruby/object:Gem::Requirement
|
|
16
114
|
requirements:
|
|
17
|
-
- -
|
|
115
|
+
- - ~>
|
|
18
116
|
- !ruby/object:Gem::Version
|
|
19
|
-
version:
|
|
117
|
+
version: 10.1.0
|
|
20
118
|
type: :development
|
|
21
119
|
prerelease: false
|
|
22
120
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
121
|
requirements:
|
|
24
|
-
- -
|
|
122
|
+
- - ~>
|
|
25
123
|
- !ruby/object:Gem::Version
|
|
26
|
-
version:
|
|
124
|
+
version: 10.1.0
|
|
27
125
|
- !ruby/object:Gem::Dependency
|
|
28
126
|
name: rspec
|
|
29
127
|
requirement: !ruby/object:Gem::Requirement
|
|
30
128
|
requirements:
|
|
31
|
-
- -
|
|
129
|
+
- - ~>
|
|
32
130
|
- !ruby/object:Gem::Version
|
|
33
|
-
version:
|
|
131
|
+
version: 2.14.1
|
|
34
132
|
type: :development
|
|
35
133
|
prerelease: false
|
|
36
134
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
135
|
requirements:
|
|
38
|
-
- -
|
|
136
|
+
- - ~>
|
|
39
137
|
- !ruby/object:Gem::Version
|
|
40
|
-
version:
|
|
138
|
+
version: 2.14.1
|
|
41
139
|
description:
|
|
42
140
|
email: engineering@crunchbase.com
|
|
43
141
|
executables: []
|
|
@@ -46,12 +144,27 @@ extra_rdoc_files: []
|
|
|
46
144
|
files:
|
|
47
145
|
- .gitignore
|
|
48
146
|
- .rspec
|
|
147
|
+
- Gemfile
|
|
148
|
+
- Gemfile.lock
|
|
49
149
|
- Guardfile
|
|
50
150
|
- README.md
|
|
51
151
|
- Rakefile
|
|
152
|
+
- bench/bench.rb
|
|
153
|
+
- bench/mr_clean.rb
|
|
154
|
+
- bench/obscenity.rb
|
|
155
|
+
- bench/parallel.rb
|
|
156
|
+
- bench/profanity_filter.rb
|
|
157
|
+
- bench/samples.yml
|
|
158
|
+
- config/bad_words.yml
|
|
52
159
|
- lib/mr_clean.rb
|
|
160
|
+
- lib/mr_clean/config.rb
|
|
161
|
+
- lib/mr_clean/filter.rb
|
|
162
|
+
- lib/mr_clean/profanity.rb
|
|
53
163
|
- lib/mr_clean/version.rb
|
|
54
164
|
- mr_clean.gemspec
|
|
165
|
+
- spec/mr_clean/config_spec.rb
|
|
166
|
+
- spec/mr_clean/filter_spec.rb
|
|
167
|
+
- spec/mr_clean/profanity_spec.rb
|
|
55
168
|
- spec/spec_helper.rb
|
|
56
169
|
homepage:
|
|
57
170
|
licenses: []
|
|
@@ -72,11 +185,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
72
185
|
version: '0'
|
|
73
186
|
requirements: []
|
|
74
187
|
rubyforge_project:
|
|
75
|
-
rubygems_version: 2.
|
|
188
|
+
rubygems_version: 2.2.2
|
|
76
189
|
signing_key:
|
|
77
190
|
specification_version: 4
|
|
78
191
|
summary: A plain ruby library for filtering profanity, spam and such based on black
|
|
79
192
|
listed words
|
|
80
193
|
test_files:
|
|
194
|
+
- spec/mr_clean/config_spec.rb
|
|
195
|
+
- spec/mr_clean/filter_spec.rb
|
|
196
|
+
- spec/mr_clean/profanity_spec.rb
|
|
81
197
|
- spec/spec_helper.rb
|
|
82
|
-
has_rdoc:
|