bad_word_detector 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +56 -0
- data/Rakefile +10 -0
- data/bad_word_detector.gemspec +33 -0
- data/bin/bad_word_detector +13 -0
- data/lib/bad_word_detector.rb +200 -0
- data/lib/bad_word_detector/bad_word.rb +30 -0
- data/lib/bad_word_detector/prefix_tree.rb +56 -0
- data/lib/bad_word_detector/rule.rb +35 -0
- data/lib/bad_word_detector/state.rb +41 -0
- data/lib/bad_word_detector/version.rb +3 -0
- data/lib/bad_word_detector/whitelist.rb +33 -0
- data/lib/conf/library.yaml +155 -0
- data/lib/conf/rules.yaml +59 -0
- data/lib/conf/whitelist.yaml +236921 -0
- data/lib/conf/words.yaml +459 -0
- data/test/rules.yaml +63 -0
- data/test/test.rb +26 -0
- data/test/test_bad_word_detector.rb +72 -0
- metadata +110 -0
data/test/rules.yaml
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
1:
|
2
|
+
-
|
3
|
+
symbol: f
|
4
|
+
t:
|
5
|
+
-
|
6
|
+
symbol: f
|
7
|
+
"!":
|
8
|
+
-
|
9
|
+
symbol: i
|
10
|
+
ph:
|
11
|
+
-
|
12
|
+
symbol: f
|
13
|
+
-
|
14
|
+
symbol: h
|
15
|
+
weight: 1
|
16
|
+
pf:
|
17
|
+
-
|
18
|
+
symbol: f
|
19
|
+
weight: 1
|
20
|
+
ck:
|
21
|
+
-
|
22
|
+
symbol: k
|
23
|
+
-
|
24
|
+
symbol: c
|
25
|
+
c:
|
26
|
+
-
|
27
|
+
symbol: ck
|
28
|
+
k:
|
29
|
+
-
|
30
|
+
symbol: ck
|
31
|
+
u:
|
32
|
+
-
|
33
|
+
symbol: v
|
34
|
+
weight: 1
|
35
|
+
|
36
|
+
"|_|":
|
37
|
+
-
|
38
|
+
symbol: u
|
39
|
+
|
40
|
+
v:
|
41
|
+
-
|
42
|
+
symbol: u
|
43
|
+
|
44
|
+
".":
|
45
|
+
-
|
46
|
+
symbol: ""
|
47
|
+
weight: 1
|
48
|
+
" ":
|
49
|
+
-
|
50
|
+
symbol: ""
|
51
|
+
weight: 1
|
52
|
+
"_":
|
53
|
+
-
|
54
|
+
symbol: ""
|
55
|
+
weight: 1
|
56
|
+
"#":
|
57
|
+
-
|
58
|
+
symbol: "s"
|
59
|
+
-
|
60
|
+
symbol: "f"
|
61
|
+
"$":
|
62
|
+
-
|
63
|
+
symbol: "s"
|
data/test/test.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
$LOAD_PATH << "#{File.expand_path(File.dirname(__FILE__))}/../lib"
|
2
|
+
|
3
|
+
require 'bad_word_detector'
|
4
|
+
|
5
|
+
finder = BadWordDetector.new
|
6
|
+
|
7
|
+
#string = 'Using faster code in your Ruby application is always ideal. To discover which is faster, benchmarking is necessary step. It measures the time it takes to execute code and compares it to other a##hole code that accomplishes the same task'
|
8
|
+
string = "tuck"
|
9
|
+
time = Time.now
|
10
|
+
found = finder.find string
|
11
|
+
puts "Time to find: #{Time.now - time}"
|
12
|
+
puts found.inspect
|
13
|
+
#
|
14
|
+
#require 'ruby-prof'
|
15
|
+
#
|
16
|
+
## Profile the code
|
17
|
+
#RubyProf.start
|
18
|
+
#
|
19
|
+
#finder.find string
|
20
|
+
#
|
21
|
+
#result = RubyProf.stop
|
22
|
+
#
|
23
|
+
## Print a flat profile to text
|
24
|
+
#printer = RubyProf::CallStackPrinter.new(result)
|
25
|
+
##printer = RubyProf::FlatPrinter.new(result)
|
26
|
+
#printer.print(File.open("report.html", "w"))
|
@@ -0,0 +1,72 @@
|
|
1
|
+
$LOAD_PATH << "#{File.expand_path(File.dirname(__FILE__))}/../lib"
|
2
|
+
|
3
|
+
require 'bad_word_detector'
|
4
|
+
require "yaml"
|
5
|
+
require "test/unit"
|
6
|
+
|
7
|
+
class TestBadWordDetector < Test::Unit::TestCase
|
8
|
+
def finder
|
9
|
+
Finder::finder
|
10
|
+
end
|
11
|
+
def test_word
|
12
|
+
word = finder.find("fuck")
|
13
|
+
assert_not_nil word
|
14
|
+
assert_equal "fuck", word.text
|
15
|
+
assert_equal "fuck", word.word
|
16
|
+
assert_equal "fuck", word.source
|
17
|
+
assert_equal 0, word.index
|
18
|
+
end
|
19
|
+
def test_word_in_text
|
20
|
+
word = finder.find("What the fuck is going on?")
|
21
|
+
assert_not_nil word
|
22
|
+
assert_equal "fuck", word.text
|
23
|
+
assert_equal "fuck", word.word
|
24
|
+
assert_equal "What the fuck is going on?", word.source
|
25
|
+
assert_equal 9, word.index
|
26
|
+
end
|
27
|
+
def test_with_distortion
|
28
|
+
word = finder.find('#|_|ck')
|
29
|
+
assert_not_nil word
|
30
|
+
assert_equal "#|_|ck", word.text
|
31
|
+
assert_equal "fuck", word.word
|
32
|
+
assert_equal "#|_|ck", word.source
|
33
|
+
assert_equal 0, word.index
|
34
|
+
end
|
35
|
+
def test_with_distortion_in_text
|
36
|
+
word = finder.find('What the #uk is going on?')
|
37
|
+
assert_not_nil word
|
38
|
+
assert_equal "fuck", word.word
|
39
|
+
assert_equal '#uk', word.text
|
40
|
+
assert_equal 'What the #uk is going on?', word.source
|
41
|
+
assert_equal 9, word.index
|
42
|
+
end
|
43
|
+
def test_distortion
|
44
|
+
word = finder.find('What the #$@# is going on?')
|
45
|
+
assert_nil word
|
46
|
+
end
|
47
|
+
def test_spaces
|
48
|
+
word = finder.find('What the f_..__u_..__c_..__k is going on?')
|
49
|
+
assert_not_nil word
|
50
|
+
assert_equal "f_..__u_..__c_..__k", word.text
|
51
|
+
assert_equal "fuck", word.word
|
52
|
+
assert_equal "What the f_..__u_..__c_..__k is going on?", word.source
|
53
|
+
assert_equal 9, word.index
|
54
|
+
end
|
55
|
+
def test_false_positive
|
56
|
+
word = finder.find("tuck", true)
|
57
|
+
assert_not_nil word
|
58
|
+
assert_equal true, word.white?
|
59
|
+
end
|
60
|
+
def test_false_positive_in_text
|
61
|
+
word = finder.find("Thing as is!", true)
|
62
|
+
assert_not_nil word
|
63
|
+
assert_equal true, word.white?
|
64
|
+
assert_equal "ass", word.word
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
class Finder
|
69
|
+
def self.finder
|
70
|
+
@finder ||= BadWordDetector.new YAML.load_file(File.dirname(__FILE__)+"/rules.yaml")
|
71
|
+
end
|
72
|
+
end
|
metadata
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bad_word_detector
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Fedotov Daniil
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-02-26 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: yard
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: redcarpet
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
description: ! "\n Detects #uck F|_|__C_K and other variations of hidden swear
|
47
|
+
words in text.\n Usage:\n ```\n finder = BadWordDetector.new\n finder.find(\"What
|
48
|
+
the #uck\")\n it will return BadWord object\n ```\n Transformation
|
49
|
+
rules is defined in form: {\"#\" => {\"symbol\"=>\"f\", \"weight\" => 2}} (where
|
50
|
+
weight is optional)\n in file conf/rules.yaml \n List of swear words is located
|
51
|
+
in conf/library.yaml\n Whitelist of english words in conf/whitelist.yaml\n You
|
52
|
+
can also set own rules:\n finder = BadWordDetector.new rules, library, whitelist\n
|
53
|
+
\ "
|
54
|
+
email:
|
55
|
+
- fedotov.danil@gmail.com
|
56
|
+
executables:
|
57
|
+
- bad_word_detector
|
58
|
+
extensions: []
|
59
|
+
extra_rdoc_files: []
|
60
|
+
files:
|
61
|
+
- .gitignore
|
62
|
+
- Gemfile
|
63
|
+
- LICENSE
|
64
|
+
- README.md
|
65
|
+
- Rakefile
|
66
|
+
- bad_word_detector.gemspec
|
67
|
+
- bin/bad_word_detector
|
68
|
+
- lib/bad_word_detector.rb
|
69
|
+
- lib/bad_word_detector/bad_word.rb
|
70
|
+
- lib/bad_word_detector/prefix_tree.rb
|
71
|
+
- lib/bad_word_detector/rule.rb
|
72
|
+
- lib/bad_word_detector/state.rb
|
73
|
+
- lib/bad_word_detector/version.rb
|
74
|
+
- lib/bad_word_detector/whitelist.rb
|
75
|
+
- lib/conf/library.yaml
|
76
|
+
- lib/conf/rules.yaml
|
77
|
+
- lib/conf/whitelist.yaml
|
78
|
+
- lib/conf/words.yaml
|
79
|
+
- test/rules.yaml
|
80
|
+
- test/test.rb
|
81
|
+
- test/test_bad_word_detector.rb
|
82
|
+
homepage: https://github.com/hairyhum/bad-words.ruby
|
83
|
+
licenses: []
|
84
|
+
post_install_message:
|
85
|
+
rdoc_options: []
|
86
|
+
require_paths:
|
87
|
+
- lib
|
88
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
96
|
+
requirements:
|
97
|
+
- - ! '>='
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
100
|
+
requirements: []
|
101
|
+
rubyforge_project:
|
102
|
+
rubygems_version: 1.8.25
|
103
|
+
signing_key:
|
104
|
+
specification_version: 3
|
105
|
+
summary: Swear word detector
|
106
|
+
test_files:
|
107
|
+
- test/rules.yaml
|
108
|
+
- test/test.rb
|
109
|
+
- test/test_bad_word_detector.rb
|
110
|
+
has_rdoc:
|