word_filter 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 218ff49c9187c5a6a3baa6837d930ca128d3406d
4
+ data.tar.gz: 37bbb6429d7fcdb56dd3d2c8ec17eb8bf7ca5f12
5
+ SHA512:
6
+ metadata.gz: 09d96996dbf4a356a54bce7892214a6661b4effdfaefec95424e5cd6da5cb93df95266cafe1478f4d3085489c7f4cc49fc0c39b5947abc9a7fad3b29c959f412
7
+ data.tar.gz: b08d8f7a70ae5e1a819db5ac9882a1dc1d7ab31c69ac2638285c97034a897adb971f51d7edd73d152b69de93483342ca978dcb7795adb55e0b6ca2ad648079dd
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in word_filter.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Huascar Oña
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,34 @@
1
+ # WordFilter
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'word_filter'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install word_filter
18
+
19
+ ## Usage
20
+
21
+ # Use example of the class
22
+ # testFilter = WordFilter.new
23
+ # testFilter.filterInit("lib/assets/dictionary.txt", "lib/assets/badwords.txt")
24
+ # testFilter.filterLevel = WordFilter::SWAPPABLE_AND_REPEATED_VOWELS_INCLUDING_NONE
25
+ # input = "I went to school and some beeeestard stole my lunch"
26
+ # puts testFilter.filterString(input)
27
+
28
+ ## Contributing
29
+
30
+ 1. Fork it
31
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
32
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
33
+ 4. Push to the branch (`git push origin my-new-feature`)
34
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,180 @@
1
+ require "word_filter/version"
2
+
3
+ module WordFilter
4
+ @@emailRegex = /[a-zA-Z0-9._%+-]+@[a-z0-9.-]+\\.[a-zA-Z]{2,4}/
5
+ @@alphaNumericDigit = /(zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|\d)/;
6
+ @@digitsRegex = Regexp.new("\b(\s*" + @@alphaNumericDigit.source + ")+\b")
7
+ @@streetNameRegex = Regexp.new("\b(\s*" + @@alphaNumericDigit.source + ")+\s([a-z\d]+\.?\s*){1,5}\b(avenue|ave|street|st|court|ct|circle|boulevard|blvd|lane|ln|trail|tr|loop|lp|route|rt|drive|dr|road|rd|terrace|tr|way|wy|highway|hiway|hw)\b")
8
+ @@phoneNumber = Regexp.new("((" + @@alphaNumericDigit.source + ")\W*?){3}((" + @@alphaNumericDigit.source + ")\W*?){4}\b")
9
+ @@urlRegex = /(?:http|https):\/\/[a-z0-9]+(?:[\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(?:(?::[0-9]{1,5})?\/[^\s]*)?/
10
+
11
+ NONE = 0;
12
+ REPEATED_VOWELS = 1;
13
+ SWAPPABLE_VOWELS = 2;
14
+ SWAPPABLE_AND_REPEATED_VOWELS = 3;
15
+ SWAPPABLE_AND_REPEATED_VOWELS_INCLUDING_NONE = 4;
16
+
17
+
18
+
19
+ def initialize()
20
+ @filterLevel = NONE
21
+ end
22
+
23
+ attr_accessor :filterLevel
24
+
25
+ def filterInit(dictionaryFile, badwordslist)
26
+ @goodWords = loadDictionary(dictionaryFile)
27
+ #The original java class requiere other three word's list
28
+ @datingWordsRegex = /dating/
29
+ @deviantWordsRegex = /deviant/
30
+ @badWordsRegex = loadBadwords(badwordslist)
31
+
32
+ vowels = /([aeiou])/
33
+ @vowelSwappedAndRepeatedRegex = Regexp.new(@badWordsRegex.source.gsub(vowels, "[aeiou]+"))
34
+ @vowelSwappedAndRepeatedRegexIncludingEmpty = Regexp.new(@badWordsRegex.source.gsub(vowels, "[aeiou]*"))
35
+ @vowelRepeatedRegex = Regexp.new(@badWordsRegex.source.gsub(vowels, "\\1+"))
36
+ @vowelSwappedRegex = Regexp.new(@badWordsRegex.source.gsub(vowels, "[aeiou]"))
37
+
38
+ end
39
+
40
+
41
+ def loadDictionary(path)
42
+ words = []
43
+ File.open(path, "r").each_line do |line|
44
+ splitted = line.split(" ")
45
+ splitted.each do |w|
46
+ words << w
47
+ end
48
+ end
49
+ return words
50
+ end
51
+
52
+ def loadBadwords(path)
53
+ words = File.read(path).gsub("\r", '').split("\n")
54
+ regex = words.join('|')
55
+ regex = '(' + regex + ')'
56
+ regex = Regexp.new(regex)
57
+ return regex
58
+ end
59
+
60
+ def filterString(input)
61
+
62
+ # Output:
63
+ # -1: An exception occured while trying to check the string, do not post
64
+ # 0: string is safe to post
65
+ # 1: string contains an email address
66
+ # 2: string contains a URL
67
+ # 3: string contains a street address
68
+ # 4: string contains a phone number
69
+ # 5: string contains a dating word
70
+ # 6: string contains a deviant word
71
+ # 9: string contains any other bad word
72
+
73
+ input = input.strip.downcase
74
+ workingCopy = input
75
+
76
+ if input == ""
77
+ return 0
78
+ end
79
+
80
+ if @@emailRegex.match(input)
81
+ return 1
82
+ end
83
+
84
+ if @@urlRegex.match(input)
85
+ return 2
86
+ end
87
+
88
+ if @@streetNameRegex.match(input)
89
+ return 3
90
+ end
91
+
92
+ if @@phoneNumber.match(input)
93
+ return 4
94
+ end
95
+
96
+ workingCopy.gsub("\s+", " ")
97
+ workingCopy.gsub!(/["',.;:?-]/, " ")
98
+ workingCopy.gsub!(/!+\s/, " ")
99
+ workingCopy.gsub!(/!+\z/, " ")
100
+ workingCopy.gsub!(/\br\su/, " ")
101
+
102
+ cleanVersion = stripGoodWords(workingCopy)
103
+
104
+ if cleanVersion == nil or cleanVersion.length == 0
105
+ return 0
106
+ end
107
+
108
+ if @datingWordsRegex.match(cleanVersion)
109
+ return 5
110
+ end
111
+
112
+ if @deviantWordsRegex.match(cleanVersion)
113
+ return 6
114
+ end
115
+
116
+ if @badWordsRegex.match(cleanVersion)
117
+ return 7
118
+ end
119
+
120
+ #let's try various combinations of bad word tricks
121
+ currentVersion = cleanVersion
122
+
123
+ #compress the string then check it again
124
+ if @badWordsRegex.match(currentVersion.gsub("[ \t\n\f\r]", ""))
125
+ return 9
126
+ end
127
+
128
+ #zap special characters and check it again
129
+ if @badWordsRegex.match(currentVersion.gsub("[^a-z]", ""))
130
+ return 9
131
+ end
132
+
133
+ #replace certain special characters with their letter equivalents
134
+ #NOTE: This one maps vertical non-letter chars (!1|) to i
135
+ specialCharsReplaced_i = currentVersion.tr("@683!1|0$+","abbeiiiost")
136
+ if @badWordsRegex.match(specialCharsReplaced_i)
137
+ return 9
138
+ end
139
+
140
+ #replace certain special characters with their letter equivalents
141
+ #NOTE: This one maps vertical non-letter chars (!1|) to l
142
+ specialCharsReplaced_l = currentVersion.tr("@683!1|0$+","abbelllost")
143
+ if @badWordsRegex.match(specialCharsReplaced_l)
144
+ return 9
145
+ end
146
+
147
+ case @filterLevel
148
+ when NONE
149
+ return 0
150
+ when REPEATED_VOWELS
151
+ if @vowelRepeatedRegex.match(specialCharsReplaced_i) or @vowelRepeatedRegex.match(specialCharsReplaced_l)
152
+ return 9
153
+ end
154
+ when SWAPPABLE_VOWELS
155
+ if @vowelSwappedRegex.match(specialCharsReplaced_i) or @vowelSwappedRegex.match(specialCharsReplaced_l)
156
+ return 9
157
+ end
158
+ when SWAPPABLE_AND_REPEATED_VOWELS
159
+ if @vowelSwappedAndRepeatedRegex.match(specialCharsReplaced_i) or @vowelSwappedAndRepeatedRegex.match(specialCharsReplaced_l)
160
+ return 9
161
+ end
162
+ when SWAPPABLE_AND_REPEATED_VOWELS_INCLUDING_NONE
163
+ if @vowelSwappedAndRepeatedRegexIncludingEmpty.match(specialCharsReplaced_i) or @vowelSwappedAndRepeatedRegexIncludingEmpty.match(specialCharsReplaced_l)
164
+ return 9
165
+ end
166
+ end
167
+
168
+ end
169
+
170
+ def stripGoodWords(input)
171
+ result = []
172
+ input = input.split(" ")
173
+ input.each do |w|
174
+ if not @goodWords.include? w
175
+ result << w
176
+ end
177
+ end
178
+ return result.join(" ")
179
+ end
180
+ end
@@ -0,0 +1,3 @@
1
+ module WordFilter
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'word_filter/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "word_filter"
8
+ spec.version = WordFilter::VERSION
9
+ spec.authors = ["Huascar Oña"]
10
+ spec.email = ["huascarking@hotmail.com"]
11
+ spec.description = %q{A bad word filter for the input text.}
12
+ spec.summary = %q{A word filter gem}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: word_filter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Huascar Oña
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-05-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: A bad word filter for the input text.
42
+ email:
43
+ - huascarking@hotmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - Gemfile
50
+ - LICENSE.txt
51
+ - README.md
52
+ - Rakefile
53
+ - lib/word_filter.rb
54
+ - lib/word_filter/version.rb
55
+ - word_filter.gemspec
56
+ homepage: ''
57
+ licenses:
58
+ - MIT
59
+ metadata: {}
60
+ post_install_message:
61
+ rdoc_options: []
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - '>='
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ requirements: []
75
+ rubyforge_project:
76
+ rubygems_version: 2.0.0.rc.2
77
+ signing_key:
78
+ specification_version: 4
79
+ summary: A word filter gem
80
+ test_files: []