word_filter 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a81dac029f338386b201365215420886d300afcb
4
- data.tar.gz: e108e5ef8d32656eb688d5e3690e1af0fbedbea4
3
+ metadata.gz: 06e6a40537fb1cfad2ee83e8a598c8826bd40fda
4
+ data.tar.gz: 14efc8f39dc0a84df54e2195799ea65214134d32
5
5
  SHA512:
6
- metadata.gz: 3a909da71e2df9ddf2acf718510c7f223712fdbc130b4129f867f4b3840eeb928992b06ed1dc8584837624204f859e72d6f1e70fd1b0b928466274db63d88340
7
- data.tar.gz: 1eeeaeba7351963169e980e3bd8aa269191ff6a37bced95ef28aa01f3a5a14b7aba24eab78e6ce115adb9e48c02b4e2b706ed9b8ea1da203ba7bcb1cb8994b94
6
+ metadata.gz: 921118f7c180ba70f95b4df31297b14351f1777a0dcfca5edaa4d2b7e77be84045880f0edc5b303c3db3059a32e07f098f6eba1d79ee607bbeb5a0fe7224ac62
7
+ data.tar.gz: 6d79bc70cb2d79ea499faa05533dfeccabbecafde561db16b4045085b0828d54b2ab902164a0bff8678bb519acc5e0b1921ea90b7d9dabea28fec60d4eb28df7
@@ -1,3 +1,3 @@
1
1
  module WordFilter
2
- VERSION = "0.0.8"
2
+ VERSION = "0.0.9"
3
3
  end
data/lib/word_filter.rb CHANGED
@@ -1,4 +1,183 @@
1
1
  require "word_filter/version"
2
2
 
3
3
  module WordFilter
4
+ class Filter
5
+
6
+ @@emailRegex = /[a-zA-Z0-9._%+-]+@[a-z0-9.-]+\\.[a-zA-Z]{2,4}/
7
+ @@alphaNumericDigit = /(zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|\d)/;
8
+ @@digitsRegex = Regexp.new("\b(\s*" + @@alphaNumericDigit.source + ")+\b")
9
+ @@streetNameRegex = Regexp.new("\b(\s*" + @@alphaNumericDigit.source + ")+\s([a-z\d]+\.?\s*){1,5}\b(avenue|ave|street|st|court|ct|circle|boulevard|blvd|lane|ln|trail|tr|loop|lp|route|rt|drive|dr|road|rd|terrace|tr|way|wy|highway|hiway|hw)\b")
10
+ @@phoneNumber = Regexp.new("((" + @@alphaNumericDigit.source + ")\W*?){3}((" + @@alphaNumericDigit.source + ")\W*?){4}\b")
11
+ @@urlRegex = /(?:http|https):\/\/[a-z0-9]+(?:[\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(?:(?::[0-9]{1,5})?\/[^\s]*)?/
12
+
13
+ NONE = 0;
14
+ REPEATED_VOWELS = 1;
15
+ SWAPPABLE_VOWELS = 2;
16
+ SWAPPABLE_AND_REPEATED_VOWELS = 3;
17
+ SWAPPABLE_AND_REPEATED_VOWELS_INCLUDING_NONE = 4;
18
+
19
+
20
+
21
+ def initialize()
22
+ @filterLevel = NONE
23
+ end
24
+
25
+ attr_accessor :filterLevel
26
+
27
+ def filterInit(dictionaryFile, badwordslist)
28
+ @goodWords = loadDictionary(dictionaryFile)
29
+ #The original java class requiere other three word's list
30
+ @datingWordsRegex = /dating/
31
+ @deviantWordsRegex = /deviant/
32
+ @badWordsRegex = loadBadwords(badwordslist)
33
+
34
+ vowels = /([aeiou])/
35
+ @vowelSwappedAndRepeatedRegex = Regexp.new(@badWordsRegex.source.gsub(vowels, "[aeiou]+"))
36
+ @vowelSwappedAndRepeatedRegexIncludingEmpty = Regexp.new(@badWordsRegex.source.gsub(vowels, "[aeiou]*"))
37
+ @vowelRepeatedRegex = Regexp.new(@badWordsRegex.source.gsub(vowels, "\\1+"))
38
+ @vowelSwappedRegex = Regexp.new(@badWordsRegex.source.gsub(vowels, "[aeiou]"))
39
+
40
+ end
41
+
42
+
43
+ def loadDictionary(path)
44
+ words = []
45
+ File.open(path, "r").each_line do |line|
46
+ splitted = line.split(" ")
47
+ splitted.each do |w|
48
+ words << w
49
+ end
50
+ end
51
+ return words
52
+ end
53
+
54
+ def loadBadwords(path)
55
+ words = File.read(path).gsub("\r", '').split("\n")
56
+ regex = words.join('|')
57
+ regex = '(' + regex + ')'
58
+ regex = Regexp.new(regex)
59
+ return regex
60
+ end
61
+
62
+ def filterString(input)
63
+
64
+ # Output:
65
+ # -1: An exception occured while trying to check the string, do not post
66
+ # 0: string is safe to post
67
+ # 1: string contains an email address
68
+ # 2: string contains a URL
69
+ # 3: string contains a street address
70
+ # 4: string contains a phone number
71
+ # 5: string contains a dating word
72
+ # 6: string contains a deviant word
73
+ # 9: string contains any other bad word
74
+
75
+ input = input.strip.downcase
76
+ workingCopy = input
77
+
78
+ if input == ""
79
+ return 0
80
+ end
81
+
82
+ if @@emailRegex.match(input)
83
+ return 1
84
+ end
85
+
86
+ if @@urlRegex.match(input)
87
+ return 2
88
+ end
89
+
90
+ if @@streetNameRegex.match(input)
91
+ return 3
92
+ end
93
+
94
+ if @@phoneNumber.match(input)
95
+ return 4
96
+ end
97
+
98
+ workingCopy.gsub("\s+", " ")
99
+ workingCopy.gsub!(/["',.;:?-]/, " ")
100
+ workingCopy.gsub!(/!+\s/, " ")
101
+ workingCopy.gsub!(/!+\z/, " ")
102
+ workingCopy.gsub!(/\br\su/, " ")
103
+
104
+ cleanVersion = stripGoodWords(workingCopy)
105
+
106
+ if cleanVersion == nil or cleanVersion.length == 0
107
+ return 0
108
+ end
109
+
110
+ if @datingWordsRegex.match(cleanVersion)
111
+ return 5
112
+ end
113
+
114
+ if @deviantWordsRegex.match(cleanVersion)
115
+ return 6
116
+ end
117
+
118
+ if @badWordsRegex.match(cleanVersion)
119
+ return 7
120
+ end
121
+
122
+ #let's try various combinations of bad word tricks
123
+ currentVersion = cleanVersion
124
+
125
+ #compress the string then check it again
126
+ if @badWordsRegex.match(currentVersion.gsub("[ \t\n\f\r]", ""))
127
+ return 9
128
+ end
129
+
130
+ #zap special characters and check it again
131
+ if @badWordsRegex.match(currentVersion.gsub("[^a-z]", ""))
132
+ return 9
133
+ end
134
+
135
+ #replace certain special characters with their letter equivalents
136
+ #NOTE: This one maps vertical non-letter chars (!1|) to i
137
+ specialCharsReplaced_i = currentVersion.tr("@683!1|0$+","abbeiiiost")
138
+ if @badWordsRegex.match(specialCharsReplaced_i)
139
+ return 9
140
+ end
141
+
142
+ #replace certain special characters with their letter equivalents
143
+ #NOTE: This one maps vertical non-letter chars (!1|) to l
144
+ specialCharsReplaced_l = currentVersion.tr("@683!1|0$+","abbelllost")
145
+ if @badWordsRegex.match(specialCharsReplaced_l)
146
+ return 9
147
+ end
148
+
149
+ case @filterLevel
150
+ when NONE
151
+ return 0
152
+ when REPEATED_VOWELS
153
+ if @vowelRepeatedRegex.match(specialCharsReplaced_i) or @vowelRepeatedRegex.match(specialCharsReplaced_l)
154
+ return 9
155
+ end
156
+ when SWAPPABLE_VOWELS
157
+ if @vowelSwappedRegex.match(specialCharsReplaced_i) or @vowelSwappedRegex.match(specialCharsReplaced_l)
158
+ return 9
159
+ end
160
+ when SWAPPABLE_AND_REPEATED_VOWELS
161
+ if @vowelSwappedAndRepeatedRegex.match(specialCharsReplaced_i) or @vowelSwappedAndRepeatedRegex.match(specialCharsReplaced_l)
162
+ return 9
163
+ end
164
+ when SWAPPABLE_AND_REPEATED_VOWELS_INCLUDING_NONE
165
+ if @vowelSwappedAndRepeatedRegexIncludingEmpty.match(specialCharsReplaced_i) or @vowelSwappedAndRepeatedRegexIncludingEmpty.match(specialCharsReplaced_l)
166
+ return 9
167
+ end
168
+ end
169
+
170
+ end
171
+
172
+ def stripGoodWords(input)
173
+ result = []
174
+ input = input.split(" ")
175
+ input.each do |w|
176
+ if not @goodWords.include? w
177
+ result << w
178
+ end
179
+ end
180
+ return result.join(" ")
181
+ end
182
+ end
4
183
  end
data/word_filter.gemspec CHANGED
@@ -6,7 +6,7 @@ require 'word_filter/version'
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "word_filter"
8
8
  spec.version = WordFilter::VERSION
9
- spec.authors = ["Huascar Oña"]
9
+ spec.authors = ["Huascar Ona"]
10
10
  spec.email = ["huascarking@hotmail.com"]
11
11
  spec.description = %q{A bad word filter for the input text.}
12
12
  spec.summary = %q{A word filter gem}
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: word_filter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
- - Huascar Oña
7
+ - Huascar Ona
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []