word_filter 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a81dac029f338386b201365215420886d300afcb
4
- data.tar.gz: e108e5ef8d32656eb688d5e3690e1af0fbedbea4
3
+ metadata.gz: 06e6a40537fb1cfad2ee83e8a598c8826bd40fda
4
+ data.tar.gz: 14efc8f39dc0a84df54e2195799ea65214134d32
5
5
  SHA512:
6
- metadata.gz: 3a909da71e2df9ddf2acf718510c7f223712fdbc130b4129f867f4b3840eeb928992b06ed1dc8584837624204f859e72d6f1e70fd1b0b928466274db63d88340
7
- data.tar.gz: 1eeeaeba7351963169e980e3bd8aa269191ff6a37bced95ef28aa01f3a5a14b7aba24eab78e6ce115adb9e48c02b4e2b706ed9b8ea1da203ba7bcb1cb8994b94
6
+ metadata.gz: 921118f7c180ba70f95b4df31297b14351f1777a0dcfca5edaa4d2b7e77be84045880f0edc5b303c3db3059a32e07f098f6eba1d79ee607bbeb5a0fe7224ac62
7
+ data.tar.gz: 6d79bc70cb2d79ea499faa05533dfeccabbecafde561db16b4045085b0828d54b2ab902164a0bff8678bb519acc5e0b1921ea90b7d9dabea28fec60d4eb28df7
@@ -1,3 +1,3 @@
1
1
  module WordFilter
2
- VERSION = "0.0.8"
2
+ VERSION = "0.0.9"
3
3
  end
data/lib/word_filter.rb CHANGED
@@ -1,4 +1,183 @@
1
1
  require "word_filter/version"
2
2
 
3
3
  module WordFilter
4
+ class Filter
5
+
6
+ @@emailRegex = /[a-zA-Z0-9._%+-]+@[a-z0-9.-]+\\.[a-zA-Z]{2,4}/
7
+ @@alphaNumericDigit = /(zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|\d)/;
8
+ @@digitsRegex = Regexp.new("\b(\s*" + @@alphaNumericDigit.source + ")+\b")
9
+ @@streetNameRegex = Regexp.new("\b(\s*" + @@alphaNumericDigit.source + ")+\s([a-z\d]+\.?\s*){1,5}\b(avenue|ave|street|st|court|ct|circle|boulevard|blvd|lane|ln|trail|tr|loop|lp|route|rt|drive|dr|road|rd|terrace|tr|way|wy|highway|hiway|hw)\b")
10
+ @@phoneNumber = Regexp.new("((" + @@alphaNumericDigit.source + ")\W*?){3}((" + @@alphaNumericDigit.source + ")\W*?){4}\b")
11
+ @@urlRegex = /(?:http|https):\/\/[a-z0-9]+(?:[\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(?:(?::[0-9]{1,5})?\/[^\s]*)?/
12
+
13
+ NONE = 0;
14
+ REPEATED_VOWELS = 1;
15
+ SWAPPABLE_VOWELS = 2;
16
+ SWAPPABLE_AND_REPEATED_VOWELS = 3;
17
+ SWAPPABLE_AND_REPEATED_VOWELS_INCLUDING_NONE = 4;
18
+
19
+
20
+
21
+ def initialize()
22
+ @filterLevel = NONE
23
+ end
24
+
25
+ attr_accessor :filterLevel
26
+
27
+ def filterInit(dictionaryFile, badwordslist)
28
+ @goodWords = loadDictionary(dictionaryFile)
29
+ #The original java class requiere other three word's list
30
+ @datingWordsRegex = /dating/
31
+ @deviantWordsRegex = /deviant/
32
+ @badWordsRegex = loadBadwords(badwordslist)
33
+
34
+ vowels = /([aeiou])/
35
+ @vowelSwappedAndRepeatedRegex = Regexp.new(@badWordsRegex.source.gsub(vowels, "[aeiou]+"))
36
+ @vowelSwappedAndRepeatedRegexIncludingEmpty = Regexp.new(@badWordsRegex.source.gsub(vowels, "[aeiou]*"))
37
+ @vowelRepeatedRegex = Regexp.new(@badWordsRegex.source.gsub(vowels, "\\1+"))
38
+ @vowelSwappedRegex = Regexp.new(@badWordsRegex.source.gsub(vowels, "[aeiou]"))
39
+
40
+ end
41
+
42
+
43
+ def loadDictionary(path)
44
+ words = []
45
+ File.open(path, "r").each_line do |line|
46
+ splitted = line.split(" ")
47
+ splitted.each do |w|
48
+ words << w
49
+ end
50
+ end
51
+ return words
52
+ end
53
+
54
+ def loadBadwords(path)
55
+ words = File.read(path).gsub("\r", '').split("\n")
56
+ regex = words.join('|')
57
+ regex = '(' + regex + ')'
58
+ regex = Regexp.new(regex)
59
+ return regex
60
+ end
61
+
62
+ def filterString(input)
63
+
64
+ # Output:
65
+ # -1: An exception occured while trying to check the string, do not post
66
+ # 0: string is safe to post
67
+ # 1: string contains an email address
68
+ # 2: string contains a URL
69
+ # 3: string contains a street address
70
+ # 4: string contains a phone number
71
+ # 5: string contains a dating word
72
+ # 6: string contains a deviant word
73
+ # 9: string contains any other bad word
74
+
75
+ input = input.strip.downcase
76
+ workingCopy = input
77
+
78
+ if input == ""
79
+ return 0
80
+ end
81
+
82
+ if @@emailRegex.match(input)
83
+ return 1
84
+ end
85
+
86
+ if @@urlRegex.match(input)
87
+ return 2
88
+ end
89
+
90
+ if @@streetNameRegex.match(input)
91
+ return 3
92
+ end
93
+
94
+ if @@phoneNumber.match(input)
95
+ return 4
96
+ end
97
+
98
+ workingCopy.gsub("\s+", " ")
99
+ workingCopy.gsub!(/["',.;:?-]/, " ")
100
+ workingCopy.gsub!(/!+\s/, " ")
101
+ workingCopy.gsub!(/!+\z/, " ")
102
+ workingCopy.gsub!(/\br\su/, " ")
103
+
104
+ cleanVersion = stripGoodWords(workingCopy)
105
+
106
+ if cleanVersion == nil or cleanVersion.length == 0
107
+ return 0
108
+ end
109
+
110
+ if @datingWordsRegex.match(cleanVersion)
111
+ return 5
112
+ end
113
+
114
+ if @deviantWordsRegex.match(cleanVersion)
115
+ return 6
116
+ end
117
+
118
+ if @badWordsRegex.match(cleanVersion)
119
+ return 7
120
+ end
121
+
122
+ #let's try various combinations of bad word tricks
123
+ currentVersion = cleanVersion
124
+
125
+ #compress the string then check it again
126
+ if @badWordsRegex.match(currentVersion.gsub("[ \t\n\f\r]", ""))
127
+ return 9
128
+ end
129
+
130
+ #zap special characters and check it again
131
+ if @badWordsRegex.match(currentVersion.gsub("[^a-z]", ""))
132
+ return 9
133
+ end
134
+
135
+ #replace certain special characters with their letter equivalents
136
+ #NOTE: This one maps vertical non-letter chars (!1|) to i
137
+ specialCharsReplaced_i = currentVersion.tr("@683!1|0$+","abbeiiiost")
138
+ if @badWordsRegex.match(specialCharsReplaced_i)
139
+ return 9
140
+ end
141
+
142
+ #replace certain special characters with their letter equivalents
143
+ #NOTE: This one maps vertical non-letter chars (!1|) to l
144
+ specialCharsReplaced_l = currentVersion.tr("@683!1|0$+","abbelllost")
145
+ if @badWordsRegex.match(specialCharsReplaced_l)
146
+ return 9
147
+ end
148
+
149
+ case @filterLevel
150
+ when NONE
151
+ return 0
152
+ when REPEATED_VOWELS
153
+ if @vowelRepeatedRegex.match(specialCharsReplaced_i) or @vowelRepeatedRegex.match(specialCharsReplaced_l)
154
+ return 9
155
+ end
156
+ when SWAPPABLE_VOWELS
157
+ if @vowelSwappedRegex.match(specialCharsReplaced_i) or @vowelSwappedRegex.match(specialCharsReplaced_l)
158
+ return 9
159
+ end
160
+ when SWAPPABLE_AND_REPEATED_VOWELS
161
+ if @vowelSwappedAndRepeatedRegex.match(specialCharsReplaced_i) or @vowelSwappedAndRepeatedRegex.match(specialCharsReplaced_l)
162
+ return 9
163
+ end
164
+ when SWAPPABLE_AND_REPEATED_VOWELS_INCLUDING_NONE
165
+ if @vowelSwappedAndRepeatedRegexIncludingEmpty.match(specialCharsReplaced_i) or @vowelSwappedAndRepeatedRegexIncludingEmpty.match(specialCharsReplaced_l)
166
+ return 9
167
+ end
168
+ end
169
+
170
+ end
171
+
172
+ def stripGoodWords(input)
173
+ result = []
174
+ input = input.split(" ")
175
+ input.each do |w|
176
+ if not @goodWords.include? w
177
+ result << w
178
+ end
179
+ end
180
+ return result.join(" ")
181
+ end
182
+ end
4
183
  end
data/word_filter.gemspec CHANGED
@@ -6,7 +6,7 @@ require 'word_filter/version'
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "word_filter"
8
8
  spec.version = WordFilter::VERSION
9
- spec.authors = ["Huascar Oña"]
9
+ spec.authors = ["Huascar Ona"]
10
10
  spec.email = ["huascarking@hotmail.com"]
11
11
  spec.description = %q{A bad word filter for the input text.}
12
12
  spec.summary = %q{A word filter gem}
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: word_filter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
- - Huascar Oña
7
+ - Huascar Ona
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []