word_filter 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 74a627f37b73f79c444ab06cf7d1c26c388dc24e
4
- data.tar.gz: 7932280b60acab39f5cc30d7ab3ce93c0313cedf
3
+ metadata.gz: a81dac029f338386b201365215420886d300afcb
4
+ data.tar.gz: e108e5ef8d32656eb688d5e3690e1af0fbedbea4
5
5
  SHA512:
6
- metadata.gz: 91102171d04402a6d8e7210e2302f77622ed9919a29b95fdee8401095cadb53307b57dd07e0277bdc731e89a2e0e59510328b94d50d0ce7b3a99d6f206ec2333
7
- data.tar.gz: 08a72912640cf7c8c6bd8df6d8295bb8c7b450b5b34f57f67348bce9bea5979e5238bdce8bfc38e12c29983ab385eb58433e4dda489dafd2ce198c4e96614fce
6
+ metadata.gz: 3a909da71e2df9ddf2acf718510c7f223712fdbc130b4129f867f4b3840eeb928992b06ed1dc8584837624204f859e72d6f1e70fd1b0b928466274db63d88340
7
+ data.tar.gz: 1eeeaeba7351963169e980e3bd8aa269191ff6a37bced95ef28aa01f3a5a14b7aba24eab78e6ce115adb9e48c02b4e2b706ed9b8ea1da203ba7bcb1cb8994b94
@@ -1,3 +1,3 @@
1
- module WordFilterVersion
2
- VERSION = "0.0.7"
1
+ module WordFilter
2
+ VERSION = "0.0.8"
3
3
  end
data/lib/word_filter.rb CHANGED
@@ -1,183 +1,4 @@
1
1
  require "word_filter/version"
2
2
 
3
3
  module WordFilter
4
- class Filter
5
-
6
- @@emailRegex = /[a-zA-Z0-9._%+-]+@[a-z0-9.-]+\\.[a-zA-Z]{2,4}/
7
- @@alphaNumericDigit = /(zero|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|\d)/;
8
- @@digitsRegex = Regexp.new("\b(\s*" + @@alphaNumericDigit.source + ")+\b")
9
- @@streetNameRegex = Regexp.new("\b(\s*" + @@alphaNumericDigit.source + ")+\s([a-z\d]+\.?\s*){1,5}\b(avenue|ave|street|st|court|ct|circle|boulevard|blvd|lane|ln|trail|tr|loop|lp|route|rt|drive|dr|road|rd|terrace|tr|way|wy|highway|hiway|hw)\b")
10
- @@phoneNumber = Regexp.new("((" + @@alphaNumericDigit.source + ")\W*?){3}((" + @@alphaNumericDigit.source + ")\W*?){4}\b")
11
- @@urlRegex = /(?:http|https):\/\/[a-z0-9]+(?:[\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(?:(?::[0-9]{1,5})?\/[^\s]*)?/
12
-
13
- NONE = 0;
14
- REPEATED_VOWELS = 1;
15
- SWAPPABLE_VOWELS = 2;
16
- SWAPPABLE_AND_REPEATED_VOWELS = 3;
17
- SWAPPABLE_AND_REPEATED_VOWELS_INCLUDING_NONE = 4;
18
-
19
-
20
-
21
- def initialize()
22
- @filterLevel = NONE
23
- end
24
-
25
- attr_accessor :filterLevel
26
-
27
- def filterInit(dictionaryFile, badwordslist)
28
- @goodWords = loadDictionary(dictionaryFile)
29
- #The original java class requiere other three word's list
30
- @datingWordsRegex = /dating/
31
- @deviantWordsRegex = /deviant/
32
- @badWordsRegex = loadBadwords(badwordslist)
33
-
34
- vowels = /([aeiou])/
35
- @vowelSwappedAndRepeatedRegex = Regexp.new(@badWordsRegex.source.gsub(vowels, "[aeiou]+"))
36
- @vowelSwappedAndRepeatedRegexIncludingEmpty = Regexp.new(@badWordsRegex.source.gsub(vowels, "[aeiou]*"))
37
- @vowelRepeatedRegex = Regexp.new(@badWordsRegex.source.gsub(vowels, "\\1+"))
38
- @vowelSwappedRegex = Regexp.new(@badWordsRegex.source.gsub(vowels, "[aeiou]"))
39
-
40
- end
41
-
42
-
43
- def loadDictionary(path)
44
- words = []
45
- File.open(path, "r").each_line do |line|
46
- splitted = line.split(" ")
47
- splitted.each do |w|
48
- words << w
49
- end
50
- end
51
- return words
52
- end
53
-
54
- def loadBadwords(path)
55
- words = File.read(path).gsub("\r", '').split("\n")
56
- regex = words.join('|')
57
- regex = '(' + regex + ')'
58
- regex = Regexp.new(regex)
59
- return regex
60
- end
61
-
62
- def filterString(input)
63
-
64
- # Output:
65
- # -1: An exception occured while trying to check the string, do not post
66
- # 0: string is safe to post
67
- # 1: string contains an email address
68
- # 2: string contains a URL
69
- # 3: string contains a street address
70
- # 4: string contains a phone number
71
- # 5: string contains a dating word
72
- # 6: string contains a deviant word
73
- # 9: string contains any other bad word
74
-
75
- input = input.strip.downcase
76
- workingCopy = input
77
-
78
- if input == ""
79
- return 0
80
- end
81
-
82
- if @@emailRegex.match(input)
83
- return 1
84
- end
85
-
86
- if @@urlRegex.match(input)
87
- return 2
88
- end
89
-
90
- if @@streetNameRegex.match(input)
91
- return 3
92
- end
93
-
94
- if @@phoneNumber.match(input)
95
- return 4
96
- end
97
-
98
- workingCopy.gsub("\s+", " ")
99
- workingCopy.gsub!(/["',.;:?-]/, " ")
100
- workingCopy.gsub!(/!+\s/, " ")
101
- workingCopy.gsub!(/!+\z/, " ")
102
- workingCopy.gsub!(/\br\su/, " ")
103
-
104
- cleanVersion = stripGoodWords(workingCopy)
105
-
106
- if cleanVersion == nil or cleanVersion.length == 0
107
- return 0
108
- end
109
-
110
- if @datingWordsRegex.match(cleanVersion)
111
- return 5
112
- end
113
-
114
- if @deviantWordsRegex.match(cleanVersion)
115
- return 6
116
- end
117
-
118
- if @badWordsRegex.match(cleanVersion)
119
- return 7
120
- end
121
-
122
- #let's try various combinations of bad word tricks
123
- currentVersion = cleanVersion
124
-
125
- #compress the string then check it again
126
- if @badWordsRegex.match(currentVersion.gsub("[ \t\n\f\r]", ""))
127
- return 9
128
- end
129
-
130
- #zap special characters and check it again
131
- if @badWordsRegex.match(currentVersion.gsub("[^a-z]", ""))
132
- return 9
133
- end
134
-
135
- #replace certain special characters with their letter equivalents
136
- #NOTE: This one maps vertical non-letter chars (!1|) to i
137
- specialCharsReplaced_i = currentVersion.tr("@683!1|0$+","abbeiiiost")
138
- if @badWordsRegex.match(specialCharsReplaced_i)
139
- return 9
140
- end
141
-
142
- #replace certain special characters with their letter equivalents
143
- #NOTE: This one maps vertical non-letter chars (!1|) to l
144
- specialCharsReplaced_l = currentVersion.tr("@683!1|0$+","abbelllost")
145
- if @badWordsRegex.match(specialCharsReplaced_l)
146
- return 9
147
- end
148
-
149
- case @filterLevel
150
- when NONE
151
- return 0
152
- when REPEATED_VOWELS
153
- if @vowelRepeatedRegex.match(specialCharsReplaced_i) or @vowelRepeatedRegex.match(specialCharsReplaced_l)
154
- return 9
155
- end
156
- when SWAPPABLE_VOWELS
157
- if @vowelSwappedRegex.match(specialCharsReplaced_i) or @vowelSwappedRegex.match(specialCharsReplaced_l)
158
- return 9
159
- end
160
- when SWAPPABLE_AND_REPEATED_VOWELS
161
- if @vowelSwappedAndRepeatedRegex.match(specialCharsReplaced_i) or @vowelSwappedAndRepeatedRegex.match(specialCharsReplaced_l)
162
- return 9
163
- end
164
- when SWAPPABLE_AND_REPEATED_VOWELS_INCLUDING_NONE
165
- if @vowelSwappedAndRepeatedRegexIncludingEmpty.match(specialCharsReplaced_i) or @vowelSwappedAndRepeatedRegexIncludingEmpty.match(specialCharsReplaced_l)
166
- return 9
167
- end
168
- end
169
-
170
- end
171
-
172
- def stripGoodWords(input)
173
- result = []
174
- input = input.split(" ")
175
- input.each do |w|
176
- if not @goodWords.include? w
177
- result << w
178
- end
179
- end
180
- return result.join(" ")
181
- end
182
- end
183
4
  end
data/word_filter.gemspec CHANGED
@@ -5,7 +5,7 @@ require 'word_filter/version'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "word_filter"
8
- spec.version = WordFilterVersion::VERSION
8
+ spec.version = WordFilter::VERSION
9
9
  spec.authors = ["Huascar Oña"]
10
10
  spec.email = ["huascarking@hotmail.com"]
11
11
  spec.description = %q{A bad word filter for the input text.}
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: word_filter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Huascar Oña