language_filter 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +98 -0
- data/Rakefile +11 -0
- data/config/exceptionlists/hate.txt +0 -0
- data/config/exceptionlists/mccormick.txt +0 -0
- data/config/exceptionlists/profanity.txt +1 -0
- data/config/exceptionlists/sex.txt +5 -0
- data/config/exceptionlists/violence.txt +5 -0
- data/config/matchlists/hate.txt +7 -0
- data/config/matchlists/mccormick.txt +342 -0
- data/config/matchlists/profanity.txt +10 -0
- data/config/{filters → matchlists}/sex.txt +13 -13
- data/config/{filters → matchlists}/violence.txt +4 -4
- data/lib/language_filter.rb +278 -166
- data/lib/language_filter/version.rb +2 -2
- data/test/lib/language_filter/methods_test.rb +66 -0
- data/test/lib/language_filter/version_test.rb +9 -0
- data/test/lists/simpsons-5000.txt +1 -0
- data/test/lists/wiktionary-50000.txt +1 -0
- data/test/test_helper.rb +111 -0
- metadata +23 -7
- data/config/filters/hate.txt +0 -6
- data/config/filters/profanity.txt +0 -10
@@ -1,21 +1,20 @@
|
|
1
|
-
|
1
|
+
sex\w*
|
2
2
|
blow ?job\w*
|
3
3
|
fellat\w*
|
4
4
|
felch\w*
|
5
|
-
\w*
|
5
|
+
\w*fuck\w*
|
6
6
|
wank\w*
|
7
|
-
|
7
|
+
cocks?
|
8
8
|
cock suck\w*
|
9
9
|
poll ?smok\w*
|
10
|
-
|
11
|
-
dick ?suck\w*
|
10
|
+
dicks?
|
12
11
|
fudge ?pack\w*
|
13
12
|
rim ?job\w*
|
14
13
|
knob ?gobbl\w*
|
15
|
-
anal
|
14
|
+
anal
|
16
15
|
rectums?
|
17
|
-
|
18
|
-
|
16
|
+
ass+
|
17
|
+
as*hole\w*
|
19
18
|
ballsacks?
|
20
19
|
scrotums?
|
21
20
|
bollocks
|
@@ -26,12 +25,12 @@ knobends?
|
|
26
25
|
manhoods?
|
27
26
|
wieners?
|
28
27
|
breasts?
|
29
|
-
tit
|
28
|
+
tit(t(ie|y))?s?
|
30
29
|
boob\w*
|
31
30
|
honkers?
|
32
31
|
cleavages?
|
33
32
|
vagina\w*
|
34
|
-
puss
|
33
|
+
puss(y|ies|ee)
|
35
34
|
muffs?
|
36
35
|
cunt\w*
|
37
36
|
twats?
|
@@ -45,12 +44,13 @@ homos?
|
|
45
44
|
sluts?
|
46
45
|
whor\w*
|
47
46
|
skank\w*
|
48
|
-
g+
|
47
|
+
g+h?[ae]ys?
|
49
48
|
dykes?
|
50
|
-
\w*
|
51
|
-
|
49
|
+
fag\w*
|
50
|
+
cumm?(ing|er)
|
52
51
|
jizz\w*
|
53
52
|
pubes?
|
53
|
+
puberty
|
54
54
|
pubic
|
55
55
|
smegma
|
56
56
|
boy ?butter
|
@@ -1,4 +1,4 @@
|
|
1
|
-
stab
|
1
|
+
stab(ing|ed|s|ber)?
|
2
2
|
kill\w*
|
3
3
|
beat ?up
|
4
4
|
beat the \w+ out of
|
@@ -6,8 +6,8 @@ beat the \w+ out of
|
|
6
6
|
fuck ?\w* up
|
7
7
|
murder\w*
|
8
8
|
genocide
|
9
|
-
shoot
|
10
|
-
shot
|
9
|
+
shoot (him|her|it|me|us|them)
|
10
|
+
shot (him|her|it|me|us|them)
|
11
11
|
gun\w*
|
12
12
|
phasers?
|
13
|
-
death
|
13
|
+
death( ray)?
|
data/lib/language_filter.rb
CHANGED
@@ -1,172 +1,284 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
require 'pathname'
|
2
|
-
require 'yaml'
|
3
4
|
require 'language_filter/error'
|
4
5
|
require 'language_filter/version'
|
5
6
|
|
6
7
|
module LanguageFilter
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
8
|
+
class Filter
|
9
|
+
attr_accessor :matchlist, :exceptionlist, :replacement, :creative_letters
|
10
|
+
attr_reader :creative_matchlist
|
11
|
+
|
12
|
+
CREATIVE_BEG_REGEX = '(?<=\\s|\\A|_|\\-|\\.)'
|
13
|
+
CREATIVE_END_REGEX = '(?=\\b|\\s|\\z|_|\\-|\\.)'
|
14
|
+
|
15
|
+
DEFAULT_EXCEPTIONLIST = []
|
16
|
+
DEFAULT_MATCHLIST = File.dirname(__FILE__) + "/../config/matchlists/profanity.txt"
|
17
|
+
DEFAULT_REPLACEMENT = :stars
|
18
|
+
DEFAULT_CREATIVE_LETTERS = false
|
19
|
+
|
20
|
+
def initialize(options={})
|
21
|
+
@creative_letters = if options[:creative_letters] then
|
22
|
+
options[:creative_letters]
|
23
|
+
else DEFAULT_CREATIVE_LETTERS end
|
24
|
+
|
25
|
+
@matchlist = if options[:matchlist] then
|
26
|
+
validate_list_content(options[:matchlist])
|
27
|
+
set_list_content(options[:matchlist])
|
28
|
+
else set_list_content(DEFAULT_MATCHLIST) end
|
29
|
+
@creative_matchlist = @matchlist.map {|list_item| use_creative_letters(list_item)}
|
30
|
+
|
31
|
+
@exceptionlist = if options[:exceptionlist] then
|
32
|
+
validate_list_content(options[:exceptionlist])
|
33
|
+
set_list_content(options[:exceptionlist])
|
34
|
+
elsif options[:matchlist].class == Symbol then
|
35
|
+
set_list_content(options[:matchlist],folder: "exceptionlists")
|
36
|
+
else set_list_content(DEFAULT_EXCEPTIONLIST) end
|
37
|
+
|
38
|
+
@replacement = options[:replacement] || DEFAULT_REPLACEMENT
|
39
|
+
validate_replacement
|
40
|
+
end
|
41
|
+
|
42
|
+
# SETTERS
|
43
|
+
|
44
|
+
def matchlist=(content)
|
45
|
+
validate_list_content(content)
|
46
|
+
@matchlist = case content
|
47
|
+
when :default then set_list_content(DEFAULT_MATCHLIST)
|
48
|
+
else set_list_content(content)
|
49
|
+
end
|
50
|
+
@exceptionlist = set_list_content(content,folder: "exceptionlists") if content.class == Symbol and @exceptionlist.empty?
|
51
|
+
@creative_matchlist = @matchlist.map {|list_item| use_creative_letters(list_item)}
|
52
|
+
end
|
53
|
+
|
54
|
+
def exceptionlist=(content)
|
55
|
+
validate_list_content(content)
|
56
|
+
@exceptionlist = case content
|
57
|
+
when :default then set_list_content(DEFAULT_EXCEPTIONLIST)
|
58
|
+
else set_list_content(content)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def replacement=(value)
|
63
|
+
@replacement = case value
|
64
|
+
when :default then :stars
|
65
|
+
else value
|
66
|
+
end
|
67
|
+
validate_replacement
|
68
|
+
end
|
69
|
+
|
70
|
+
# LANGUAGE
|
71
|
+
|
72
|
+
def match?(text)
|
73
|
+
return false unless text.to_s.size >= 3
|
74
|
+
chosen_matchlist = case @creative_letters
|
75
|
+
when true then @creative_matchlist
|
76
|
+
else @matchlist
|
77
|
+
end
|
78
|
+
chosen_matchlist.each do |list_item|
|
79
|
+
start_at = 0
|
80
|
+
text.scan(%r"#{beg_regex}#{list_item}#{end_regex}"i) do |match|
|
81
|
+
unless @exceptionlist.empty? then
|
82
|
+
match_start = text[start_at..-1].index(%r"#{beg_regex}#{list_item}#{end_regex}"i) + start_at
|
83
|
+
match_end = match_start + match.size-1
|
84
|
+
end
|
85
|
+
return true if @exceptionlist.empty? or not protected_by_exceptionlist?(match_start,match_end,text,start_at)
|
86
|
+
start_at = match_end + 1 unless @exceptionlist.empty?
|
87
|
+
end
|
88
|
+
end
|
89
|
+
false
|
90
|
+
end
|
91
|
+
|
92
|
+
def matched(text)
|
93
|
+
words = []
|
94
|
+
return words unless text.to_s.size >= 3
|
95
|
+
chosen_matchlist = case @creative_letters
|
96
|
+
when true then @creative_matchlist
|
97
|
+
else @matchlist
|
98
|
+
end
|
99
|
+
chosen_matchlist.each do |list_item|
|
100
|
+
start_at = 0
|
101
|
+
text.scan(%r"#{beg_regex}#{list_item}#{end_regex}"i) do |match|
|
102
|
+
unless @exceptionlist.empty? then
|
103
|
+
match_start = text[start_at..-1].index(%r"#{beg_regex}#{list_item}#{end_regex}"i) + start_at
|
104
|
+
match_end = match_start + match.size-1
|
105
|
+
end
|
106
|
+
words << match if @exceptionlist.empty? or not protected_by_exceptionlist?(match_start,match_end,text,start_at)
|
107
|
+
start_at = match_end + 1 unless @exceptionlist.empty?
|
108
|
+
end
|
109
|
+
end
|
110
|
+
words.uniq
|
111
|
+
end
|
112
|
+
|
113
|
+
def sanitize(text)
|
114
|
+
return text unless text.to_s.size >= 3
|
115
|
+
chosen_matchlist = case @creative_letters
|
116
|
+
when true then @creative_matchlist
|
117
|
+
else @matchlist
|
118
|
+
end
|
119
|
+
chosen_matchlist.each do |list_item|
|
120
|
+
start_at = 0
|
121
|
+
text.gsub!(%r"#{beg_regex}#{list_item}#{end_regex}"i) do |match|
|
122
|
+
unless @exceptionlist.empty? then
|
123
|
+
match_start = text[start_at..-1].index(%r"#{beg_regex}#{list_item}#{end_regex}"i) + start_at
|
124
|
+
match_end = match_start + match.size-1
|
125
|
+
end
|
126
|
+
unless @exceptionlist.empty? or not protected_by_exceptionlist?(match_start,match_end,text,start_at) then
|
127
|
+
start_at = match_end + 1 unless @exceptionlist.empty?
|
128
|
+
match
|
129
|
+
else
|
130
|
+
start_at = match_end + 1 unless @exceptionlist.empty?
|
131
|
+
replace(match)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
text
|
136
|
+
end
|
137
|
+
|
138
|
+
private
|
139
|
+
|
140
|
+
# VALIDATIONS
|
141
|
+
|
142
|
+
def validate_list_content(content)
|
143
|
+
case content
|
144
|
+
when Array then content.all? {|c| c.class == String} || raise(LanguageFilter::EmptyContentList.new("List content array is empty."))
|
145
|
+
when String then File.exists?(content) || raise(LanguageFilter::UnkownContentFile.new("List content file \"#{content}\" can't be found."))
|
146
|
+
when Pathname then content.exist? || raise(LanguageFilter::UnkownContentFile.new("List content file \"#{content}\" can't be found."))
|
147
|
+
when Symbol then
|
148
|
+
case content
|
149
|
+
when :default, :hate, :profanity, :sex, :violence then true
|
150
|
+
else raise(LanguageFilter::UnkownContent.new("The only accepted symbols are :default, :hate, :profanity, :sex, and :violence."))
|
151
|
+
end
|
152
|
+
else raise LanguageFilter::UnkownContent.new("The list content can be either an Array, Pathname, or String path to a file.")
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def validate_replacement
|
157
|
+
case @replacement
|
158
|
+
when :default, :garbled, :vowels, :stars, :nonconsonants
|
159
|
+
else raise LanguageFilter::UnknownReplacement.new("This is not a known replacement type.")
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# HELPERS
|
164
|
+
|
165
|
+
def set_list_content(list,options={})
|
166
|
+
case list
|
167
|
+
when :hate then load_list File.dirname(__FILE__) + "/../config/#{options[:folder] || "matchlists"}/hate.txt"
|
168
|
+
when :profanity then load_list File.dirname(__FILE__) + "/../config/#{options[:folder] || "matchlists"}/profanity.txt"
|
169
|
+
when :sex then load_list File.dirname(__FILE__) + "/../config/#{options[:folder] || "matchlists"}/sex.txt"
|
170
|
+
when :violence then load_list File.dirname(__FILE__) + "/../config/#{options[:folder] || "matchlists"}/violence.txt"
|
171
|
+
when Array then list.map {|list_item| list_item.gsub(/(?<=[^\\]|\A)\((?=[^(\?\:)])/,'(?:')}
|
172
|
+
when String, Pathname then load_list list.to_s
|
173
|
+
else []
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def load_list(filepath)
|
178
|
+
IO.readlines(filepath).each {|line| line.gsub!(/\n/,''); line.gsub!(/(?<=[^\\]|\A)\((?=[^(\?\:)])/,'(?:')}
|
179
|
+
end
|
180
|
+
|
181
|
+
def use_creative_letters(text)
|
182
|
+
new_text = ""
|
183
|
+
last_char = ""
|
184
|
+
first_char_done = false
|
185
|
+
text.each_char do |char|
|
186
|
+
if last_char != '\\'
|
187
|
+
# new_text += '[\\-_\\s\\*\\.\\,\\`\\:\\\']*' if last_char != "" and char =~ /[A-Za-z]/ and first_char_done
|
188
|
+
new_text += case char.downcase
|
189
|
+
when 'a' then first_char_done = true; '(?:(?:a|@|4|\\^|/\\\\|/\\-\\\\|aye?)+)'
|
190
|
+
when 'b' then first_char_done = true; '(?:(?:b|i3|l3|13|\\|3|/3|\\\\3|3|8|6|\\u00df|p\\>|\\|\\:|[^a-z]bee+[^a-z])+)'
|
191
|
+
when 'c','k' then first_char_done = true; '(?:(?:c|\\u00a9|\\u00a2|\\(|\\[|[^a-z]cee+[^a-z]|[^a-z]see+[^a-z]|k|x|[\\|\\[\\]\\)\\(li1\\!\\u00a1][\\<\\{\\(]|[^a-z][ck]ay+[^a-z])+)'
|
192
|
+
when 'd' then first_char_done = true; '(?:(?:d|\\)|\\|\\)|\\[\\)|\\?|\\|\\>|\\|o|[^a-z]dee+[^a-z])+)'
|
193
|
+
when 'e' then first_char_done = true; '(?:(?:e|3|\\&|\\u20ac|\\u00eb|\\[\\-)+)'
|
194
|
+
when 'f' then first_char_done = true; '(?:(?:f|ph|\\u0192|[\\|\\}\\{\\\\/\\(\\)\\[\\]1il\\!][\\=\\#]|[^a-z]ef+[^a-z])+)'
|
195
|
+
when 'g' then first_char_done = true; '(?:(?:g|6|9|\\&|c\\-|\\(_\\+|[^a-z]gee+[^a-z])+)'
|
196
|
+
when 'h' then first_char_done = true; '(?:(?:h|\\#|[\\|\\}\\{\\\\/\\(\\)\\[\\]]\\-?[\\|\\}\\{\\\\/\\(\\)\\[\\]])+)'
|
197
|
+
when 'i','l' then first_char_done = true; '(?:(?:i|l|1|\\!|\\u00a1|\\||\\]|\\[|\\\\|/|[^a-z]eye[^a-z]|\\u00a3|[\\|li1\\!\\u00a1\\[\\]\\(\\)\\{\\}]_|\\u00ac|[^a-z]el+[^a-z]))'
|
198
|
+
when 'j' then first_char_done = true; '(?:(?:j|\\]|\\u00bf|_\\||_/|\\</|\\(/|[^a-z]jay+[^a-z])+)'
|
199
|
+
when 'm' then first_char_done = true; '(?:(?:m|[\\|\\(\\)/](?:\\\\/|v|\\|)[\\|\\(\\)\\\\]|\\^\\^|[^a-z]em+[^a-z])+)'
|
200
|
+
when 'n' then first_char_done = true; '(?:(?:n|[\\|/\\[\\]\\<\\>]\\\\[\\|/\\[\\]\\<\\>]|/v|\\^/|[^a-z]en+[^a-z])+)'
|
201
|
+
when 'o' then first_char_done = true; '(?:(?:o|0|\\(\\)|\\[\\]|\\u00b0|[^a-z]oh+[^a-z])+)'
|
202
|
+
when 'p' then first_char_done = true; '(?:(?:p|\\u00b6|[\\|li1\\[\\]\\!\\u00a1/\\\\][\\*o\\u00b0\\"\\>7\\^]|[^a-z]pee+[^a-z])+)'
|
203
|
+
when 'q' then first_char_done = true; '(?:(?:q|9|(?:0|\\(\\)|\\[\\])_|\\(_\\,\\)|\\<\\||[^a-z][ck]ue*|qu?eue*[^a-z])+)'
|
204
|
+
when 'r' then first_char_done = true; '(?:(?:r|[/1\\|li]?[2\\^\\?z]|\\u00ae|[^a-z]ar+[^a-z])+)'
|
205
|
+
when 's','z' then first_char_done = true; '(?:(?:s|\\$|5|\\u00a7|[^a-z]es+[^a-z]|z|2|7_|\\~/_|\\>_|\\%|[^a-z]zee+[^a-z])+)'
|
206
|
+
when 't' then first_char_done = true; '(?:(?:t|7|\\+|\\u2020|\\-\\|\\-|\\\'\\]\\[\\\')+)'
|
207
|
+
when 'u','v' then first_char_done = true; '(?:(?:u|v|\\u00b5|[\\|\\(\\)\\[\\]\\{\\}]_[\\|\\(\\)\\[\\]\\{\\}]|\\L\\||\\/|[^a-z]you[^a-z]|[^a-z]yoo+[^a-z]|[^a-z]vee+[^a-z]))'
|
208
|
+
when 'w' then first_char_done = true; '(?:(?:w|vv|\\\\/\\\\/|\\\\\\|/|\\\\\\\\\\\'|\\\'//|\\\\\\^/|\\(n\\)|[^a-z]do?u+b+l+e*[^a-z]?(?:u+|you|yoo+)[^a-z])+)'
|
209
|
+
when 'x' then first_char_done = true; '(?:(?:x|\\>\\<|\\%|\\*|\\}\\{|\\)\\(|[^a-z]e[ck]+s+[^a-z]|[^a-z]ex+[^a-z])+)'
|
210
|
+
when 'y' then first_char_done = true; '(?:(?:y|\\u00a5|j|\\\'/|[^a-z]wh?(?:y+|ie+)[^a-z])+)'
|
211
|
+
else char
|
212
|
+
end
|
213
|
+
elsif char.downcase == 'w' then
|
214
|
+
new_text += 'S'
|
215
|
+
else
|
216
|
+
new_text += char
|
217
|
+
end
|
218
|
+
last_char = char
|
219
|
+
end
|
220
|
+
new_text
|
221
|
+
end
|
222
|
+
|
223
|
+
def protected_by_exceptionlist?(match_start,match_end,text,start_at)
|
224
|
+
@exceptionlist.each do |list_item|
|
225
|
+
current_start_at = start_at
|
226
|
+
done_searching = false
|
227
|
+
until done_searching do
|
228
|
+
# puts "#{current_start_at}"
|
229
|
+
text_snippet = text[current_start_at..-1]
|
230
|
+
exception_start = text_snippet.index(%r"\b#{list_item}\b"i)
|
231
|
+
# puts "#{text_snippet[%r`\b#{list_item}\b`i]}, #{text[match_start..match_end]} :: #{current_start_at}, #{text.size} :: #{match_start}, #{match_end}" if text[match_start..match_end] == "XIII"
|
232
|
+
if exception_start then
|
233
|
+
exception_start += current_start_at
|
234
|
+
# puts "#{text_snippet[%r`\b#{list_item}\b`i]}, #{text[match_start..match_end]} :: #{current_start_at}, #{text.size} :: #{match_start}, #{match_end} :: #{exception_start}, #{text[exception_start,20]}" if text[match_start..match_end] == "XIII"
|
235
|
+
if exception_start <= match_start then
|
236
|
+
exception_end = exception_start + text_snippet[%r"\b#{list_item}\b"i].size-1
|
237
|
+
# puts "#{text_snippet[%r`\b#{list_item}\b`i]}, #{text[match_start..match_end]} :: #{current_start_at}, #{text.size} :: #{match_start}, #{match_end} :: #{exception_start}, #{exception_end}"
|
238
|
+
if exception_end >= match_end
|
239
|
+
return true
|
240
|
+
elsif text[exception_end+1..-1].index(%r"\b#{list_item}\b"i)
|
241
|
+
current_start_at = exception_end+1
|
242
|
+
else
|
243
|
+
done_searching = true
|
244
|
+
end
|
245
|
+
else
|
246
|
+
done_searching = true
|
247
|
+
end
|
248
|
+
else
|
249
|
+
done_searching = true
|
250
|
+
end
|
251
|
+
# puts text[exception_end+1..-1].index(%r"\b#{list_item}\b"i).inspect
|
252
|
+
end
|
253
|
+
end
|
254
|
+
return false
|
255
|
+
end
|
256
|
+
|
257
|
+
# This was moved to private because users should just use sanitize for any content
|
258
|
+
def replace(word)
|
259
|
+
case @replacement
|
260
|
+
when :vowels then word.gsub(/[aeiou]/i, '*')
|
261
|
+
when :stars then '*' * word.size
|
262
|
+
when :nonconsonants then word.gsub(/[^bcdfghjklmnpqrstvwxyz]/i, '*')
|
263
|
+
when :default, :garbled then '$@!#%'
|
264
|
+
else raise LanguageFilter::UnknownReplacement.new("#{@replacement} is not a known replacement type.")
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
def beg_regex
|
269
|
+
if @creative_letters then
|
270
|
+
CREATIVE_BEG_REGEX
|
271
|
+
else
|
272
|
+
'\\b'
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
def end_regex
|
277
|
+
if @creative_letters then
|
278
|
+
CREATIVE_END_REGEX
|
279
|
+
else
|
280
|
+
'\\b'
|
281
|
+
end
|
282
|
+
end
|
283
|
+
end
|
172
284
|
end
|