language_filter 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +98 -0
- data/Rakefile +11 -0
- data/config/exceptionlists/hate.txt +0 -0
- data/config/exceptionlists/mccormick.txt +0 -0
- data/config/exceptionlists/profanity.txt +1 -0
- data/config/exceptionlists/sex.txt +5 -0
- data/config/exceptionlists/violence.txt +5 -0
- data/config/matchlists/hate.txt +7 -0
- data/config/matchlists/mccormick.txt +342 -0
- data/config/matchlists/profanity.txt +10 -0
- data/config/{filters → matchlists}/sex.txt +13 -13
- data/config/{filters → matchlists}/violence.txt +4 -4
- data/lib/language_filter.rb +278 -166
- data/lib/language_filter/version.rb +2 -2
- data/test/lib/language_filter/methods_test.rb +66 -0
- data/test/lib/language_filter/version_test.rb +9 -0
- data/test/lists/simpsons-5000.txt +1 -0
- data/test/lists/wiktionary-50000.txt +1 -0
- data/test/test_helper.rb +111 -0
- metadata +23 -7
- data/config/filters/hate.txt +0 -6
- data/config/filters/profanity.txt +0 -10
@@ -1,21 +1,20 @@
|
|
1
|
-
|
1
|
+
sex\w*
|
2
2
|
blow ?job\w*
|
3
3
|
fellat\w*
|
4
4
|
felch\w*
|
5
|
-
\w*
|
5
|
+
\w*fuck\w*
|
6
6
|
wank\w*
|
7
|
-
|
7
|
+
cocks?
|
8
8
|
cock suck\w*
|
9
9
|
poll ?smok\w*
|
10
|
-
|
11
|
-
dick ?suck\w*
|
10
|
+
dicks?
|
12
11
|
fudge ?pack\w*
|
13
12
|
rim ?job\w*
|
14
13
|
knob ?gobbl\w*
|
15
|
-
anal
|
14
|
+
anal
|
16
15
|
rectums?
|
17
|
-
|
18
|
-
|
16
|
+
ass+
|
17
|
+
as*hole\w*
|
19
18
|
ballsacks?
|
20
19
|
scrotums?
|
21
20
|
bollocks
|
@@ -26,12 +25,12 @@ knobends?
|
|
26
25
|
manhoods?
|
27
26
|
wieners?
|
28
27
|
breasts?
|
29
|
-
tit
|
28
|
+
tit(t(ie|y))?s?
|
30
29
|
boob\w*
|
31
30
|
honkers?
|
32
31
|
cleavages?
|
33
32
|
vagina\w*
|
34
|
-
puss
|
33
|
+
puss(y|ies|ee)
|
35
34
|
muffs?
|
36
35
|
cunt\w*
|
37
36
|
twats?
|
@@ -45,12 +44,13 @@ homos?
|
|
45
44
|
sluts?
|
46
45
|
whor\w*
|
47
46
|
skank\w*
|
48
|
-
g+
|
47
|
+
g+h?[ae]ys?
|
49
48
|
dykes?
|
50
|
-
\w*
|
51
|
-
|
49
|
+
fag\w*
|
50
|
+
cumm?(ing|er)
|
52
51
|
jizz\w*
|
53
52
|
pubes?
|
53
|
+
puberty
|
54
54
|
pubic
|
55
55
|
smegma
|
56
56
|
boy ?butter
|
@@ -1,4 +1,4 @@
|
|
1
|
-
stab
|
1
|
+
stab(ing|ed|s|ber)?
|
2
2
|
kill\w*
|
3
3
|
beat ?up
|
4
4
|
beat the \w+ out of
|
@@ -6,8 +6,8 @@ beat the \w+ out of
|
|
6
6
|
fuck ?\w* up
|
7
7
|
murder\w*
|
8
8
|
genocide
|
9
|
-
shoot
|
10
|
-
shot
|
9
|
+
shoot (him|her|it|me|us|them)
|
10
|
+
shot (him|her|it|me|us|them)
|
11
11
|
gun\w*
|
12
12
|
phasers?
|
13
|
-
death
|
13
|
+
death( ray)?
|
data/lib/language_filter.rb
CHANGED
@@ -1,172 +1,284 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
require 'pathname'
|
2
|
-
require 'yaml'
|
3
4
|
require 'language_filter/error'
|
4
5
|
require 'language_filter/version'
|
5
6
|
|
6
7
|
module LanguageFilter
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
8
|
+
class Filter
|
9
|
+
attr_accessor :matchlist, :exceptionlist, :replacement, :creative_letters
|
10
|
+
attr_reader :creative_matchlist
|
11
|
+
|
12
|
+
CREATIVE_BEG_REGEX = '(?<=\\s|\\A|_|\\-|\\.)'
|
13
|
+
CREATIVE_END_REGEX = '(?=\\b|\\s|\\z|_|\\-|\\.)'
|
14
|
+
|
15
|
+
DEFAULT_EXCEPTIONLIST = []
|
16
|
+
DEFAULT_MATCHLIST = File.dirname(__FILE__) + "/../config/matchlists/profanity.txt"
|
17
|
+
DEFAULT_REPLACEMENT = :stars
|
18
|
+
DEFAULT_CREATIVE_LETTERS = false
|
19
|
+
|
20
|
+
def initialize(options={})
|
21
|
+
@creative_letters = if options[:creative_letters] then
|
22
|
+
options[:creative_letters]
|
23
|
+
else DEFAULT_CREATIVE_LETTERS end
|
24
|
+
|
25
|
+
@matchlist = if options[:matchlist] then
|
26
|
+
validate_list_content(options[:matchlist])
|
27
|
+
set_list_content(options[:matchlist])
|
28
|
+
else set_list_content(DEFAULT_MATCHLIST) end
|
29
|
+
@creative_matchlist = @matchlist.map {|list_item| use_creative_letters(list_item)}
|
30
|
+
|
31
|
+
@exceptionlist = if options[:exceptionlist] then
|
32
|
+
validate_list_content(options[:exceptionlist])
|
33
|
+
set_list_content(options[:exceptionlist])
|
34
|
+
elsif options[:matchlist].class == Symbol then
|
35
|
+
set_list_content(options[:matchlist],folder: "exceptionlists")
|
36
|
+
else set_list_content(DEFAULT_EXCEPTIONLIST) end
|
37
|
+
|
38
|
+
@replacement = options[:replacement] || DEFAULT_REPLACEMENT
|
39
|
+
validate_replacement
|
40
|
+
end
|
41
|
+
|
42
|
+
# SETTERS
|
43
|
+
|
44
|
+
def matchlist=(content)
|
45
|
+
validate_list_content(content)
|
46
|
+
@matchlist = case content
|
47
|
+
when :default then set_list_content(DEFAULT_MATCHLIST)
|
48
|
+
else set_list_content(content)
|
49
|
+
end
|
50
|
+
@exceptionlist = set_list_content(content,folder: "exceptionlists") if content.class == Symbol and @exceptionlist.empty?
|
51
|
+
@creative_matchlist = @matchlist.map {|list_item| use_creative_letters(list_item)}
|
52
|
+
end
|
53
|
+
|
54
|
+
def exceptionlist=(content)
|
55
|
+
validate_list_content(content)
|
56
|
+
@exceptionlist = case content
|
57
|
+
when :default then set_list_content(DEFAULT_EXCEPTIONLIST)
|
58
|
+
else set_list_content(content)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def replacement=(value)
|
63
|
+
@replacement = case value
|
64
|
+
when :default then :stars
|
65
|
+
else value
|
66
|
+
end
|
67
|
+
validate_replacement
|
68
|
+
end
|
69
|
+
|
70
|
+
# LANGUAGE
|
71
|
+
|
72
|
+
def match?(text)
|
73
|
+
return false unless text.to_s.size >= 3
|
74
|
+
chosen_matchlist = case @creative_letters
|
75
|
+
when true then @creative_matchlist
|
76
|
+
else @matchlist
|
77
|
+
end
|
78
|
+
chosen_matchlist.each do |list_item|
|
79
|
+
start_at = 0
|
80
|
+
text.scan(%r"#{beg_regex}#{list_item}#{end_regex}"i) do |match|
|
81
|
+
unless @exceptionlist.empty? then
|
82
|
+
match_start = text[start_at..-1].index(%r"#{beg_regex}#{list_item}#{end_regex}"i) + start_at
|
83
|
+
match_end = match_start + match.size-1
|
84
|
+
end
|
85
|
+
return true if @exceptionlist.empty? or not protected_by_exceptionlist?(match_start,match_end,text,start_at)
|
86
|
+
start_at = match_end + 1 unless @exceptionlist.empty?
|
87
|
+
end
|
88
|
+
end
|
89
|
+
false
|
90
|
+
end
|
91
|
+
|
92
|
+
def matched(text)
|
93
|
+
words = []
|
94
|
+
return words unless text.to_s.size >= 3
|
95
|
+
chosen_matchlist = case @creative_letters
|
96
|
+
when true then @creative_matchlist
|
97
|
+
else @matchlist
|
98
|
+
end
|
99
|
+
chosen_matchlist.each do |list_item|
|
100
|
+
start_at = 0
|
101
|
+
text.scan(%r"#{beg_regex}#{list_item}#{end_regex}"i) do |match|
|
102
|
+
unless @exceptionlist.empty? then
|
103
|
+
match_start = text[start_at..-1].index(%r"#{beg_regex}#{list_item}#{end_regex}"i) + start_at
|
104
|
+
match_end = match_start + match.size-1
|
105
|
+
end
|
106
|
+
words << match if @exceptionlist.empty? or not protected_by_exceptionlist?(match_start,match_end,text,start_at)
|
107
|
+
start_at = match_end + 1 unless @exceptionlist.empty?
|
108
|
+
end
|
109
|
+
end
|
110
|
+
words.uniq
|
111
|
+
end
|
112
|
+
|
113
|
+
def sanitize(text)
|
114
|
+
return text unless text.to_s.size >= 3
|
115
|
+
chosen_matchlist = case @creative_letters
|
116
|
+
when true then @creative_matchlist
|
117
|
+
else @matchlist
|
118
|
+
end
|
119
|
+
chosen_matchlist.each do |list_item|
|
120
|
+
start_at = 0
|
121
|
+
text.gsub!(%r"#{beg_regex}#{list_item}#{end_regex}"i) do |match|
|
122
|
+
unless @exceptionlist.empty? then
|
123
|
+
match_start = text[start_at..-1].index(%r"#{beg_regex}#{list_item}#{end_regex}"i) + start_at
|
124
|
+
match_end = match_start + match.size-1
|
125
|
+
end
|
126
|
+
unless @exceptionlist.empty? or not protected_by_exceptionlist?(match_start,match_end,text,start_at) then
|
127
|
+
start_at = match_end + 1 unless @exceptionlist.empty?
|
128
|
+
match
|
129
|
+
else
|
130
|
+
start_at = match_end + 1 unless @exceptionlist.empty?
|
131
|
+
replace(match)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
text
|
136
|
+
end
|
137
|
+
|
138
|
+
private
|
139
|
+
|
140
|
+
# VALIDATIONS
|
141
|
+
|
142
|
+
def validate_list_content(content)
|
143
|
+
case content
|
144
|
+
when Array then content.all? {|c| c.class == String} || raise(LanguageFilter::EmptyContentList.new("List content array is empty."))
|
145
|
+
when String then File.exists?(content) || raise(LanguageFilter::UnkownContentFile.new("List content file \"#{content}\" can't be found."))
|
146
|
+
when Pathname then content.exist? || raise(LanguageFilter::UnkownContentFile.new("List content file \"#{content}\" can't be found."))
|
147
|
+
when Symbol then
|
148
|
+
case content
|
149
|
+
when :default, :hate, :profanity, :sex, :violence then true
|
150
|
+
else raise(LanguageFilter::UnkownContent.new("The only accepted symbols are :default, :hate, :profanity, :sex, and :violence."))
|
151
|
+
end
|
152
|
+
else raise LanguageFilter::UnkownContent.new("The list content can be either an Array, Pathname, or String path to a file.")
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def validate_replacement
|
157
|
+
case @replacement
|
158
|
+
when :default, :garbled, :vowels, :stars, :nonconsonants
|
159
|
+
else raise LanguageFilter::UnknownReplacement.new("This is not a known replacement type.")
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# HELPERS
|
164
|
+
|
165
|
+
def set_list_content(list,options={})
|
166
|
+
case list
|
167
|
+
when :hate then load_list File.dirname(__FILE__) + "/../config/#{options[:folder] || "matchlists"}/hate.txt"
|
168
|
+
when :profanity then load_list File.dirname(__FILE__) + "/../config/#{options[:folder] || "matchlists"}/profanity.txt"
|
169
|
+
when :sex then load_list File.dirname(__FILE__) + "/../config/#{options[:folder] || "matchlists"}/sex.txt"
|
170
|
+
when :violence then load_list File.dirname(__FILE__) + "/../config/#{options[:folder] || "matchlists"}/violence.txt"
|
171
|
+
when Array then list.map {|list_item| list_item.gsub(/(?<=[^\\]|\A)\((?=[^(\?\:)])/,'(?:')}
|
172
|
+
when String, Pathname then load_list list.to_s
|
173
|
+
else []
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def load_list(filepath)
|
178
|
+
IO.readlines(filepath).each {|line| line.gsub!(/\n/,''); line.gsub!(/(?<=[^\\]|\A)\((?=[^(\?\:)])/,'(?:')}
|
179
|
+
end
|
180
|
+
|
181
|
+
def use_creative_letters(text)
|
182
|
+
new_text = ""
|
183
|
+
last_char = ""
|
184
|
+
first_char_done = false
|
185
|
+
text.each_char do |char|
|
186
|
+
if last_char != '\\'
|
187
|
+
# new_text += '[\\-_\\s\\*\\.\\,\\`\\:\\\']*' if last_char != "" and char =~ /[A-Za-z]/ and first_char_done
|
188
|
+
new_text += case char.downcase
|
189
|
+
when 'a' then first_char_done = true; '(?:(?:a|@|4|\\^|/\\\\|/\\-\\\\|aye?)+)'
|
190
|
+
when 'b' then first_char_done = true; '(?:(?:b|i3|l3|13|\\|3|/3|\\\\3|3|8|6|\\u00df|p\\>|\\|\\:|[^a-z]bee+[^a-z])+)'
|
191
|
+
when 'c','k' then first_char_done = true; '(?:(?:c|\\u00a9|\\u00a2|\\(|\\[|[^a-z]cee+[^a-z]|[^a-z]see+[^a-z]|k|x|[\\|\\[\\]\\)\\(li1\\!\\u00a1][\\<\\{\\(]|[^a-z][ck]ay+[^a-z])+)'
|
192
|
+
when 'd' then first_char_done = true; '(?:(?:d|\\)|\\|\\)|\\[\\)|\\?|\\|\\>|\\|o|[^a-z]dee+[^a-z])+)'
|
193
|
+
when 'e' then first_char_done = true; '(?:(?:e|3|\\&|\\u20ac|\\u00eb|\\[\\-)+)'
|
194
|
+
when 'f' then first_char_done = true; '(?:(?:f|ph|\\u0192|[\\|\\}\\{\\\\/\\(\\)\\[\\]1il\\!][\\=\\#]|[^a-z]ef+[^a-z])+)'
|
195
|
+
when 'g' then first_char_done = true; '(?:(?:g|6|9|\\&|c\\-|\\(_\\+|[^a-z]gee+[^a-z])+)'
|
196
|
+
when 'h' then first_char_done = true; '(?:(?:h|\\#|[\\|\\}\\{\\\\/\\(\\)\\[\\]]\\-?[\\|\\}\\{\\\\/\\(\\)\\[\\]])+)'
|
197
|
+
when 'i','l' then first_char_done = true; '(?:(?:i|l|1|\\!|\\u00a1|\\||\\]|\\[|\\\\|/|[^a-z]eye[^a-z]|\\u00a3|[\\|li1\\!\\u00a1\\[\\]\\(\\)\\{\\}]_|\\u00ac|[^a-z]el+[^a-z]))'
|
198
|
+
when 'j' then first_char_done = true; '(?:(?:j|\\]|\\u00bf|_\\||_/|\\</|\\(/|[^a-z]jay+[^a-z])+)'
|
199
|
+
when 'm' then first_char_done = true; '(?:(?:m|[\\|\\(\\)/](?:\\\\/|v|\\|)[\\|\\(\\)\\\\]|\\^\\^|[^a-z]em+[^a-z])+)'
|
200
|
+
when 'n' then first_char_done = true; '(?:(?:n|[\\|/\\[\\]\\<\\>]\\\\[\\|/\\[\\]\\<\\>]|/v|\\^/|[^a-z]en+[^a-z])+)'
|
201
|
+
when 'o' then first_char_done = true; '(?:(?:o|0|\\(\\)|\\[\\]|\\u00b0|[^a-z]oh+[^a-z])+)'
|
202
|
+
when 'p' then first_char_done = true; '(?:(?:p|\\u00b6|[\\|li1\\[\\]\\!\\u00a1/\\\\][\\*o\\u00b0\\"\\>7\\^]|[^a-z]pee+[^a-z])+)'
|
203
|
+
when 'q' then first_char_done = true; '(?:(?:q|9|(?:0|\\(\\)|\\[\\])_|\\(_\\,\\)|\\<\\||[^a-z][ck]ue*|qu?eue*[^a-z])+)'
|
204
|
+
when 'r' then first_char_done = true; '(?:(?:r|[/1\\|li]?[2\\^\\?z]|\\u00ae|[^a-z]ar+[^a-z])+)'
|
205
|
+
when 's','z' then first_char_done = true; '(?:(?:s|\\$|5|\\u00a7|[^a-z]es+[^a-z]|z|2|7_|\\~/_|\\>_|\\%|[^a-z]zee+[^a-z])+)'
|
206
|
+
when 't' then first_char_done = true; '(?:(?:t|7|\\+|\\u2020|\\-\\|\\-|\\\'\\]\\[\\\')+)'
|
207
|
+
when 'u','v' then first_char_done = true; '(?:(?:u|v|\\u00b5|[\\|\\(\\)\\[\\]\\{\\}]_[\\|\\(\\)\\[\\]\\{\\}]|\\L\\||\\/|[^a-z]you[^a-z]|[^a-z]yoo+[^a-z]|[^a-z]vee+[^a-z]))'
|
208
|
+
when 'w' then first_char_done = true; '(?:(?:w|vv|\\\\/\\\\/|\\\\\\|/|\\\\\\\\\\\'|\\\'//|\\\\\\^/|\\(n\\)|[^a-z]do?u+b+l+e*[^a-z]?(?:u+|you|yoo+)[^a-z])+)'
|
209
|
+
when 'x' then first_char_done = true; '(?:(?:x|\\>\\<|\\%|\\*|\\}\\{|\\)\\(|[^a-z]e[ck]+s+[^a-z]|[^a-z]ex+[^a-z])+)'
|
210
|
+
when 'y' then first_char_done = true; '(?:(?:y|\\u00a5|j|\\\'/|[^a-z]wh?(?:y+|ie+)[^a-z])+)'
|
211
|
+
else char
|
212
|
+
end
|
213
|
+
elsif char.downcase == 'w' then
|
214
|
+
new_text += 'S'
|
215
|
+
else
|
216
|
+
new_text += char
|
217
|
+
end
|
218
|
+
last_char = char
|
219
|
+
end
|
220
|
+
new_text
|
221
|
+
end
|
222
|
+
|
223
|
+
def protected_by_exceptionlist?(match_start,match_end,text,start_at)
|
224
|
+
@exceptionlist.each do |list_item|
|
225
|
+
current_start_at = start_at
|
226
|
+
done_searching = false
|
227
|
+
until done_searching do
|
228
|
+
# puts "#{current_start_at}"
|
229
|
+
text_snippet = text[current_start_at..-1]
|
230
|
+
exception_start = text_snippet.index(%r"\b#{list_item}\b"i)
|
231
|
+
# puts "#{text_snippet[%r`\b#{list_item}\b`i]}, #{text[match_start..match_end]} :: #{current_start_at}, #{text.size} :: #{match_start}, #{match_end}" if text[match_start..match_end] == "XIII"
|
232
|
+
if exception_start then
|
233
|
+
exception_start += current_start_at
|
234
|
+
# puts "#{text_snippet[%r`\b#{list_item}\b`i]}, #{text[match_start..match_end]} :: #{current_start_at}, #{text.size} :: #{match_start}, #{match_end} :: #{exception_start}, #{text[exception_start,20]}" if text[match_start..match_end] == "XIII"
|
235
|
+
if exception_start <= match_start then
|
236
|
+
exception_end = exception_start + text_snippet[%r"\b#{list_item}\b"i].size-1
|
237
|
+
# puts "#{text_snippet[%r`\b#{list_item}\b`i]}, #{text[match_start..match_end]} :: #{current_start_at}, #{text.size} :: #{match_start}, #{match_end} :: #{exception_start}, #{exception_end}"
|
238
|
+
if exception_end >= match_end
|
239
|
+
return true
|
240
|
+
elsif text[exception_end+1..-1].index(%r"\b#{list_item}\b"i)
|
241
|
+
current_start_at = exception_end+1
|
242
|
+
else
|
243
|
+
done_searching = true
|
244
|
+
end
|
245
|
+
else
|
246
|
+
done_searching = true
|
247
|
+
end
|
248
|
+
else
|
249
|
+
done_searching = true
|
250
|
+
end
|
251
|
+
# puts text[exception_end+1..-1].index(%r"\b#{list_item}\b"i).inspect
|
252
|
+
end
|
253
|
+
end
|
254
|
+
return false
|
255
|
+
end
|
256
|
+
|
257
|
+
# This was moved to private because users should just use sanitize for any content
|
258
|
+
def replace(word)
|
259
|
+
case @replacement
|
260
|
+
when :vowels then word.gsub(/[aeiou]/i, '*')
|
261
|
+
when :stars then '*' * word.size
|
262
|
+
when :nonconsonants then word.gsub(/[^bcdfghjklmnpqrstvwxyz]/i, '*')
|
263
|
+
when :default, :garbled then '$@!#%'
|
264
|
+
else raise LanguageFilter::UnknownReplacement.new("#{@replacement} is not a known replacement type.")
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
def beg_regex
|
269
|
+
if @creative_letters then
|
270
|
+
CREATIVE_BEG_REGEX
|
271
|
+
else
|
272
|
+
'\\b'
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
def end_regex
|
277
|
+
if @creative_letters then
|
278
|
+
CREATIVE_END_REGEX
|
279
|
+
else
|
280
|
+
'\\b'
|
281
|
+
end
|
282
|
+
end
|
283
|
+
end
|
172
284
|
end
|