gluttonberg-core 2.5.5 → 2.5.6
Sign up to get free protection for your applications and to get access to all the features.
- data/app/assets/javascripts/gb_application.js +34 -0
- data/app/assets/stylesheets/gb_admin-override.sass +17 -1
- data/app/controllers/gluttonberg/admin/asset_library/assets_controller.rb +4 -2
- data/app/controllers/gluttonberg/admin/content/articles_controller.rb +22 -22
- data/app/controllers/gluttonberg/admin/content/comments_controller.rb +80 -23
- data/app/controllers/gluttonberg/admin/main_controller.rb +1 -1
- data/app/controllers/gluttonberg/public/comments_controller.rb +9 -8
- data/app/helpers/gluttonberg/admin.rb +3 -3
- data/app/helpers/gluttonberg/asset_library.rb +14 -11
- data/app/models/gluttonberg/asset.rb +8 -6
- data/app/models/gluttonberg/comment.rb +57 -3
- data/app/models/gluttonberg/comment_subscription.rb +2 -0
- data/app/models/gluttonberg/setting.rb +11 -5
- data/app/views/gluttonberg/admin/asset_library/assets/_browser_root.html.haml +20 -3
- data/app/views/gluttonberg/admin/asset_library/assets/search.json.haml +1 -1
- data/app/views/gluttonberg/admin/content/comments/index.html.haml +36 -10
- data/app/views/gluttonberg/admin/content/main/_sidebar.html.haml +6 -3
- data/app/views/gluttonberg/admin/main/index.html.haml +4 -4
- data/app/views/gluttonberg/admin/settings/generic_settings/index.html.haml +11 -7
- data/config/routes.rb +10 -0
- data/db/migrate/20130201025800_spam_flag_for_comments.rb +6 -0
- data/lib/engine.rb +3 -2
- data/lib/generators/gluttonberg/installer/installer_generator.rb +33 -32
- data/lib/gluttonberg/content/block.rb +2 -0
- data/lib/gluttonberg/content/clean_html.rb +16 -14
- data/lib/gluttonberg/content/despamilator/conf/unusual_characters.txt +6674 -0
- data/lib/gluttonberg/content/despamilator/filter/emails.rb +49 -0
- data/lib/gluttonberg/content/despamilator/filter/gtubs_test_filter.rb +25 -0
- data/lib/gluttonberg/content/despamilator/filter/html_tags.rb +134 -0
- data/lib/gluttonberg/content/despamilator/filter/ip_address_url.rb +27 -0
- data/lib/gluttonberg/content/despamilator/filter/long_words.rb +29 -0
- data/lib/gluttonberg/content/despamilator/filter/mixed_case.rb +25 -0
- data/lib/gluttonberg/content/despamilator/filter/naughty_words.rb +80 -0
- data/lib/gluttonberg/content/despamilator/filter/no_vowels.rb +28 -0
- data/lib/gluttonberg/content/despamilator/filter/numbers_and_words.rb +55 -0
- data/lib/gluttonberg/content/despamilator/filter/obfuscated_urls.rb +45 -0
- data/lib/gluttonberg/content/despamilator/filter/prices.rb +23 -0
- data/lib/gluttonberg/content/despamilator/filter/script_tag.rb +25 -0
- data/lib/gluttonberg/content/despamilator/filter/shouting.rb +38 -0
- data/lib/gluttonberg/content/despamilator/filter/spammy_tlds.rb +26 -0
- data/lib/gluttonberg/content/despamilator/filter/square_brackets.rb +27 -0
- data/lib/gluttonberg/content/despamilator/filter/trailing_number.rb +25 -0
- data/lib/gluttonberg/content/despamilator/filter/unusual_characters.rb +51 -0
- data/lib/gluttonberg/content/despamilator/filter/urls.rb +45 -0
- data/lib/gluttonberg/content/despamilator/filter/very_long_domain_name.rb +31 -0
- data/lib/gluttonberg/content/despamilator/filter/weird_punctuation.rb +48 -0
- data/lib/gluttonberg/content/despamilator/filter.rb +57 -0
- data/lib/gluttonberg/content/despamilator/subject/text.rb +36 -0
- data/lib/gluttonberg/content/despamilator/subject.rb +34 -0
- data/lib/gluttonberg/content/despamilator/version.rb +7 -0
- data/lib/gluttonberg/content/despamilator.rb +79 -0
- data/lib/gluttonberg/content.rb +12 -11
- data/lib/gluttonberg/library/attachment_mixin.rb +52 -269
- data/lib/gluttonberg/library/config/image_sizes.rb +61 -0
- data/lib/gluttonberg/library/config.rb +10 -0
- data/lib/gluttonberg/library/processor/audio.rb +42 -0
- data/lib/gluttonberg/library/processor/image.rb +134 -0
- data/lib/gluttonberg/library/processor.rb +11 -0
- data/lib/gluttonberg/library/storage/filesystem.rb +76 -0
- data/lib/gluttonberg/library/storage/s3.rb +196 -0
- data/lib/gluttonberg/library/storage.rb +11 -0
- data/lib/gluttonberg/library.rb +87 -86
- data/lib/gluttonberg/record_history.rb +14 -15
- data/lib/gluttonberg/tasks/asset.rake +25 -3
- data/lib/gluttonberg/version.rb +1 -1
- metadata +53 -2
@@ -0,0 +1,49 @@
|
|
1
|
+
module Gluttonberg
|
2
|
+
module Content
|
3
|
+
require 'despamilator/filter'
|
4
|
+
|
5
|
+
module DespamilatorFilter
|
6
|
+
|
7
|
+
class Emails < Despamilator::Filter
|
8
|
+
|
9
|
+
def name
|
10
|
+
'Emails'
|
11
|
+
end
|
12
|
+
|
13
|
+
def description
|
14
|
+
'Detects each emails in a string'
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse subject
|
18
|
+
@email_regex ||= begin
|
19
|
+
email_name_regex = '[A-Z0-9_\.%\+\-\']+'
|
20
|
+
domain_head_regex = '(?:[A-Z0-9\-]+\.)+'
|
21
|
+
domain_tld_regex = '(?:[A-Z]{2,4}|museum|travel)'
|
22
|
+
/\A#{email_name_regex}@#{domain_head_regex}#{domain_tld_regex}\z/i
|
23
|
+
end
|
24
|
+
|
25
|
+
comment_email_as_spam = Gluttonberg::Setting.get_setting("comment_email_as_spam")
|
26
|
+
if comment_email_as_spam == "Yes"
|
27
|
+
text = subject.text.strip
|
28
|
+
subject.register_match!({
|
29
|
+
:score => 1.0, :filter => self
|
30
|
+
}) if @email_regex.match(text)
|
31
|
+
end
|
32
|
+
|
33
|
+
comment_number_of_emails_allowed = Gluttonberg::Setting.get_setting("comment_number_of_emails_allowed")
|
34
|
+
if !comment_number_of_emails_allowed.blank? && comment_number_of_emails_allowed.to_i > 0
|
35
|
+
comment_number_of_emails_allowed = comment_number_of_emails_allowed.to_i
|
36
|
+
subject.text.split(/%s/).each do |word|
|
37
|
+
subject.register_match!({
|
38
|
+
:score => (1.0/comment_number_of_emails_allowed), :filter => self
|
39
|
+
}) if @email_regex.match(word)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end #Content
|
49
|
+
end #Gluttonberg
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Gluttonberg
|
2
|
+
module Content
|
3
|
+
require 'despamilator/filter'
|
4
|
+
|
5
|
+
module DespamilatorFilter
|
6
|
+
|
7
|
+
class GtubsTestFilter < Despamilator::Filter
|
8
|
+
|
9
|
+
def name
|
10
|
+
'GTubs Test Filter'
|
11
|
+
end
|
12
|
+
|
13
|
+
def description
|
14
|
+
'Detects the special test string (Despamilator.gtubs_test_string) and assigns a big score.'
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse subject
|
18
|
+
subject.register_match!({:score => 100, :filter => self}) if subject.text == Despamilator.gtubs_test_string
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end #Content
|
25
|
+
end #Gluttonberg
|
@@ -0,0 +1,134 @@
|
|
1
|
+
module Gluttonberg
|
2
|
+
module Content
|
3
|
+
|
4
|
+
module DespamilatorFilter
|
5
|
+
|
6
|
+
class HtmlTags < Despamilator::Filter
|
7
|
+
|
8
|
+
def parse subject
|
9
|
+
text = subject.text.downcase
|
10
|
+
|
11
|
+
html_tags.each do |tag|
|
12
|
+
opening_elements = text.count(/<\s*#{tag}\W/)
|
13
|
+
closing_elements = text.count(/\W#{tag}\s*\/>/)
|
14
|
+
|
15
|
+
if opening_elements > 0 or closing_elements > 0
|
16
|
+
safest_element_count = opening_elements > closing_elements ? opening_elements : closing_elements
|
17
|
+
subject.register_match!({:score => 0.6 * safest_element_count, :filter => self})
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def name
|
23
|
+
'HTML tags'
|
24
|
+
end
|
25
|
+
|
26
|
+
def description
|
27
|
+
'Detects HTML tags in text'
|
28
|
+
end
|
29
|
+
|
30
|
+
def html_tags
|
31
|
+
# make sure these are lowercase, in order to save processing
|
32
|
+
[
|
33
|
+
'!--',
|
34
|
+
'!doctype',
|
35
|
+
'a',
|
36
|
+
'abbr',
|
37
|
+
'acronym',
|
38
|
+
'address',
|
39
|
+
'applet',
|
40
|
+
'area',
|
41
|
+
'b',
|
42
|
+
'base',
|
43
|
+
'basefont',
|
44
|
+
'bdo',
|
45
|
+
'big',
|
46
|
+
'blockquote',
|
47
|
+
'body',
|
48
|
+
'br',
|
49
|
+
'button',
|
50
|
+
'caption',
|
51
|
+
'center',
|
52
|
+
'cite',
|
53
|
+
'code',
|
54
|
+
'col',
|
55
|
+
'colgroup',
|
56
|
+
'dd',
|
57
|
+
'del',
|
58
|
+
'dfn',
|
59
|
+
'dir',
|
60
|
+
'div',
|
61
|
+
'dl',
|
62
|
+
'dt',
|
63
|
+
'em',
|
64
|
+
'fieldset',
|
65
|
+
'font',
|
66
|
+
'form',
|
67
|
+
'frame',
|
68
|
+
'frameset',
|
69
|
+
'h1',
|
70
|
+
'h2',
|
71
|
+
'h3',
|
72
|
+
'h4',
|
73
|
+
'h5',
|
74
|
+
'h6',
|
75
|
+
'head',
|
76
|
+
'hr',
|
77
|
+
'html',
|
78
|
+
'i',
|
79
|
+
'iframe',
|
80
|
+
'img',
|
81
|
+
'input',
|
82
|
+
'ins',
|
83
|
+
'isindex',
|
84
|
+
'kbd',
|
85
|
+
'label',
|
86
|
+
'legend',
|
87
|
+
'li',
|
88
|
+
'link',
|
89
|
+
'map',
|
90
|
+
'menu',
|
91
|
+
'meta',
|
92
|
+
'noframes',
|
93
|
+
'noscript',
|
94
|
+
'object',
|
95
|
+
'ol',
|
96
|
+
'optgroup',
|
97
|
+
'option',
|
98
|
+
'p',
|
99
|
+
'param',
|
100
|
+
'pre',
|
101
|
+
'q',
|
102
|
+
's',
|
103
|
+
'samp',
|
104
|
+
'select',
|
105
|
+
'small',
|
106
|
+
'span',
|
107
|
+
'strike',
|
108
|
+
'strong',
|
109
|
+
'style',
|
110
|
+
'sub',
|
111
|
+
'sup',
|
112
|
+
'table',
|
113
|
+
'tbody',
|
114
|
+
'td',
|
115
|
+
'textarea',
|
116
|
+
'tfoot',
|
117
|
+
'th',
|
118
|
+
'thead',
|
119
|
+
'title',
|
120
|
+
'tr',
|
121
|
+
'tt',
|
122
|
+
'u',
|
123
|
+
'ul',
|
124
|
+
'var',
|
125
|
+
'xmp'
|
126
|
+
]
|
127
|
+
|
128
|
+
end
|
129
|
+
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
end #Content
|
134
|
+
end #Gluttonberg
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Gluttonberg
|
2
|
+
module Content
|
3
|
+
require 'despamilator/filter'
|
4
|
+
|
5
|
+
module DespamilatorFilter
|
6
|
+
|
7
|
+
class IPAddressURL < Despamilator::Filter
|
8
|
+
|
9
|
+
def name
|
10
|
+
'IP Address URL'
|
11
|
+
end
|
12
|
+
|
13
|
+
def description
|
14
|
+
'Detects IP address URLs'
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse subject
|
18
|
+
subject.register_match!({
|
19
|
+
:score => 0.5, :filter => self
|
20
|
+
}) if subject.text.downcase.count(/http:\/\/\d+\.\d+\.\d+\.\d+/) > 0
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end #Content
|
27
|
+
end #Gluttonberg
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Gluttonberg
|
2
|
+
module Content
|
3
|
+
require 'despamilator/filter'
|
4
|
+
|
5
|
+
module DespamilatorFilter
|
6
|
+
|
7
|
+
class LongWords < Despamilator::Filter
|
8
|
+
|
9
|
+
def name
|
10
|
+
'Long Words'
|
11
|
+
end
|
12
|
+
|
13
|
+
def description
|
14
|
+
'Detects long and unbroken strings'
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse subject
|
18
|
+
subject.text.without_uris.words.each do |word|
|
19
|
+
subject.register_match!({
|
20
|
+
:score => 0.1, :filter => self
|
21
|
+
}) if word.length > 20
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end #Content
|
29
|
+
end #Gluttonberg
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Gluttonberg
|
2
|
+
module Content
|
3
|
+
module DespamilatorFilter
|
4
|
+
|
5
|
+
class MixedCase < Despamilator::Filter
|
6
|
+
def name
|
7
|
+
'Mixed Case String'
|
8
|
+
end
|
9
|
+
|
10
|
+
def description
|
11
|
+
'Detects mixed case strings.'
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse subject
|
15
|
+
text = subject.text.without_uris
|
16
|
+
count = text.remove_and_count!(/[a-z][A-Z]/)
|
17
|
+
count += text.remove_and_count!(/[a-z][A-Z][a-z]/)
|
18
|
+
subject.register_match!({:score => 0.1 * count, :filter => self}) if count > 0
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end #Content
|
25
|
+
end #Gluttonberg
|
@@ -0,0 +1,80 @@
|
|
1
|
+
module Gluttonberg
|
2
|
+
module Content
|
3
|
+
require 'despamilator/filter'
|
4
|
+
|
5
|
+
module DespamilatorFilter
|
6
|
+
|
7
|
+
class NaughtyWords < Despamilator::Filter
|
8
|
+
|
9
|
+
def name
|
10
|
+
'Naughty Words'
|
11
|
+
end
|
12
|
+
|
13
|
+
def description
|
14
|
+
'Detects cheeky words'
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse subject
|
18
|
+
text = subject.text.downcase
|
19
|
+
|
20
|
+
naughty_words.each do |word|
|
21
|
+
subject.register_match!({:score => 0.1, :filter => self}) if text =~ /\b#{word}s?\b/
|
22
|
+
end
|
23
|
+
|
24
|
+
gb_blacklist_settings = Gluttonberg::Setting.get_setting("comment_blacklist")
|
25
|
+
unless gb_blacklist_settings.blank?
|
26
|
+
gb_blacklist_settings_words = gb_blacklist_settings.split(",")
|
27
|
+
gb_blacklist_settings_words.each do |word|
|
28
|
+
subject.register_match!({:score => 1.0, :filter => self}) if text =~ /\b#{word.strip.downcase}s?\b/
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def local_parse subject
|
34
|
+
local_score = 0.0
|
35
|
+
unless subject.blank?
|
36
|
+
text = subject.downcase
|
37
|
+
|
38
|
+
naughty_words.each do |word|
|
39
|
+
local_score += 0.1 if text =~ /\b#{word}s?\b/
|
40
|
+
end
|
41
|
+
|
42
|
+
gb_blacklist_settings = Gluttonberg::Setting.get_setting("comment_blacklist")
|
43
|
+
unless gb_blacklist_settings.blank?
|
44
|
+
gb_blacklist_settings_words = gb_blacklist_settings.split(",")
|
45
|
+
gb_blacklist_settings_words.each do |word|
|
46
|
+
local_score += 1.0 if text.include?(word.strip.downcase)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
local_score
|
51
|
+
end
|
52
|
+
|
53
|
+
def naughty_words
|
54
|
+
words = %w{
|
55
|
+
underage
|
56
|
+
penis
|
57
|
+
viagra
|
58
|
+
bondage
|
59
|
+
cunt
|
60
|
+
fuck
|
61
|
+
shit
|
62
|
+
dick
|
63
|
+
tits
|
64
|
+
nude
|
65
|
+
dicks
|
66
|
+
shemale
|
67
|
+
dildo
|
68
|
+
porn
|
69
|
+
cock
|
70
|
+
pussy
|
71
|
+
clit
|
72
|
+
preteen
|
73
|
+
lolita
|
74
|
+
}
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end #Content
|
80
|
+
end #Gluttonberg
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Gluttonberg
|
2
|
+
module Content
|
3
|
+
require 'despamilator/filter'
|
4
|
+
|
5
|
+
module DespamilatorFilter
|
6
|
+
|
7
|
+
class NoVowels < Despamilator::Filter
|
8
|
+
|
9
|
+
NO_VOWELS_REGEX = /^[b-df-hj-np-tv-xzB-DF-HJ-NP-TV-XZ]+$/
|
10
|
+
|
11
|
+
def name
|
12
|
+
'No Vowels'
|
13
|
+
end
|
14
|
+
|
15
|
+
def description
|
16
|
+
'Detects things that are all letters but no vowels and separated by spaces'
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse(subject)
|
20
|
+
words = subject.text.split(/\s+/).select{|str| str.match(NO_VOWELS_REGEX)}
|
21
|
+
unless words.empty?
|
22
|
+
subject.register_match!({:score => ((words.length ** 2).to_f / 100) , :filter => self})
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end #Content
|
28
|
+
end #Gluttonberg
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Gluttonberg
|
2
|
+
module Content
|
3
|
+
require 'despamilator/filter'
|
4
|
+
|
5
|
+
module DespamilatorFilter
|
6
|
+
|
7
|
+
class NumbersAndWords < Despamilator::Filter
|
8
|
+
|
9
|
+
def parse subject
|
10
|
+
text = tidy_text(subject)
|
11
|
+
|
12
|
+
[
|
13
|
+
/\w\d+/,
|
14
|
+
/\d+\w/,
|
15
|
+
/\d+($|\b)/
|
16
|
+
].each do |regexp|
|
17
|
+
matches = text.scan(regexp)
|
18
|
+
|
19
|
+
next if matches.empty?
|
20
|
+
|
21
|
+
matches.each do |to_remove|
|
22
|
+
to_remove = to_remove.to_s
|
23
|
+
text.sub!(to_remove, '') unless to_remove.empty?
|
24
|
+
subject.register_match!({:score => 0.1, :filter => self})
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def name
|
30
|
+
'Numbers next to words'
|
31
|
+
end
|
32
|
+
|
33
|
+
def description
|
34
|
+
'Detects unusual number/word combinations'
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def tidy_text subject
|
40
|
+
text = subject.text.without_uris
|
41
|
+
text.downcase!
|
42
|
+
|
43
|
+
# strip out "good numbers"
|
44
|
+
text.gsub!(/h[1-6]/, '')
|
45
|
+
text.gsub!(/(^|\b)\d+($|\b)/, '')
|
46
|
+
text.gsub!(/(^|\b)\d+(,|\.)\d+($|\b)/, '')
|
47
|
+
text.gsub!(/(^|\b)\d+(st|nd|rd|th)($|\b)/, '')
|
48
|
+
|
49
|
+
text
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end #Content
|
55
|
+
end #Gluttonberg
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Gluttonberg
|
2
|
+
module Content
|
3
|
+
module DespamilatorFilter
|
4
|
+
|
5
|
+
class ObfuscatedURLs < Despamilator::Filter
|
6
|
+
def name
|
7
|
+
'Obfuscated URLs'
|
8
|
+
end
|
9
|
+
|
10
|
+
def description
|
11
|
+
'Finds lame attempts at obfuscating urls.'
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse subject
|
15
|
+
text = subject.text.without_uris.downcase
|
16
|
+
count = find_space_separated_parts text
|
17
|
+
count += find_space_separated_characters text
|
18
|
+
|
19
|
+
# weird maths below is due to some issue with ruby 1.9.2 multiplying floats by 3 (?!)
|
20
|
+
subject.register_match!({:score => (4.0 * count) / 10, :filter => self}) if count > 0
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def find_space_separated_parts text
|
26
|
+
text.count(/www\s+\w+\s+com/)
|
27
|
+
end
|
28
|
+
|
29
|
+
def find_space_separated_characters text
|
30
|
+
count = 0
|
31
|
+
|
32
|
+
text.split(/[a-z][a-z]/).each do |candidate|
|
33
|
+
candidate.strip!
|
34
|
+
candidate.gsub!(/\s+/, '')
|
35
|
+
count += 1 if candidate =~ /\w{5,}\.\w{2,3}/
|
36
|
+
end
|
37
|
+
|
38
|
+
count
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end #Content
|
45
|
+
end #Gluttonberg
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Gluttonberg
|
2
|
+
module Content
|
3
|
+
module DespamilatorFilter
|
4
|
+
|
5
|
+
class Prices < Despamilator::Filter
|
6
|
+
def name
|
7
|
+
'Prices'
|
8
|
+
end
|
9
|
+
|
10
|
+
def description
|
11
|
+
'Detects prices in text.'
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse subject
|
15
|
+
price_count = subject.text.count(/\$\s*\d+/)
|
16
|
+
subject.register_match!({:score => 0.075 * price_count, :filter => self}) if price_count > 0
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end #Content
|
23
|
+
end #Gluttonberg
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Gluttonberg
|
2
|
+
module Content
|
3
|
+
require 'despamilator/filter'
|
4
|
+
|
5
|
+
module DespamilatorFilter
|
6
|
+
|
7
|
+
class ScriptTag < Despamilator::Filter
|
8
|
+
|
9
|
+
def parse subject
|
10
|
+
subject.register_match!({:score => 1, :filter => self}) if subject.text.downcase.match(/<\/?script(>|\s+|\n|\r)/)
|
11
|
+
end
|
12
|
+
|
13
|
+
def name
|
14
|
+
'Script tag'
|
15
|
+
end
|
16
|
+
|
17
|
+
def description
|
18
|
+
'Searches for variations for the HTML script tag'
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end #Content
|
25
|
+
end #Gluttonberg
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Gluttonberg
|
2
|
+
module Content
|
3
|
+
require 'despamilator/filter'
|
4
|
+
|
5
|
+
module DespamilatorFilter
|
6
|
+
|
7
|
+
class Shouting < Despamilator::Filter
|
8
|
+
|
9
|
+
def name
|
10
|
+
'Shouting'
|
11
|
+
end
|
12
|
+
|
13
|
+
def description
|
14
|
+
'Detects and scores shouting (all caps)'
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse subject
|
18
|
+
# strip HTML
|
19
|
+
text = subject.text.gsub(/<\/?[^>]*>/, "")
|
20
|
+
|
21
|
+
return if text.length < 20
|
22
|
+
|
23
|
+
uppercased = text.scan(/[A-Z][A-Z]+/).join.length
|
24
|
+
lowercased = text.count(/[a-z]/)
|
25
|
+
|
26
|
+
if uppercased > 0
|
27
|
+
subject.register_match!({
|
28
|
+
:score => (uppercased.to_f / (uppercased + lowercased)) * 0.5,
|
29
|
+
:filter => self
|
30
|
+
})
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end #Content
|
38
|
+
end #Gluttonberg
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Gluttonberg
|
2
|
+
module Content
|
3
|
+
require 'despamilator/filter'
|
4
|
+
|
5
|
+
module DespamilatorFilter
|
6
|
+
|
7
|
+
class SpammyTLDs < Despamilator::Filter
|
8
|
+
|
9
|
+
def name
|
10
|
+
'Spammy TLDs'
|
11
|
+
end
|
12
|
+
|
13
|
+
def description
|
14
|
+
'Detects TLDs that are more commonly associated with spam.'
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse subject
|
18
|
+
matches = subject.text.count(/\w{5,}\.(info|biz|xxx)\b/)
|
19
|
+
subject.register_match!({:score => 0.05 * matches, :filter => self}) if matches > 0
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end #Content
|
26
|
+
end #Gluttonberg
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Gluttonberg
|
2
|
+
module Content
|
3
|
+
require 'despamilator/filter'
|
4
|
+
|
5
|
+
module DespamilatorFilter
|
6
|
+
|
7
|
+
class SquareBrackets < Despamilator::Filter
|
8
|
+
|
9
|
+
def name
|
10
|
+
'Square Brackets'
|
11
|
+
end
|
12
|
+
|
13
|
+
def description
|
14
|
+
'Detects each square bracket in a string'
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse subject
|
18
|
+
subject.text.downcase.scan(/(\[|\])/).each do |match|
|
19
|
+
subject.register_match!({:score => 0.05, :filter => self})
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end #Content
|
27
|
+
end #Gluttonberg
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Gluttonberg
|
2
|
+
module Content
|
3
|
+
require 'despamilator/filter'
|
4
|
+
|
5
|
+
module DespamilatorFilter
|
6
|
+
|
7
|
+
class TrailingNumber < Despamilator::Filter
|
8
|
+
|
9
|
+
def name
|
10
|
+
'Trailing Number'
|
11
|
+
end
|
12
|
+
|
13
|
+
def description
|
14
|
+
'Detects a trailing cache busting number'
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse subject
|
18
|
+
subject.register_match!({:score => 0.1, :filter => self}) if subject.text.without_uris =~ /\b\d+\s*$/
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end #Content
|
25
|
+
end #Gluttonberg
|