despamilator 2.1 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +17 -2
- data/README.rdoc +17 -18
- data/lib/despamilator.rb +2 -2
- data/lib/despamilator/filter/obfuscated_urls.rb +1 -1
- data/lib/despamilator/filter/spammy_tlds.rb +1 -1
- data/lib/despamilator/version.rb +1 -1
- metadata +5 -5
data/History.txt
CHANGED
@@ -67,6 +67,21 @@
|
|
67
67
|
|
68
68
|
* Minor bug fix in shouting filter.
|
69
69
|
|
70
|
-
===
|
70
|
+
=== 2.1 2011-09-11
|
71
71
|
|
72
|
-
*
|
72
|
+
* Significant improvement in filtering accuracy. Added...
|
73
|
+
* Suspect punctuation detection.
|
74
|
+
* URL obfuscation detection.
|
75
|
+
* Price detection (dollars only).
|
76
|
+
* Long domain name detection.
|
77
|
+
* Spammy TLD detection.
|
78
|
+
|
79
|
+
* Added URL negation for many filters to reduce false positives.
|
80
|
+
|
81
|
+
* Deprecated the "matched_by" method. Replaced by "matches".
|
82
|
+
|
83
|
+
* Internal refactor to make filters stateless.
|
84
|
+
|
85
|
+
=== 2.1.1 2011-09-24
|
86
|
+
|
87
|
+
* Fixed syntax error fo Ruby 1.8.
|
data/README.rdoc
CHANGED
@@ -23,11 +23,11 @@ Using Despamilator:
|
|
23
23
|
dspam = Despamilator.new('some text with an <h2> tag qthhg')
|
24
24
|
|
25
25
|
dspam.score #=> the total score for this string (1 is considered high)
|
26
|
-
dspam.
|
27
|
-
first_match = dspam.
|
28
|
-
first_match.name #=> some string with the name of the filter
|
29
|
-
first_match.description #=> some string to describe
|
30
|
-
first_match
|
26
|
+
dspam.matches #=> array of matching filters
|
27
|
+
first_match = dspam.matches.first #=> first matching filter
|
28
|
+
first_match[:filter].name #=> some string with the name of the filter
|
29
|
+
first_match[:filter].description #=> some string to describe
|
30
|
+
first_match[:score] #=> the individual score assigned by this matching filter
|
31
31
|
|
32
32
|
== FILTERING:
|
33
33
|
|
@@ -49,15 +49,15 @@ They should always supply the following methods:
|
|
49
49
|
|
50
50
|
* name #=> the name of your filter.
|
51
51
|
* description #=> what your filter will look for.
|
52
|
-
* parse(
|
52
|
+
* parse(subject) #=> the method that will be called when parsing. A copy of the message is passed in.
|
53
53
|
|
54
|
-
|
54
|
+
The subject of the detection (including text) is passed into the "parse" method. It provides
|
55
|
+
the following methods...
|
55
56
|
|
56
|
-
*
|
57
|
-
*
|
58
|
-
* score #=> the current score assigned to the text
|
57
|
+
* text #=> The text of the message. This is immutable but you can alter a duplicate (using "dup").
|
58
|
+
* register_match! #=> Method that receives the instance of the filter and the assigned score if matched.
|
59
59
|
|
60
|
-
Take a look at the "
|
60
|
+
Take a look at the "prices" code and tests in "spec/filters/prices.rb".
|
61
61
|
|
62
62
|
==== Example Filter:
|
63
63
|
|
@@ -68,7 +68,7 @@ lib/despamilator/filter/detect_letter_a.rb:
|
|
68
68
|
|
69
69
|
module DespamilatorFilter
|
70
70
|
|
71
|
-
class DetectLetterA < Despamilator::
|
71
|
+
class DetectLetterA < Despamilator::Filter
|
72
72
|
|
73
73
|
def name
|
74
74
|
'Detecting the letter A'
|
@@ -78,10 +78,10 @@ lib/despamilator/filter/detect_letter_a.rb:
|
|
78
78
|
'Detects the letter "a" in a string for no reason other than a demo'
|
79
79
|
end
|
80
80
|
|
81
|
-
def parse
|
82
|
-
if text.downcase.scan(/a/)
|
83
|
-
|
84
|
-
|
81
|
+
def parse subject
|
82
|
+
if subject.text.downcase.scan(/a/)
|
83
|
+
# add 0.1 to the score of the text
|
84
|
+
subject.register_match!({:score => 0.1, :filter => self})
|
85
85
|
end
|
86
86
|
end
|
87
87
|
end
|
@@ -90,8 +90,7 @@ As previously stated, ensure you put a spec test together as well!
|
|
90
90
|
|
91
91
|
== REQUIREMENTS:
|
92
92
|
|
93
|
-
*
|
94
|
-
* rspec
|
93
|
+
* domainatrix
|
95
94
|
|
96
95
|
|
97
96
|
== INSTALL:
|
data/lib/despamilator.rb
CHANGED
@@ -15,8 +15,8 @@ require 'ostruct'
|
|
15
15
|
#
|
16
16
|
# dspam = Despamilator.new('some text with an <h2> tag qthhg')
|
17
17
|
#
|
18
|
-
# dspam.score #=> the total score for this string (1 is normally my threshold)
|
19
|
-
# dspam.
|
18
|
+
# dspam.score #=> the total score for this string (1 is normally my threshold).
|
19
|
+
# dspam.matches #=> array of hashes containing matching filters and their score.
|
20
20
|
|
21
21
|
class Despamilator
|
22
22
|
|
@@ -15,7 +15,7 @@ module DespamilatorFilter
|
|
15
15
|
count += find_space_separated_characters text
|
16
16
|
|
17
17
|
# weird maths below is due to some issue with ruby 1.9.2 multiplying floats by 3 (?!)
|
18
|
-
subject.register_match!({score
|
18
|
+
subject.register_match!({:score => (4.0 * count) / 10, :filter => self}) if count > 0
|
19
19
|
end
|
20
20
|
|
21
21
|
private
|
@@ -13,7 +13,7 @@ module DespamilatorFilter
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def parse subject
|
16
|
-
matches = subject.text.count(/\w{5,}\.(info|biz)\b/)
|
16
|
+
matches = subject.text.count(/\w{5,}\.(info|biz|xxx)\b/)
|
17
17
|
subject.register_match!({:score => 0.05 * matches, :filter => self}) if matches > 0
|
18
18
|
end
|
19
19
|
|
data/lib/despamilator/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: despamilator
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-09-
|
12
|
+
date: 2011-09-24 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: domainatrix
|
16
|
-
requirement: &
|
16
|
+
requirement: &70225887914340 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70225887914340
|
25
25
|
description: ! 'Despamilator is a plugin based spam detector designed for use on your
|
26
26
|
web forms borne out of two annoyances: Spam being submitted in my web forms and
|
27
27
|
CAPTCHAS being intrusive. Despamilator will apply some commonly used heuristics
|
@@ -74,7 +74,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
74
74
|
requirements:
|
75
75
|
- - ! '>='
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version: 1.3.
|
77
|
+
version: 1.3.5
|
78
78
|
requirements: []
|
79
79
|
rubyforge_project: despamilator
|
80
80
|
rubygems_version: 1.8.6
|