phisher 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +65 -0
- data/LICENSE.txt +20 -0
- data/README.md +58 -0
- data/Rakefile +50 -0
- data/lib/phisher.rb +0 -0
- data/lib/phisher/algo.rb +30 -0
- data/lib/phisher/knn.rb +121 -0
- data/lib/phisher/phisher.rb +58 -0
- data/lib/phisher/url_list.rb +35 -0
- data/lib/phisher/url_parser.rb +11 -0
- data/test/algo_spec.rb +23 -0
- data/test/helpers.rb +25 -0
- data/test/knn_spec.rb +119 -0
- data/test/phisher_spec.rb +66 -0
- data/test/url_list_spec.rb +51 -0
- data/test/url_parser_spec.rb +30 -0
- metadata +133 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 7b3ca61c8c2aa9607ca972bc974fa0044575d6be
|
|
4
|
+
data.tar.gz: 642053a9264be718552610d16e2dae5c1d563a07
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 0942ccf9178a39d551579db63640c129a131fc6af1635a07b36e8795f42dba00d22643781b0f937cdd25d759a953fbc984be56569c61a08640e5767594da80bd
|
|
7
|
+
data.tar.gz: ac440a37f5409e1c896ff3ee4649fcdfccef7c7117136d7e28534df6c030008ea39c751c72d0efd834bc2ccf62ef2f0286ccfc19f5af30354b6adb30f0371677
|
data/.document
ADDED
data/Gemfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
source "http://rubygems.org"
|
|
2
|
+
# Add dependencies required to use your gem here.
|
|
3
|
+
# Example:
|
|
4
|
+
# gem "activesupport", ">= 2.3.5"
|
|
5
|
+
|
|
6
|
+
# Add dependencies to develop your gem here.
|
|
7
|
+
# Include everything needed to run rake, tests, features, etc.
|
|
8
|
+
group :development do
|
|
9
|
+
gem "minitest", ">= 0"
|
|
10
|
+
gem "rdoc", "~> 3.12"
|
|
11
|
+
gem "bundler", "~> 1.0"
|
|
12
|
+
gem "jeweler", "~> 2.0.1"
|
|
13
|
+
gem "simplecov", ">= 0"
|
|
14
|
+
end
|
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
GEM
|
|
2
|
+
remote: http://rubygems.org/
|
|
3
|
+
specs:
|
|
4
|
+
addressable (2.3.6)
|
|
5
|
+
builder (3.2.2)
|
|
6
|
+
descendants_tracker (0.0.4)
|
|
7
|
+
thread_safe (~> 0.3, >= 0.3.1)
|
|
8
|
+
docile (1.1.5)
|
|
9
|
+
faraday (0.9.0)
|
|
10
|
+
multipart-post (>= 1.2, < 3)
|
|
11
|
+
git (1.2.8)
|
|
12
|
+
github_api (0.12.2)
|
|
13
|
+
addressable (~> 2.3)
|
|
14
|
+
descendants_tracker (~> 0.0.4)
|
|
15
|
+
faraday (~> 0.8, < 0.10)
|
|
16
|
+
hashie (>= 3.3)
|
|
17
|
+
multi_json (>= 1.7.5, < 2.0)
|
|
18
|
+
nokogiri (~> 1.6.3)
|
|
19
|
+
oauth2
|
|
20
|
+
hashie (3.3.1)
|
|
21
|
+
highline (1.6.21)
|
|
22
|
+
jeweler (2.0.1)
|
|
23
|
+
builder
|
|
24
|
+
bundler (>= 1.0)
|
|
25
|
+
git (>= 1.2.5)
|
|
26
|
+
github_api
|
|
27
|
+
highline (>= 1.6.15)
|
|
28
|
+
nokogiri (>= 1.5.10)
|
|
29
|
+
rake
|
|
30
|
+
rdoc
|
|
31
|
+
json (1.8.1)
|
|
32
|
+
jwt (1.0.0)
|
|
33
|
+
mini_portile (0.6.0)
|
|
34
|
+
minitest (5.4.2)
|
|
35
|
+
multi_json (1.10.1)
|
|
36
|
+
multi_xml (0.5.5)
|
|
37
|
+
multipart-post (2.0.0)
|
|
38
|
+
nokogiri (1.6.3.1)
|
|
39
|
+
mini_portile (= 0.6.0)
|
|
40
|
+
oauth2 (1.0.0)
|
|
41
|
+
faraday (>= 0.8, < 0.10)
|
|
42
|
+
jwt (~> 1.0)
|
|
43
|
+
multi_json (~> 1.3)
|
|
44
|
+
multi_xml (~> 0.5)
|
|
45
|
+
rack (~> 1.2)
|
|
46
|
+
rack (1.5.2)
|
|
47
|
+
rake (10.3.2)
|
|
48
|
+
rdoc (3.12.2)
|
|
49
|
+
json (~> 1.4)
|
|
50
|
+
simplecov (0.9.1)
|
|
51
|
+
docile (~> 1.1.0)
|
|
52
|
+
multi_json (~> 1.0)
|
|
53
|
+
simplecov-html (~> 0.8.0)
|
|
54
|
+
simplecov-html (0.8.0)
|
|
55
|
+
thread_safe (0.3.4)
|
|
56
|
+
|
|
57
|
+
PLATFORMS
|
|
58
|
+
ruby
|
|
59
|
+
|
|
60
|
+
DEPENDENCIES
|
|
61
|
+
bundler (~> 1.0)
|
|
62
|
+
jeweler (~> 2.0.1)
|
|
63
|
+
minitest
|
|
64
|
+
rdoc (~> 3.12)
|
|
65
|
+
simplecov
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Copyright (c) 2014 fhur
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
4
|
+
a copy of this software and associated documentation files (the
|
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
9
|
+
the following conditions:
|
|
10
|
+
|
|
11
|
+
The above copyright notice and this permission notice shall be
|
|
12
|
+
included in all copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
phisher
|
|
2
|
+
-------
|
|
3
|
+
|
|
4
|
+
Phishing detection gem
|
|
5
|
+
|
|
6
|
+
## Usage
|
|
7
|
+
|
|
8
|
+
To import `phisher` simply
|
|
9
|
+
|
|
10
|
+
```ruby
|
|
11
|
+
gem install phisher
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Phishy links
|
|
15
|
+
To check if a url is phishy simply call
|
|
16
|
+
```ruby
|
|
17
|
+
phisher.verify("www.google.com") # => :safe
|
|
18
|
+
phisher.verify("faizbook.nz") # => :phishy
|
|
19
|
+
phisher.verify("someunknownsite.com") # => :unknown
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Black and White lists
|
|
23
|
+
|
|
24
|
+
Phisher can be trained to reject any url in a blacklist and verify any
|
|
25
|
+
url in a whitelist.
|
|
26
|
+
|
|
27
|
+
```ruby
|
|
28
|
+
phisher = Phisher.new
|
|
29
|
+
|
|
30
|
+
# A list of urls, can optionaly contain wildards i.e. myblog.blogger.com/*
|
|
31
|
+
phisher.blacklist = blacklisted_links
|
|
32
|
+
|
|
33
|
+
# A list of whitelisted domains. Can also contain wildcards
|
|
34
|
+
phisher.whitelist = whitelisted_domains
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Training a Phisher
|
|
39
|
+
|
|
40
|
+
Black/White lists are great but they do have drawbacks, namely they are
|
|
41
|
+
not easily extensible. To improve Phisher you should train it to detect
|
|
42
|
+
phishing.
|
|
43
|
+
|
|
44
|
+
## Contributing to phisher
|
|
45
|
+
|
|
46
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
|
47
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
|
48
|
+
* Fork the project.
|
|
49
|
+
* Start a feature/bugfix branch.
|
|
50
|
+
* Commit and push until you are happy with your contribution.
|
|
51
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
|
52
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
|
53
|
+
|
|
54
|
+
## Copyright
|
|
55
|
+
|
|
56
|
+
Copyright (c) 2014 fhur. See LICENSE.txt for
|
|
57
|
+
further details.
|
|
58
|
+
|
data/Rakefile
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
require 'rubygems'
|
|
4
|
+
require 'bundler'
|
|
5
|
+
begin
|
|
6
|
+
Bundler.setup(:default, :development)
|
|
7
|
+
rescue Bundler::BundlerError => e
|
|
8
|
+
$stderr.puts e.message
|
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
|
10
|
+
exit e.status_code
|
|
11
|
+
end
|
|
12
|
+
require 'rake'
|
|
13
|
+
|
|
14
|
+
require 'jeweler'
|
|
15
|
+
Jeweler::Tasks.new do |gem|
|
|
16
|
+
# gem is a Gem::Specification... see http://guides.rubygems.org/specification-reference/ for more options
|
|
17
|
+
gem.name = "phisher"
|
|
18
|
+
gem.homepage = "http://github.com/fhur/phisher"
|
|
19
|
+
gem.license = "MIT"
|
|
20
|
+
gem.summary = %Q{Simple and extensible phishing detection gem}
|
|
21
|
+
gem.description = %Q{Simple and extensible phishing detection gem}
|
|
22
|
+
gem.email = "fernandohur@gmail.com"
|
|
23
|
+
gem.authors = ["fhur"]
|
|
24
|
+
# dependencies defined in Gemfile
|
|
25
|
+
end
|
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
|
27
|
+
|
|
28
|
+
desc "Run all minitest tests"
|
|
29
|
+
task :test do
|
|
30
|
+
Dir.glob("./test/**_spec.rb").each { |file| require file }
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
desc "Code coverage detail"
|
|
34
|
+
task :coverage do
|
|
35
|
+
require 'simplecov'
|
|
36
|
+
SimpleCov.start
|
|
37
|
+
Rake::Task['test'].execute
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
task :default => :test
|
|
41
|
+
|
|
42
|
+
require 'rdoc/task'
|
|
43
|
+
Rake::RDocTask.new do |rdoc|
|
|
44
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
|
45
|
+
|
|
46
|
+
rdoc.rdoc_dir = 'rdoc'
|
|
47
|
+
rdoc.title = "phisher #{version}"
|
|
48
|
+
rdoc.rdoc_files.include('README*')
|
|
49
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
50
|
+
end
|
data/lib/phisher.rb
ADDED
|
File without changes
|
data/lib/phisher/algo.rb
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
class Algo
|
|
2
|
+
|
|
3
|
+
attr_reader :weight
|
|
4
|
+
|
|
5
|
+
# Creates a new instance of the algorithm with the given weight
|
|
6
|
+
#
|
|
7
|
+
# Arguments:
|
|
8
|
+
# {float} weight the weight assigned to this algorithm.
|
|
9
|
+
#
|
|
10
|
+
def initialize(weight)
|
|
11
|
+
@weight = weight
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Calculates the risk that a given url is phishy or safe.
|
|
15
|
+
#
|
|
16
|
+
# Risk is measured from 0 to 1 both inclusive where a risk
|
|
17
|
+
# of 0 means that the url is completely safe and a risk of
|
|
18
|
+
# 1 means that the url is completely phishy
|
|
19
|
+
#
|
|
20
|
+
# Arguments:
|
|
21
|
+
# {string} url The url whose risk will be calculated
|
|
22
|
+
#
|
|
23
|
+
# Returns:
|
|
24
|
+
# A float in [0..1] indicating the risk of the given url
|
|
25
|
+
#
|
|
26
|
+
def risk(url)
|
|
27
|
+
raise 'Subclasses must override risk(url) method'
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
end
|
data/lib/phisher/knn.rb
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Knn : K-Nearest-Neighbor
|
|
3
|
+
#
|
|
4
|
+
# the KNN algorithm is very simple
|
|
5
|
+
# Given a set of labeled training data, <x,f(x)> , a new input will
|
|
6
|
+
# be compared with each x to determine the distance. After this the class
|
|
7
|
+
# of the k-closest distances will be chosen
|
|
8
|
+
#
|
|
9
|
+
#
|
|
10
|
+
# Usage Example:
|
|
11
|
+
#
|
|
12
|
+
# knn = Knn.new
|
|
13
|
+
#
|
|
14
|
+
# print "training Knn... "
|
|
15
|
+
# 10.times do |i|
|
|
16
|
+
# klazz = 0
|
|
17
|
+
# klazz = 1 if i >= 5
|
|
18
|
+
# knn.train([i],klazz)
|
|
19
|
+
# end
|
|
20
|
+
#
|
|
21
|
+
# puts "[done]"
|
|
22
|
+
# knn.data_set.each_with_index {|klass,index| p "class #{index}: #{klass}"}
|
|
23
|
+
#
|
|
24
|
+
# puts "Classifying a few inputs"
|
|
25
|
+
# 20.times do |i|
|
|
26
|
+
# test = i.to_f/2
|
|
27
|
+
# print "#{test} =>"
|
|
28
|
+
# puts knn.classify([test])
|
|
29
|
+
# end
|
|
30
|
+
#
|
|
31
|
+
|
|
32
|
+
class Knn
|
|
33
|
+
|
|
34
|
+
attr_reader :training_set
|
|
35
|
+
attr_reader :default_distance
|
|
36
|
+
|
|
37
|
+
def initialize()
|
|
38
|
+
@training_set = []
|
|
39
|
+
@default_distance = lambda do |array1, array2|
|
|
40
|
+
squares_sum = array1.zip(array2).map do |item|
|
|
41
|
+
(item[0] - item[1])**2
|
|
42
|
+
end
|
|
43
|
+
Math.sqrt(squares_sum.reduce(:+))
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Returns the class closest to the data point for a
|
|
48
|
+
# given K
|
|
49
|
+
#
|
|
50
|
+
# Arguments:
|
|
51
|
+
# {Array} data an array
|
|
52
|
+
# {integer} k the number of classes to consider
|
|
53
|
+
# {block} distance an optional block in case you want
|
|
54
|
+
# to provide a custom distance function
|
|
55
|
+
#
|
|
56
|
+
# Returns:
|
|
57
|
+
# The class that the data array should belong to
|
|
58
|
+
#
|
|
59
|
+
def classify(data, k, &distance)
|
|
60
|
+
|
|
61
|
+
if distance == nil
|
|
62
|
+
distance = @default_distance
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
distances = @training_set.map do |training_point|
|
|
66
|
+
[ distance.call(training_point.data, data), training_point.label ]
|
|
67
|
+
end
|
|
68
|
+
sorted_distances = distances.sort
|
|
69
|
+
nearest_neightbors = sorted_distances.first(k)
|
|
70
|
+
classes = nearest_neightbors.map { |neighbor| neighbor[1] }
|
|
71
|
+
class_frequencies = get_class_frequencies(classes)
|
|
72
|
+
most_frequent(class_frequencies)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Classifies an array with the given label.
|
|
76
|
+
#
|
|
77
|
+
# Arguments:
|
|
78
|
+
# {Array} data the array that will be labeled
|
|
79
|
+
# {symbol} label an identifier for the label
|
|
80
|
+
#
|
|
81
|
+
# Returns:
|
|
82
|
+
# An instance of the training set
|
|
83
|
+
def train(data, label)
|
|
84
|
+
training_point = TrainingPoint.new data, label
|
|
85
|
+
@training_set.push training_point
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
# Given an array of where each element is a class label
|
|
93
|
+
# this method returns the frequency of each label
|
|
94
|
+
def get_class_frequencies(class_array)
|
|
95
|
+
freqs = {}
|
|
96
|
+
class_array.each do |clazz|
|
|
97
|
+
freqs[clazz] = 0 unless freqs[clazz]
|
|
98
|
+
freqs[clazz] += 1
|
|
99
|
+
end
|
|
100
|
+
return freqs
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Given a map of class => frequency(class)
|
|
104
|
+
# This method returns the class with the highest
|
|
105
|
+
# frequency.
|
|
106
|
+
# If more than one class has the highest frequency
|
|
107
|
+
# this method can return any of those classes.
|
|
108
|
+
def most_frequent(class_frequencies)
|
|
109
|
+
most_frequent_class = nil
|
|
110
|
+
highest_frecuency = -1
|
|
111
|
+
class_frequencies.each do |clazz, freq|
|
|
112
|
+
if freq > highest_frecuency
|
|
113
|
+
most_frequent_class = clazz
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
return most_frequent_class
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
class TrainingPoint < Struct.new(:data, :label); end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
require 'phisher/url_list'
|
|
2
|
+
|
|
3
|
+
class Phisher
|
|
4
|
+
|
|
5
|
+
attr_reader :blacklist
|
|
6
|
+
attr_reader :whitelist
|
|
7
|
+
attr_reader :algos
|
|
8
|
+
|
|
9
|
+
# Initializes a Phisher with a whitelist, blacklist and set of phishing detection algorithms
|
|
10
|
+
#
|
|
11
|
+
# Arguments:
|
|
12
|
+
# {Array} blacklist an array of blacklisted urls as strings
|
|
13
|
+
# {Array} whitelist an array of whitelisted urls as strings
|
|
14
|
+
# {Array} algos an array of Algo subclasses
|
|
15
|
+
#
|
|
16
|
+
def initialize(blacklist: [], whitelist: [], algos: [])
|
|
17
|
+
@blacklist = Blacklist.new blacklist
|
|
18
|
+
@whitelist = Whitelist.new whitelist
|
|
19
|
+
@algos = algos
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Calculates the risk for a given url by testing the url against a blacklist, whitelist and a
|
|
23
|
+
# set of phishing detection algorithms.
|
|
24
|
+
#
|
|
25
|
+
# This method follows the following rules:
|
|
26
|
+
# 1. If the url is blacklisted then 1 is immediately returned
|
|
27
|
+
# 2. If the url is whitelisted then 0 will be returned
|
|
28
|
+
# 3. If neither (1.) nor (2.) occur then the url is tested agains the list of algorithms provided
|
|
29
|
+
# to the Phisher. The result is calculated as the weighted average of each algorithm's weight
|
|
30
|
+
# multiplied by the algorithm's risk score.
|
|
31
|
+
#
|
|
32
|
+
# Arguments:
|
|
33
|
+
# {String} url a url to test for safety
|
|
34
|
+
#
|
|
35
|
+
# Returns:
|
|
36
|
+
# A float between 0 and 1 indicating the risk associated with the url where 0 is the minimum
|
|
37
|
+
# amount of risk and 1 is the max risk.
|
|
38
|
+
#
|
|
39
|
+
def verify(url)
|
|
40
|
+
|
|
41
|
+
# First check if the url is included in the blacklist, if so then return 1 (max risk)
|
|
42
|
+
blacklisted = @blacklist.include? url
|
|
43
|
+
return 1 if blacklisted
|
|
44
|
+
|
|
45
|
+
# If the url is not blacklister, check if it is whitelisted and if so return 0 (min risk)
|
|
46
|
+
whitelisted = @whitelist.include? url
|
|
47
|
+
return 0 if whitelisted
|
|
48
|
+
|
|
49
|
+
# if the url is neither black nor white listed then calcualte the weighted risk of the url
|
|
50
|
+
# by each registered phishing detection algorithm
|
|
51
|
+
weight_adjusted_risk = @algos.map do |algo|
|
|
52
|
+
algo.risk(url)*algo.weight
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
return weight_adjusted_risk.reduce(:+)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
require 'phisher/url_parser'
|
|
2
|
+
|
|
3
|
+
# Base class for a blacklist and whitelist
|
|
4
|
+
class UrlList
|
|
5
|
+
include UrlParser
|
|
6
|
+
|
|
7
|
+
attr_reader :list
|
|
8
|
+
|
|
9
|
+
def initialize(list=[])
|
|
10
|
+
@list = list.map do |url|
|
|
11
|
+
parse(url)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def include?(url)
|
|
16
|
+
@list.each do |regex|
|
|
17
|
+
return true if regex.match(url)
|
|
18
|
+
end
|
|
19
|
+
return false
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def << (url)
|
|
23
|
+
regex = parse(url)
|
|
24
|
+
@list.push(regex)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def size
|
|
28
|
+
@list.size
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
class Blacklist < UrlList; end
|
|
34
|
+
|
|
35
|
+
class Whitelist < UrlList; end
|
data/test/algo_spec.rb
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require 'minitest/autorun'
|
|
2
|
+
require 'phisher/algo'
|
|
3
|
+
|
|
4
|
+
describe Algo do
|
|
5
|
+
|
|
6
|
+
describe 'constructor' do
|
|
7
|
+
it 'should set the weight of the Algo instance' do
|
|
8
|
+
weight = 0.5
|
|
9
|
+
algo = Algo.new weight
|
|
10
|
+
algo.weight.must_equal weight
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
describe 'risk(url)' do
|
|
16
|
+
it 'should raise an error' do
|
|
17
|
+
algo = Algo.new 1
|
|
18
|
+
assert_raises RuntimeError do
|
|
19
|
+
algo.risk('http://google.com')
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
data/test/helpers.rb
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
module TestHelpers
|
|
2
|
+
|
|
3
|
+
# Asserts that the size of a list increments by the
|
|
4
|
+
# given amount after the given block is executed.
|
|
5
|
+
#
|
|
6
|
+
# Example
|
|
7
|
+
# list = []
|
|
8
|
+
# assert_list_inc list, 2 do
|
|
9
|
+
# list += [1,2]
|
|
10
|
+
# end
|
|
11
|
+
#
|
|
12
|
+
# Should pass because the list is incremented by 2.
|
|
13
|
+
#
|
|
14
|
+
# Arguments:
|
|
15
|
+
# {Array} init_list the initial array
|
|
16
|
+
# {integer} amount the amount that is expected to be
|
|
17
|
+
# incremented. Defaults to 1.
|
|
18
|
+
# {block} an arbitrary block of code
|
|
19
|
+
def assert_list_inc(init_list, amount=1, &block)
|
|
20
|
+
dup_list = init_list.dup
|
|
21
|
+
yield(block)
|
|
22
|
+
init_list.size.must_equal(dup_list.size + amount)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
end
|
data/test/knn_spec.rb
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
require 'phisher/knn'
|
|
2
|
+
require 'minitest/autorun'
|
|
3
|
+
require_relative './helpers'
|
|
4
|
+
|
|
5
|
+
describe Knn do
|
|
6
|
+
include TestHelpers
|
|
7
|
+
|
|
8
|
+
before :each do
|
|
9
|
+
@knn = Knn.new
|
|
10
|
+
@rand_list = lambda do |size|
|
|
11
|
+
Array.new(size + 1) { rand }
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
describe 'default_distance' do
|
|
16
|
+
|
|
17
|
+
before :each do
|
|
18
|
+
@dist = @knn.default_distance
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it 'should be greater than or equal to 0' do
|
|
22
|
+
20.times do |i|
|
|
23
|
+
d = @dist.(@rand_list.(i), @rand_list.(i))
|
|
24
|
+
(d >= 0).must_equal true
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
it 'should return 0 when two points are equal' do
|
|
29
|
+
20.times do |i|
|
|
30
|
+
point = @rand_list.(i)
|
|
31
|
+
@dist.call(point, point).must_equal 0
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# symmetric: d(x,y) == d(y,x)
|
|
37
|
+
it 'should be symmetric' do
|
|
38
|
+
20.times do |i|
|
|
39
|
+
point_a = @rand_list.(i)
|
|
40
|
+
point_b = @rand_list.(i)
|
|
41
|
+
@dist.call(point_a, point_b).must_equal(
|
|
42
|
+
@dist.call(point_b, point_a)
|
|
43
|
+
)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Triangle inequality: d(x,z) ≤ d(x,y) + d(y,z)
|
|
48
|
+
it 'should satisfy the triangle inequality' do
|
|
49
|
+
20.times do |i|
|
|
50
|
+
x = @rand_list.(i)
|
|
51
|
+
y = @rand_list.(i)
|
|
52
|
+
z = @rand_list.(i)
|
|
53
|
+
dist_sum = @dist.(x,y) + @dist.(y,z)
|
|
54
|
+
(@dist.(x,z) <= dist_sum).must_equal true
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
describe 'train' do
|
|
60
|
+
|
|
61
|
+
it 'should increase the size of the training set' do
|
|
62
|
+
1.upto(20) do |i|
|
|
63
|
+
assert_list_inc @knn.training_set, i do
|
|
64
|
+
i.times do
|
|
65
|
+
data = @rand_list.(i)
|
|
66
|
+
label = rand.to_s.to_sym
|
|
67
|
+
result = @knn.train(data, label)
|
|
68
|
+
point = result.last
|
|
69
|
+
point.data.must_equal data
|
|
70
|
+
point.label.must_equal label
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
describe 'classify' do
|
|
78
|
+
|
|
79
|
+
#
|
|
80
|
+
# Initialize the following scenario
|
|
81
|
+
#
|
|
82
|
+
# 5 . . . C C
|
|
83
|
+
# 4 . . . . C
|
|
84
|
+
# 3 . A . . .
|
|
85
|
+
# 2 A B B . .
|
|
86
|
+
# 1 A A A . .
|
|
87
|
+
# 0 1 2 3 4 5
|
|
88
|
+
#
|
|
89
|
+
# Dot means there is nothing at that location A,B,C
|
|
90
|
+
# means that there is a point label with A,B,C at the
|
|
91
|
+
# given coordinates
|
|
92
|
+
#
|
|
93
|
+
before :each do
|
|
94
|
+
[[1,1], [2,1], [3,1], [1,2], [2,3]].each do |point|
|
|
95
|
+
@knn.train(point, :a)
|
|
96
|
+
end
|
|
97
|
+
[[2,2], [3,2]].each do |point|
|
|
98
|
+
@knn.train(point, :b)
|
|
99
|
+
end
|
|
100
|
+
[[5,4],[4,5],[5,5]].each do |point|
|
|
101
|
+
@knn.train(point, :c)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
it 'should classify new points' do
|
|
106
|
+
|
|
107
|
+
@knn.classify([2,4], 1).must_equal :a
|
|
108
|
+
@knn.classify([6,6], 2).must_equal :c
|
|
109
|
+
@knn.classify([7,7], 3).must_equal :c
|
|
110
|
+
@knn.classify([7,7], 3).must_equal :c
|
|
111
|
+
|
|
112
|
+
@knn.classify([2,2], 1).must_equal :b
|
|
113
|
+
@knn.classify([2,2], 2).must_equal :a
|
|
114
|
+
@knn.classify([2,2], 3).must_equal :a
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
require 'minitest/autorun'
|
|
2
|
+
require 'phisher/phisher'
|
|
3
|
+
require 'phisher/algo'
|
|
4
|
+
|
|
5
|
+
class MockAlgo < Algo
|
|
6
|
+
|
|
7
|
+
def initialize(risk, weight)
|
|
8
|
+
super(weight)
|
|
9
|
+
@risk = risk
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def risk(url)
|
|
13
|
+
@risk
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
describe Phisher do
|
|
18
|
+
|
|
19
|
+
before :each do
|
|
20
|
+
@whitelist = ['*.foo.com','*.bar.bz','baz.nl/*','qux.org']
|
|
21
|
+
@blacklist = ['lee.com','facebook.com','gmail.nl', 'gmail.nl/*']
|
|
22
|
+
@algos = [ MockAlgo.new(0.5, 0.2), MockAlgo.new(0.1, 0.5), MockAlgo.new(0.9,0.3)]
|
|
23
|
+
@phisher = Phisher.new whitelist: @whitelist, blacklist: @blacklist, algos: @algos
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
describe 'initialize' do
|
|
28
|
+
it 'should initialize the blacklist' do
|
|
29
|
+
phisher = Phisher.new whitelist: @whitelist
|
|
30
|
+
phisher.whitelist.size.must_equal @whitelist.size
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it 'should initialize the white list' do
|
|
34
|
+
phisher = Phisher.new blacklist: @blacklist
|
|
35
|
+
phisher.blacklist.size.must_equal @blacklist.size
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
it 'should initialize the algo list' do
|
|
39
|
+
phisher = Phisher.new algos: @algos
|
|
40
|
+
phisher.algos.must_equal @algos
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
describe 'verify' do
|
|
45
|
+
|
|
46
|
+
it 'should return 1 for a blacklisted url' do
|
|
47
|
+
['lee.com', 'facebook.com', 'gmail.nl', 'gmail.nl/some', 'gmail.nl/site'].each do |url|
|
|
48
|
+
@phisher.verify(url).must_equal 1
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it 'should return 0 for a whitelisted url' do
|
|
53
|
+
['asd.foo.com', 'boo.bar.bz', 'baz.nl/', 'baz.nl/my-site', 'qux.org'].each do |url|
|
|
54
|
+
@phisher.verify(url).must_equal 0
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
it 'should return the weighted average of the algorithms result and their weight' do
|
|
59
|
+
risk = @phisher.verify('someohtersite.com')
|
|
60
|
+
risk.must_equal 0.5*0.2 + 0.1*0.5 + 0.9*0.3
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
require 'minitest/autorun'
|
|
2
|
+
require 'phisher/url_list'
|
|
3
|
+
|
|
4
|
+
describe UrlList do
|
|
5
|
+
|
|
6
|
+
describe 'constructor' do
|
|
7
|
+
it 'should initialize the UrlList with the given wildcard urls ' do
|
|
8
|
+
bl = UrlList.new ["a.com", "*.a.b", "foo.org/*"]
|
|
9
|
+
bl.list.size.must_equal 3
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
describe 'include?' do
|
|
14
|
+
|
|
15
|
+
before :each do
|
|
16
|
+
@bl = UrlList.new ['foo.org', 'fee.com', 'a.b.c/*', '*.bar.net']
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it 'should return true when url is part of blacklist' do
|
|
20
|
+
includes = ['foo.org','fee.com', 'a.b.c/', 'a.bar.net'].map do |url|
|
|
21
|
+
@bl.include? url
|
|
22
|
+
end
|
|
23
|
+
result = includes.reduce(true) { |curr, acum| acum and curr }
|
|
24
|
+
result.must_equal true
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
it 'should return false when url is part of blacklist' do
|
|
28
|
+
includes = ['foo.orgo','a.fee.com', 'a.bee.c/', 'bar.netbiz'].map do |url|
|
|
29
|
+
@bl.include? url
|
|
30
|
+
end
|
|
31
|
+
result = includes.reduce(false) { |curr, acum| acum or curr }
|
|
32
|
+
result.must_equal false
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
describe "<<" do
|
|
37
|
+
|
|
38
|
+
it "should increase the size of a UrlList" do
|
|
39
|
+
|
|
40
|
+
@list = UrlList.new
|
|
41
|
+
urls = ['facebook.com','google.com','dropbox.com','github.com/*']
|
|
42
|
+
urls.each do |url|
|
|
43
|
+
@list << url
|
|
44
|
+
end
|
|
45
|
+
@list.size.must_equal urls.size
|
|
46
|
+
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
require 'minitest/autorun'
|
|
2
|
+
require 'phisher/url_parser'
|
|
3
|
+
|
|
4
|
+
describe UrlParser do
|
|
5
|
+
|
|
6
|
+
include UrlParser
|
|
7
|
+
|
|
8
|
+
describe "parse" do
|
|
9
|
+
|
|
10
|
+
it "should convert * wildcards to .* regex" do
|
|
11
|
+
|
|
12
|
+
parse("*.google.com").must_equal(/\A.*\.google\.com\z/)
|
|
13
|
+
parse("*.google.*").must_equal(/\A.*\.google\..*\z/)
|
|
14
|
+
parse("*").must_equal(/\A.*\z/)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it "should match all urls matching wildcard" do
|
|
18
|
+
parse("*.google.com").match("foo.google.com").wont_be_nil
|
|
19
|
+
parse("google.*").match("google.com/page").wont_be_nil
|
|
20
|
+
parse("*.google.com/*").match("foo.google.com/").wont_be_nil
|
|
21
|
+
parse("google.com/*/foo").match("google.com/a/b/c/foo").wont_be_nil
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
it "should not match urls that don't match wildcard" do
|
|
25
|
+
parse("google.com/*").match("foo.google.com/site").must_be_nil
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: phisher
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- fhur
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2014-11-30 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: minitest
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ">="
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '0'
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ">="
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rdoc
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '3.12'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '3.12'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: bundler
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '1.0'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '1.0'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: jeweler
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - "~>"
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: 2.0.1
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - "~>"
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: 2.0.1
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: simplecov
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - ">="
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '0'
|
|
76
|
+
type: :development
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - ">="
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '0'
|
|
83
|
+
description: Simple and extensible phishing detection gem
|
|
84
|
+
email: fernandohur@gmail.com
|
|
85
|
+
executables: []
|
|
86
|
+
extensions: []
|
|
87
|
+
extra_rdoc_files:
|
|
88
|
+
- LICENSE.txt
|
|
89
|
+
- README.md
|
|
90
|
+
files:
|
|
91
|
+
- ".document"
|
|
92
|
+
- Gemfile
|
|
93
|
+
- Gemfile.lock
|
|
94
|
+
- LICENSE.txt
|
|
95
|
+
- README.md
|
|
96
|
+
- Rakefile
|
|
97
|
+
- lib/phisher.rb
|
|
98
|
+
- lib/phisher/algo.rb
|
|
99
|
+
- lib/phisher/knn.rb
|
|
100
|
+
- lib/phisher/phisher.rb
|
|
101
|
+
- lib/phisher/url_list.rb
|
|
102
|
+
- lib/phisher/url_parser.rb
|
|
103
|
+
- test/algo_spec.rb
|
|
104
|
+
- test/helpers.rb
|
|
105
|
+
- test/knn_spec.rb
|
|
106
|
+
- test/phisher_spec.rb
|
|
107
|
+
- test/url_list_spec.rb
|
|
108
|
+
- test/url_parser_spec.rb
|
|
109
|
+
homepage: http://github.com/fhur/phisher
|
|
110
|
+
licenses:
|
|
111
|
+
- MIT
|
|
112
|
+
metadata: {}
|
|
113
|
+
post_install_message:
|
|
114
|
+
rdoc_options: []
|
|
115
|
+
require_paths:
|
|
116
|
+
- lib
|
|
117
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
118
|
+
requirements:
|
|
119
|
+
- - ">="
|
|
120
|
+
- !ruby/object:Gem::Version
|
|
121
|
+
version: '0'
|
|
122
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
123
|
+
requirements:
|
|
124
|
+
- - ">="
|
|
125
|
+
- !ruby/object:Gem::Version
|
|
126
|
+
version: '0'
|
|
127
|
+
requirements: []
|
|
128
|
+
rubyforge_project:
|
|
129
|
+
rubygems_version: 2.2.2
|
|
130
|
+
signing_key:
|
|
131
|
+
specification_version: 4
|
|
132
|
+
summary: Simple and extensible phishing detection gem
|
|
133
|
+
test_files: []
|