phisher_phinder 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.env.example +3 -0
- data/.gitignore +18 -0
- data/.rspec +3 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +6 -0
- data/CHANGELOG.md +1 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +93 -0
- data/LICENSE +21 -0
- data/LICENSE.txt +21 -0
- data/README.md +38 -0
- data/Rakefile +38 -0
- data/bin/console +20 -0
- data/bin/setup +8 -0
- data/db/migrations/0001_create_geo_ip_cache.rb +35 -0
- data/lib/phisher_phinder.rb +28 -0
- data/lib/phisher_phinder/body_hyperlink.rb +47 -0
- data/lib/phisher_phinder/cached_geoip_client.rb +95 -0
- data/lib/phisher_phinder/expanded_data_processor.rb +61 -0
- data/lib/phisher_phinder/extended_ip.rb +16 -0
- data/lib/phisher_phinder/extended_ip_factory.rb +51 -0
- data/lib/phisher_phinder/geoip_ip_data.rb +6 -0
- data/lib/phisher_phinder/mail.rb +50 -0
- data/lib/phisher_phinder/mail_parser.rb +111 -0
- data/lib/phisher_phinder/mail_parser/body_parser.rb +94 -0
- data/lib/phisher_phinder/mail_parser/header_value_parser.rb +24 -0
- data/lib/phisher_phinder/mail_parser/received_headers/by_parser.rb +45 -0
- data/lib/phisher_phinder/mail_parser/received_headers/classifier.rb +27 -0
- data/lib/phisher_phinder/mail_parser/received_headers/for_parser.rb +23 -0
- data/lib/phisher_phinder/mail_parser/received_headers/from_parser.rb +40 -0
- data/lib/phisher_phinder/mail_parser/received_headers/parser.rb +74 -0
- data/lib/phisher_phinder/mail_parser/received_headers/starttls_parser.rb +24 -0
- data/lib/phisher_phinder/mail_parser/received_headers/timestamp_parser.rb +32 -0
- data/lib/phisher_phinder/simple_ip.rb +15 -0
- data/lib/phisher_phinder/version.rb +3 -0
- data/phisher_phinder.gemspec +32 -0
- metadata +112 -0
@@ -0,0 +1,61 @@
|
|
1
|
+
module PhisherPhinder
|
2
|
+
class ExpandedDataProcessor
|
3
|
+
def process(mail)
|
4
|
+
{
|
5
|
+
linked_content: mail.hypertext_links.map { |l| lookup_content(l) },
|
6
|
+
mail: mail
|
7
|
+
}
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def lookup_content(link)
|
13
|
+
base_output = {
|
14
|
+
href: link.href,
|
15
|
+
link_text: link.text,
|
16
|
+
content_requested: true,
|
17
|
+
response: nil,
|
18
|
+
error: nil
|
19
|
+
}
|
20
|
+
|
21
|
+
if link.supports_retrieval?
|
22
|
+
require 'net/http'
|
23
|
+
|
24
|
+
begin
|
25
|
+
response = Net::HTTP.get_response(link.href)
|
26
|
+
|
27
|
+
if response.is_a?(Net::HTTPOK)
|
28
|
+
base_output.merge({response: response_with_body(response)})
|
29
|
+
else
|
30
|
+
base_output.merge(response: response_status_only(response))
|
31
|
+
end
|
32
|
+
rescue => e
|
33
|
+
base_output.merge(
|
34
|
+
error: {
|
35
|
+
class: e.class,
|
36
|
+
message: e.message
|
37
|
+
}
|
38
|
+
)
|
39
|
+
end
|
40
|
+
else
|
41
|
+
base_output.merge(content_requested: false)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def response_with_body(response)
|
46
|
+
{
|
47
|
+
status: response.code.to_i,
|
48
|
+
body: response.body,
|
49
|
+
links_within_body: response.body.scan(/https?:\/\/[a-z0-9\/._?=,&#!*~();:@+$%\[\]-]+/i)
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
def response_status_only(response)
|
54
|
+
{
|
55
|
+
status: response.code.to_i,
|
56
|
+
body: nil,
|
57
|
+
links_within_body: []
|
58
|
+
}
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PhisherPhinder
|
4
|
+
class ExtendedIp
|
5
|
+
attr_reader :ip_address, :geoip_ip_data
|
6
|
+
|
7
|
+
def initialize(ip_address:, geoip_ip_data:)
|
8
|
+
@ip_address = ip_address
|
9
|
+
@geoip_ip_data = geoip_ip_data
|
10
|
+
end
|
11
|
+
|
12
|
+
def ==(other)
|
13
|
+
ip_address == other.ip_address && geoip_ip_data == other.geoip_ip_data
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'ipaddr'
|
3
|
+
|
4
|
+
module PhisherPhinder
|
5
|
+
class ExtendedIpFactory
|
6
|
+
def initialize(geoip_client:)
|
7
|
+
@geoip_client = geoip_client
|
8
|
+
end
|
9
|
+
|
10
|
+
def build(ip_string)
|
11
|
+
ip = IPAddr.new(ip_string)
|
12
|
+
|
13
|
+
if non_public_ip?(ip)
|
14
|
+
SimpleIp.new(ip_address: ip)
|
15
|
+
else
|
16
|
+
ExtendedIp.new(ip_address: ip, geoip_ip_data: geoip_data(ip_string))
|
17
|
+
end
|
18
|
+
rescue IPAddr::InvalidAddressError
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def non_public_ip?(ip)
|
24
|
+
localhost_ip?(ip) ||
|
25
|
+
ipv4_class_a_private?(ip) ||
|
26
|
+
ipv4_class_b_private?(ip) ||
|
27
|
+
ipv4_class_c_private?(ip)
|
28
|
+
end
|
29
|
+
|
30
|
+
def localhost_ip?(ip)
|
31
|
+
ip.loopback?
|
32
|
+
end
|
33
|
+
|
34
|
+
def ipv4_class_a_private?(ip)
|
35
|
+
IPAddr.new('10.0.0.1/8').include?(ip)
|
36
|
+
end
|
37
|
+
|
38
|
+
def ipv4_class_b_private?(ip)
|
39
|
+
IPAddr.new('172.16.0.0/12').include?(ip)
|
40
|
+
end
|
41
|
+
|
42
|
+
def ipv4_class_c_private?(ip)
|
43
|
+
IPAddr.new('192.168.0.0/16').include?(ip)
|
44
|
+
end
|
45
|
+
|
46
|
+
def geoip_data(ip_string)
|
47
|
+
@geoip_client.lookup(ip_string)
|
48
|
+
rescue MaxMind::GeoIP2::AddressNotFoundError
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PhisherPhinder
|
4
|
+
class Mail
|
5
|
+
attr_reader :original_email, :original_headers, :original_body, :headers, :tracing_headers, :body
|
6
|
+
|
7
|
+
def initialize(
|
8
|
+
original_email:, original_headers:, original_body:, headers:, tracing_headers:, body:
|
9
|
+
)
|
10
|
+
@original_email = original_email
|
11
|
+
@original_headers = original_headers
|
12
|
+
@original_body = original_body
|
13
|
+
@headers = headers
|
14
|
+
@tracing_headers = tracing_headers
|
15
|
+
@body = body
|
16
|
+
end
|
17
|
+
|
18
|
+
def reply_to_addresses
|
19
|
+
@headers[:reply_to].map do |value_string|
|
20
|
+
value_string.split(",")
|
21
|
+
end.flatten.map do |email_address_string|
|
22
|
+
extract_email_address(email_address_string)
|
23
|
+
end.uniq
|
24
|
+
end
|
25
|
+
|
26
|
+
def hypertext_links
|
27
|
+
body_as_html.
|
28
|
+
xpath('//a').
|
29
|
+
select { |el| el.attributes['href'] }.
|
30
|
+
map { |el| BodyHyperlink.new(el.attributes['href'].value, el.text) }
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def body_as_html
|
36
|
+
require 'nokogiri'
|
37
|
+
|
38
|
+
Nokogiri::HTML(body[:html])
|
39
|
+
end
|
40
|
+
|
41
|
+
def extract_email_address(email_address_string)
|
42
|
+
if email_address_string.include? '<'
|
43
|
+
email_address_string =~ /<([^>]+)>/
|
44
|
+
$1
|
45
|
+
else
|
46
|
+
email_address_string
|
47
|
+
end.downcase.strip
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require_relative('mail_parser/body_parser')
|
3
|
+
require_relative('mail_parser/header_value_parser')
|
4
|
+
|
5
|
+
module PhisherPhinder
|
6
|
+
module MailParser
|
7
|
+
class Parser
|
8
|
+
def initialize(enriched_ip_factory, line_ending_type)
|
9
|
+
@line_end = line_ending_type == 'dos' ? "\r\n" : "\n"
|
10
|
+
@enriched_ip_factory = enriched_ip_factory
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse(contents)
|
14
|
+
original_headers, original_body = separate(contents)
|
15
|
+
headers = extract_headers(original_headers)
|
16
|
+
Mail.new(
|
17
|
+
original_email: contents,
|
18
|
+
original_headers: original_headers,
|
19
|
+
original_body: original_body,
|
20
|
+
headers: headers,
|
21
|
+
tracing_headers: generate_tracing_headers(headers),
|
22
|
+
body: parse_body(original_body, headers)
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def separate(contents)
|
29
|
+
contents.split("#{@line_end}#{@line_end}", 2)
|
30
|
+
end
|
31
|
+
|
32
|
+
def extract_headers(headers)
|
33
|
+
parse_headers(unfold_headers(headers).split(@line_end))
|
34
|
+
end
|
35
|
+
|
36
|
+
def unfold_headers(headers)
|
37
|
+
headers.gsub(/#{@line_end}[\s\t]+/, ' ')
|
38
|
+
end
|
39
|
+
|
40
|
+
def parse_headers(headers_array)
|
41
|
+
headers_array.each_with_index.inject({}) do |memo, (header_string, index)|
|
42
|
+
header, value = header_string.split(":", 2)
|
43
|
+
sequence = headers_array.length - index - 1
|
44
|
+
memo.merge(convert_header_name(header) => enrich_header_value(value, sequence)) do |_, existing, new|
|
45
|
+
if existing.is_a? Array
|
46
|
+
existing << new
|
47
|
+
else
|
48
|
+
[existing, new]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def convert_header_name(header)
|
55
|
+
header.gsub(/-/, '_').downcase.to_sym
|
56
|
+
end
|
57
|
+
|
58
|
+
def enrich_header_value(value, sequence)
|
59
|
+
{data: HeaderValueParser.new.parse(value), sequence: sequence}
|
60
|
+
end
|
61
|
+
|
62
|
+
def generate_tracing_headers(headers)
|
63
|
+
received_header_values = headers.inject([]) do |memo, (header_name, header_value)|
|
64
|
+
if [:received, :x_received].include? header_name
|
65
|
+
if header_value.is_a? Array
|
66
|
+
memo += header_value
|
67
|
+
else
|
68
|
+
memo << header_value
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
memo
|
73
|
+
end.flatten
|
74
|
+
|
75
|
+
{
|
76
|
+
received: restore_sequence(received_header_values).map { |v| parse_received_header(v[:data]) }
|
77
|
+
}
|
78
|
+
end
|
79
|
+
|
80
|
+
def parse_received_header(value)
|
81
|
+
parser = MailParser::ReceivedHeaders::Parser.new(
|
82
|
+
by_parser: MailParser::ReceivedHeaders::ByParser.new(@enriched_ip_factory),
|
83
|
+
for_parser: MailParser::ReceivedHeaders::ForParser.new,
|
84
|
+
from_parser: MailParser::ReceivedHeaders::FromParser.new(@enriched_ip_factory),
|
85
|
+
starttls_parser: MailParser::ReceivedHeaders::StarttlsParser.new,
|
86
|
+
timestamp_parser: MailParser::ReceivedHeaders::TimestampParser.new,
|
87
|
+
classifier: MailParser::ReceivedHeaders::Classifier.new
|
88
|
+
)
|
89
|
+
parser.parse(value)
|
90
|
+
end
|
91
|
+
|
92
|
+
def restore_sequence(values)
|
93
|
+
values.sort { |a,b| b[:sequence] <=> a[:sequence] }
|
94
|
+
end
|
95
|
+
|
96
|
+
def parse_body(original_body, headers)
|
97
|
+
MailParser::BodyParser.new(@line_end).parse(
|
98
|
+
body_contents: original_body,
|
99
|
+
content_type: headers.dig(:content_type, :data),
|
100
|
+
content_transfer_encoding: headers.dig(:content_transfer_encoding, :data),
|
101
|
+
)
|
102
|
+
end
|
103
|
+
|
104
|
+
def valid_base64_decoded(text)
|
105
|
+
if Base64.strict_encode64(Base64.decode64(text)) == text.gsub(/#{@line_end}/, '')
|
106
|
+
Base64.decode64(text)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PhisherPhinder
|
4
|
+
module MailParser
|
5
|
+
class BodyParser
|
6
|
+
def initialize(line_end)
|
7
|
+
@line_end = line_end
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse(body_contents:, content_type:, content_transfer_encoding:)
|
11
|
+
if multipart_alternative?(content_type)
|
12
|
+
parse_multipart_alternative(content_type, body_contents)
|
13
|
+
elsif html?(content_type)
|
14
|
+
{
|
15
|
+
text: nil,
|
16
|
+
html: decode_body(body_contents, content_transfer_encoding)
|
17
|
+
}
|
18
|
+
else
|
19
|
+
{
|
20
|
+
text: body_contents,
|
21
|
+
html: nil
|
22
|
+
}
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def html?(content_type)
|
29
|
+
content_type && content_type.split(';').first == 'text/html'
|
30
|
+
end
|
31
|
+
|
32
|
+
def decode_body(body_contents, content_transfer_encoding)
|
33
|
+
require 'base64'
|
34
|
+
|
35
|
+
content_transfer_encoding ? Base64.decode64(body_contents) : body_contents
|
36
|
+
end
|
37
|
+
|
38
|
+
def multipart_alternative?(content_type)
|
39
|
+
content_type =~ /\Amultipart\/alternative/
|
40
|
+
end
|
41
|
+
|
42
|
+
def parse_multipart_alternative(content_type, contents)
|
43
|
+
base_boundary = content_type.split(';').last.strip.split('=').last
|
44
|
+
start_boundary = '--' + base_boundary + @line_end
|
45
|
+
end_boundary = '--' + base_boundary + '--'
|
46
|
+
|
47
|
+
raw_blocks = contents.split(start_boundary)
|
48
|
+
trimmed_blocks = strip_epilogue(strip_prologue(raw_blocks), end_boundary)
|
49
|
+
|
50
|
+
categorise_blocks(trimmed_blocks).inject({html: '', text: ''}) do |memo, block|
|
51
|
+
memo.merge(block[:html] ? {html: memo[:html] + block[:contents]} : {text: memo[:text] + block[:contents]})
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def strip_prologue(blocks)
|
56
|
+
blocks[1..-1]
|
57
|
+
end
|
58
|
+
|
59
|
+
def strip_epilogue(blocks, end_boundary)
|
60
|
+
blocks[0..-2] << blocks[-1].split(end_boundary).first
|
61
|
+
end
|
62
|
+
|
63
|
+
def categorise_blocks(blocks)
|
64
|
+
blocks.map do |block|
|
65
|
+
lines = block.split(@line_end)
|
66
|
+
processing_block_headers = true
|
67
|
+
html = false
|
68
|
+
base64_encoded = false
|
69
|
+
|
70
|
+
while processing_block_headers do
|
71
|
+
line = lines.shift.strip
|
72
|
+
if line.empty?
|
73
|
+
processing_block_headers = false
|
74
|
+
elsif line =~/\AContent-Type: text\/html/
|
75
|
+
html = true
|
76
|
+
elsif line =~ /\AContent-Transfer-Encoding: base64/
|
77
|
+
base64_encoded = true
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
contents = if base64_encoded
|
82
|
+
(lines.map { |l| Base64.decode64(l) }).join
|
83
|
+
else
|
84
|
+
lines.join(@line_end)
|
85
|
+
end
|
86
|
+
{
|
87
|
+
html: html,
|
88
|
+
contents: contents
|
89
|
+
}
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PhisherPhinder
|
4
|
+
module MailParser
|
5
|
+
class HeaderValueParser
|
6
|
+
def parse(raw_value)
|
7
|
+
utf_8_preambles = raw_value.scan(/=\?UTF-8\?b\?/)
|
8
|
+
if raw_value.scan(/=\?UTF-8\?b\?/).any?
|
9
|
+
(raw_value.split(' ').map { |snippet| parse_utf8_base64(snippet) }).join
|
10
|
+
else
|
11
|
+
raw_value.strip
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def parse_utf8_base64(raw_value)
|
18
|
+
require 'base64'
|
19
|
+
|
20
|
+
Base64.decode64(raw_value.strip.sub(/=\?UTF-8\?b\?/, '')).force_encoding('UTF-8')
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PhisherPhinder
|
4
|
+
module MailParser
|
5
|
+
module ReceivedHeaders
|
6
|
+
class ByParser
|
7
|
+
def initialize(extended_ip_factory)
|
8
|
+
@extended_ip_factory = extended_ip_factory
|
9
|
+
end
|
10
|
+
|
11
|
+
def parse(component)
|
12
|
+
return {recipient: nil, protocol: nil, id: nil, recipient_additional: nil} unless component
|
13
|
+
|
14
|
+
patterns = [
|
15
|
+
/by\s(?<recipient>\S+)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
|
16
|
+
/by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
|
17
|
+
/by\s(?<recipient>\S+)\s(?<additional>.+)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
|
18
|
+
/by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\sid\s(?<id>\S+)/,
|
19
|
+
/by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\swith\s(?<protocol>.+)\sid\s(?<id>\S+)/,
|
20
|
+
/by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\swith\s(?<protocol>\S+)\sID\s(?<id>\S+)/,
|
21
|
+
/by\s(?<recipient>\S+)\swith\s(?<protocol>.+)\sid\s(?<id>\S+)/,
|
22
|
+
/by\s(?<recipient>\S+)\swith\s(?<protocol>.+)/,
|
23
|
+
]
|
24
|
+
|
25
|
+
matches = patterns.inject(nil) do |memo, pattern|
|
26
|
+
memo || component.match(pattern)
|
27
|
+
end
|
28
|
+
|
29
|
+
{
|
30
|
+
recipient: enrich_recipient(matches[:recipient]),
|
31
|
+
protocol: matches.names.include?('protocol') ? matches[:protocol]: nil,
|
32
|
+
id: matches.names.include?('id') ? matches[:id]: nil,
|
33
|
+
recipient_additional: matches.names.include?('additional') ? matches[:additional] : nil
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def enrich_recipient(recipient)
|
40
|
+
@extended_ip_factory.build(recipient) || recipient
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|