phisher_phinder 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.env.example +3 -0
- data/.gitignore +18 -0
- data/.rspec +3 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +6 -0
- data/CHANGELOG.md +1 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +93 -0
- data/LICENSE +21 -0
- data/LICENSE.txt +21 -0
- data/README.md +38 -0
- data/Rakefile +38 -0
- data/bin/console +20 -0
- data/bin/setup +8 -0
- data/db/migrations/0001_create_geo_ip_cache.rb +35 -0
- data/lib/phisher_phinder.rb +28 -0
- data/lib/phisher_phinder/body_hyperlink.rb +47 -0
- data/lib/phisher_phinder/cached_geoip_client.rb +95 -0
- data/lib/phisher_phinder/expanded_data_processor.rb +61 -0
- data/lib/phisher_phinder/extended_ip.rb +16 -0
- data/lib/phisher_phinder/extended_ip_factory.rb +51 -0
- data/lib/phisher_phinder/geoip_ip_data.rb +6 -0
- data/lib/phisher_phinder/mail.rb +50 -0
- data/lib/phisher_phinder/mail_parser.rb +111 -0
- data/lib/phisher_phinder/mail_parser/body_parser.rb +94 -0
- data/lib/phisher_phinder/mail_parser/header_value_parser.rb +24 -0
- data/lib/phisher_phinder/mail_parser/received_headers/by_parser.rb +45 -0
- data/lib/phisher_phinder/mail_parser/received_headers/classifier.rb +27 -0
- data/lib/phisher_phinder/mail_parser/received_headers/for_parser.rb +23 -0
- data/lib/phisher_phinder/mail_parser/received_headers/from_parser.rb +40 -0
- data/lib/phisher_phinder/mail_parser/received_headers/parser.rb +74 -0
- data/lib/phisher_phinder/mail_parser/received_headers/starttls_parser.rb +24 -0
- data/lib/phisher_phinder/mail_parser/received_headers/timestamp_parser.rb +32 -0
- data/lib/phisher_phinder/simple_ip.rb +15 -0
- data/lib/phisher_phinder/version.rb +3 -0
- data/phisher_phinder.gemspec +32 -0
- metadata +112 -0
@@ -0,0 +1,61 @@
|
|
1
|
+
module PhisherPhinder
|
2
|
+
class ExpandedDataProcessor
|
3
|
+
def process(mail)
|
4
|
+
{
|
5
|
+
linked_content: mail.hypertext_links.map { |l| lookup_content(l) },
|
6
|
+
mail: mail
|
7
|
+
}
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def lookup_content(link)
|
13
|
+
base_output = {
|
14
|
+
href: link.href,
|
15
|
+
link_text: link.text,
|
16
|
+
content_requested: true,
|
17
|
+
response: nil,
|
18
|
+
error: nil
|
19
|
+
}
|
20
|
+
|
21
|
+
if link.supports_retrieval?
|
22
|
+
require 'net/http'
|
23
|
+
|
24
|
+
begin
|
25
|
+
response = Net::HTTP.get_response(link.href)
|
26
|
+
|
27
|
+
if response.is_a?(Net::HTTPOK)
|
28
|
+
base_output.merge({response: response_with_body(response)})
|
29
|
+
else
|
30
|
+
base_output.merge(response: response_status_only(response))
|
31
|
+
end
|
32
|
+
rescue => e
|
33
|
+
base_output.merge(
|
34
|
+
error: {
|
35
|
+
class: e.class,
|
36
|
+
message: e.message
|
37
|
+
}
|
38
|
+
)
|
39
|
+
end
|
40
|
+
else
|
41
|
+
base_output.merge(content_requested: false)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def response_with_body(response)
|
46
|
+
{
|
47
|
+
status: response.code.to_i,
|
48
|
+
body: response.body,
|
49
|
+
links_within_body: response.body.scan(/https?:\/\/[a-z0-9\/._?=,&#!*~();:@+$%\[\]-]+/i)
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
def response_status_only(response)
|
54
|
+
{
|
55
|
+
status: response.code.to_i,
|
56
|
+
body: nil,
|
57
|
+
links_within_body: []
|
58
|
+
}
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PhisherPhinder
|
4
|
+
class ExtendedIp
|
5
|
+
attr_reader :ip_address, :geoip_ip_data
|
6
|
+
|
7
|
+
def initialize(ip_address:, geoip_ip_data:)
|
8
|
+
@ip_address = ip_address
|
9
|
+
@geoip_ip_data = geoip_ip_data
|
10
|
+
end
|
11
|
+
|
12
|
+
def ==(other)
|
13
|
+
ip_address == other.ip_address && geoip_ip_data == other.geoip_ip_data
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'ipaddr'
|
3
|
+
|
4
|
+
module PhisherPhinder
|
5
|
+
class ExtendedIpFactory
|
6
|
+
def initialize(geoip_client:)
|
7
|
+
@geoip_client = geoip_client
|
8
|
+
end
|
9
|
+
|
10
|
+
def build(ip_string)
|
11
|
+
ip = IPAddr.new(ip_string)
|
12
|
+
|
13
|
+
if non_public_ip?(ip)
|
14
|
+
SimpleIp.new(ip_address: ip)
|
15
|
+
else
|
16
|
+
ExtendedIp.new(ip_address: ip, geoip_ip_data: geoip_data(ip_string))
|
17
|
+
end
|
18
|
+
rescue IPAddr::InvalidAddressError
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def non_public_ip?(ip)
|
24
|
+
localhost_ip?(ip) ||
|
25
|
+
ipv4_class_a_private?(ip) ||
|
26
|
+
ipv4_class_b_private?(ip) ||
|
27
|
+
ipv4_class_c_private?(ip)
|
28
|
+
end
|
29
|
+
|
30
|
+
def localhost_ip?(ip)
|
31
|
+
ip.loopback?
|
32
|
+
end
|
33
|
+
|
34
|
+
def ipv4_class_a_private?(ip)
|
35
|
+
IPAddr.new('10.0.0.1/8').include?(ip)
|
36
|
+
end
|
37
|
+
|
38
|
+
def ipv4_class_b_private?(ip)
|
39
|
+
IPAddr.new('172.16.0.0/12').include?(ip)
|
40
|
+
end
|
41
|
+
|
42
|
+
def ipv4_class_c_private?(ip)
|
43
|
+
IPAddr.new('192.168.0.0/16').include?(ip)
|
44
|
+
end
|
45
|
+
|
46
|
+
def geoip_data(ip_string)
|
47
|
+
@geoip_client.lookup(ip_string)
|
48
|
+
rescue MaxMind::GeoIP2::AddressNotFoundError
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PhisherPhinder
|
4
|
+
class Mail
|
5
|
+
attr_reader :original_email, :original_headers, :original_body, :headers, :tracing_headers, :body
|
6
|
+
|
7
|
+
def initialize(
|
8
|
+
original_email:, original_headers:, original_body:, headers:, tracing_headers:, body:
|
9
|
+
)
|
10
|
+
@original_email = original_email
|
11
|
+
@original_headers = original_headers
|
12
|
+
@original_body = original_body
|
13
|
+
@headers = headers
|
14
|
+
@tracing_headers = tracing_headers
|
15
|
+
@body = body
|
16
|
+
end
|
17
|
+
|
18
|
+
def reply_to_addresses
|
19
|
+
@headers[:reply_to].map do |value_string|
|
20
|
+
value_string.split(",")
|
21
|
+
end.flatten.map do |email_address_string|
|
22
|
+
extract_email_address(email_address_string)
|
23
|
+
end.uniq
|
24
|
+
end
|
25
|
+
|
26
|
+
def hypertext_links
|
27
|
+
body_as_html.
|
28
|
+
xpath('//a').
|
29
|
+
select { |el| el.attributes['href'] }.
|
30
|
+
map { |el| BodyHyperlink.new(el.attributes['href'].value, el.text) }
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def body_as_html
|
36
|
+
require 'nokogiri'
|
37
|
+
|
38
|
+
Nokogiri::HTML(body[:html])
|
39
|
+
end
|
40
|
+
|
41
|
+
def extract_email_address(email_address_string)
|
42
|
+
if email_address_string.include? '<'
|
43
|
+
email_address_string =~ /<([^>]+)>/
|
44
|
+
$1
|
45
|
+
else
|
46
|
+
email_address_string
|
47
|
+
end.downcase.strip
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require_relative('mail_parser/body_parser')
|
3
|
+
require_relative('mail_parser/header_value_parser')
|
4
|
+
|
5
|
+
module PhisherPhinder
|
6
|
+
module MailParser
|
7
|
+
class Parser
|
8
|
+
def initialize(enriched_ip_factory, line_ending_type)
|
9
|
+
@line_end = line_ending_type == 'dos' ? "\r\n" : "\n"
|
10
|
+
@enriched_ip_factory = enriched_ip_factory
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse(contents)
|
14
|
+
original_headers, original_body = separate(contents)
|
15
|
+
headers = extract_headers(original_headers)
|
16
|
+
Mail.new(
|
17
|
+
original_email: contents,
|
18
|
+
original_headers: original_headers,
|
19
|
+
original_body: original_body,
|
20
|
+
headers: headers,
|
21
|
+
tracing_headers: generate_tracing_headers(headers),
|
22
|
+
body: parse_body(original_body, headers)
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def separate(contents)
|
29
|
+
contents.split("#{@line_end}#{@line_end}", 2)
|
30
|
+
end
|
31
|
+
|
32
|
+
def extract_headers(headers)
|
33
|
+
parse_headers(unfold_headers(headers).split(@line_end))
|
34
|
+
end
|
35
|
+
|
36
|
+
def unfold_headers(headers)
|
37
|
+
headers.gsub(/#{@line_end}[\s\t]+/, ' ')
|
38
|
+
end
|
39
|
+
|
40
|
+
def parse_headers(headers_array)
|
41
|
+
headers_array.each_with_index.inject({}) do |memo, (header_string, index)|
|
42
|
+
header, value = header_string.split(":", 2)
|
43
|
+
sequence = headers_array.length - index - 1
|
44
|
+
memo.merge(convert_header_name(header) => enrich_header_value(value, sequence)) do |_, existing, new|
|
45
|
+
if existing.is_a? Array
|
46
|
+
existing << new
|
47
|
+
else
|
48
|
+
[existing, new]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def convert_header_name(header)
|
55
|
+
header.gsub(/-/, '_').downcase.to_sym
|
56
|
+
end
|
57
|
+
|
58
|
+
def enrich_header_value(value, sequence)
|
59
|
+
{data: HeaderValueParser.new.parse(value), sequence: sequence}
|
60
|
+
end
|
61
|
+
|
62
|
+
def generate_tracing_headers(headers)
|
63
|
+
received_header_values = headers.inject([]) do |memo, (header_name, header_value)|
|
64
|
+
if [:received, :x_received].include? header_name
|
65
|
+
if header_value.is_a? Array
|
66
|
+
memo += header_value
|
67
|
+
else
|
68
|
+
memo << header_value
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
memo
|
73
|
+
end.flatten
|
74
|
+
|
75
|
+
{
|
76
|
+
received: restore_sequence(received_header_values).map { |v| parse_received_header(v[:data]) }
|
77
|
+
}
|
78
|
+
end
|
79
|
+
|
80
|
+
def parse_received_header(value)
|
81
|
+
parser = MailParser::ReceivedHeaders::Parser.new(
|
82
|
+
by_parser: MailParser::ReceivedHeaders::ByParser.new(@enriched_ip_factory),
|
83
|
+
for_parser: MailParser::ReceivedHeaders::ForParser.new,
|
84
|
+
from_parser: MailParser::ReceivedHeaders::FromParser.new(@enriched_ip_factory),
|
85
|
+
starttls_parser: MailParser::ReceivedHeaders::StarttlsParser.new,
|
86
|
+
timestamp_parser: MailParser::ReceivedHeaders::TimestampParser.new,
|
87
|
+
classifier: MailParser::ReceivedHeaders::Classifier.new
|
88
|
+
)
|
89
|
+
parser.parse(value)
|
90
|
+
end
|
91
|
+
|
92
|
+
def restore_sequence(values)
|
93
|
+
values.sort { |a,b| b[:sequence] <=> a[:sequence] }
|
94
|
+
end
|
95
|
+
|
96
|
+
def parse_body(original_body, headers)
|
97
|
+
MailParser::BodyParser.new(@line_end).parse(
|
98
|
+
body_contents: original_body,
|
99
|
+
content_type: headers.dig(:content_type, :data),
|
100
|
+
content_transfer_encoding: headers.dig(:content_transfer_encoding, :data),
|
101
|
+
)
|
102
|
+
end
|
103
|
+
|
104
|
+
def valid_base64_decoded(text)
|
105
|
+
if Base64.strict_encode64(Base64.decode64(text)) == text.gsub(/#{@line_end}/, '')
|
106
|
+
Base64.decode64(text)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PhisherPhinder
|
4
|
+
module MailParser
|
5
|
+
class BodyParser
|
6
|
+
def initialize(line_end)
|
7
|
+
@line_end = line_end
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse(body_contents:, content_type:, content_transfer_encoding:)
|
11
|
+
if multipart_alternative?(content_type)
|
12
|
+
parse_multipart_alternative(content_type, body_contents)
|
13
|
+
elsif html?(content_type)
|
14
|
+
{
|
15
|
+
text: nil,
|
16
|
+
html: decode_body(body_contents, content_transfer_encoding)
|
17
|
+
}
|
18
|
+
else
|
19
|
+
{
|
20
|
+
text: body_contents,
|
21
|
+
html: nil
|
22
|
+
}
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def html?(content_type)
|
29
|
+
content_type && content_type.split(';').first == 'text/html'
|
30
|
+
end
|
31
|
+
|
32
|
+
def decode_body(body_contents, content_transfer_encoding)
|
33
|
+
require 'base64'
|
34
|
+
|
35
|
+
content_transfer_encoding ? Base64.decode64(body_contents) : body_contents
|
36
|
+
end
|
37
|
+
|
38
|
+
def multipart_alternative?(content_type)
|
39
|
+
content_type =~ /\Amultipart\/alternative/
|
40
|
+
end
|
41
|
+
|
42
|
+
def parse_multipart_alternative(content_type, contents)
|
43
|
+
base_boundary = content_type.split(';').last.strip.split('=').last
|
44
|
+
start_boundary = '--' + base_boundary + @line_end
|
45
|
+
end_boundary = '--' + base_boundary + '--'
|
46
|
+
|
47
|
+
raw_blocks = contents.split(start_boundary)
|
48
|
+
trimmed_blocks = strip_epilogue(strip_prologue(raw_blocks), end_boundary)
|
49
|
+
|
50
|
+
categorise_blocks(trimmed_blocks).inject({html: '', text: ''}) do |memo, block|
|
51
|
+
memo.merge(block[:html] ? {html: memo[:html] + block[:contents]} : {text: memo[:text] + block[:contents]})
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def strip_prologue(blocks)
|
56
|
+
blocks[1..-1]
|
57
|
+
end
|
58
|
+
|
59
|
+
def strip_epilogue(blocks, end_boundary)
|
60
|
+
blocks[0..-2] << blocks[-1].split(end_boundary).first
|
61
|
+
end
|
62
|
+
|
63
|
+
def categorise_blocks(blocks)
|
64
|
+
blocks.map do |block|
|
65
|
+
lines = block.split(@line_end)
|
66
|
+
processing_block_headers = true
|
67
|
+
html = false
|
68
|
+
base64_encoded = false
|
69
|
+
|
70
|
+
while processing_block_headers do
|
71
|
+
line = lines.shift.strip
|
72
|
+
if line.empty?
|
73
|
+
processing_block_headers = false
|
74
|
+
elsif line =~/\AContent-Type: text\/html/
|
75
|
+
html = true
|
76
|
+
elsif line =~ /\AContent-Transfer-Encoding: base64/
|
77
|
+
base64_encoded = true
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
contents = if base64_encoded
|
82
|
+
(lines.map { |l| Base64.decode64(l) }).join
|
83
|
+
else
|
84
|
+
lines.join(@line_end)
|
85
|
+
end
|
86
|
+
{
|
87
|
+
html: html,
|
88
|
+
contents: contents
|
89
|
+
}
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PhisherPhinder
|
4
|
+
module MailParser
|
5
|
+
class HeaderValueParser
|
6
|
+
def parse(raw_value)
|
7
|
+
utf_8_preambles = raw_value.scan(/=\?UTF-8\?b\?/)
|
8
|
+
if raw_value.scan(/=\?UTF-8\?b\?/).any?
|
9
|
+
(raw_value.split(' ').map { |snippet| parse_utf8_base64(snippet) }).join
|
10
|
+
else
|
11
|
+
raw_value.strip
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def parse_utf8_base64(raw_value)
|
18
|
+
require 'base64'
|
19
|
+
|
20
|
+
Base64.decode64(raw_value.strip.sub(/=\?UTF-8\?b\?/, '')).force_encoding('UTF-8')
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PhisherPhinder
|
4
|
+
module MailParser
|
5
|
+
module ReceivedHeaders
|
6
|
+
class ByParser
|
7
|
+
def initialize(extended_ip_factory)
|
8
|
+
@extended_ip_factory = extended_ip_factory
|
9
|
+
end
|
10
|
+
|
11
|
+
def parse(component)
|
12
|
+
return {recipient: nil, protocol: nil, id: nil, recipient_additional: nil} unless component
|
13
|
+
|
14
|
+
patterns = [
|
15
|
+
/by\s(?<recipient>\S+)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
|
16
|
+
/by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
|
17
|
+
/by\s(?<recipient>\S+)\s(?<additional>.+)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
|
18
|
+
/by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\sid\s(?<id>\S+)/,
|
19
|
+
/by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\swith\s(?<protocol>.+)\sid\s(?<id>\S+)/,
|
20
|
+
/by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\swith\s(?<protocol>\S+)\sID\s(?<id>\S+)/,
|
21
|
+
/by\s(?<recipient>\S+)\swith\s(?<protocol>.+)\sid\s(?<id>\S+)/,
|
22
|
+
/by\s(?<recipient>\S+)\swith\s(?<protocol>.+)/,
|
23
|
+
]
|
24
|
+
|
25
|
+
matches = patterns.inject(nil) do |memo, pattern|
|
26
|
+
memo || component.match(pattern)
|
27
|
+
end
|
28
|
+
|
29
|
+
{
|
30
|
+
recipient: enrich_recipient(matches[:recipient]),
|
31
|
+
protocol: matches.names.include?('protocol') ? matches[:protocol]: nil,
|
32
|
+
id: matches.names.include?('id') ? matches[:id]: nil,
|
33
|
+
recipient_additional: matches.names.include?('additional') ? matches[:additional] : nil
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def enrich_recipient(recipient)
|
40
|
+
@extended_ip_factory.build(recipient) || recipient
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|