phisher_phinder 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/.env.example +3 -0
  3. data/.gitignore +18 -0
  4. data/.rspec +3 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +6 -0
  8. data/CHANGELOG.md +1 -0
  9. data/Gemfile +14 -0
  10. data/Gemfile.lock +93 -0
  11. data/LICENSE +21 -0
  12. data/LICENSE.txt +21 -0
  13. data/README.md +38 -0
  14. data/Rakefile +38 -0
  15. data/bin/console +20 -0
  16. data/bin/setup +8 -0
  17. data/db/migrations/0001_create_geo_ip_cache.rb +35 -0
  18. data/lib/phisher_phinder.rb +28 -0
  19. data/lib/phisher_phinder/body_hyperlink.rb +47 -0
  20. data/lib/phisher_phinder/cached_geoip_client.rb +95 -0
  21. data/lib/phisher_phinder/expanded_data_processor.rb +61 -0
  22. data/lib/phisher_phinder/extended_ip.rb +16 -0
  23. data/lib/phisher_phinder/extended_ip_factory.rb +51 -0
  24. data/lib/phisher_phinder/geoip_ip_data.rb +6 -0
  25. data/lib/phisher_phinder/mail.rb +50 -0
  26. data/lib/phisher_phinder/mail_parser.rb +111 -0
  27. data/lib/phisher_phinder/mail_parser/body_parser.rb +94 -0
  28. data/lib/phisher_phinder/mail_parser/header_value_parser.rb +24 -0
  29. data/lib/phisher_phinder/mail_parser/received_headers/by_parser.rb +45 -0
  30. data/lib/phisher_phinder/mail_parser/received_headers/classifier.rb +27 -0
  31. data/lib/phisher_phinder/mail_parser/received_headers/for_parser.rb +23 -0
  32. data/lib/phisher_phinder/mail_parser/received_headers/from_parser.rb +40 -0
  33. data/lib/phisher_phinder/mail_parser/received_headers/parser.rb +74 -0
  34. data/lib/phisher_phinder/mail_parser/received_headers/starttls_parser.rb +24 -0
  35. data/lib/phisher_phinder/mail_parser/received_headers/timestamp_parser.rb +32 -0
  36. data/lib/phisher_phinder/simple_ip.rb +15 -0
  37. data/lib/phisher_phinder/version.rb +3 -0
  38. data/phisher_phinder.gemspec +32 -0
  39. metadata +112 -0
@@ -0,0 +1,61 @@
1
+ module PhisherPhinder
2
+ class ExpandedDataProcessor
3
+ def process(mail)
4
+ {
5
+ linked_content: mail.hypertext_links.map { |l| lookup_content(l) },
6
+ mail: mail
7
+ }
8
+ end
9
+
10
+ private
11
+
12
+ def lookup_content(link)
13
+ base_output = {
14
+ href: link.href,
15
+ link_text: link.text,
16
+ content_requested: true,
17
+ response: nil,
18
+ error: nil
19
+ }
20
+
21
+ if link.supports_retrieval?
22
+ require 'net/http'
23
+
24
+ begin
25
+ response = Net::HTTP.get_response(link.href)
26
+
27
+ if response.is_a?(Net::HTTPOK)
28
+ base_output.merge({response: response_with_body(response)})
29
+ else
30
+ base_output.merge(response: response_status_only(response))
31
+ end
32
+ rescue => e
33
+ base_output.merge(
34
+ error: {
35
+ class: e.class,
36
+ message: e.message
37
+ }
38
+ )
39
+ end
40
+ else
41
+ base_output.merge(content_requested: false)
42
+ end
43
+ end
44
+
45
+ def response_with_body(response)
46
+ {
47
+ status: response.code.to_i,
48
+ body: response.body,
49
+ links_within_body: response.body.scan(/https?:\/\/[a-z0-9\/._?=,&#!*~();:@+$%\[\]-]+/i)
50
+ }
51
+ end
52
+
53
+ def response_status_only(response)
54
+ {
55
+ status: response.code.to_i,
56
+ body: nil,
57
+ links_within_body: []
58
+ }
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PhisherPhinder
4
+ class ExtendedIp
5
+ attr_reader :ip_address, :geoip_ip_data
6
+
7
+ def initialize(ip_address:, geoip_ip_data:)
8
+ @ip_address = ip_address
9
+ @geoip_ip_data = geoip_ip_data
10
+ end
11
+
12
+ def ==(other)
13
+ ip_address == other.ip_address && geoip_ip_data == other.geoip_ip_data
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+ require 'ipaddr'
3
+
4
+ module PhisherPhinder
5
+ class ExtendedIpFactory
6
+ def initialize(geoip_client:)
7
+ @geoip_client = geoip_client
8
+ end
9
+
10
+ def build(ip_string)
11
+ ip = IPAddr.new(ip_string)
12
+
13
+ if non_public_ip?(ip)
14
+ SimpleIp.new(ip_address: ip)
15
+ else
16
+ ExtendedIp.new(ip_address: ip, geoip_ip_data: geoip_data(ip_string))
17
+ end
18
+ rescue IPAddr::InvalidAddressError
19
+ end
20
+
21
+ private
22
+
23
+ def non_public_ip?(ip)
24
+ localhost_ip?(ip) ||
25
+ ipv4_class_a_private?(ip) ||
26
+ ipv4_class_b_private?(ip) ||
27
+ ipv4_class_c_private?(ip)
28
+ end
29
+
30
+ def localhost_ip?(ip)
31
+ ip.loopback?
32
+ end
33
+
34
+ def ipv4_class_a_private?(ip)
35
+ IPAddr.new('10.0.0.1/8').include?(ip)
36
+ end
37
+
38
+ def ipv4_class_b_private?(ip)
39
+ IPAddr.new('172.16.0.0/12').include?(ip)
40
+ end
41
+
42
+ def ipv4_class_c_private?(ip)
43
+ IPAddr.new('192.168.0.0/16').include?(ip)
44
+ end
45
+
46
+ def geoip_data(ip_string)
47
+ @geoip_client.lookup(ip_string)
48
+ rescue MaxMind::GeoIP2::AddressNotFoundError
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PhisherPhinder
4
+ class GeoipIpData < Sequel::Model(:geoip_ip_data)
5
+ end
6
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PhisherPhinder
4
+ class Mail
5
+ attr_reader :original_email, :original_headers, :original_body, :headers, :tracing_headers, :body
6
+
7
+ def initialize(
8
+ original_email:, original_headers:, original_body:, headers:, tracing_headers:, body:
9
+ )
10
+ @original_email = original_email
11
+ @original_headers = original_headers
12
+ @original_body = original_body
13
+ @headers = headers
14
+ @tracing_headers = tracing_headers
15
+ @body = body
16
+ end
17
+
18
+ def reply_to_addresses
19
+ @headers[:reply_to].map do |value_string|
20
+ value_string.split(",")
21
+ end.flatten.map do |email_address_string|
22
+ extract_email_address(email_address_string)
23
+ end.uniq
24
+ end
25
+
26
+ def hypertext_links
27
+ body_as_html.
28
+ xpath('//a').
29
+ select { |el| el.attributes['href'] }.
30
+ map { |el| BodyHyperlink.new(el.attributes['href'].value, el.text) }
31
+ end
32
+
33
+ private
34
+
35
+ def body_as_html
36
+ require 'nokogiri'
37
+
38
+ Nokogiri::HTML(body[:html])
39
+ end
40
+
41
+ def extract_email_address(email_address_string)
42
+ if email_address_string.include? '<'
43
+ email_address_string =~ /<([^>]+)>/
44
+ $1
45
+ else
46
+ email_address_string
47
+ end.downcase.strip
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+ require_relative('mail_parser/body_parser')
3
+ require_relative('mail_parser/header_value_parser')
4
+
5
+ module PhisherPhinder
6
+ module MailParser
7
+ class Parser
8
+ def initialize(enriched_ip_factory, line_ending_type)
9
+ @line_end = line_ending_type == 'dos' ? "\r\n" : "\n"
10
+ @enriched_ip_factory = enriched_ip_factory
11
+ end
12
+
13
+ def parse(contents)
14
+ original_headers, original_body = separate(contents)
15
+ headers = extract_headers(original_headers)
16
+ Mail.new(
17
+ original_email: contents,
18
+ original_headers: original_headers,
19
+ original_body: original_body,
20
+ headers: headers,
21
+ tracing_headers: generate_tracing_headers(headers),
22
+ body: parse_body(original_body, headers)
23
+ )
24
+ end
25
+
26
+ private
27
+
28
+ def separate(contents)
29
+ contents.split("#{@line_end}#{@line_end}", 2)
30
+ end
31
+
32
+ def extract_headers(headers)
33
+ parse_headers(unfold_headers(headers).split(@line_end))
34
+ end
35
+
36
+ def unfold_headers(headers)
37
+ headers.gsub(/#{@line_end}[\s\t]+/, ' ')
38
+ end
39
+
40
+ def parse_headers(headers_array)
41
+ headers_array.each_with_index.inject({}) do |memo, (header_string, index)|
42
+ header, value = header_string.split(":", 2)
43
+ sequence = headers_array.length - index - 1
44
+ memo.merge(convert_header_name(header) => enrich_header_value(value, sequence)) do |_, existing, new|
45
+ if existing.is_a? Array
46
+ existing << new
47
+ else
48
+ [existing, new]
49
+ end
50
+ end
51
+ end
52
+ end
53
+
54
+ def convert_header_name(header)
55
+ header.gsub(/-/, '_').downcase.to_sym
56
+ end
57
+
58
+ def enrich_header_value(value, sequence)
59
+ {data: HeaderValueParser.new.parse(value), sequence: sequence}
60
+ end
61
+
62
+ def generate_tracing_headers(headers)
63
+ received_header_values = headers.inject([]) do |memo, (header_name, header_value)|
64
+ if [:received, :x_received].include? header_name
65
+ if header_value.is_a? Array
66
+ memo += header_value
67
+ else
68
+ memo << header_value
69
+ end
70
+ end
71
+
72
+ memo
73
+ end.flatten
74
+
75
+ {
76
+ received: restore_sequence(received_header_values).map { |v| parse_received_header(v[:data]) }
77
+ }
78
+ end
79
+
80
+ def parse_received_header(value)
81
+ parser = MailParser::ReceivedHeaders::Parser.new(
82
+ by_parser: MailParser::ReceivedHeaders::ByParser.new(@enriched_ip_factory),
83
+ for_parser: MailParser::ReceivedHeaders::ForParser.new,
84
+ from_parser: MailParser::ReceivedHeaders::FromParser.new(@enriched_ip_factory),
85
+ starttls_parser: MailParser::ReceivedHeaders::StarttlsParser.new,
86
+ timestamp_parser: MailParser::ReceivedHeaders::TimestampParser.new,
87
+ classifier: MailParser::ReceivedHeaders::Classifier.new
88
+ )
89
+ parser.parse(value)
90
+ end
91
+
92
+ def restore_sequence(values)
93
+ values.sort { |a,b| b[:sequence] <=> a[:sequence] }
94
+ end
95
+
96
+ def parse_body(original_body, headers)
97
+ MailParser::BodyParser.new(@line_end).parse(
98
+ body_contents: original_body,
99
+ content_type: headers.dig(:content_type, :data),
100
+ content_transfer_encoding: headers.dig(:content_transfer_encoding, :data),
101
+ )
102
+ end
103
+
104
+ def valid_base64_decoded(text)
105
+ if Base64.strict_encode64(Base64.decode64(text)) == text.gsub(/#{@line_end}/, '')
106
+ Base64.decode64(text)
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PhisherPhinder
4
+ module MailParser
5
+ class BodyParser
6
+ def initialize(line_end)
7
+ @line_end = line_end
8
+ end
9
+
10
+ def parse(body_contents:, content_type:, content_transfer_encoding:)
11
+ if multipart_alternative?(content_type)
12
+ parse_multipart_alternative(content_type, body_contents)
13
+ elsif html?(content_type)
14
+ {
15
+ text: nil,
16
+ html: decode_body(body_contents, content_transfer_encoding)
17
+ }
18
+ else
19
+ {
20
+ text: body_contents,
21
+ html: nil
22
+ }
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def html?(content_type)
29
+ content_type && content_type.split(';').first == 'text/html'
30
+ end
31
+
32
+ def decode_body(body_contents, content_transfer_encoding)
33
+ require 'base64'
34
+
35
+ content_transfer_encoding ? Base64.decode64(body_contents) : body_contents
36
+ end
37
+
38
+ def multipart_alternative?(content_type)
39
+ content_type =~ /\Amultipart\/alternative/
40
+ end
41
+
42
+ def parse_multipart_alternative(content_type, contents)
43
+ base_boundary = content_type.split(';').last.strip.split('=').last
44
+ start_boundary = '--' + base_boundary + @line_end
45
+ end_boundary = '--' + base_boundary + '--'
46
+
47
+ raw_blocks = contents.split(start_boundary)
48
+ trimmed_blocks = strip_epilogue(strip_prologue(raw_blocks), end_boundary)
49
+
50
+ categorise_blocks(trimmed_blocks).inject({html: '', text: ''}) do |memo, block|
51
+ memo.merge(block[:html] ? {html: memo[:html] + block[:contents]} : {text: memo[:text] + block[:contents]})
52
+ end
53
+ end
54
+
55
+ def strip_prologue(blocks)
56
+ blocks[1..-1]
57
+ end
58
+
59
+ def strip_epilogue(blocks, end_boundary)
60
+ blocks[0..-2] << blocks[-1].split(end_boundary).first
61
+ end
62
+
63
+ def categorise_blocks(blocks)
64
+ blocks.map do |block|
65
+ lines = block.split(@line_end)
66
+ processing_block_headers = true
67
+ html = false
68
+ base64_encoded = false
69
+
70
+ while processing_block_headers do
71
+ line = lines.shift.strip
72
+ if line.empty?
73
+ processing_block_headers = false
74
+ elsif line =~/\AContent-Type: text\/html/
75
+ html = true
76
+ elsif line =~ /\AContent-Transfer-Encoding: base64/
77
+ base64_encoded = true
78
+ end
79
+ end
80
+
81
+ contents = if base64_encoded
82
+ (lines.map { |l| Base64.decode64(l) }).join
83
+ else
84
+ lines.join(@line_end)
85
+ end
86
+ {
87
+ html: html,
88
+ contents: contents
89
+ }
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PhisherPhinder
4
+ module MailParser
5
+ class HeaderValueParser
6
+ def parse(raw_value)
7
+ utf_8_preambles = raw_value.scan(/=\?UTF-8\?b\?/)
8
+ if raw_value.scan(/=\?UTF-8\?b\?/).any?
9
+ (raw_value.split(' ').map { |snippet| parse_utf8_base64(snippet) }).join
10
+ else
11
+ raw_value.strip
12
+ end
13
+ end
14
+
15
+ private
16
+
17
+ def parse_utf8_base64(raw_value)
18
+ require 'base64'
19
+
20
+ Base64.decode64(raw_value.strip.sub(/=\?UTF-8\?b\?/, '')).force_encoding('UTF-8')
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PhisherPhinder
4
+ module MailParser
5
+ module ReceivedHeaders
6
+ class ByParser
7
+ def initialize(extended_ip_factory)
8
+ @extended_ip_factory = extended_ip_factory
9
+ end
10
+
11
+ def parse(component)
12
+ return {recipient: nil, protocol: nil, id: nil, recipient_additional: nil} unless component
13
+
14
+ patterns = [
15
+ /by\s(?<recipient>\S+)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
16
+ /by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
17
+ /by\s(?<recipient>\S+)\s(?<additional>.+)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
18
+ /by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\sid\s(?<id>\S+)/,
19
+ /by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\swith\s(?<protocol>.+)\sid\s(?<id>\S+)/,
20
+ /by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\swith\s(?<protocol>\S+)\sID\s(?<id>\S+)/,
21
+ /by\s(?<recipient>\S+)\swith\s(?<protocol>.+)\sid\s(?<id>\S+)/,
22
+ /by\s(?<recipient>\S+)\swith\s(?<protocol>.+)/,
23
+ ]
24
+
25
+ matches = patterns.inject(nil) do |memo, pattern|
26
+ memo || component.match(pattern)
27
+ end
28
+
29
+ {
30
+ recipient: enrich_recipient(matches[:recipient]),
31
+ protocol: matches.names.include?('protocol') ? matches[:protocol]: nil,
32
+ id: matches.names.include?('id') ? matches[:id]: nil,
33
+ recipient_additional: matches.names.include?('additional') ? matches[:additional] : nil
34
+ }
35
+ end
36
+
37
+ private
38
+
39
+ def enrich_recipient(recipient)
40
+ @extended_ip_factory.build(recipient) || recipient
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end