phisher_phinder 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/.env.example +3 -0
  3. data/.gitignore +18 -0
  4. data/.rspec +3 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/.travis.yml +6 -0
  8. data/CHANGELOG.md +1 -0
  9. data/Gemfile +14 -0
  10. data/Gemfile.lock +93 -0
  11. data/LICENSE +21 -0
  12. data/LICENSE.txt +21 -0
  13. data/README.md +38 -0
  14. data/Rakefile +38 -0
  15. data/bin/console +20 -0
  16. data/bin/setup +8 -0
  17. data/db/migrations/0001_create_geo_ip_cache.rb +35 -0
  18. data/lib/phisher_phinder.rb +28 -0
  19. data/lib/phisher_phinder/body_hyperlink.rb +47 -0
  20. data/lib/phisher_phinder/cached_geoip_client.rb +95 -0
  21. data/lib/phisher_phinder/expanded_data_processor.rb +61 -0
  22. data/lib/phisher_phinder/extended_ip.rb +16 -0
  23. data/lib/phisher_phinder/extended_ip_factory.rb +51 -0
  24. data/lib/phisher_phinder/geoip_ip_data.rb +6 -0
  25. data/lib/phisher_phinder/mail.rb +50 -0
  26. data/lib/phisher_phinder/mail_parser.rb +111 -0
  27. data/lib/phisher_phinder/mail_parser/body_parser.rb +94 -0
  28. data/lib/phisher_phinder/mail_parser/header_value_parser.rb +24 -0
  29. data/lib/phisher_phinder/mail_parser/received_headers/by_parser.rb +45 -0
  30. data/lib/phisher_phinder/mail_parser/received_headers/classifier.rb +27 -0
  31. data/lib/phisher_phinder/mail_parser/received_headers/for_parser.rb +23 -0
  32. data/lib/phisher_phinder/mail_parser/received_headers/from_parser.rb +40 -0
  33. data/lib/phisher_phinder/mail_parser/received_headers/parser.rb +74 -0
  34. data/lib/phisher_phinder/mail_parser/received_headers/starttls_parser.rb +24 -0
  35. data/lib/phisher_phinder/mail_parser/received_headers/timestamp_parser.rb +32 -0
  36. data/lib/phisher_phinder/simple_ip.rb +15 -0
  37. data/lib/phisher_phinder/version.rb +3 -0
  38. data/phisher_phinder.gemspec +32 -0
  39. metadata +112 -0
@@ -0,0 +1,61 @@
1
+ module PhisherPhinder
2
+ class ExpandedDataProcessor
3
+ def process(mail)
4
+ {
5
+ linked_content: mail.hypertext_links.map { |l| lookup_content(l) },
6
+ mail: mail
7
+ }
8
+ end
9
+
10
+ private
11
+
12
+ def lookup_content(link)
13
+ base_output = {
14
+ href: link.href,
15
+ link_text: link.text,
16
+ content_requested: true,
17
+ response: nil,
18
+ error: nil
19
+ }
20
+
21
+ if link.supports_retrieval?
22
+ require 'net/http'
23
+
24
+ begin
25
+ response = Net::HTTP.get_response(link.href)
26
+
27
+ if response.is_a?(Net::HTTPOK)
28
+ base_output.merge({response: response_with_body(response)})
29
+ else
30
+ base_output.merge(response: response_status_only(response))
31
+ end
32
+ rescue => e
33
+ base_output.merge(
34
+ error: {
35
+ class: e.class,
36
+ message: e.message
37
+ }
38
+ )
39
+ end
40
+ else
41
+ base_output.merge(content_requested: false)
42
+ end
43
+ end
44
+
45
+ def response_with_body(response)
46
+ {
47
+ status: response.code.to_i,
48
+ body: response.body,
49
+ links_within_body: response.body.scan(/https?:\/\/[a-z0-9\/._?=,&#!*~();:@+$%\[\]-]+/i)
50
+ }
51
+ end
52
+
53
+ def response_status_only(response)
54
+ {
55
+ status: response.code.to_i,
56
+ body: nil,
57
+ links_within_body: []
58
+ }
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PhisherPhinder
4
+ class ExtendedIp
5
+ attr_reader :ip_address, :geoip_ip_data
6
+
7
+ def initialize(ip_address:, geoip_ip_data:)
8
+ @ip_address = ip_address
9
+ @geoip_ip_data = geoip_ip_data
10
+ end
11
+
12
+ def ==(other)
13
+ ip_address == other.ip_address && geoip_ip_data == other.geoip_ip_data
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+ require 'ipaddr'
3
+
4
+ module PhisherPhinder
5
+ class ExtendedIpFactory
6
+ def initialize(geoip_client:)
7
+ @geoip_client = geoip_client
8
+ end
9
+
10
+ def build(ip_string)
11
+ ip = IPAddr.new(ip_string)
12
+
13
+ if non_public_ip?(ip)
14
+ SimpleIp.new(ip_address: ip)
15
+ else
16
+ ExtendedIp.new(ip_address: ip, geoip_ip_data: geoip_data(ip_string))
17
+ end
18
+ rescue IPAddr::InvalidAddressError
19
+ end
20
+
21
+ private
22
+
23
+ def non_public_ip?(ip)
24
+ localhost_ip?(ip) ||
25
+ ipv4_class_a_private?(ip) ||
26
+ ipv4_class_b_private?(ip) ||
27
+ ipv4_class_c_private?(ip)
28
+ end
29
+
30
+ def localhost_ip?(ip)
31
+ ip.loopback?
32
+ end
33
+
34
+ def ipv4_class_a_private?(ip)
35
+ IPAddr.new('10.0.0.1/8').include?(ip)
36
+ end
37
+
38
+ def ipv4_class_b_private?(ip)
39
+ IPAddr.new('172.16.0.0/12').include?(ip)
40
+ end
41
+
42
+ def ipv4_class_c_private?(ip)
43
+ IPAddr.new('192.168.0.0/16').include?(ip)
44
+ end
45
+
46
+ def geoip_data(ip_string)
47
+ @geoip_client.lookup(ip_string)
48
+ rescue MaxMind::GeoIP2::AddressNotFoundError
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PhisherPhinder
4
+ class GeoipIpData < Sequel::Model(:geoip_ip_data)
5
+ end
6
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PhisherPhinder
4
+ class Mail
5
+ attr_reader :original_email, :original_headers, :original_body, :headers, :tracing_headers, :body
6
+
7
+ def initialize(
8
+ original_email:, original_headers:, original_body:, headers:, tracing_headers:, body:
9
+ )
10
+ @original_email = original_email
11
+ @original_headers = original_headers
12
+ @original_body = original_body
13
+ @headers = headers
14
+ @tracing_headers = tracing_headers
15
+ @body = body
16
+ end
17
+
18
+ def reply_to_addresses
19
+ @headers[:reply_to].map do |value_string|
20
+ value_string.split(",")
21
+ end.flatten.map do |email_address_string|
22
+ extract_email_address(email_address_string)
23
+ end.uniq
24
+ end
25
+
26
+ def hypertext_links
27
+ body_as_html.
28
+ xpath('//a').
29
+ select { |el| el.attributes['href'] }.
30
+ map { |el| BodyHyperlink.new(el.attributes['href'].value, el.text) }
31
+ end
32
+
33
+ private
34
+
35
+ def body_as_html
36
+ require 'nokogiri'
37
+
38
+ Nokogiri::HTML(body[:html])
39
+ end
40
+
41
+ def extract_email_address(email_address_string)
42
+ if email_address_string.include? '<'
43
+ email_address_string =~ /<([^>]+)>/
44
+ $1
45
+ else
46
+ email_address_string
47
+ end.downcase.strip
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+ require_relative('mail_parser/body_parser')
3
+ require_relative('mail_parser/header_value_parser')
4
+
5
+ module PhisherPhinder
6
+ module MailParser
7
+ class Parser
8
+ def initialize(enriched_ip_factory, line_ending_type)
9
+ @line_end = line_ending_type == 'dos' ? "\r\n" : "\n"
10
+ @enriched_ip_factory = enriched_ip_factory
11
+ end
12
+
13
+ def parse(contents)
14
+ original_headers, original_body = separate(contents)
15
+ headers = extract_headers(original_headers)
16
+ Mail.new(
17
+ original_email: contents,
18
+ original_headers: original_headers,
19
+ original_body: original_body,
20
+ headers: headers,
21
+ tracing_headers: generate_tracing_headers(headers),
22
+ body: parse_body(original_body, headers)
23
+ )
24
+ end
25
+
26
+ private
27
+
28
+ def separate(contents)
29
+ contents.split("#{@line_end}#{@line_end}", 2)
30
+ end
31
+
32
+ def extract_headers(headers)
33
+ parse_headers(unfold_headers(headers).split(@line_end))
34
+ end
35
+
36
+ def unfold_headers(headers)
37
+ headers.gsub(/#{@line_end}[\s\t]+/, ' ')
38
+ end
39
+
40
+ def parse_headers(headers_array)
41
+ headers_array.each_with_index.inject({}) do |memo, (header_string, index)|
42
+ header, value = header_string.split(":", 2)
43
+ sequence = headers_array.length - index - 1
44
+ memo.merge(convert_header_name(header) => enrich_header_value(value, sequence)) do |_, existing, new|
45
+ if existing.is_a? Array
46
+ existing << new
47
+ else
48
+ [existing, new]
49
+ end
50
+ end
51
+ end
52
+ end
53
+
54
+ def convert_header_name(header)
55
+ header.gsub(/-/, '_').downcase.to_sym
56
+ end
57
+
58
+ def enrich_header_value(value, sequence)
59
+ {data: HeaderValueParser.new.parse(value), sequence: sequence}
60
+ end
61
+
62
+ def generate_tracing_headers(headers)
63
+ received_header_values = headers.inject([]) do |memo, (header_name, header_value)|
64
+ if [:received, :x_received].include? header_name
65
+ if header_value.is_a? Array
66
+ memo += header_value
67
+ else
68
+ memo << header_value
69
+ end
70
+ end
71
+
72
+ memo
73
+ end.flatten
74
+
75
+ {
76
+ received: restore_sequence(received_header_values).map { |v| parse_received_header(v[:data]) }
77
+ }
78
+ end
79
+
80
+ def parse_received_header(value)
81
+ parser = MailParser::ReceivedHeaders::Parser.new(
82
+ by_parser: MailParser::ReceivedHeaders::ByParser.new(@enriched_ip_factory),
83
+ for_parser: MailParser::ReceivedHeaders::ForParser.new,
84
+ from_parser: MailParser::ReceivedHeaders::FromParser.new(@enriched_ip_factory),
85
+ starttls_parser: MailParser::ReceivedHeaders::StarttlsParser.new,
86
+ timestamp_parser: MailParser::ReceivedHeaders::TimestampParser.new,
87
+ classifier: MailParser::ReceivedHeaders::Classifier.new
88
+ )
89
+ parser.parse(value)
90
+ end
91
+
92
+ def restore_sequence(values)
93
+ values.sort { |a,b| b[:sequence] <=> a[:sequence] }
94
+ end
95
+
96
+ def parse_body(original_body, headers)
97
+ MailParser::BodyParser.new(@line_end).parse(
98
+ body_contents: original_body,
99
+ content_type: headers.dig(:content_type, :data),
100
+ content_transfer_encoding: headers.dig(:content_transfer_encoding, :data),
101
+ )
102
+ end
103
+
104
+ def valid_base64_decoded(text)
105
+ if Base64.strict_encode64(Base64.decode64(text)) == text.gsub(/#{@line_end}/, '')
106
+ Base64.decode64(text)
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PhisherPhinder
4
+ module MailParser
5
+ class BodyParser
6
+ def initialize(line_end)
7
+ @line_end = line_end
8
+ end
9
+
10
+ def parse(body_contents:, content_type:, content_transfer_encoding:)
11
+ if multipart_alternative?(content_type)
12
+ parse_multipart_alternative(content_type, body_contents)
13
+ elsif html?(content_type)
14
+ {
15
+ text: nil,
16
+ html: decode_body(body_contents, content_transfer_encoding)
17
+ }
18
+ else
19
+ {
20
+ text: body_contents,
21
+ html: nil
22
+ }
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def html?(content_type)
29
+ content_type && content_type.split(';').first == 'text/html'
30
+ end
31
+
32
+ def decode_body(body_contents, content_transfer_encoding)
33
+ require 'base64'
34
+
35
+ content_transfer_encoding ? Base64.decode64(body_contents) : body_contents
36
+ end
37
+
38
+ def multipart_alternative?(content_type)
39
+ content_type =~ /\Amultipart\/alternative/
40
+ end
41
+
42
+ def parse_multipart_alternative(content_type, contents)
43
+ base_boundary = content_type.split(';').last.strip.split('=').last
44
+ start_boundary = '--' + base_boundary + @line_end
45
+ end_boundary = '--' + base_boundary + '--'
46
+
47
+ raw_blocks = contents.split(start_boundary)
48
+ trimmed_blocks = strip_epilogue(strip_prologue(raw_blocks), end_boundary)
49
+
50
+ categorise_blocks(trimmed_blocks).inject({html: '', text: ''}) do |memo, block|
51
+ memo.merge(block[:html] ? {html: memo[:html] + block[:contents]} : {text: memo[:text] + block[:contents]})
52
+ end
53
+ end
54
+
55
+ def strip_prologue(blocks)
56
+ blocks[1..-1]
57
+ end
58
+
59
+ def strip_epilogue(blocks, end_boundary)
60
+ blocks[0..-2] << blocks[-1].split(end_boundary).first
61
+ end
62
+
63
+ def categorise_blocks(blocks)
64
+ blocks.map do |block|
65
+ lines = block.split(@line_end)
66
+ processing_block_headers = true
67
+ html = false
68
+ base64_encoded = false
69
+
70
+ while processing_block_headers do
71
+ line = lines.shift.strip
72
+ if line.empty?
73
+ processing_block_headers = false
74
+ elsif line =~/\AContent-Type: text\/html/
75
+ html = true
76
+ elsif line =~ /\AContent-Transfer-Encoding: base64/
77
+ base64_encoded = true
78
+ end
79
+ end
80
+
81
+ contents = if base64_encoded
82
+ (lines.map { |l| Base64.decode64(l) }).join
83
+ else
84
+ lines.join(@line_end)
85
+ end
86
+ {
87
+ html: html,
88
+ contents: contents
89
+ }
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PhisherPhinder
4
+ module MailParser
5
+ class HeaderValueParser
6
+ def parse(raw_value)
7
+ utf_8_preambles = raw_value.scan(/=\?UTF-8\?b\?/)
8
+ if raw_value.scan(/=\?UTF-8\?b\?/).any?
9
+ (raw_value.split(' ').map { |snippet| parse_utf8_base64(snippet) }).join
10
+ else
11
+ raw_value.strip
12
+ end
13
+ end
14
+
15
+ private
16
+
17
+ def parse_utf8_base64(raw_value)
18
+ require 'base64'
19
+
20
+ Base64.decode64(raw_value.strip.sub(/=\?UTF-8\?b\?/, '')).force_encoding('UTF-8')
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PhisherPhinder
4
+ module MailParser
5
+ module ReceivedHeaders
6
+ class ByParser
7
+ def initialize(extended_ip_factory)
8
+ @extended_ip_factory = extended_ip_factory
9
+ end
10
+
11
+ def parse(component)
12
+ return {recipient: nil, protocol: nil, id: nil, recipient_additional: nil} unless component
13
+
14
+ patterns = [
15
+ /by\s(?<recipient>\S+)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
16
+ /by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
17
+ /by\s(?<recipient>\S+)\s(?<additional>.+)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
18
+ /by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\sid\s(?<id>\S+)/,
19
+ /by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\swith\s(?<protocol>.+)\sid\s(?<id>\S+)/,
20
+ /by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\swith\s(?<protocol>\S+)\sID\s(?<id>\S+)/,
21
+ /by\s(?<recipient>\S+)\swith\s(?<protocol>.+)\sid\s(?<id>\S+)/,
22
+ /by\s(?<recipient>\S+)\swith\s(?<protocol>.+)/,
23
+ ]
24
+
25
+ matches = patterns.inject(nil) do |memo, pattern|
26
+ memo || component.match(pattern)
27
+ end
28
+
29
+ {
30
+ recipient: enrich_recipient(matches[:recipient]),
31
+ protocol: matches.names.include?('protocol') ? matches[:protocol]: nil,
32
+ id: matches.names.include?('id') ? matches[:id]: nil,
33
+ recipient_additional: matches.names.include?('additional') ? matches[:additional] : nil
34
+ }
35
+ end
36
+
37
+ private
38
+
39
+ def enrich_recipient(recipient)
40
+ @extended_ip_factory.build(recipient) || recipient
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end