phisher_phinder 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/.env.example +2 -1
  3. data/.gitignore +3 -0
  4. data/Gemfile +0 -11
  5. data/Gemfile.lock +45 -13
  6. data/README.md +108 -2
  7. data/exe/phisher_phinder +61 -0
  8. data/lib/phisher_phinder.rb +11 -2
  9. data/lib/phisher_phinder/command.rb +20 -0
  10. data/lib/phisher_phinder/display.rb +64 -0
  11. data/lib/phisher_phinder/extended_ip.rb +4 -0
  12. data/lib/phisher_phinder/extended_ip_factory.rb +4 -2
  13. data/lib/phisher_phinder/geoip_ip_data.rb +9 -2
  14. data/lib/phisher_phinder/mail.rb +10 -3
  15. data/lib/phisher_phinder/mail_parser.rb +43 -30
  16. data/lib/phisher_phinder/mail_parser/authentication_headers/auth_results_parser.rb +150 -0
  17. data/lib/phisher_phinder/mail_parser/authentication_headers/parser.rb +25 -0
  18. data/lib/phisher_phinder/mail_parser/authentication_headers/received_spf_parser.rb +222 -0
  19. data/lib/phisher_phinder/mail_parser/body/block_classifier.rb +106 -0
  20. data/lib/phisher_phinder/mail_parser/body/block_parser.rb +37 -0
  21. data/lib/phisher_phinder/mail_parser/body_parser.rb +26 -31
  22. data/lib/phisher_phinder/mail_parser/header_value_parser.rb +25 -10
  23. data/lib/phisher_phinder/mail_parser/received_headers/by_parser.rb +35 -5
  24. data/lib/phisher_phinder/mail_parser/received_headers/for_parser.rb +25 -5
  25. data/lib/phisher_phinder/mail_parser/received_headers/from_parser.rb +50 -6
  26. data/lib/phisher_phinder/mail_parser/received_headers/parser.rb +50 -29
  27. data/lib/phisher_phinder/mail_parser/received_headers/starttls_parser.rb +8 -1
  28. data/lib/phisher_phinder/null_lookup_client.rb +9 -0
  29. data/lib/phisher_phinder/null_response.rb +12 -0
  30. data/lib/phisher_phinder/sender_extractor.rb +74 -0
  31. data/lib/phisher_phinder/simple_ip.rb +4 -0
  32. data/lib/phisher_phinder/tracing_report.rb +47 -0
  33. data/lib/phisher_phinder/version.rb +1 -1
  34. data/phisher_phinder.gemspec +15 -1
  35. metadata +208 -13
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PhisherPhinder
4
+ module MailParser
5
+ module Body
6
+ class BlockClassifier
7
+ def initialize(line_end)
8
+ @line_end = line_end
9
+ end
10
+
11
+ def classify_block(contents)
12
+ lines = contents.split(@line_end)
13
+ processing_block_headers = true
14
+
15
+ output = {
16
+ content_type: :text,
17
+ character_set: :utf_8,
18
+ content_transfer_encoding: nil
19
+ }
20
+
21
+ while processing_block_headers && lines.any? do
22
+ line = lines.shift&.strip
23
+ if line && line.empty?
24
+ processing_block_headers = false
25
+ elsif line && line =~ /\AContent-Type:/
26
+ output.merge!(extract_content_type(line))
27
+
28
+ output.merge!(extract_character_set(line))
29
+ elsif line && line =~ /\AContent-Transfer-Encoding/
30
+ output.merge!(extract_encoding(line))
31
+ end
32
+ end
33
+
34
+ output[:content] = lines.join(@line_end)
35
+
36
+ output
37
+ end
38
+
39
+ def classify_headers(headers)
40
+ output = {
41
+ content_type: :text,
42
+ character_set: :utf_8,
43
+ content_transfer_encoding: nil
44
+ }
45
+
46
+ output.merge!(extract_content_type(headers[:content_type]))
47
+
48
+ output.merge!(extract_character_set(headers[:content_type]))
49
+
50
+ output.merge!(extract_encoding(headers[:content_transfer_encoding]))
51
+
52
+ output
53
+ end
54
+
55
+ private
56
+
57
+ def extract_content_type(content_type_string)
58
+ if content_type_string
59
+ if content_type_string.include?('text/plain')
60
+ {content_type: :text}
61
+ elsif content_type_string.include?('text/html')
62
+ {content_type: :html}
63
+ else
64
+ {}
65
+ end
66
+ else
67
+ {}
68
+ end
69
+ end
70
+
71
+ def extract_character_set(content_type_string)
72
+ if content_type_string
73
+ charset_matches = content_type_string.match(/charset="?(?<charset>.+?)"?\z/)
74
+ if charset_matches
75
+ if charset_matches[:charset].downcase == 'utf-8'
76
+ {character_set: :utf_8}
77
+ elsif charset_matches[:charset].downcase == 'windows-1251'
78
+ {character_set: :windows_1251}
79
+ elsif charset_matches[:charset].downcase == 'iso-8859-1'
80
+ {character_set: :iso_8859_1}
81
+ else
82
+ {}
83
+ end
84
+ else
85
+ {}
86
+ end
87
+ else
88
+ {}
89
+ end
90
+ end
91
+
92
+ def extract_encoding(encoding_string)
93
+ if encoding_string&.include? 'base64'
94
+ {content_transfer_encoding: :base64}
95
+ elsif encoding_string&.include? 'quoted-printable'
96
+ {content_transfer_encoding: :quoted_printable}
97
+ elsif encoding_string&.include? '7bit'
98
+ {content_transfer_encoding: :seven_bit}
99
+ else
100
+ {}
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PhisherPhinder
4
+ module MailParser
5
+ module Body
6
+ class BlockParser
7
+ def initialize(line_end)
8
+ @line_end = line_end
9
+ end
10
+
11
+ def parse(block_data)
12
+ encoding = block_data[:content_transfer_encoding] || :seven_bit
13
+
14
+ case encoding
15
+ when :seven_bit
16
+ block_data[:content]
17
+ when :base64
18
+ decoded = Base64.decode64(block_data[:content])
19
+ if block_data[:character_set] == :utf_8
20
+ decoded.force_encoding('UTF-8')
21
+ elsif block_data[:character_set] == :windows_1251
22
+ decoded.force_encoding('cp1251').encode('UTF-8')
23
+ end
24
+ when :quoted_printable
25
+ remove_troublesome_sequences(block_data[:content]).unpack('M').first.force_encoding('UTF-8')
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def remove_troublesome_sequences(content)
32
+ content.gsub(/=((?:[^a-f0-9#{@line_end}])|(?:[a-f0-9][^a-f0-9]))/i, '=3D\1')
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -10,16 +10,27 @@ module PhisherPhinder
10
10
  def parse(body_contents:, content_type:, content_transfer_encoding:)
11
11
  if multipart_alternative?(content_type)
12
12
  parse_multipart_alternative(content_type, body_contents)
13
- elsif html?(content_type)
14
- {
15
- text: nil,
16
- html: decode_body(body_contents, content_transfer_encoding)
17
- }
18
13
  else
19
- {
20
- text: body_contents,
21
- html: nil
22
- }
14
+ classifier = Body::BlockClassifier.new(@line_end)
15
+ parser = Body::BlockParser.new(@line_end)
16
+
17
+ classification = classifier.classify_headers(
18
+ content_type: content_type, content_transfer_encoding: content_transfer_encoding
19
+ ).merge(content: body_contents)
20
+
21
+ contents = parser.parse(classification)
22
+
23
+ if classification[:content_type] == :html
24
+ {
25
+ html: contents,
26
+ text: nil
27
+ }
28
+ else
29
+ {
30
+ html: nil,
31
+ text: contents
32
+ }
33
+ end
23
34
  end
24
35
  end
25
36
 
@@ -40,7 +51,7 @@ module PhisherPhinder
40
51
  end
41
52
 
42
53
  def parse_multipart_alternative(content_type, contents)
43
- base_boundary = content_type.split(';').last.strip.split('=').last
54
+ base_boundary = content_type.split(';').last.strip.gsub(/boundary=/, '').gsub(/"/, '')
44
55
  start_boundary = '--' + base_boundary + @line_end
45
56
  end_boundary = '--' + base_boundary + '--'
46
57
 
@@ -61,30 +72,14 @@ module PhisherPhinder
61
72
  end
62
73
 
63
74
  def categorise_blocks(blocks)
75
+ classifier = Body::BlockClassifier.new(@line_end)
76
+ parser = Body::BlockParser.new(@line_end)
64
77
  blocks.map do |block|
65
- lines = block.split(@line_end)
66
- processing_block_headers = true
67
- html = false
68
- base64_encoded = false
69
-
70
- while processing_block_headers do
71
- line = lines.shift.strip
72
- if line.empty?
73
- processing_block_headers = false
74
- elsif line =~/\AContent-Type: text\/html/
75
- html = true
76
- elsif line =~ /\AContent-Transfer-Encoding: base64/
77
- base64_encoded = true
78
- end
79
- end
78
+ classification = classifier.classify_block(block)
79
+ contents = parser.parse(classification)
80
80
 
81
- contents = if base64_encoded
82
- (lines.map { |l| Base64.decode64(l) }).join
83
- else
84
- lines.join(@line_end)
85
- end
86
81
  {
87
- html: html,
82
+ html: classification[:content_type] == :html,
88
83
  contents: contents
89
84
  }
90
85
  end
@@ -4,20 +4,35 @@ module PhisherPhinder
4
4
  module MailParser
5
5
  class HeaderValueParser
6
6
  def parse(raw_value)
7
- utf_8_preambles = raw_value.scan(/=\?UTF-8\?b\?/)
8
- if raw_value.scan(/=\?UTF-8\?b\?/).any?
9
- (raw_value.split(' ').map { |snippet| parse_utf8_base64(snippet) }).join
10
- else
11
- raw_value.strip
12
- end
7
+ stripped_value = raw_value.strip
8
+ words = stripped_value.split(' ')
9
+ words.map do |word|
10
+ if encoded?(word)
11
+ matches = word.match(/\A=\?(?<character_set>.+)\?(?<encoding>.)\?(?<content>.+)\z/)
12
+
13
+ unencoded_content = if matches[:encoding].downcase == 'b'
14
+ Base64.decode64(matches[:content])
15
+ elsif matches[:encoding].downcase == 'q'
16
+ matches[:content].unpack('M').first
17
+ end
18
+
19
+ content = if matches[:character_set] =~ /iso-8859-1/i
20
+ unencoded_content.force_encoding('ISO-8859-1').encode('UTF-8')
21
+ elsif matches[:character_set] =~ /windows-1251/i
22
+ unencoded_content.force_encoding('cp1251').encode('UTF-8')
23
+ elsif matches[:character_set] =~ /utf-8/i
24
+ unencoded_content.force_encoding('UTF-8')
25
+ end
26
+ else
27
+ word
28
+ end
29
+ end.join(' ')
13
30
  end
14
31
 
15
32
  private
16
33
 
17
- def parse_utf8_base64(raw_value)
18
- require 'base64'
19
-
20
- Base64.decode64(raw_value.strip.sub(/=\?UTF-8\?b\?/, '')).force_encoding('UTF-8')
34
+ def encoded?(raw_value)
35
+ raw_value =~ /=\?[a-z1-9-]+\?[bq]/i
21
36
  end
22
37
  end
23
38
  end
@@ -4,14 +4,34 @@ module PhisherPhinder
4
4
  module MailParser
5
5
  module ReceivedHeaders
6
6
  class ByParser
7
- def initialize(extended_ip_factory)
8
- @extended_ip_factory = extended_ip_factory
7
+ def initialize(ip_factory:, starttls_parser:)
8
+ @extended_ip_factory = ip_factory
9
+ @starttls_parser = starttls_parser
9
10
  end
10
11
 
11
12
  def parse(component)
12
- return {recipient: nil, protocol: nil, id: nil, recipient_additional: nil} unless component
13
+ unless component
14
+ return {
15
+ recipient: nil,
16
+ protocol: nil,
17
+ id: nil,
18
+ recipient_additional: nil,
19
+ authenticated_as: nil
20
+ }.merge(@starttls_parser.parse(nil))
21
+ end
13
22
 
14
23
  patterns = [
24
+ %r{by\s(?<recipient>\S+)\s
25
+ \((?<additional>[^)]+)\)\s
26
+ with\sMicrosoft\sSMTP\sServer\s(?<starttls>\([^\)]+\))\s
27
+ id\s(?<id>\S+)\s
28
+ via\s(?<protocol>Frontend\sTransport)
29
+ }x,
30
+ %r{by\s(?<recipient>\S+)\s
31
+ \((?<additional>[^)]+)\)\s
32
+ with\sMicrosoft\sSMTP\sServer\s(?<starttls>\([^\)]+\))\s
33
+ id\s(?<id>\S+)
34
+ }x,
15
35
  /by\s(?<recipient>\S+)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
16
36
  /by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
17
37
  /by\s(?<recipient>\S+)\s(?<additional>.+)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
@@ -20,6 +40,9 @@ module PhisherPhinder
20
40
  /by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\swith\s(?<protocol>\S+)\sID\s(?<id>\S+)/,
21
41
  /by\s(?<recipient>\S+)\swith\s(?<protocol>.+)\sid\s(?<id>\S+)/,
22
42
  /by\s(?<recipient>\S+)\swith\s(?<protocol>.+)/,
43
+ /by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\s\(authenticated as (?<authenticated_as>[^\)]+)\)\sid\s(?<id>\S+)/,
44
+ /by\s(?<recipient>\S+)\sid\s(?<id>\S+)/,
45
+ /by\s(?<recipient>\S+)/
23
46
  ]
24
47
 
25
48
  matches = patterns.inject(nil) do |memo, pattern|
@@ -30,8 +53,15 @@ module PhisherPhinder
30
53
  recipient: enrich_recipient(matches[:recipient]),
31
54
  protocol: matches.names.include?('protocol') ? matches[:protocol]: nil,
32
55
  id: matches.names.include?('id') ? matches[:id]: nil,
33
- recipient_additional: matches.names.include?('additional') ? matches[:additional] : nil
34
- }
56
+ recipient_additional: matches.names.include?('additional') ? matches[:additional] : nil,
57
+ authenticated_as: matches.names.include?('authenticated_as') ? matches[:authenticated_as] : nil,
58
+ }.merge(
59
+ if matches.names.include?('starttls')
60
+ @starttls_parser.parse(matches[:starttls])
61
+ else
62
+ @starttls_parser.parse(nil)
63
+ end
64
+ )
35
65
  end
36
66
 
37
67
  private
@@ -4,18 +4,38 @@ module PhisherPhinder
4
4
  module MailParser
5
5
  module ReceivedHeaders
6
6
  class ForParser
7
+ def initialize(starttls_parser:)
8
+ @starttls_parser = starttls_parser
9
+ end
10
+
7
11
  def parse(component)
8
- component =~ /\Afor\s(\S+)\z/
12
+ return {recipient_mailbox: nil}.merge(@starttls_parser.parse(nil)) unless component
13
+
14
+ patterns = [
15
+ /\Afor\s(?<recipient_mailbox>\S+)\s\(Google Transport Security\)\z/,
16
+ /\Afor\s(?<recipient_mailbox>\S+)\s(?<starttls>\([^\)]+\))\z/,
17
+ /\Afor\s(?<recipient_mailbox>.+)\z/,
18
+ ]
19
+
20
+ matches = patterns.inject(nil) do |memo, pattern|
21
+ memo || component.match(pattern)
22
+ end
9
23
 
10
- {
11
- recipient_mailbox: strip_angle_brackets($1)
12
- }
24
+ output = {
25
+ recipient_mailbox: strip_angle_brackets(matches[:recipient_mailbox]),
26
+ }.merge(
27
+ if matches.names.include?('starttls')
28
+ @starttls_parser.parse(matches[:starttls])
29
+ else
30
+ @starttls_parser.parse(nil)
31
+ end
32
+ )
13
33
  end
14
34
 
15
35
  private
16
36
 
17
37
  def strip_angle_brackets(email_address_string)
18
- email_address_string =~ /\<([^>]+)\>/ ? $1 : email_address_string
38
+ email_address_string =~ /\<\s?([^>]+?)\s?\>/ ? $1 : email_address_string
19
39
  end
20
40
  end
21
41
  end
@@ -4,14 +4,41 @@ module PhisherPhinder
4
4
  module MailParser
5
5
  module ReceivedHeaders
6
6
  class FromParser
7
- def initialize(extended_ip_factory)
8
- @extended_ip_factory = extended_ip_factory
7
+ def initialize(ip_factory:, starttls_parser:)
8
+ @extended_ip_factory = ip_factory
9
+ @starttls_parser = starttls_parser
9
10
  end
10
11
 
11
12
  def parse(component)
12
- return {advertised_sender: nil, helo: nil, sender: nil} unless component
13
+ unless component
14
+ return {
15
+ advertised_authenticated_sender: nil,
16
+ advertised_sender: nil,
17
+ helo: nil,
18
+ sender: {
19
+ host: nil,
20
+ ip: nil
21
+ },
22
+ }.merge(@starttls_parser.parse(nil))
23
+ end
13
24
 
14
25
  patterns = [
26
+ %r{
27
+ from\s\[(?<advertised_sender>[\S]+)\]\s
28
+ \((?<sender_host>\S+?)\.?\s
29
+ \[(?<sender_ip>[^\]]+)\]\)\s
30
+ \(Authenticated\ssender:\s(?<advertised_authenticated_sender>[^\)]+)\)
31
+ }x,
32
+ /from\s\[(?<sender_ip>[^\]]+)\]\s\(helo=(?<helo>[^\)]+)\)/,
33
+ %r{
34
+ from\s\[(?<advertised_sender>[\S]+)\]\s
35
+ \((?<sender_host>\S+?)\.?\s
36
+ \[(?<sender_ip>[^\]]+)\]\)
37
+ }x,
38
+ /from\s(?<sender_ip>[^\]]+)\s\(EHLO\s(?<helo>[^\)]+)\)/,
39
+ /from\s(?<advertised_sender>[\S]+)\s\((?<sender_host>\S+?)\.?\s\[(?<sender_ip>[^\]]+)\]\) \((?<starttls>[^\)]+\))/,
40
+ /from\s(?<advertised_sender>[\S]+)\s\((?<sender_host>\S+?)\.?\s\[(?<sender_ip>[^\]]+)\]\) \((?<starttls>[^\)]+\))/,
41
+ /from\s(?<advertised_sender>[\S]+)\s\(HELO\s(?<helo>[^)]+)\)\s\(\)/,
15
42
  /from\s(?<advertised_sender>[\S]+)\s\(HELO\s(?<helo>[^)]+)\)\s\(\[(?<sender_ip>[^\]]+)\]\)/,
16
43
  /from\s(?<advertised_sender>[\S]+)\s\((?<sender_host>\S+?)\.?\s\[(?<sender_ip>[^\]]+)\]\)/,
17
44
  /from\s(?<advertised_sender>\S+)\s\((?<sender_host>\S+?)\.?\s(?<sender_ip>\S+?)\)/,
@@ -25,14 +52,31 @@ module PhisherPhinder
25
52
  memo || component.match(pattern)
26
53
  end
27
54
 
28
- {
29
- advertised_sender: matches[:advertised_sender],
55
+ output = {
56
+ advertised_sender: expand_advertised_sender(extract(matches, :advertised_sender)),
30
57
  helo: matches.names.include?('helo') ? matches[:helo] : nil,
31
58
  sender: {
32
59
  host: matches.names.include?('sender_host') ? matches[:sender_host] : nil,
33
60
  ip: matches.names.include?('sender_ip') ? @extended_ip_factory.build(matches[:sender_ip]) : nil
34
- }
61
+ },
62
+ advertised_authenticated_sender: matches.names.include?('advertised_authenticated_sender') ? matches[:advertised_authenticated_sender] : nil
35
63
  }
64
+
65
+ if matches.names.include?('starttls')
66
+ output.merge(@starttls_parser.parse(matches[:starttls]))
67
+ else
68
+ output.merge(@starttls_parser.parse(nil))
69
+ end
70
+ end
71
+
72
+ private
73
+
74
+ def extract(matches, key)
75
+ matches.names.include?(key.to_s) ? matches[key] : nil
76
+ end
77
+
78
+ def expand_advertised_sender(sender)
79
+ sender ? (@extended_ip_factory.build(sender) || sender) : nil
36
80
  end
37
81
  end
38
82
  end