phisher_phinder 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/.env.example +2 -1
  3. data/.gitignore +3 -0
  4. data/Gemfile +0 -11
  5. data/Gemfile.lock +45 -13
  6. data/README.md +108 -2
  7. data/exe/phisher_phinder +61 -0
  8. data/lib/phisher_phinder.rb +11 -2
  9. data/lib/phisher_phinder/command.rb +20 -0
  10. data/lib/phisher_phinder/display.rb +64 -0
  11. data/lib/phisher_phinder/extended_ip.rb +4 -0
  12. data/lib/phisher_phinder/extended_ip_factory.rb +4 -2
  13. data/lib/phisher_phinder/geoip_ip_data.rb +9 -2
  14. data/lib/phisher_phinder/mail.rb +10 -3
  15. data/lib/phisher_phinder/mail_parser.rb +43 -30
  16. data/lib/phisher_phinder/mail_parser/authentication_headers/auth_results_parser.rb +150 -0
  17. data/lib/phisher_phinder/mail_parser/authentication_headers/parser.rb +25 -0
  18. data/lib/phisher_phinder/mail_parser/authentication_headers/received_spf_parser.rb +222 -0
  19. data/lib/phisher_phinder/mail_parser/body/block_classifier.rb +106 -0
  20. data/lib/phisher_phinder/mail_parser/body/block_parser.rb +37 -0
  21. data/lib/phisher_phinder/mail_parser/body_parser.rb +26 -31
  22. data/lib/phisher_phinder/mail_parser/header_value_parser.rb +25 -10
  23. data/lib/phisher_phinder/mail_parser/received_headers/by_parser.rb +35 -5
  24. data/lib/phisher_phinder/mail_parser/received_headers/for_parser.rb +25 -5
  25. data/lib/phisher_phinder/mail_parser/received_headers/from_parser.rb +50 -6
  26. data/lib/phisher_phinder/mail_parser/received_headers/parser.rb +50 -29
  27. data/lib/phisher_phinder/mail_parser/received_headers/starttls_parser.rb +8 -1
  28. data/lib/phisher_phinder/null_lookup_client.rb +9 -0
  29. data/lib/phisher_phinder/null_response.rb +12 -0
  30. data/lib/phisher_phinder/sender_extractor.rb +74 -0
  31. data/lib/phisher_phinder/simple_ip.rb +4 -0
  32. data/lib/phisher_phinder/tracing_report.rb +47 -0
  33. data/lib/phisher_phinder/version.rb +1 -1
  34. data/phisher_phinder.gemspec +15 -1
  35. metadata +208 -13
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PhisherPhinder
4
+ module MailParser
5
+ module Body
6
+ class BlockClassifier
7
+ def initialize(line_end)
8
+ @line_end = line_end
9
+ end
10
+
11
+ def classify_block(contents)
12
+ lines = contents.split(@line_end)
13
+ processing_block_headers = true
14
+
15
+ output = {
16
+ content_type: :text,
17
+ character_set: :utf_8,
18
+ content_transfer_encoding: nil
19
+ }
20
+
21
+ while processing_block_headers && lines.any? do
22
+ line = lines.shift&.strip
23
+ if line && line.empty?
24
+ processing_block_headers = false
25
+ elsif line && line =~ /\AContent-Type:/
26
+ output.merge!(extract_content_type(line))
27
+
28
+ output.merge!(extract_character_set(line))
29
+ elsif line && line =~ /\AContent-Transfer-Encoding/
30
+ output.merge!(extract_encoding(line))
31
+ end
32
+ end
33
+
34
+ output[:content] = lines.join(@line_end)
35
+
36
+ output
37
+ end
38
+
39
+ def classify_headers(headers)
40
+ output = {
41
+ content_type: :text,
42
+ character_set: :utf_8,
43
+ content_transfer_encoding: nil
44
+ }
45
+
46
+ output.merge!(extract_content_type(headers[:content_type]))
47
+
48
+ output.merge!(extract_character_set(headers[:content_type]))
49
+
50
+ output.merge!(extract_encoding(headers[:content_transfer_encoding]))
51
+
52
+ output
53
+ end
54
+
55
+ private
56
+
57
+ def extract_content_type(content_type_string)
58
+ if content_type_string
59
+ if content_type_string.include?('text/plain')
60
+ {content_type: :text}
61
+ elsif content_type_string.include?('text/html')
62
+ {content_type: :html}
63
+ else
64
+ {}
65
+ end
66
+ else
67
+ {}
68
+ end
69
+ end
70
+
71
+ def extract_character_set(content_type_string)
72
+ if content_type_string
73
+ charset_matches = content_type_string.match(/charset="?(?<charset>.+?)"?\z/)
74
+ if charset_matches
75
+ if charset_matches[:charset].downcase == 'utf-8'
76
+ {character_set: :utf_8}
77
+ elsif charset_matches[:charset].downcase == 'windows-1251'
78
+ {character_set: :windows_1251}
79
+ elsif charset_matches[:charset].downcase == 'iso-8859-1'
80
+ {character_set: :iso_8859_1}
81
+ else
82
+ {}
83
+ end
84
+ else
85
+ {}
86
+ end
87
+ else
88
+ {}
89
+ end
90
+ end
91
+
92
+ def extract_encoding(encoding_string)
93
+ if encoding_string&.include? 'base64'
94
+ {content_transfer_encoding: :base64}
95
+ elsif encoding_string&.include? 'quoted-printable'
96
+ {content_transfer_encoding: :quoted_printable}
97
+ elsif encoding_string&.include? '7bit'
98
+ {content_transfer_encoding: :seven_bit}
99
+ else
100
+ {}
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PhisherPhinder
4
+ module MailParser
5
+ module Body
6
+ class BlockParser
7
+ def initialize(line_end)
8
+ @line_end = line_end
9
+ end
10
+
11
+ def parse(block_data)
12
+ encoding = block_data[:content_transfer_encoding] || :seven_bit
13
+
14
+ case encoding
15
+ when :seven_bit
16
+ block_data[:content]
17
+ when :base64
18
+ decoded = Base64.decode64(block_data[:content])
19
+ if block_data[:character_set] == :utf_8
20
+ decoded.force_encoding('UTF-8')
21
+ elsif block_data[:character_set] == :windows_1251
22
+ decoded.force_encoding('cp1251').encode('UTF-8')
23
+ end
24
+ when :quoted_printable
25
+ remove_troublesome_sequences(block_data[:content]).unpack('M').first.force_encoding('UTF-8')
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def remove_troublesome_sequences(content)
32
+ content.gsub(/=((?:[^a-f0-9#{@line_end}])|(?:[a-f0-9][^a-f0-9]))/i, '=3D\1')
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -10,16 +10,27 @@ module PhisherPhinder
10
10
  def parse(body_contents:, content_type:, content_transfer_encoding:)
11
11
  if multipart_alternative?(content_type)
12
12
  parse_multipart_alternative(content_type, body_contents)
13
- elsif html?(content_type)
14
- {
15
- text: nil,
16
- html: decode_body(body_contents, content_transfer_encoding)
17
- }
18
13
  else
19
- {
20
- text: body_contents,
21
- html: nil
22
- }
14
+ classifier = Body::BlockClassifier.new(@line_end)
15
+ parser = Body::BlockParser.new(@line_end)
16
+
17
+ classification = classifier.classify_headers(
18
+ content_type: content_type, content_transfer_encoding: content_transfer_encoding
19
+ ).merge(content: body_contents)
20
+
21
+ contents = parser.parse(classification)
22
+
23
+ if classification[:content_type] == :html
24
+ {
25
+ html: contents,
26
+ text: nil
27
+ }
28
+ else
29
+ {
30
+ html: nil,
31
+ text: contents
32
+ }
33
+ end
23
34
  end
24
35
  end
25
36
 
@@ -40,7 +51,7 @@ module PhisherPhinder
40
51
  end
41
52
 
42
53
  def parse_multipart_alternative(content_type, contents)
43
- base_boundary = content_type.split(';').last.strip.split('=').last
54
+ base_boundary = content_type.split(';').last.strip.gsub(/boundary=/, '').gsub(/"/, '')
44
55
  start_boundary = '--' + base_boundary + @line_end
45
56
  end_boundary = '--' + base_boundary + '--'
46
57
 
@@ -61,30 +72,14 @@ module PhisherPhinder
61
72
  end
62
73
 
63
74
  def categorise_blocks(blocks)
75
+ classifier = Body::BlockClassifier.new(@line_end)
76
+ parser = Body::BlockParser.new(@line_end)
64
77
  blocks.map do |block|
65
- lines = block.split(@line_end)
66
- processing_block_headers = true
67
- html = false
68
- base64_encoded = false
69
-
70
- while processing_block_headers do
71
- line = lines.shift.strip
72
- if line.empty?
73
- processing_block_headers = false
74
- elsif line =~/\AContent-Type: text\/html/
75
- html = true
76
- elsif line =~ /\AContent-Transfer-Encoding: base64/
77
- base64_encoded = true
78
- end
79
- end
78
+ classification = classifier.classify_block(block)
79
+ contents = parser.parse(classification)
80
80
 
81
- contents = if base64_encoded
82
- (lines.map { |l| Base64.decode64(l) }).join
83
- else
84
- lines.join(@line_end)
85
- end
86
81
  {
87
- html: html,
82
+ html: classification[:content_type] == :html,
88
83
  contents: contents
89
84
  }
90
85
  end
@@ -4,20 +4,35 @@ module PhisherPhinder
4
4
  module MailParser
5
5
  class HeaderValueParser
6
6
  def parse(raw_value)
7
- utf_8_preambles = raw_value.scan(/=\?UTF-8\?b\?/)
8
- if raw_value.scan(/=\?UTF-8\?b\?/).any?
9
- (raw_value.split(' ').map { |snippet| parse_utf8_base64(snippet) }).join
10
- else
11
- raw_value.strip
12
- end
7
+ stripped_value = raw_value.strip
8
+ words = stripped_value.split(' ')
9
+ words.map do |word|
10
+ if encoded?(word)
11
+ matches = word.match(/\A=\?(?<character_set>.+)\?(?<encoding>.)\?(?<content>.+)\z/)
12
+
13
+ unencoded_content = if matches[:encoding].downcase == 'b'
14
+ Base64.decode64(matches[:content])
15
+ elsif matches[:encoding].downcase == 'q'
16
+ matches[:content].unpack('M').first
17
+ end
18
+
19
+ content = if matches[:character_set] =~ /iso-8859-1/i
20
+ unencoded_content.force_encoding('ISO-8859-1').encode('UTF-8')
21
+ elsif matches[:character_set] =~ /windows-1251/i
22
+ unencoded_content.force_encoding('cp1251').encode('UTF-8')
23
+ elsif matches[:character_set] =~ /utf-8/i
24
+ unencoded_content.force_encoding('UTF-8')
25
+ end
26
+ else
27
+ word
28
+ end
29
+ end.join(' ')
13
30
  end
14
31
 
15
32
  private
16
33
 
17
- def parse_utf8_base64(raw_value)
18
- require 'base64'
19
-
20
- Base64.decode64(raw_value.strip.sub(/=\?UTF-8\?b\?/, '')).force_encoding('UTF-8')
34
+ def encoded?(raw_value)
35
+ raw_value =~ /=\?[a-z1-9-]+\?[bq]/i
21
36
  end
22
37
  end
23
38
  end
@@ -4,14 +4,34 @@ module PhisherPhinder
4
4
  module MailParser
5
5
  module ReceivedHeaders
6
6
  class ByParser
7
- def initialize(extended_ip_factory)
8
- @extended_ip_factory = extended_ip_factory
7
+ def initialize(ip_factory:, starttls_parser:)
8
+ @extended_ip_factory = ip_factory
9
+ @starttls_parser = starttls_parser
9
10
  end
10
11
 
11
12
  def parse(component)
12
- return {recipient: nil, protocol: nil, id: nil, recipient_additional: nil} unless component
13
+ unless component
14
+ return {
15
+ recipient: nil,
16
+ protocol: nil,
17
+ id: nil,
18
+ recipient_additional: nil,
19
+ authenticated_as: nil
20
+ }.merge(@starttls_parser.parse(nil))
21
+ end
13
22
 
14
23
  patterns = [
24
+ %r{by\s(?<recipient>\S+)\s
25
+ \((?<additional>[^)]+)\)\s
26
+ with\sMicrosoft\sSMTP\sServer\s(?<starttls>\([^\)]+\))\s
27
+ id\s(?<id>\S+)\s
28
+ via\s(?<protocol>Frontend\sTransport)
29
+ }x,
30
+ %r{by\s(?<recipient>\S+)\s
31
+ \((?<additional>[^)]+)\)\s
32
+ with\sMicrosoft\sSMTP\sServer\s(?<starttls>\([^\)]+\))\s
33
+ id\s(?<id>\S+)
34
+ }x,
15
35
  /by\s(?<recipient>\S+)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
16
36
  /by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
17
37
  /by\s(?<recipient>\S+)\s(?<additional>.+)\swith\s(?<protocol>\S+)\sid\s(?<id>\S+)/,
@@ -20,6 +40,9 @@ module PhisherPhinder
20
40
  /by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\swith\s(?<protocol>\S+)\sID\s(?<id>\S+)/,
21
41
  /by\s(?<recipient>\S+)\swith\s(?<protocol>.+)\sid\s(?<id>\S+)/,
22
42
  /by\s(?<recipient>\S+)\swith\s(?<protocol>.+)/,
43
+ /by\s(?<recipient>\S+)\s\((?<additional>[^)]+)\)\s\(authenticated as (?<authenticated_as>[^\)]+)\)\sid\s(?<id>\S+)/,
44
+ /by\s(?<recipient>\S+)\sid\s(?<id>\S+)/,
45
+ /by\s(?<recipient>\S+)/
23
46
  ]
24
47
 
25
48
  matches = patterns.inject(nil) do |memo, pattern|
@@ -30,8 +53,15 @@ module PhisherPhinder
30
53
  recipient: enrich_recipient(matches[:recipient]),
31
54
  protocol: matches.names.include?('protocol') ? matches[:protocol]: nil,
32
55
  id: matches.names.include?('id') ? matches[:id]: nil,
33
- recipient_additional: matches.names.include?('additional') ? matches[:additional] : nil
34
- }
56
+ recipient_additional: matches.names.include?('additional') ? matches[:additional] : nil,
57
+ authenticated_as: matches.names.include?('authenticated_as') ? matches[:authenticated_as] : nil,
58
+ }.merge(
59
+ if matches.names.include?('starttls')
60
+ @starttls_parser.parse(matches[:starttls])
61
+ else
62
+ @starttls_parser.parse(nil)
63
+ end
64
+ )
35
65
  end
36
66
 
37
67
  private
@@ -4,18 +4,38 @@ module PhisherPhinder
4
4
  module MailParser
5
5
  module ReceivedHeaders
6
6
  class ForParser
7
+ def initialize(starttls_parser:)
8
+ @starttls_parser = starttls_parser
9
+ end
10
+
7
11
  def parse(component)
8
- component =~ /\Afor\s(\S+)\z/
12
+ return {recipient_mailbox: nil}.merge(@starttls_parser.parse(nil)) unless component
13
+
14
+ patterns = [
15
+ /\Afor\s(?<recipient_mailbox>\S+)\s\(Google Transport Security\)\z/,
16
+ /\Afor\s(?<recipient_mailbox>\S+)\s(?<starttls>\([^\)]+\))\z/,
17
+ /\Afor\s(?<recipient_mailbox>.+)\z/,
18
+ ]
19
+
20
+ matches = patterns.inject(nil) do |memo, pattern|
21
+ memo || component.match(pattern)
22
+ end
9
23
 
10
- {
11
- recipient_mailbox: strip_angle_brackets($1)
12
- }
24
+ output = {
25
+ recipient_mailbox: strip_angle_brackets(matches[:recipient_mailbox]),
26
+ }.merge(
27
+ if matches.names.include?('starttls')
28
+ @starttls_parser.parse(matches[:starttls])
29
+ else
30
+ @starttls_parser.parse(nil)
31
+ end
32
+ )
13
33
  end
14
34
 
15
35
  private
16
36
 
17
37
  def strip_angle_brackets(email_address_string)
18
- email_address_string =~ /\<([^>]+)\>/ ? $1 : email_address_string
38
+ email_address_string =~ /\<\s?([^>]+?)\s?\>/ ? $1 : email_address_string
19
39
  end
20
40
  end
21
41
  end
@@ -4,14 +4,41 @@ module PhisherPhinder
4
4
  module MailParser
5
5
  module ReceivedHeaders
6
6
  class FromParser
7
- def initialize(extended_ip_factory)
8
- @extended_ip_factory = extended_ip_factory
7
+ def initialize(ip_factory:, starttls_parser:)
8
+ @extended_ip_factory = ip_factory
9
+ @starttls_parser = starttls_parser
9
10
  end
10
11
 
11
12
  def parse(component)
12
- return {advertised_sender: nil, helo: nil, sender: nil} unless component
13
+ unless component
14
+ return {
15
+ advertised_authenticated_sender: nil,
16
+ advertised_sender: nil,
17
+ helo: nil,
18
+ sender: {
19
+ host: nil,
20
+ ip: nil
21
+ },
22
+ }.merge(@starttls_parser.parse(nil))
23
+ end
13
24
 
14
25
  patterns = [
26
+ %r{
27
+ from\s\[(?<advertised_sender>[\S]+)\]\s
28
+ \((?<sender_host>\S+?)\.?\s
29
+ \[(?<sender_ip>[^\]]+)\]\)\s
30
+ \(Authenticated\ssender:\s(?<advertised_authenticated_sender>[^\)]+)\)
31
+ }x,
32
+ /from\s\[(?<sender_ip>[^\]]+)\]\s\(helo=(?<helo>[^\)]+)\)/,
33
+ %r{
34
+ from\s\[(?<advertised_sender>[\S]+)\]\s
35
+ \((?<sender_host>\S+?)\.?\s
36
+ \[(?<sender_ip>[^\]]+)\]\)
37
+ }x,
38
+ /from\s(?<sender_ip>[^\]]+)\s\(EHLO\s(?<helo>[^\)]+)\)/,
39
+ /from\s(?<advertised_sender>[\S]+)\s\((?<sender_host>\S+?)\.?\s\[(?<sender_ip>[^\]]+)\]\) \((?<starttls>[^\)]+\))/,
40
+ /from\s(?<advertised_sender>[\S]+)\s\((?<sender_host>\S+?)\.?\s\[(?<sender_ip>[^\]]+)\]\) \((?<starttls>[^\)]+\))/,
41
+ /from\s(?<advertised_sender>[\S]+)\s\(HELO\s(?<helo>[^)]+)\)\s\(\)/,
15
42
  /from\s(?<advertised_sender>[\S]+)\s\(HELO\s(?<helo>[^)]+)\)\s\(\[(?<sender_ip>[^\]]+)\]\)/,
16
43
  /from\s(?<advertised_sender>[\S]+)\s\((?<sender_host>\S+?)\.?\s\[(?<sender_ip>[^\]]+)\]\)/,
17
44
  /from\s(?<advertised_sender>\S+)\s\((?<sender_host>\S+?)\.?\s(?<sender_ip>\S+?)\)/,
@@ -25,14 +52,31 @@ module PhisherPhinder
25
52
  memo || component.match(pattern)
26
53
  end
27
54
 
28
- {
29
- advertised_sender: matches[:advertised_sender],
55
+ output = {
56
+ advertised_sender: expand_advertised_sender(extract(matches, :advertised_sender)),
30
57
  helo: matches.names.include?('helo') ? matches[:helo] : nil,
31
58
  sender: {
32
59
  host: matches.names.include?('sender_host') ? matches[:sender_host] : nil,
33
60
  ip: matches.names.include?('sender_ip') ? @extended_ip_factory.build(matches[:sender_ip]) : nil
34
- }
61
+ },
62
+ advertised_authenticated_sender: matches.names.include?('advertised_authenticated_sender') ? matches[:advertised_authenticated_sender] : nil
35
63
  }
64
+
65
+ if matches.names.include?('starttls')
66
+ output.merge(@starttls_parser.parse(matches[:starttls]))
67
+ else
68
+ output.merge(@starttls_parser.parse(nil))
69
+ end
70
+ end
71
+
72
+ private
73
+
74
+ def extract(matches, key)
75
+ matches.names.include?(key.to_s) ? matches[key] : nil
76
+ end
77
+
78
+ def expand_advertised_sender(sender)
79
+ sender ? (@extended_ip_factory.build(sender) || sender) : nil
36
80
  end
37
81
  end
38
82
  end