homographic_spoofing 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.md +117 -0
  4. data/lib/homographic_spoofing/detector/base.rb +41 -0
  5. data/lib/homographic_spoofing/detector/detection.rb +2 -0
  6. data/lib/homographic_spoofing/detector/email_address.rb +40 -0
  7. data/lib/homographic_spoofing/detector/idn.rb +78 -0
  8. data/lib/homographic_spoofing/detector/local.rb +14 -0
  9. data/lib/homographic_spoofing/detector/quoted_string.rb +13 -0
  10. data/lib/homographic_spoofing/detector/rule/base.rb +15 -0
  11. data/lib/homographic_spoofing/detector/rule/context.rb +19 -0
  12. data/lib/homographic_spoofing/detector/rule/data/allowed_idn_characters.txt +1 -0
  13. data/lib/homographic_spoofing/detector/rule/data/digits.csv +680 -0
  14. data/lib/homographic_spoofing/detector/rule/disallowed_characters.rb +140 -0
  15. data/lib/homographic_spoofing/detector/rule/idn/base.rb +3 -0
  16. data/lib/homographic_spoofing/detector/rule/idn/context.rb +8 -0
  17. data/lib/homographic_spoofing/detector/rule/idn/dangerous_pattern.rb +73 -0
  18. data/lib/homographic_spoofing/detector/rule/idn/deviation_characters.rb +10 -0
  19. data/lib/homographic_spoofing/detector/rule/idn/digits.rb +25 -0
  20. data/lib/homographic_spoofing/detector/rule/idn/invisible_characters.rb +14 -0
  21. data/lib/homographic_spoofing/detector/rule/idn/script_confusable.rb +59 -0
  22. data/lib/homographic_spoofing/detector/rule/idn/script_specific.rb +31 -0
  23. data/lib/homographic_spoofing/detector/rule/idn/unsafe_middle_dot.rb +12 -0
  24. data/lib/homographic_spoofing/detector/rule/local/dot_atom_text.rb +49 -0
  25. data/lib/homographic_spoofing/detector/rule/local/nfkc.rb +6 -0
  26. data/lib/homographic_spoofing/detector/rule/mixed_digits.rb +30 -0
  27. data/lib/homographic_spoofing/detector/rule/mixed_scripts.rb +30 -0
  28. data/lib/homographic_spoofing/detector/rule/quoted_string/bidi_control.rb +10 -0
  29. data/lib/homographic_spoofing/detector/rule/quoted_string/data/nonspacing_marks.txt +1 -0
  30. data/lib/homographic_spoofing/detector/rule/quoted_string/nfc.rb +6 -0
  31. data/lib/homographic_spoofing/detector/rule/quoted_string/nonspacing_marks.rb +21 -0
  32. data/lib/homographic_spoofing/railtie.rb +5 -0
  33. data/lib/homographic_spoofing/sanitizer/base.rb +39 -0
  34. data/lib/homographic_spoofing/sanitizer/email_address.rb +10 -0
  35. data/lib/homographic_spoofing/sanitizer/idn.rb +10 -0
  36. data/lib/homographic_spoofing/sanitizer/quoted_string.rb +10 -0
  37. data/lib/homographic_spoofing/version.rb +3 -0
  38. data/lib/homographic_spoofing.rb +47 -0
  39. metadata +166 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 0271afba8ca392a2898b76894ca4be2cf4eb024f14fea0ea4e2467265b88c7e5
4
+ data.tar.gz: f77b9ca4563b4e7e59264cbd83b60d38ae89417fddad193dc0a1c39993bd4535
5
+ SHA512:
6
+ metadata.gz: f2639bca34a87a1b3f0e121608684fa37d50f83b125efab25baeef85fa111151a7e5ae8e40e4207b4fdfd5dfe8c502965834468190dc4efc7d9ad433c6a1916c
7
+ data.tar.gz: 351fe4d4b0c22105838d99404056e848cbe3f5f0b7bcfa8f9b770a651ac8c942d73f18bc0b709b5aa074d73670fd56a997afb44b6d4f6877e96cde5eda640970
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2024 37signals, LLC
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,117 @@
1
+ # HomographicSpoofing
2
+
3
+ Toolkit to both detect and sanitize [homographic spoofing attacks](https://en.wikipedia.org/wiki/IDN_homograph_attack) in URLs and Email addresses.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem "homographic_spoofing"
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ ```bash
16
+ $ bundle
17
+ ```
18
+
19
+ Or install it yourself as:
20
+
21
+ ```bash
22
+ $ gem install homographic_spoofing
23
+ ```
24
+ ## Configuration
25
+
26
+ If `HomographicSpoofing.logger` is set to a Logger instance, the gem will log all the violations found. If you're using Rails,
27
+ it is automatically configured to use `Rails.logger`, otheriwse you can set it manually:
28
+
29
+ ```ruby
30
+ HomographicSpoofing.logger = Logger.new("log/homographic_spoofing.log")
31
+ ```
32
+
33
+ ## Usage
34
+
35
+ ### IDN
36
+
37
+ [What is an IDN](https://en.wikipedia.org/wiki/Internationalized_domain_name)
38
+
39
+ **Check if an IDN is an homographic spoof**
40
+
41
+ ```ruby
42
+ HomographicSpoofing.idn_spoof?("www.basecаmp.com")
43
+ # => true, uses cyrillic 'а' instead of latin 'a'
44
+ HomographicSpoofing.idn_spoof?("www.basecamp.com")
45
+ # => false
46
+ ```
47
+
48
+ **Sanitize an IDN**
49
+
50
+ The library can also sanitize an IDN by converting all confusable characters to their punycode representation.
51
+
52
+ ```ruby
53
+ HomographicSpoofing.sanitize_idn("www.basecаmp.com")
54
+ # => "www.xn--basecmp-6fg.com"
55
+ HomographicSpoofing.sanitize_idn("www.basecamp.com")
56
+ # => "www.basecamp.com"
57
+ ```
58
+
59
+ ### Email addresses
60
+
61
+ An email address is formed from three main parts:
62
+
63
+ "Jacopo Beschi" <<jacopo.beschi@basecamp.com>>
64
+
65
+ - The domain-part is "basecamp.com"
66
+ - The local-part is "jacopo.beschi"
67
+ - The quoted-string-part is "Jacopo Beschi"
68
+
69
+ **Check if an email_address is an homographic spoof**
70
+
71
+ ```ruby
72
+ HomographicSpoofing.email_address_spoof?(%{"Jacopo Beschi" <jacopo.beschi@basecаmp.com>})
73
+ # => true, uses cyrillic 'а' instead of latin 'a'
74
+ ```
75
+
76
+ **Sanitize an email_address**
77
+
78
+ ```ruby
79
+ >> HomographicSpoofing.sanitize_email_address(%{"Jacopo Beschi" <jacopo.beschi@basecаmp.com>})
80
+ # => "\"Jacopo Beschi\" <jacopo.beschi@xn--basecmp-6fg.com>"
81
+ ```
82
+
83
+ **Check if an email_address local-part is an homographic spoof**
84
+
85
+ ```ruby
86
+ HomographicSpoofing.email_local_spoof?("jacopo.beschi")
87
+ # => false
88
+ ```
89
+
90
+ **Check if an email_address quoted-string-part is an homographic spoof**
91
+
92
+ ```ruby
93
+ HomographicSpoofing.email_name_spoof?("Jacopo Beschi")
94
+ # => false
95
+ ```
96
+
97
+ **Sanitize an email_address quoted-string-part**
98
+
99
+ ```ruby
100
+ HomographicSpoofing.sanitize_email_name("Jacopo Beschi")
101
+ # => "Jacopo Beschi"
102
+ ```
103
+
104
+ ## Development
105
+
106
+ To experiment, start the console with `bin/console`.
107
+ Run the test via `bin/test`.
108
+
109
+ ## Contributing
110
+
111
+ Bug reports and pull requests are welcome on GitHub at https://github.com/basecamp/homographic_spoofing.
112
+
113
+ ## License
114
+
115
+ The IDN spoof detection algorithms are inspired by Chromium's [spoof_check](https://source.chromium.org/chromium/chromium/src/+/main:components/url_formatter/spoof_checks/) source code.
116
+
117
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,41 @@
1
+ class HomographicSpoofing::Detector::Base
2
+ def self.detected?(label)
3
+ new(label).detected?
4
+ end
5
+
6
+ def self.detections(label)
7
+ new(label).detections
8
+ end
9
+
10
+ def initialize(label)
11
+ @label = label
12
+ end
13
+
14
+ def detected?
15
+ detections.any?
16
+ end
17
+
18
+ def detections
19
+ rules.select(&:attack_detected?).map do |rule|
20
+ HomographicSpoofing::Detector::Detection.new(rule.reason, rule.label)
21
+ end
22
+ rescue Encoding::CompatibilityError
23
+ # String must be in Unicode.
24
+ [ HomographicSpoofing::Detector::Detection.new("invalid_unicode", label) ]
25
+ end
26
+
27
+ private
28
+ attr_reader :label
29
+
30
+ def rules
31
+ @rules ||= rule_classes.map { |klass| klass.new(context) }
32
+ end
33
+
34
+ def rule_classes
35
+ raise NotImplementedError, "subclasses must override this"
36
+ end
37
+
38
+ def context
39
+ @context ||= HomographicSpoofing::Detector::Rule::Context.new(label:)
40
+ end
41
+ end
@@ -0,0 +1,2 @@
1
+ class HomographicSpoofing::Detector::Detection < Struct.new(:reason, :label)
2
+ end
@@ -0,0 +1,40 @@
1
+ class HomographicSpoofing::Detector::EmailAddress
2
+ def self.detected?(email_address)
3
+ new(email_address).detected?
4
+ end
5
+
6
+ def self.detections(email_address)
7
+ new(email_address).detections
8
+ end
9
+
10
+ def initialize(email_address)
11
+ @email_address = email_address
12
+ end
13
+
14
+ def detected?
15
+ detections.any?
16
+ end
17
+
18
+ def detections
19
+ mail_address = mail_address_wrap(email_address)
20
+ [].tap do |result|
21
+ result.concat detector_for(part: mail_address.name, type: "quoted_string").detections if mail_address.name
22
+ result.concat detector_for(part: mail_address.local, type: "local").detections if mail_address.local
23
+ result.concat detector_for(part: mail_address.domain, type: "idn").detections if mail_address.domain
24
+ end
25
+ rescue Mail::Field::FieldError
26
+ # Do not analyse invalid email addresses.
27
+ []
28
+ end
29
+
30
+ private
31
+ attr_reader :email_address
32
+
33
+ def detector_for(type:, part:)
34
+ "HomographicSpoofing::Detector::#{type.camelize}".constantize.new(part)
35
+ end
36
+
37
+ def mail_address_wrap(email_address)
38
+ email_address.is_a?(Mail::Address) ? email_address : Mail::Address.new(email_address)
39
+ end
40
+ end
@@ -0,0 +1,78 @@
1
+ # Detects IDN Spoofing homographic attacks (See https://en.wikipedia.org/wiki/IDN_homograph_attack).
2
+ #
3
+ # The implementation follows Google Chrome IDN policy
4
+ # (See https://chromium.googlesource.com/chromium/src.git/+/master/docs/idn.md#google-chrome_s-idn-policy)
5
+ # but with some limitations:
6
+ # - It doesn't rely on ICU4C uspoof.h (https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/uspoof_8h.html)
7
+ # hence the script confusable detection is not as precise.
8
+ # - It doesn't implement 13. of Google IDN policy.
9
+ class HomographicSpoofing::Detector::Idn
10
+ def self.detected?(domain)
11
+ new(domain).detected?
12
+ end
13
+
14
+ def self.detections(domain)
15
+ new(domain).detections
16
+ end
17
+
18
+ def initialize(domain)
19
+ @domain = domain.downcase
20
+ end
21
+
22
+ def detected?
23
+ detections.any?
24
+ end
25
+
26
+ def detections
27
+ rules.select(&:attack_detected?).map do |rule|
28
+ HomographicSpoofing::Detector::Detection.new(rule.reason, rule.label)
29
+ end
30
+ rescue PublicSuffix::Error
31
+ # Invalid IDN is a spoof.
32
+ [ HomographicSpoofing::Detector::Detection.new("invalid_domain", domain) ]
33
+ end
34
+
35
+ private
36
+ attr_reader :domain
37
+
38
+ def rules
39
+ @rules ||= contexts.flat_map { |ctx| rules_for(ctx) }
40
+ end
41
+
42
+ def rules_for(context)
43
+ [
44
+ HomographicSpoofing::Detector::Rule::DisallowedCharacters,
45
+ HomographicSpoofing::Detector::Rule::MixedScripts,
46
+ HomographicSpoofing::Detector::Rule::MixedDigits,
47
+ HomographicSpoofing::Detector::Rule::Idn::InvisibleCharacters,
48
+ HomographicSpoofing::Detector::Rule::Idn::UnsafeMiddleDot,
49
+ HomographicSpoofing::Detector::Rule::Idn::ScriptConfusable,
50
+ HomographicSpoofing::Detector::Rule::Idn::Digits,
51
+ HomographicSpoofing::Detector::Rule::Idn::DangerousPattern,
52
+ HomographicSpoofing::Detector::Rule::Idn::ScriptSpecific,
53
+ HomographicSpoofing::Detector::Rule::Idn::DeviationCharacters
54
+ ].map { |klass| klass.new(context) }
55
+ end
56
+
57
+ def contexts
58
+ [ public_suffix.sld, public_suffix.trd ].compact.map do |label|
59
+ HomographicSpoofing::Detector::Rule::Idn::Context.new(label: label, tld: public_suffix.tld)
60
+ end
61
+ end
62
+
63
+ def public_suffix
64
+ @public_suffix ||= icann_domain || non_icann_domain
65
+ end
66
+
67
+ def icann_domain
68
+ PublicSuffix.parse(domain, ignore_private: true) if PublicSuffix.valid?(domain)
69
+ end
70
+
71
+ def non_icann_domain
72
+ if PublicSuffix::List.default.find(domain, default: nil, ignore_private: true).present?
73
+ PublicSuffix::Domain.new(domain)
74
+ else
75
+ raise PublicSuffix::DomainInvalid
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,14 @@
1
+ # Detects spoofing homographic attacks for the Local-Part of an email address.
2
+ #
3
+ # The implementation strictly follows Unicode guidelines for Email Security Profiles for Identifiers.
4
+ #
5
+ # See http://www.unicode.org/reports/tr39/#Email_Security_Profiles.
6
+ class HomographicSpoofing::Detector::Local < HomographicSpoofing::Detector::Base
7
+ private
8
+ def rule_classes
9
+ [ HomographicSpoofing::Detector::Rule::Local::Nfkc,
10
+ HomographicSpoofing::Detector::Rule::MixedScripts,
11
+ HomographicSpoofing::Detector::Rule::MixedDigits,
12
+ HomographicSpoofing::Detector::Rule::Local::DotAtomText ]
13
+ end
14
+ end
@@ -0,0 +1,13 @@
1
+ # Detects spoofing homographic attacks for the Quoted-String-Part of an email address.
2
+ #
3
+ # The implementation strictly follows Unicode guidelines for Email Security Profiles for Identifiers.
4
+ #
5
+ # See http://www.unicode.org/reports/tr39/#Email_Security_Profiles.
6
+ class HomographicSpoofing::Detector::QuotedString < HomographicSpoofing::Detector::Base
7
+ private
8
+ def rule_classes
9
+ [ HomographicSpoofing::Detector::Rule::QuotedString::Nfc,
10
+ HomographicSpoofing::Detector::Rule::QuotedString::BidiControl,
11
+ HomographicSpoofing::Detector::Rule::QuotedString::NonspacingMarks ]
12
+ end
13
+ end
@@ -0,0 +1,15 @@
1
+ class HomographicSpoofing::Detector::Rule::Base
2
+ delegate :scripts, :label, :label_set, to: :@context
3
+
4
+ def initialize(context)
5
+ @context = context
6
+ end
7
+
8
+ def attack_detected?
9
+ raise NotImplementedError, "subclasses must override this"
10
+ end
11
+
12
+ def reason
13
+ self.class.name.demodulize.underscore
14
+ end
15
+ end
@@ -0,0 +1,19 @@
1
+ class HomographicSpoofing::Detector::Rule::Context
2
+ attr_reader :label
3
+
4
+ def initialize(label:)
5
+ @label = label
6
+ end
7
+
8
+ SCRIPT_COMMON = "Common"
9
+ SCRIPT_INHERITED = "Inherited"
10
+ IGNORED_SCRIPTS = Set[SCRIPT_COMMON, SCRIPT_INHERITED]
11
+
12
+ def scripts
13
+ @scripts ||= Unicode::Scripts.scripts(label).to_set - IGNORED_SCRIPTS
14
+ end
15
+
16
+ def label_set
17
+ @label_set ||= label.chars.to_set
18
+ end
19
+ end