homographic_spoofing 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.md +117 -0
- data/lib/homographic_spoofing/detector/base.rb +41 -0
- data/lib/homographic_spoofing/detector/detection.rb +2 -0
- data/lib/homographic_spoofing/detector/email_address.rb +40 -0
- data/lib/homographic_spoofing/detector/idn.rb +78 -0
- data/lib/homographic_spoofing/detector/local.rb +14 -0
- data/lib/homographic_spoofing/detector/quoted_string.rb +13 -0
- data/lib/homographic_spoofing/detector/rule/base.rb +15 -0
- data/lib/homographic_spoofing/detector/rule/context.rb +19 -0
- data/lib/homographic_spoofing/detector/rule/data/allowed_idn_characters.txt +1 -0
- data/lib/homographic_spoofing/detector/rule/data/digits.csv +680 -0
- data/lib/homographic_spoofing/detector/rule/disallowed_characters.rb +140 -0
- data/lib/homographic_spoofing/detector/rule/idn/base.rb +3 -0
- data/lib/homographic_spoofing/detector/rule/idn/context.rb +8 -0
- data/lib/homographic_spoofing/detector/rule/idn/dangerous_pattern.rb +73 -0
- data/lib/homographic_spoofing/detector/rule/idn/deviation_characters.rb +10 -0
- data/lib/homographic_spoofing/detector/rule/idn/digits.rb +25 -0
- data/lib/homographic_spoofing/detector/rule/idn/invisible_characters.rb +14 -0
- data/lib/homographic_spoofing/detector/rule/idn/script_confusable.rb +59 -0
- data/lib/homographic_spoofing/detector/rule/idn/script_specific.rb +31 -0
- data/lib/homographic_spoofing/detector/rule/idn/unsafe_middle_dot.rb +12 -0
- data/lib/homographic_spoofing/detector/rule/local/dot_atom_text.rb +49 -0
- data/lib/homographic_spoofing/detector/rule/local/nfkc.rb +6 -0
- data/lib/homographic_spoofing/detector/rule/mixed_digits.rb +30 -0
- data/lib/homographic_spoofing/detector/rule/mixed_scripts.rb +30 -0
- data/lib/homographic_spoofing/detector/rule/quoted_string/bidi_control.rb +10 -0
- data/lib/homographic_spoofing/detector/rule/quoted_string/data/nonspacing_marks.txt +1 -0
- data/lib/homographic_spoofing/detector/rule/quoted_string/nfc.rb +6 -0
- data/lib/homographic_spoofing/detector/rule/quoted_string/nonspacing_marks.rb +21 -0
- data/lib/homographic_spoofing/railtie.rb +5 -0
- data/lib/homographic_spoofing/sanitizer/base.rb +39 -0
- data/lib/homographic_spoofing/sanitizer/email_address.rb +10 -0
- data/lib/homographic_spoofing/sanitizer/idn.rb +10 -0
- data/lib/homographic_spoofing/sanitizer/quoted_string.rb +10 -0
- data/lib/homographic_spoofing/version.rb +3 -0
- data/lib/homographic_spoofing.rb +47 -0
- metadata +166 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 0271afba8ca392a2898b76894ca4be2cf4eb024f14fea0ea4e2467265b88c7e5
|
4
|
+
data.tar.gz: f77b9ca4563b4e7e59264cbd83b60d38ae89417fddad193dc0a1c39993bd4535
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f2639bca34a87a1b3f0e121608684fa37d50f83b125efab25baeef85fa111151a7e5ae8e40e4207b4fdfd5dfe8c502965834468190dc4efc7d9ad433c6a1916c
|
7
|
+
data.tar.gz: 351fe4d4b0c22105838d99404056e848cbe3f5f0b7bcfa8f9b770a651ac8c942d73f18bc0b709b5aa074d73670fd56a997afb44b6d4f6877e96cde5eda640970
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2024 37signals, LLC
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
# HomographicSpoofing
|
2
|
+
|
3
|
+
Toolkit to both detect and sanitize [homographic spoofing attacks](https://en.wikipedia.org/wiki/IDN_homograph_attack) in URLs and Email addresses.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem "homographic_spoofing"
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
```bash
|
16
|
+
$ bundle
|
17
|
+
```
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
```bash
|
22
|
+
$ gem install homographic_spoofing
|
23
|
+
```
|
24
|
+
## Configuration
|
25
|
+
|
26
|
+
If `HomographicSpoofing.logger` is set to a Logger instance, the gem will log all the violations found. If you're using Rails,
|
27
|
+
it is automatically configured to use `Rails.logger`, otheriwse you can set it manually:
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
HomographicSpoofing.logger = Logger.new("log/homographic_spoofing.log")
|
31
|
+
```
|
32
|
+
|
33
|
+
## Usage
|
34
|
+
|
35
|
+
### IDN
|
36
|
+
|
37
|
+
[What is an IDN](https://en.wikipedia.org/wiki/Internationalized_domain_name)
|
38
|
+
|
39
|
+
**Check if an IDN is an homographic spoof**
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
HomographicSpoofing.idn_spoof?("www.basecаmp.com")
|
43
|
+
# => true, uses cyrillic 'а' instead of latin 'a'
|
44
|
+
HomographicSpoofing.idn_spoof?("www.basecamp.com")
|
45
|
+
# => false
|
46
|
+
```
|
47
|
+
|
48
|
+
**Sanitize an IDN**
|
49
|
+
|
50
|
+
The library can also sanitize an IDN by converting all confusable characters to their punycode representation.
|
51
|
+
|
52
|
+
```ruby
|
53
|
+
HomographicSpoofing.sanitize_idn("www.basecаmp.com")
|
54
|
+
# => "www.xn--basecmp-6fg.com"
|
55
|
+
HomographicSpoofing.sanitize_idn("www.basecamp.com")
|
56
|
+
# => "www.basecamp.com"
|
57
|
+
```
|
58
|
+
|
59
|
+
### Email addresses
|
60
|
+
|
61
|
+
An email address is formed from three main parts:
|
62
|
+
|
63
|
+
"Jacopo Beschi" <<jacopo.beschi@basecamp.com>>
|
64
|
+
|
65
|
+
- The domain-part is "basecamp.com"
|
66
|
+
- The local-part is "jacopo.beschi"
|
67
|
+
- The quoted-string-part is "Jacopo Beschi"
|
68
|
+
|
69
|
+
**Check if an email_address is an homographic spoof**
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
HomographicSpoofing.email_address_spoof?(%{"Jacopo Beschi" <jacopo.beschi@basecаmp.com>})
|
73
|
+
# => true, uses cyrillic 'а' instead of latin 'a'
|
74
|
+
```
|
75
|
+
|
76
|
+
**Sanitize an email_address**
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
>> HomographicSpoofing.sanitize_email_address(%{"Jacopo Beschi" <jacopo.beschi@basecаmp.com>})
|
80
|
+
# => "\"Jacopo Beschi\" <jacopo.beschi@xn--basecmp-6fg.com>"
|
81
|
+
```
|
82
|
+
|
83
|
+
**Check if an email_address local-part is an homographic spoof**
|
84
|
+
|
85
|
+
```ruby
|
86
|
+
HomographicSpoofing.email_local_spoof?("jacopo.beschi")
|
87
|
+
# => false
|
88
|
+
```
|
89
|
+
|
90
|
+
**Check if an email_address quoted-string-part is an homographic spoof**
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
HomographicSpoofing.email_name_spoof?("Jacopo Beschi")
|
94
|
+
# => false
|
95
|
+
```
|
96
|
+
|
97
|
+
**Sanitize an email_address quoted-string-part**
|
98
|
+
|
99
|
+
```ruby
|
100
|
+
HomographicSpoofing.sanitize_email_name("Jacopo Beschi")
|
101
|
+
# => "Jacopo Beschi"
|
102
|
+
```
|
103
|
+
|
104
|
+
## Development
|
105
|
+
|
106
|
+
To experiment, start the console with `bin/console`.
|
107
|
+
Run the test via `bin/test`.
|
108
|
+
|
109
|
+
## Contributing
|
110
|
+
|
111
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/basecamp/homographic_spoofing.
|
112
|
+
|
113
|
+
## License
|
114
|
+
|
115
|
+
The IDN spoof detection algorithms are inspired by Chromium's [spoof_check](https://source.chromium.org/chromium/chromium/src/+/main:components/url_formatter/spoof_checks/) source code.
|
116
|
+
|
117
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
@@ -0,0 +1,41 @@
|
|
1
|
+
class HomographicSpoofing::Detector::Base
|
2
|
+
def self.detected?(label)
|
3
|
+
new(label).detected?
|
4
|
+
end
|
5
|
+
|
6
|
+
def self.detections(label)
|
7
|
+
new(label).detections
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(label)
|
11
|
+
@label = label
|
12
|
+
end
|
13
|
+
|
14
|
+
def detected?
|
15
|
+
detections.any?
|
16
|
+
end
|
17
|
+
|
18
|
+
def detections
|
19
|
+
rules.select(&:attack_detected?).map do |rule|
|
20
|
+
HomographicSpoofing::Detector::Detection.new(rule.reason, rule.label)
|
21
|
+
end
|
22
|
+
rescue Encoding::CompatibilityError
|
23
|
+
# String must be in Unicode.
|
24
|
+
[ HomographicSpoofing::Detector::Detection.new("invalid_unicode", label) ]
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
attr_reader :label
|
29
|
+
|
30
|
+
def rules
|
31
|
+
@rules ||= rule_classes.map { |klass| klass.new(context) }
|
32
|
+
end
|
33
|
+
|
34
|
+
def rule_classes
|
35
|
+
raise NotImplementedError, "subclasses must override this"
|
36
|
+
end
|
37
|
+
|
38
|
+
def context
|
39
|
+
@context ||= HomographicSpoofing::Detector::Rule::Context.new(label:)
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
class HomographicSpoofing::Detector::EmailAddress
|
2
|
+
def self.detected?(email_address)
|
3
|
+
new(email_address).detected?
|
4
|
+
end
|
5
|
+
|
6
|
+
def self.detections(email_address)
|
7
|
+
new(email_address).detections
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(email_address)
|
11
|
+
@email_address = email_address
|
12
|
+
end
|
13
|
+
|
14
|
+
def detected?
|
15
|
+
detections.any?
|
16
|
+
end
|
17
|
+
|
18
|
+
def detections
|
19
|
+
mail_address = mail_address_wrap(email_address)
|
20
|
+
[].tap do |result|
|
21
|
+
result.concat detector_for(part: mail_address.name, type: "quoted_string").detections if mail_address.name
|
22
|
+
result.concat detector_for(part: mail_address.local, type: "local").detections if mail_address.local
|
23
|
+
result.concat detector_for(part: mail_address.domain, type: "idn").detections if mail_address.domain
|
24
|
+
end
|
25
|
+
rescue Mail::Field::FieldError
|
26
|
+
# Do not analyse invalid email addresses.
|
27
|
+
[]
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
attr_reader :email_address
|
32
|
+
|
33
|
+
def detector_for(type:, part:)
|
34
|
+
"HomographicSpoofing::Detector::#{type.camelize}".constantize.new(part)
|
35
|
+
end
|
36
|
+
|
37
|
+
def mail_address_wrap(email_address)
|
38
|
+
email_address.is_a?(Mail::Address) ? email_address : Mail::Address.new(email_address)
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# Detects IDN Spoofing homographic attacks (See https://en.wikipedia.org/wiki/IDN_homograph_attack).
|
2
|
+
#
|
3
|
+
# The implementation follows Google Chrome IDN policy
|
4
|
+
# (See https://chromium.googlesource.com/chromium/src.git/+/master/docs/idn.md#google-chrome_s-idn-policy)
|
5
|
+
# but with some limitations:
|
6
|
+
# - It doesn't rely on ICU4C uspoof.h (https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/uspoof_8h.html)
|
7
|
+
# hence the script confusable detection is not as precise.
|
8
|
+
# - It doesn't implement 13. of Google IDN policy.
|
9
|
+
class HomographicSpoofing::Detector::Idn
|
10
|
+
def self.detected?(domain)
|
11
|
+
new(domain).detected?
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.detections(domain)
|
15
|
+
new(domain).detections
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize(domain)
|
19
|
+
@domain = domain.downcase
|
20
|
+
end
|
21
|
+
|
22
|
+
def detected?
|
23
|
+
detections.any?
|
24
|
+
end
|
25
|
+
|
26
|
+
def detections
|
27
|
+
rules.select(&:attack_detected?).map do |rule|
|
28
|
+
HomographicSpoofing::Detector::Detection.new(rule.reason, rule.label)
|
29
|
+
end
|
30
|
+
rescue PublicSuffix::Error
|
31
|
+
# Invalid IDN is a spoof.
|
32
|
+
[ HomographicSpoofing::Detector::Detection.new("invalid_domain", domain) ]
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
attr_reader :domain
|
37
|
+
|
38
|
+
def rules
|
39
|
+
@rules ||= contexts.flat_map { |ctx| rules_for(ctx) }
|
40
|
+
end
|
41
|
+
|
42
|
+
def rules_for(context)
|
43
|
+
[
|
44
|
+
HomographicSpoofing::Detector::Rule::DisallowedCharacters,
|
45
|
+
HomographicSpoofing::Detector::Rule::MixedScripts,
|
46
|
+
HomographicSpoofing::Detector::Rule::MixedDigits,
|
47
|
+
HomographicSpoofing::Detector::Rule::Idn::InvisibleCharacters,
|
48
|
+
HomographicSpoofing::Detector::Rule::Idn::UnsafeMiddleDot,
|
49
|
+
HomographicSpoofing::Detector::Rule::Idn::ScriptConfusable,
|
50
|
+
HomographicSpoofing::Detector::Rule::Idn::Digits,
|
51
|
+
HomographicSpoofing::Detector::Rule::Idn::DangerousPattern,
|
52
|
+
HomographicSpoofing::Detector::Rule::Idn::ScriptSpecific,
|
53
|
+
HomographicSpoofing::Detector::Rule::Idn::DeviationCharacters
|
54
|
+
].map { |klass| klass.new(context) }
|
55
|
+
end
|
56
|
+
|
57
|
+
def contexts
|
58
|
+
[ public_suffix.sld, public_suffix.trd ].compact.map do |label|
|
59
|
+
HomographicSpoofing::Detector::Rule::Idn::Context.new(label: label, tld: public_suffix.tld)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def public_suffix
|
64
|
+
@public_suffix ||= icann_domain || non_icann_domain
|
65
|
+
end
|
66
|
+
|
67
|
+
def icann_domain
|
68
|
+
PublicSuffix.parse(domain, ignore_private: true) if PublicSuffix.valid?(domain)
|
69
|
+
end
|
70
|
+
|
71
|
+
def non_icann_domain
|
72
|
+
if PublicSuffix::List.default.find(domain, default: nil, ignore_private: true).present?
|
73
|
+
PublicSuffix::Domain.new(domain)
|
74
|
+
else
|
75
|
+
raise PublicSuffix::DomainInvalid
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# Detects spoofing homographic attacks for the Local-Part of an email address.
|
2
|
+
#
|
3
|
+
# The implementation strictly follows Unicode guidelines for Email Security Profiles for Identifiers.
|
4
|
+
#
|
5
|
+
# See http://www.unicode.org/reports/tr39/#Email_Security_Profiles.
|
6
|
+
class HomographicSpoofing::Detector::Local < HomographicSpoofing::Detector::Base
|
7
|
+
private
|
8
|
+
def rule_classes
|
9
|
+
[ HomographicSpoofing::Detector::Rule::Local::Nfkc,
|
10
|
+
HomographicSpoofing::Detector::Rule::MixedScripts,
|
11
|
+
HomographicSpoofing::Detector::Rule::MixedDigits,
|
12
|
+
HomographicSpoofing::Detector::Rule::Local::DotAtomText ]
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Detects spoofing homographic attacks for the Quoted-String-Part of an email address.
|
2
|
+
#
|
3
|
+
# The implementation strictly follows Unicode guidelines for Email Security Profiles for Identifiers.
|
4
|
+
#
|
5
|
+
# See http://www.unicode.org/reports/tr39/#Email_Security_Profiles.
|
6
|
+
class HomographicSpoofing::Detector::QuotedString < HomographicSpoofing::Detector::Base
|
7
|
+
private
|
8
|
+
def rule_classes
|
9
|
+
[ HomographicSpoofing::Detector::Rule::QuotedString::Nfc,
|
10
|
+
HomographicSpoofing::Detector::Rule::QuotedString::BidiControl,
|
11
|
+
HomographicSpoofing::Detector::Rule::QuotedString::NonspacingMarks ]
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
class HomographicSpoofing::Detector::Rule::Base
|
2
|
+
delegate :scripts, :label, :label_set, to: :@context
|
3
|
+
|
4
|
+
def initialize(context)
|
5
|
+
@context = context
|
6
|
+
end
|
7
|
+
|
8
|
+
def attack_detected?
|
9
|
+
raise NotImplementedError, "subclasses must override this"
|
10
|
+
end
|
11
|
+
|
12
|
+
def reason
|
13
|
+
self.class.name.demodulize.underscore
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class HomographicSpoofing::Detector::Rule::Context
|
2
|
+
attr_reader :label
|
3
|
+
|
4
|
+
def initialize(label:)
|
5
|
+
@label = label
|
6
|
+
end
|
7
|
+
|
8
|
+
SCRIPT_COMMON = "Common"
|
9
|
+
SCRIPT_INHERITED = "Inherited"
|
10
|
+
IGNORED_SCRIPTS = Set[SCRIPT_COMMON, SCRIPT_INHERITED]
|
11
|
+
|
12
|
+
def scripts
|
13
|
+
@scripts ||= Unicode::Scripts.scripts(label).to_set - IGNORED_SCRIPTS
|
14
|
+
end
|
15
|
+
|
16
|
+
def label_set
|
17
|
+
@label_set ||= label.chars.to_set
|
18
|
+
end
|
19
|
+
end
|