typosquatting 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +10 -0
- data/LICENSE +21 -0
- data/README.md +218 -0
- data/Rakefile +8 -0
- data/exe/typosquatting +6 -0
- data/lib/typosquatting/algorithms/addition.rb +20 -0
- data/lib/typosquatting/algorithms/base.rb +34 -0
- data/lib/typosquatting/algorithms/delimiter.rb +48 -0
- data/lib/typosquatting/algorithms/homoglyph.rb +61 -0
- data/lib/typosquatting/algorithms/misspelling.rb +78 -0
- data/lib/typosquatting/algorithms/numeral.rb +45 -0
- data/lib/typosquatting/algorithms/omission.rb +16 -0
- data/lib/typosquatting/algorithms/plural.rb +74 -0
- data/lib/typosquatting/algorithms/repetition.rb +16 -0
- data/lib/typosquatting/algorithms/replacement.rb +59 -0
- data/lib/typosquatting/algorithms/transposition.rb +17 -0
- data/lib/typosquatting/algorithms/vowel_swap.rb +27 -0
- data/lib/typosquatting/algorithms/word_order.rb +25 -0
- data/lib/typosquatting/cli.rb +380 -0
- data/lib/typosquatting/confusion.rb +70 -0
- data/lib/typosquatting/ecosystems/base.rb +65 -0
- data/lib/typosquatting/ecosystems/cargo.rb +45 -0
- data/lib/typosquatting/ecosystems/composer.rb +64 -0
- data/lib/typosquatting/ecosystems/golang.rb +56 -0
- data/lib/typosquatting/ecosystems/hex.rb +42 -0
- data/lib/typosquatting/ecosystems/maven.rb +64 -0
- data/lib/typosquatting/ecosystems/npm.rb +66 -0
- data/lib/typosquatting/ecosystems/nuget.rb +41 -0
- data/lib/typosquatting/ecosystems/pub.rb +43 -0
- data/lib/typosquatting/ecosystems/pypi.rb +38 -0
- data/lib/typosquatting/ecosystems/rubygems.rb +42 -0
- data/lib/typosquatting/generator.rb +58 -0
- data/lib/typosquatting/lookup.rb +138 -0
- data/lib/typosquatting/sbom.rb +98 -0
- data/lib/typosquatting/version.rb +5 -0
- data/lib/typosquatting.rb +103 -0
- data/sig/typosquatting.rbs +4 -0
- metadata +114 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: e50bfd6b6ae458a3c588600cf0ae4e3fe7a551bf801d0133dca2c88d5df423d6
|
|
4
|
+
data.tar.gz: d9f0abe8dd964b970e0f760f807b4b76a2f71d3ad9a05b32dcf5b19ce1438f76
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 9cf712d35089a972dd4b9cd47139cb0b793a901f2665de3027629c0f6f39faa1216ef2df4092ee04bc21c89b5c9e2f44e4b26dcbf95598c9b64a151793b3f6be
|
|
7
|
+
data.tar.gz: 4bb644fb9af9173051c6de93b5d0b5e88f3dac01ff6873dba102dfb2a72d64e0acda77daf7116597a1a93945e050a04f9603c18a039f6faca0761be51e310142
|
data/CHANGELOG.md
ADDED
data/CODE_OF_CONDUCT.md
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Code of Conduct
|
|
2
|
+
|
|
3
|
+
"typosquatting" follows [The Ruby Community Conduct Guideline](https://www.ruby-lang.org/en/conduct) in all "collaborative space", which is defined as community communications channels (such as mailing lists, submitted patches, commit comments, etc.):
|
|
4
|
+
|
|
5
|
+
* Participants will be tolerant of opposing views.
|
|
6
|
+
* Participants must ensure that their language and actions are free of personal attacks and disparaging personal remarks.
|
|
7
|
+
* When interpreting the words and actions of others, participants should always assume good intentions.
|
|
8
|
+
* Behaviour which can be reasonably considered harassment will not be tolerated.
|
|
9
|
+
|
|
10
|
+
If you have any concerns about behaviour within this project, please contact us at ["andrewnez@gmail.com"](mailto:"andrewnez@gmail.com").
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Andrew Nesbitt
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
# Typosquatting
|
|
2
|
+
|
|
3
|
+
Detect potential typosquatting packages across package ecosystems. Generate typosquat variants of package names and check if they exist on package registries.
|
|
4
|
+
|
|
5
|
+
Supports PyPI, npm, RubyGems, Cargo, Go, Maven, NuGet, Composer, Hex, and Pub.
|
|
6
|
+
|
|
7
|
+
## When to use this
|
|
8
|
+
|
|
9
|
+
**Typosquatting** is when an attacker publishes a malicious package with a name similar to a popular one, hoping developers mistype the name or copy-paste a bad example. This tool generates those similar names and checks if they exist.
|
|
10
|
+
|
|
11
|
+
**Dependency confusion** is when an attacker publishes a public package with the same name as your private/internal package, hoping your build system fetches the public one. The `confusion` command checks which registries have your package name.
|
|
12
|
+
|
|
13
|
+
This tool helps you:
|
|
14
|
+
- Find existing typosquats of packages you maintain
|
|
15
|
+
- Audit your dependencies for packages that look like typosquats of popular ones
|
|
16
|
+
- Check if your internal package names are safe from dependency confusion
|
|
17
|
+
|
|
18
|
+
False positives are common. A package named `request` isn't necessarily a typosquat of `requests`. Use the output as a starting point for investigation, not as a definitive verdict.
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
gem install typosquatting
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Or add to your Gemfile:
|
|
27
|
+
|
|
28
|
+
```ruby
|
|
29
|
+
gem "typosquatting"
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## CLI Usage
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
# Generate typosquat variants for a package
|
|
36
|
+
typosquatting generate requests -e pypi
|
|
37
|
+
|
|
38
|
+
# Use specific algorithms only
|
|
39
|
+
typosquatting generate requests -e pypi -a omission,homoglyph
|
|
40
|
+
|
|
41
|
+
# Show which algorithm generated each variant
|
|
42
|
+
typosquatting generate requests -e pypi -v
|
|
43
|
+
|
|
44
|
+
# Check which variants actually exist on registries
|
|
45
|
+
typosquatting check requests -e pypi
|
|
46
|
+
|
|
47
|
+
# Only show existing packages
|
|
48
|
+
typosquatting check requests -e pypi --existing-only
|
|
49
|
+
|
|
50
|
+
# Preview what would be checked without API calls
|
|
51
|
+
typosquatting check requests -e pypi --dry-run
|
|
52
|
+
|
|
53
|
+
# Check for dependency confusion risks
|
|
54
|
+
typosquatting confusion com.company:internal-lib -e maven
|
|
55
|
+
|
|
56
|
+
# Check multiple packages from a file
|
|
57
|
+
typosquatting confusion -e maven --file internal-packages.txt
|
|
58
|
+
|
|
59
|
+
# Scan an SBOM for potential typosquats
|
|
60
|
+
typosquatting sbom bom.json
|
|
61
|
+
|
|
62
|
+
# Output as JSON
|
|
63
|
+
typosquatting check requests -e pypi -f json
|
|
64
|
+
|
|
65
|
+
# List available algorithms
|
|
66
|
+
typosquatting algorithms
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Example Output
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
$ typosquatting check lodash -e npm --existing-only -v
|
|
73
|
+
|
|
74
|
+
Checking 142 variants...
|
|
75
|
+
lodas (omission) - EXISTS
|
|
76
|
+
registries: npmjs.org
|
|
77
|
+
lodah (omission) - EXISTS
|
|
78
|
+
registries: npmjs.org
|
|
79
|
+
1odash (homoglyph) - EXISTS
|
|
80
|
+
registries: npmjs.org
|
|
81
|
+
|
|
82
|
+
Checked 142 variants, 3 exist
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
$ typosquatting sbom bom.json
|
|
87
|
+
|
|
88
|
+
Potential typosquats found:
|
|
89
|
+
|
|
90
|
+
reqests (pypi)
|
|
91
|
+
Version: 1.0.0
|
|
92
|
+
PURL: pkg:pypi/reqests@1.0.0
|
|
93
|
+
Similar to existing packages:
|
|
94
|
+
- requests (omission)
|
|
95
|
+
registries: pypi.org
|
|
96
|
+
|
|
97
|
+
Found 1 suspicious package(s)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Library Usage
|
|
101
|
+
|
|
102
|
+
```ruby
|
|
103
|
+
require "typosquatting"
|
|
104
|
+
|
|
105
|
+
# Generate variants (returns array of names)
|
|
106
|
+
variants = Typosquatting.generate("requests", ecosystem: "pypi")
|
|
107
|
+
# => ["reqests", "requets", "request", "reqeusts", ...]
|
|
108
|
+
|
|
109
|
+
# Generate with algorithm info
|
|
110
|
+
variants = Typosquatting.generate_with_algorithms("requests", ecosystem: "pypi")
|
|
111
|
+
variants.each do |v|
|
|
112
|
+
puts "#{v.name} (#{v.algorithm})"
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Check which variants exist on registries
|
|
116
|
+
results = Typosquatting.check("requests", ecosystem: "pypi")
|
|
117
|
+
results.each do |result|
|
|
118
|
+
puts "#{result.name} - #{result.exists? ? 'EXISTS' : 'available'}"
|
|
119
|
+
puts " registries: #{result.registries.map(&:name).join(', ')}" if result.exists?
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Dependency confusion check
|
|
123
|
+
confusion = Typosquatting.check_confusion("my-internal-package", ecosystem: "maven")
|
|
124
|
+
confusion.registries.each do |registry, exists|
|
|
125
|
+
puts "#{registry}: #{exists ? 'EXISTS' : 'available'}"
|
|
126
|
+
end
|
|
127
|
+
puts "Risk detected!" if confusion.confusion_risk?
|
|
128
|
+
|
|
129
|
+
# Access ecosystem rules
|
|
130
|
+
ecosystem = Typosquatting::Ecosystem.get("pypi")
|
|
131
|
+
ecosystem.valid_name?("some-package") # => true
|
|
132
|
+
ecosystem.normalise("Some_Package") # => "some-package"
|
|
133
|
+
|
|
134
|
+
# Scan an SBOM
|
|
135
|
+
checker = Typosquatting::SBOMChecker.new("bom.json")
|
|
136
|
+
results = checker.check
|
|
137
|
+
results.each do |result|
|
|
138
|
+
puts "#{result.name} may be a typosquat of:"
|
|
139
|
+
result.suspicions.each do |s|
|
|
140
|
+
puts " - #{s.name} (#{s.algorithm})"
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## Supported Ecosystems
|
|
146
|
+
|
|
147
|
+
Use these identifiers with the `-e` / `--ecosystem` flag:
|
|
148
|
+
|
|
149
|
+
| ID | Registry | Case Sensitive | Delimiters | Notes |
|
|
150
|
+
|----|----------|----------------|------------|-------|
|
|
151
|
+
| `pypi` | PyPI | No | `-` `_` `.` | Normalizes to lowercase, collapses delimiters to `-` |
|
|
152
|
+
| `npm` | npmjs.org | No | `-` `_` `.` | Supports scoped packages (`@scope/name`) |
|
|
153
|
+
| `gem` | RubyGems | Yes | `-` `_` | No dots allowed |
|
|
154
|
+
| `cargo` | crates.io | No | `-` `_` | `_` and `-` are equivalent |
|
|
155
|
+
| `golang` | proxy.golang.org | Yes | `-` `_` `.` `/` | Module paths with `/`, version suffixes |
|
|
156
|
+
| `maven` | Maven Central | Yes | `-` `_` `.` | `groupId:artifactId` format |
|
|
157
|
+
| `nuget` | nuget.org | No | `-` `_` `.` | Dots common in names |
|
|
158
|
+
| `composer` | Packagist | No | `-` `_` `.` | `vendor/package` format |
|
|
159
|
+
| `hex` | hex.pm | No | `_` | Underscore only, no hyphens |
|
|
160
|
+
| `pub` | pub.dev | No | `_` | Underscore only, 2-64 chars |
|
|
161
|
+
|
|
162
|
+
## Algorithms
|
|
163
|
+
|
|
164
|
+
Use these names with the `-a` / `--algorithms` flag (comma-separated):
|
|
165
|
+
|
|
166
|
+
| Name | Description | Example |
|
|
167
|
+
|------|-------------|---------|
|
|
168
|
+
| `omission` | Drop single characters | `requests` -> `reqests` |
|
|
169
|
+
| `repetition` | Double characters | `requests` -> `rrequests` |
|
|
170
|
+
| `replacement` | Adjacent keyboard characters | `requests` -> `requezts` |
|
|
171
|
+
| `transposition` | Swap adjacent characters | `requests` -> `reqeusts` |
|
|
172
|
+
| `addition` | Insert characters at start/end | `requests` -> `arequests` |
|
|
173
|
+
| `homoglyph` | Lookalike characters | `requests` -> `reque5ts` |
|
|
174
|
+
| `vowel_swap` | Swap vowels | `requests` -> `raquests` |
|
|
175
|
+
| `delimiter` | Change/add/remove `-` `_` `.` | `my-package` -> `my_package` |
|
|
176
|
+
| `word_order` | Reorder words | `foo-bar` -> `bar-foo` |
|
|
177
|
+
| `plural` | Singularize/pluralize | `request` -> `requests` |
|
|
178
|
+
| `misspelling` | Common typos | `library` -> `libary` |
|
|
179
|
+
| `numeral` | Number/word swap | `lib2` -> `libtwo` |
|
|
180
|
+
|
|
181
|
+
## SBOM Support
|
|
182
|
+
|
|
183
|
+
The `sbom` command parses CycloneDX and SPDX JSON files. It reads the `purl` field from each component to determine the ecosystem and package name.
|
|
184
|
+
|
|
185
|
+
Supported formats:
|
|
186
|
+
- CycloneDX 1.4+ (JSON)
|
|
187
|
+
- SPDX 2.2+ (JSON)
|
|
188
|
+
|
|
189
|
+
The checker looks for packages in your SBOM that have names similar to existing popular packages, which could indicate you've installed a typosquat.
|
|
190
|
+
|
|
191
|
+
## API and Rate Limiting
|
|
192
|
+
|
|
193
|
+
Package lookups use the [ecosyste.ms](https://packages.ecosyste.ms) API. Requests are made in parallel (10 concurrent by default) to improve performance.
|
|
194
|
+
|
|
195
|
+
Be mindful when checking many packages. The `--dry-run` flag shows what would be checked without making API calls.
|
|
196
|
+
|
|
197
|
+
## Development
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
git clone https://github.com/andrew/typosquatting
|
|
201
|
+
cd typosquatting
|
|
202
|
+
bundle install
|
|
203
|
+
bundle exec rake test
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
Run locally without installing:
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
bundle exec ruby -Ilib exe/typosquatting help
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## Contributing
|
|
213
|
+
|
|
214
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/andrew/typosquatting.
|
|
215
|
+
|
|
216
|
+
## License
|
|
217
|
+
|
|
218
|
+
MIT
|
data/Rakefile
ADDED
data/exe/typosquatting
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Typosquatting
|
|
4
|
+
module Algorithms
|
|
5
|
+
class Addition < Base
|
|
6
|
+
CHARS = ("a".."z").to_a + ("0".."9").to_a
|
|
7
|
+
|
|
8
|
+
def generate(package_name)
|
|
9
|
+
variants = []
|
|
10
|
+
|
|
11
|
+
CHARS.each do |char|
|
|
12
|
+
variants << char + package_name
|
|
13
|
+
variants << package_name + char
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
variants.uniq
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Typosquatting
|
|
4
|
+
module Algorithms
|
|
5
|
+
class Base
|
|
6
|
+
attr_reader :name
|
|
7
|
+
|
|
8
|
+
def initialize
|
|
9
|
+
@name = self.class.name.split("::").last.gsub(/([a-z])([A-Z])/, '\1_\2').downcase
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def generate(package_name)
|
|
13
|
+
raise NotImplementedError, "Subclasses must implement #generate"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def self.all
|
|
17
|
+
@all ||= [
|
|
18
|
+
Omission.new,
|
|
19
|
+
Repetition.new,
|
|
20
|
+
Replacement.new,
|
|
21
|
+
Transposition.new,
|
|
22
|
+
Addition.new,
|
|
23
|
+
Homoglyph.new,
|
|
24
|
+
VowelSwap.new,
|
|
25
|
+
Delimiter.new,
|
|
26
|
+
WordOrder.new,
|
|
27
|
+
Plural.new,
|
|
28
|
+
Misspelling.new,
|
|
29
|
+
Numeral.new
|
|
30
|
+
]
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Typosquatting
|
|
4
|
+
module Algorithms
|
|
5
|
+
class Delimiter < Base
|
|
6
|
+
DELIMITERS = %w[- _ .].freeze
|
|
7
|
+
|
|
8
|
+
def generate(package_name)
|
|
9
|
+
variants = []
|
|
10
|
+
|
|
11
|
+
DELIMITERS.each do |from_delim|
|
|
12
|
+
next unless package_name.include?(from_delim)
|
|
13
|
+
|
|
14
|
+
DELIMITERS.each do |to_delim|
|
|
15
|
+
next if from_delim == to_delim
|
|
16
|
+
|
|
17
|
+
variants << package_name.gsub(from_delim, to_delim)
|
|
18
|
+
|
|
19
|
+
current = package_name
|
|
20
|
+
while current.include?(from_delim)
|
|
21
|
+
current = current.sub(from_delim, to_delim)
|
|
22
|
+
variants << current unless current == package_name.gsub(from_delim, to_delim)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
variants << package_name.gsub(from_delim, "")
|
|
27
|
+
|
|
28
|
+
current = package_name
|
|
29
|
+
while current.include?(from_delim)
|
|
30
|
+
current = current.sub(from_delim, "")
|
|
31
|
+
variants << current
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
DELIMITERS.each do |delim|
|
|
36
|
+
(1...package_name.length).each do |i|
|
|
37
|
+
next if DELIMITERS.include?(package_name[i - 1]) || DELIMITERS.include?(package_name[i])
|
|
38
|
+
|
|
39
|
+
variant = package_name[0...i] + delim + package_name[i..]
|
|
40
|
+
variants << variant
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
variants.uniq
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Typosquatting
|
|
4
|
+
module Algorithms
|
|
5
|
+
class Homoglyph < Base
|
|
6
|
+
GLYPHS = {
|
|
7
|
+
"a" => %w[4 @],
|
|
8
|
+
"b" => %w[8 6],
|
|
9
|
+
"c" => %w[( {],
|
|
10
|
+
"e" => %w[3],
|
|
11
|
+
"g" => %w[9 6],
|
|
12
|
+
"i" => %w[1 l | !],
|
|
13
|
+
"l" => %w[1 i | I],
|
|
14
|
+
"o" => %w[0],
|
|
15
|
+
"s" => %w[5 $],
|
|
16
|
+
"t" => %w[7 +],
|
|
17
|
+
"z" => %w[2],
|
|
18
|
+
"0" => %w[o O],
|
|
19
|
+
"1" => %w[l i I |],
|
|
20
|
+
"2" => %w[z Z],
|
|
21
|
+
"3" => %w[e E],
|
|
22
|
+
"4" => %w[a A],
|
|
23
|
+
"5" => %w[s S],
|
|
24
|
+
"6" => %w[b g],
|
|
25
|
+
"7" => %w[t T],
|
|
26
|
+
"8" => %w[b B],
|
|
27
|
+
"9" => %w[g q],
|
|
28
|
+
"rn" => %w[m],
|
|
29
|
+
"m" => %w[rn nn],
|
|
30
|
+
"vv" => %w[w],
|
|
31
|
+
"w" => %w[vv uu],
|
|
32
|
+
"cl" => %w[d],
|
|
33
|
+
"d" => %w[cl]
|
|
34
|
+
}.freeze
|
|
35
|
+
|
|
36
|
+
def generate(package_name)
|
|
37
|
+
variants = []
|
|
38
|
+
|
|
39
|
+
package_name.each_char.with_index do |char, i|
|
|
40
|
+
glyphs = GLYPHS[char.downcase] || []
|
|
41
|
+
glyphs.each do |glyph|
|
|
42
|
+
variant = package_name[0...i] + glyph + package_name[(i + 1)..]
|
|
43
|
+
variants << variant
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
GLYPHS.each do |pattern, replacements|
|
|
48
|
+
next if pattern.length == 1
|
|
49
|
+
|
|
50
|
+
if package_name.include?(pattern)
|
|
51
|
+
replacements.each do |replacement|
|
|
52
|
+
variants << package_name.gsub(pattern, replacement)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
variants.uniq
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Typosquatting
|
|
4
|
+
module Algorithms
|
|
5
|
+
class Misspelling < Base
|
|
6
|
+
COMMON_MISSPELLINGS = {
|
|
7
|
+
"accommodate" => %w[accomodate acommodate],
|
|
8
|
+
"achieve" => %w[acheive],
|
|
9
|
+
"acquire" => %w[aquire],
|
|
10
|
+
"address" => %w[adress],
|
|
11
|
+
"argument" => %w[arguement],
|
|
12
|
+
"calendar" => %w[calender],
|
|
13
|
+
"category" => %w[catagory],
|
|
14
|
+
"commit" => %w[comit],
|
|
15
|
+
"config" => %w[confg],
|
|
16
|
+
"database" => %w[databse],
|
|
17
|
+
"debug" => %w[debig],
|
|
18
|
+
"environment" => %w[enviroment enviornment],
|
|
19
|
+
"experience" => %w[experiance],
|
|
20
|
+
"gauge" => %w[guage],
|
|
21
|
+
"grammar" => %w[grammer],
|
|
22
|
+
"independent" => %w[independant],
|
|
23
|
+
"library" => %w[libary libraray],
|
|
24
|
+
"license" => %w[licence lisense],
|
|
25
|
+
"necessary" => %w[neccessary necessery],
|
|
26
|
+
"occurrence" => %w[occurence occurrance],
|
|
27
|
+
"parallel" => %w[paralel parrallel],
|
|
28
|
+
"privilege" => %w[priviledge],
|
|
29
|
+
"queue" => %w[que],
|
|
30
|
+
"receive" => %w[recieve],
|
|
31
|
+
"recommend" => %w[recomend reccommend],
|
|
32
|
+
"reference" => %w[refrence referance],
|
|
33
|
+
"separate" => %w[seperate],
|
|
34
|
+
"successful" => %w[succesful succesfull],
|
|
35
|
+
"async" => %w[asyc asnyc],
|
|
36
|
+
"util" => %w[utl],
|
|
37
|
+
"utils" => %w[utls utlis],
|
|
38
|
+
"helper" => %w[hleper helpr],
|
|
39
|
+
"client" => %w[clent cleint],
|
|
40
|
+
"server" => %w[sever servre],
|
|
41
|
+
"request" => %w[requst reuqest],
|
|
42
|
+
"response" => %w[respnse responese],
|
|
43
|
+
"parse" => %w[prase prse],
|
|
44
|
+
"logger" => %w[loger logge],
|
|
45
|
+
"handler" => %w[handlr hander],
|
|
46
|
+
"manager" => %w[manger managr],
|
|
47
|
+
"controller" => %w[controler controllr],
|
|
48
|
+
"service" => %w[sevice servce],
|
|
49
|
+
"module" => %w[modle moduel],
|
|
50
|
+
"package" => %w[pakage packge],
|
|
51
|
+
"python" => %w[pyhton pytohn],
|
|
52
|
+
"ruby" => %w[rubu rby],
|
|
53
|
+
"javascript" => %w[javscript javasript],
|
|
54
|
+
"typescript" => %w[typscript tyepscript]
|
|
55
|
+
}.freeze
|
|
56
|
+
|
|
57
|
+
def generate(package_name)
|
|
58
|
+
variants = []
|
|
59
|
+
|
|
60
|
+
COMMON_MISSPELLINGS.each do |correct, misspellings|
|
|
61
|
+
if package_name.include?(correct)
|
|
62
|
+
misspellings.each do |misspelling|
|
|
63
|
+
variants << package_name.gsub(correct, misspelling)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
misspellings.each do |misspelling|
|
|
68
|
+
if package_name.include?(misspelling)
|
|
69
|
+
variants << package_name.gsub(misspelling, correct)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
variants.uniq
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Typosquatting
|
|
4
|
+
module Algorithms
|
|
5
|
+
class Numeral < Base
|
|
6
|
+
NUMERALS = {
|
|
7
|
+
"0" => %w[zero],
|
|
8
|
+
"1" => %w[one first],
|
|
9
|
+
"2" => %w[two second],
|
|
10
|
+
"3" => %w[three third],
|
|
11
|
+
"4" => %w[four fourth for],
|
|
12
|
+
"5" => %w[five fifth],
|
|
13
|
+
"6" => %w[six sixth],
|
|
14
|
+
"7" => %w[seven seventh],
|
|
15
|
+
"8" => %w[eight eighth],
|
|
16
|
+
"9" => %w[nine ninth],
|
|
17
|
+
"10" => %w[ten tenth]
|
|
18
|
+
}.freeze
|
|
19
|
+
|
|
20
|
+
def generate(package_name)
|
|
21
|
+
variants = []
|
|
22
|
+
|
|
23
|
+
NUMERALS.each do |digit, words|
|
|
24
|
+
if package_name.include?(digit)
|
|
25
|
+
words.each do |word|
|
|
26
|
+
variants << package_name.gsub(digit, word)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
words.each do |word|
|
|
31
|
+
if package_name.include?(word)
|
|
32
|
+
variants << package_name.gsub(word, digit)
|
|
33
|
+
|
|
34
|
+
(words - [word]).each do |other_word|
|
|
35
|
+
variants << package_name.gsub(word, other_word)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
variants.reject { |v| v == package_name }.uniq
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Typosquatting
|
|
4
|
+
module Algorithms
|
|
5
|
+
class Omission < Base
|
|
6
|
+
def generate(package_name)
|
|
7
|
+
variants = []
|
|
8
|
+
package_name.length.times do |i|
|
|
9
|
+
variant = package_name[0...i] + package_name[(i + 1)..]
|
|
10
|
+
variants << variant unless variant.empty?
|
|
11
|
+
end
|
|
12
|
+
variants.uniq
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Typosquatting
|
|
4
|
+
module Algorithms
|
|
5
|
+
class Plural < Base
|
|
6
|
+
IRREGULAR_PLURALS = {
|
|
7
|
+
"child" => "children",
|
|
8
|
+
"person" => "people",
|
|
9
|
+
"man" => "men",
|
|
10
|
+
"woman" => "women",
|
|
11
|
+
"foot" => "feet",
|
|
12
|
+
"tooth" => "teeth",
|
|
13
|
+
"goose" => "geese",
|
|
14
|
+
"mouse" => "mice",
|
|
15
|
+
"ox" => "oxen",
|
|
16
|
+
"index" => "indices",
|
|
17
|
+
"matrix" => "matrices",
|
|
18
|
+
"vertex" => "vertices",
|
|
19
|
+
"analysis" => "analyses",
|
|
20
|
+
"basis" => "bases",
|
|
21
|
+
"crisis" => "crises",
|
|
22
|
+
"datum" => "data",
|
|
23
|
+
"medium" => "media",
|
|
24
|
+
"criterion" => "criteria"
|
|
25
|
+
}.freeze
|
|
26
|
+
|
|
27
|
+
def generate(package_name)
|
|
28
|
+
variants = []
|
|
29
|
+
|
|
30
|
+
variants << pluralize(package_name)
|
|
31
|
+
variants << singularize(package_name)
|
|
32
|
+
|
|
33
|
+
variants.compact.reject { |v| v == package_name }.uniq
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def pluralize(word)
|
|
37
|
+
return IRREGULAR_PLURALS[word] if IRREGULAR_PLURALS.key?(word)
|
|
38
|
+
|
|
39
|
+
case word
|
|
40
|
+
when /(.*)([^aeiou])y$/
|
|
41
|
+
"#{$1}#{$2}ies"
|
|
42
|
+
when /(.*)(ss|x|z|ch|sh)$/
|
|
43
|
+
"#{word}es"
|
|
44
|
+
when /(.*)fe$/
|
|
45
|
+
"#{$1}ves"
|
|
46
|
+
when /(.*)f$/
|
|
47
|
+
"#{$1}ves"
|
|
48
|
+
when /(.*)s$/
|
|
49
|
+
"#{word}es"
|
|
50
|
+
else
|
|
51
|
+
"#{word}s"
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def singularize(word)
|
|
56
|
+
reverse_irregulars = IRREGULAR_PLURALS.invert
|
|
57
|
+
return reverse_irregulars[word] if reverse_irregulars.key?(word)
|
|
58
|
+
|
|
59
|
+
case word
|
|
60
|
+
when /(.*)ies$/
|
|
61
|
+
"#{$1}y"
|
|
62
|
+
when /(.*)ves$/
|
|
63
|
+
"#{$1}f"
|
|
64
|
+
when /(.*)(ses|xes|zes|ches|shes)$/
|
|
65
|
+
word[0..-3]
|
|
66
|
+
when /(.*)s$/
|
|
67
|
+
$1
|
|
68
|
+
else
|
|
69
|
+
word
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Typosquatting
|
|
4
|
+
module Algorithms
|
|
5
|
+
class Repetition < Base
|
|
6
|
+
def generate(package_name)
|
|
7
|
+
variants = []
|
|
8
|
+
package_name.each_char.with_index do |char, i|
|
|
9
|
+
variant = package_name[0..i] + char + package_name[(i + 1)..]
|
|
10
|
+
variants << variant
|
|
11
|
+
end
|
|
12
|
+
variants.uniq
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|