typosquatting 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +5 -0
  3. data/CODE_OF_CONDUCT.md +10 -0
  4. data/LICENSE +21 -0
  5. data/README.md +218 -0
  6. data/Rakefile +8 -0
  7. data/exe/typosquatting +6 -0
  8. data/lib/typosquatting/algorithms/addition.rb +20 -0
  9. data/lib/typosquatting/algorithms/base.rb +34 -0
  10. data/lib/typosquatting/algorithms/delimiter.rb +48 -0
  11. data/lib/typosquatting/algorithms/homoglyph.rb +61 -0
  12. data/lib/typosquatting/algorithms/misspelling.rb +78 -0
  13. data/lib/typosquatting/algorithms/numeral.rb +45 -0
  14. data/lib/typosquatting/algorithms/omission.rb +16 -0
  15. data/lib/typosquatting/algorithms/plural.rb +74 -0
  16. data/lib/typosquatting/algorithms/repetition.rb +16 -0
  17. data/lib/typosquatting/algorithms/replacement.rb +59 -0
  18. data/lib/typosquatting/algorithms/transposition.rb +17 -0
  19. data/lib/typosquatting/algorithms/vowel_swap.rb +27 -0
  20. data/lib/typosquatting/algorithms/word_order.rb +25 -0
  21. data/lib/typosquatting/cli.rb +380 -0
  22. data/lib/typosquatting/confusion.rb +70 -0
  23. data/lib/typosquatting/ecosystems/base.rb +65 -0
  24. data/lib/typosquatting/ecosystems/cargo.rb +45 -0
  25. data/lib/typosquatting/ecosystems/composer.rb +64 -0
  26. data/lib/typosquatting/ecosystems/golang.rb +56 -0
  27. data/lib/typosquatting/ecosystems/hex.rb +42 -0
  28. data/lib/typosquatting/ecosystems/maven.rb +64 -0
  29. data/lib/typosquatting/ecosystems/npm.rb +66 -0
  30. data/lib/typosquatting/ecosystems/nuget.rb +41 -0
  31. data/lib/typosquatting/ecosystems/pub.rb +43 -0
  32. data/lib/typosquatting/ecosystems/pypi.rb +38 -0
  33. data/lib/typosquatting/ecosystems/rubygems.rb +42 -0
  34. data/lib/typosquatting/generator.rb +58 -0
  35. data/lib/typosquatting/lookup.rb +138 -0
  36. data/lib/typosquatting/sbom.rb +98 -0
  37. data/lib/typosquatting/version.rb +5 -0
  38. data/lib/typosquatting.rb +103 -0
  39. data/sig/typosquatting.rbs +4 -0
  40. metadata +114 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e50bfd6b6ae458a3c588600cf0ae4e3fe7a551bf801d0133dca2c88d5df423d6
4
+ data.tar.gz: d9f0abe8dd964b970e0f760f807b4b76a2f71d3ad9a05b32dcf5b19ce1438f76
5
+ SHA512:
6
+ metadata.gz: 9cf712d35089a972dd4b9cd47139cb0b793a901f2665de3027629c0f6f39faa1216ef2df4092ee04bc21c89b5c9e2f44e4b26dcbf95598c9b64a151793b3f6be
7
+ data.tar.gz: 4bb644fb9af9173051c6de93b5d0b5e88f3dac01ff6873dba102dfb2a72d64e0acda77daf7116597a1a93945e050a04f9603c18a039f6faca0761be51e310142
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ ## [Unreleased]
2
+
3
+ ## [0.1.0] - 2025-12-16
4
+
5
+ - Initial release
@@ -0,0 +1,10 @@
1
+ # Code of Conduct
2
+
3
+ "typosquatting" follows [The Ruby Community Conduct Guideline](https://www.ruby-lang.org/en/conduct) in all "collaborative space", which is defined as community communications channels (such as mailing lists, submitted patches, commit comments, etc.):
4
+
5
+ * Participants will be tolerant of opposing views.
6
+ * Participants must ensure that their language and actions are free of personal attacks and disparaging personal remarks.
7
+ * When interpreting the words and actions of others, participants should always assume good intentions.
8
+ * Behaviour which can be reasonably considered harassment will not be tolerated.
9
+
10
+ If you have any concerns about behaviour within this project, please contact us at ["andrewnez@gmail.com"](mailto:"andrewnez@gmail.com").
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Andrew Nesbitt
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,218 @@
1
+ # Typosquatting
2
+
3
+ Detect potential typosquatting packages across package ecosystems. Generate typosquat variants of package names and check if they exist on package registries.
4
+
5
+ Supports PyPI, npm, RubyGems, Cargo, Go, Maven, NuGet, Composer, Hex, and Pub.
6
+
7
+ ## When to use this
8
+
9
+ **Typosquatting** is when an attacker publishes a malicious package with a name similar to a popular one, hoping developers mistype the name or copy-paste a bad example. This tool generates those similar names and checks if they exist.
10
+
11
+ **Dependency confusion** is when an attacker publishes a public package with the same name as your private/internal package, hoping your build system fetches the public one. The `confusion` command checks which registries have your package name.
12
+
13
+ This tool helps you:
14
+ - Find existing typosquats of packages you maintain
15
+ - Audit your dependencies for packages that look like typosquats of popular ones
16
+ - Check if your internal package names are safe from dependency confusion
17
+
18
+ False positives are common. A package named `request` isn't necessarily a typosquat of `requests`. Use the output as a starting point for investigation, not as a definitive verdict.
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ gem install typosquatting
24
+ ```
25
+
26
+ Or add to your Gemfile:
27
+
28
+ ```ruby
29
+ gem "typosquatting"
30
+ ```
31
+
32
+ ## CLI Usage
33
+
34
+ ```bash
35
+ # Generate typosquat variants for a package
36
+ typosquatting generate requests -e pypi
37
+
38
+ # Use specific algorithms only
39
+ typosquatting generate requests -e pypi -a omission,homoglyph
40
+
41
+ # Show which algorithm generated each variant
42
+ typosquatting generate requests -e pypi -v
43
+
44
+ # Check which variants actually exist on registries
45
+ typosquatting check requests -e pypi
46
+
47
+ # Only show existing packages
48
+ typosquatting check requests -e pypi --existing-only
49
+
50
+ # Preview what would be checked without API calls
51
+ typosquatting check requests -e pypi --dry-run
52
+
53
+ # Check for dependency confusion risks
54
+ typosquatting confusion com.company:internal-lib -e maven
55
+
56
+ # Check multiple packages from a file
57
+ typosquatting confusion -e maven --file internal-packages.txt
58
+
59
+ # Scan an SBOM for potential typosquats
60
+ typosquatting sbom bom.json
61
+
62
+ # Output as JSON
63
+ typosquatting check requests -e pypi -f json
64
+
65
+ # List available algorithms
66
+ typosquatting algorithms
67
+ ```
68
+
69
+ ## Example Output
70
+
71
+ ```bash
72
+ $ typosquatting check lodash -e npm --existing-only -v
73
+
74
+ Checking 142 variants...
75
+ lodas (omission) - EXISTS
76
+ registries: npmjs.org
77
+ lodah (omission) - EXISTS
78
+ registries: npmjs.org
79
+ 1odash (homoglyph) - EXISTS
80
+ registries: npmjs.org
81
+
82
+ Checked 142 variants, 3 exist
83
+ ```
84
+
85
+ ```bash
86
+ $ typosquatting sbom bom.json
87
+
88
+ Potential typosquats found:
89
+
90
+ reqests (pypi)
91
+ Version: 1.0.0
92
+ PURL: pkg:pypi/reqests@1.0.0
93
+ Similar to existing packages:
94
+ - requests (omission)
95
+ registries: pypi.org
96
+
97
+ Found 1 suspicious package(s)
98
+ ```
99
+
100
+ ## Library Usage
101
+
102
+ ```ruby
103
+ require "typosquatting"
104
+
105
+ # Generate variants (returns array of names)
106
+ variants = Typosquatting.generate("requests", ecosystem: "pypi")
107
+ # => ["reqests", "requets", "request", "reqeusts", ...]
108
+
109
+ # Generate with algorithm info
110
+ variants = Typosquatting.generate_with_algorithms("requests", ecosystem: "pypi")
111
+ variants.each do |v|
112
+ puts "#{v.name} (#{v.algorithm})"
113
+ end
114
+
115
+ # Check which variants exist on registries
116
+ results = Typosquatting.check("requests", ecosystem: "pypi")
117
+ results.each do |result|
118
+ puts "#{result.name} - #{result.exists? ? 'EXISTS' : 'available'}"
119
+ puts " registries: #{result.registries.map(&:name).join(', ')}" if result.exists?
120
+ end
121
+
122
+ # Dependency confusion check
123
+ confusion = Typosquatting.check_confusion("my-internal-package", ecosystem: "maven")
124
+ confusion.registries.each do |registry, exists|
125
+ puts "#{registry}: #{exists ? 'EXISTS' : 'available'}"
126
+ end
127
+ puts "Risk detected!" if confusion.confusion_risk?
128
+
129
+ # Access ecosystem rules
130
+ ecosystem = Typosquatting::Ecosystem.get("pypi")
131
+ ecosystem.valid_name?("some-package") # => true
132
+ ecosystem.normalise("Some_Package") # => "some-package"
133
+
134
+ # Scan an SBOM
135
+ checker = Typosquatting::SBOMChecker.new("bom.json")
136
+ results = checker.check
137
+ results.each do |result|
138
+ puts "#{result.name} may be a typosquat of:"
139
+ result.suspicions.each do |s|
140
+ puts " - #{s.name} (#{s.algorithm})"
141
+ end
142
+ end
143
+ ```
144
+
145
+ ## Supported Ecosystems
146
+
147
+ Use these identifiers with the `-e` / `--ecosystem` flag:
148
+
149
+ | ID | Registry | Case Sensitive | Delimiters | Notes |
150
+ |----|----------|----------------|------------|-------|
151
+ | `pypi` | PyPI | No | `-` `_` `.` | Normalizes to lowercase, collapses delimiters to `-` |
152
+ | `npm` | npmjs.org | No | `-` `_` `.` | Supports scoped packages (`@scope/name`) |
153
+ | `gem` | RubyGems | Yes | `-` `_` | No dots allowed |
154
+ | `cargo` | crates.io | No | `-` `_` | `_` and `-` are equivalent |
155
+ | `golang` | proxy.golang.org | Yes | `-` `_` `.` `/` | Module paths with `/`, version suffixes |
156
+ | `maven` | Maven Central | Yes | `-` `_` `.` | `groupId:artifactId` format |
157
+ | `nuget` | nuget.org | No | `-` `_` `.` | Dots common in names |
158
+ | `composer` | Packagist | No | `-` `_` `.` | `vendor/package` format |
159
+ | `hex` | hex.pm | No | `_` | Underscore only, no hyphens |
160
+ | `pub` | pub.dev | No | `_` | Underscore only, 2-64 chars |
161
+
162
+ ## Algorithms
163
+
164
+ Use these names with the `-a` / `--algorithms` flag (comma-separated):
165
+
166
+ | Name | Description | Example |
167
+ |------|-------------|---------|
168
+ | `omission` | Drop single characters | `requests` -> `reqests` |
169
+ | `repetition` | Double characters | `requests` -> `rrequests` |
170
+ | `replacement` | Adjacent keyboard characters | `requests` -> `requezts` |
171
+ | `transposition` | Swap adjacent characters | `requests` -> `reqeusts` |
172
+ | `addition` | Insert characters at start/end | `requests` -> `arequests` |
173
+ | `homoglyph` | Lookalike characters | `requests` -> `reque5ts` |
174
+ | `vowel_swap` | Swap vowels | `requests` -> `raquests` |
175
+ | `delimiter` | Change/add/remove `-` `_` `.` | `my-package` -> `my_package` |
176
+ | `word_order` | Reorder words | `foo-bar` -> `bar-foo` |
177
+ | `plural` | Singularize/pluralize | `request` -> `requests` |
178
+ | `misspelling` | Common typos | `library` -> `libary` |
179
+ | `numeral` | Number/word swap | `lib2` -> `libtwo` |
180
+
181
+ ## SBOM Support
182
+
183
+ The `sbom` command parses CycloneDX and SPDX JSON files. It reads the `purl` field from each component to determine the ecosystem and package name.
184
+
185
+ Supported formats:
186
+ - CycloneDX 1.4+ (JSON)
187
+ - SPDX 2.2+ (JSON)
188
+
189
+ The checker looks for packages in your SBOM that have names similar to existing popular packages, which could indicate you've installed a typosquat.
190
+
191
+ ## API and Rate Limiting
192
+
193
+ Package lookups use the [ecosyste.ms](https://packages.ecosyste.ms) API. Requests are made in parallel (10 concurrent by default) to improve performance.
194
+
195
+ Be mindful when checking many packages. The `--dry-run` flag shows what would be checked without making API calls.
196
+
197
+ ## Development
198
+
199
+ ```bash
200
+ git clone https://github.com/andrew/typosquatting
201
+ cd typosquatting
202
+ bundle install
203
+ bundle exec rake test
204
+ ```
205
+
206
+ Run locally without installing:
207
+
208
+ ```bash
209
+ bundle exec ruby -Ilib exe/typosquatting help
210
+ ```
211
+
212
+ ## Contributing
213
+
214
+ Bug reports and pull requests are welcome on GitHub at https://github.com/andrew/typosquatting.
215
+
216
+ ## License
217
+
218
+ MIT
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "minitest/test_task"
5
+
6
+ Minitest::TestTask.create
7
+
8
+ task default: :test
data/exe/typosquatting ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "typosquatting"
5
+
6
+ Typosquatting::CLI.run(ARGV)
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Algorithms
5
+ class Addition < Base
6
+ CHARS = ("a".."z").to_a + ("0".."9").to_a
7
+
8
+ def generate(package_name)
9
+ variants = []
10
+
11
+ CHARS.each do |char|
12
+ variants << char + package_name
13
+ variants << package_name + char
14
+ end
15
+
16
+ variants.uniq
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Algorithms
5
+ class Base
6
+ attr_reader :name
7
+
8
+ def initialize
9
+ @name = self.class.name.split("::").last.gsub(/([a-z])([A-Z])/, '\1_\2').downcase
10
+ end
11
+
12
+ def generate(package_name)
13
+ raise NotImplementedError, "Subclasses must implement #generate"
14
+ end
15
+
16
+ def self.all
17
+ @all ||= [
18
+ Omission.new,
19
+ Repetition.new,
20
+ Replacement.new,
21
+ Transposition.new,
22
+ Addition.new,
23
+ Homoglyph.new,
24
+ VowelSwap.new,
25
+ Delimiter.new,
26
+ WordOrder.new,
27
+ Plural.new,
28
+ Misspelling.new,
29
+ Numeral.new
30
+ ]
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Algorithms
5
+ class Delimiter < Base
6
+ DELIMITERS = %w[- _ .].freeze
7
+
8
+ def generate(package_name)
9
+ variants = []
10
+
11
+ DELIMITERS.each do |from_delim|
12
+ next unless package_name.include?(from_delim)
13
+
14
+ DELIMITERS.each do |to_delim|
15
+ next if from_delim == to_delim
16
+
17
+ variants << package_name.gsub(from_delim, to_delim)
18
+
19
+ current = package_name
20
+ while current.include?(from_delim)
21
+ current = current.sub(from_delim, to_delim)
22
+ variants << current unless current == package_name.gsub(from_delim, to_delim)
23
+ end
24
+ end
25
+
26
+ variants << package_name.gsub(from_delim, "")
27
+
28
+ current = package_name
29
+ while current.include?(from_delim)
30
+ current = current.sub(from_delim, "")
31
+ variants << current
32
+ end
33
+ end
34
+
35
+ DELIMITERS.each do |delim|
36
+ (1...package_name.length).each do |i|
37
+ next if DELIMITERS.include?(package_name[i - 1]) || DELIMITERS.include?(package_name[i])
38
+
39
+ variant = package_name[0...i] + delim + package_name[i..]
40
+ variants << variant
41
+ end
42
+ end
43
+
44
+ variants.uniq
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Algorithms
5
+ class Homoglyph < Base
6
+ GLYPHS = {
7
+ "a" => %w[4 @],
8
+ "b" => %w[8 6],
9
+ "c" => %w[( {],
10
+ "e" => %w[3],
11
+ "g" => %w[9 6],
12
+ "i" => %w[1 l | !],
13
+ "l" => %w[1 i | I],
14
+ "o" => %w[0],
15
+ "s" => %w[5 $],
16
+ "t" => %w[7 +],
17
+ "z" => %w[2],
18
+ "0" => %w[o O],
19
+ "1" => %w[l i I |],
20
+ "2" => %w[z Z],
21
+ "3" => %w[e E],
22
+ "4" => %w[a A],
23
+ "5" => %w[s S],
24
+ "6" => %w[b g],
25
+ "7" => %w[t T],
26
+ "8" => %w[b B],
27
+ "9" => %w[g q],
28
+ "rn" => %w[m],
29
+ "m" => %w[rn nn],
30
+ "vv" => %w[w],
31
+ "w" => %w[vv uu],
32
+ "cl" => %w[d],
33
+ "d" => %w[cl]
34
+ }.freeze
35
+
36
+ def generate(package_name)
37
+ variants = []
38
+
39
+ package_name.each_char.with_index do |char, i|
40
+ glyphs = GLYPHS[char.downcase] || []
41
+ glyphs.each do |glyph|
42
+ variant = package_name[0...i] + glyph + package_name[(i + 1)..]
43
+ variants << variant
44
+ end
45
+ end
46
+
47
+ GLYPHS.each do |pattern, replacements|
48
+ next if pattern.length == 1
49
+
50
+ if package_name.include?(pattern)
51
+ replacements.each do |replacement|
52
+ variants << package_name.gsub(pattern, replacement)
53
+ end
54
+ end
55
+ end
56
+
57
+ variants.uniq
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Algorithms
5
+ class Misspelling < Base
6
+ COMMON_MISSPELLINGS = {
7
+ "accommodate" => %w[accomodate acommodate],
8
+ "achieve" => %w[acheive],
9
+ "acquire" => %w[aquire],
10
+ "address" => %w[adress],
11
+ "argument" => %w[arguement],
12
+ "calendar" => %w[calender],
13
+ "category" => %w[catagory],
14
+ "commit" => %w[comit],
15
+ "config" => %w[confg],
16
+ "database" => %w[databse],
17
+ "debug" => %w[debig],
18
+ "environment" => %w[enviroment enviornment],
19
+ "experience" => %w[experiance],
20
+ "gauge" => %w[guage],
21
+ "grammar" => %w[grammer],
22
+ "independent" => %w[independant],
23
+ "library" => %w[libary libraray],
24
+ "license" => %w[licence lisense],
25
+ "necessary" => %w[neccessary necessery],
26
+ "occurrence" => %w[occurence occurrance],
27
+ "parallel" => %w[paralel parrallel],
28
+ "privilege" => %w[priviledge],
29
+ "queue" => %w[que],
30
+ "receive" => %w[recieve],
31
+ "recommend" => %w[recomend reccommend],
32
+ "reference" => %w[refrence referance],
33
+ "separate" => %w[seperate],
34
+ "successful" => %w[succesful succesfull],
35
+ "async" => %w[asyc asnyc],
36
+ "util" => %w[utl],
37
+ "utils" => %w[utls utlis],
38
+ "helper" => %w[hleper helpr],
39
+ "client" => %w[clent cleint],
40
+ "server" => %w[sever servre],
41
+ "request" => %w[requst reuqest],
42
+ "response" => %w[respnse responese],
43
+ "parse" => %w[prase prse],
44
+ "logger" => %w[loger logge],
45
+ "handler" => %w[handlr hander],
46
+ "manager" => %w[manger managr],
47
+ "controller" => %w[controler controllr],
48
+ "service" => %w[sevice servce],
49
+ "module" => %w[modle moduel],
50
+ "package" => %w[pakage packge],
51
+ "python" => %w[pyhton pytohn],
52
+ "ruby" => %w[rubu rby],
53
+ "javascript" => %w[javscript javasript],
54
+ "typescript" => %w[typscript tyepscript]
55
+ }.freeze
56
+
57
+ def generate(package_name)
58
+ variants = []
59
+
60
+ COMMON_MISSPELLINGS.each do |correct, misspellings|
61
+ if package_name.include?(correct)
62
+ misspellings.each do |misspelling|
63
+ variants << package_name.gsub(correct, misspelling)
64
+ end
65
+ end
66
+
67
+ misspellings.each do |misspelling|
68
+ if package_name.include?(misspelling)
69
+ variants << package_name.gsub(misspelling, correct)
70
+ end
71
+ end
72
+ end
73
+
74
+ variants.uniq
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Algorithms
5
+ class Numeral < Base
6
+ NUMERALS = {
7
+ "0" => %w[zero],
8
+ "1" => %w[one first],
9
+ "2" => %w[two second],
10
+ "3" => %w[three third],
11
+ "4" => %w[four fourth for],
12
+ "5" => %w[five fifth],
13
+ "6" => %w[six sixth],
14
+ "7" => %w[seven seventh],
15
+ "8" => %w[eight eighth],
16
+ "9" => %w[nine ninth],
17
+ "10" => %w[ten tenth]
18
+ }.freeze
19
+
20
+ def generate(package_name)
21
+ variants = []
22
+
23
+ NUMERALS.each do |digit, words|
24
+ if package_name.include?(digit)
25
+ words.each do |word|
26
+ variants << package_name.gsub(digit, word)
27
+ end
28
+ end
29
+
30
+ words.each do |word|
31
+ if package_name.include?(word)
32
+ variants << package_name.gsub(word, digit)
33
+
34
+ (words - [word]).each do |other_word|
35
+ variants << package_name.gsub(word, other_word)
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ variants.reject { |v| v == package_name }.uniq
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Algorithms
5
+ class Omission < Base
6
+ def generate(package_name)
7
+ variants = []
8
+ package_name.length.times do |i|
9
+ variant = package_name[0...i] + package_name[(i + 1)..]
10
+ variants << variant unless variant.empty?
11
+ end
12
+ variants.uniq
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Algorithms
5
+ class Plural < Base
6
+ IRREGULAR_PLURALS = {
7
+ "child" => "children",
8
+ "person" => "people",
9
+ "man" => "men",
10
+ "woman" => "women",
11
+ "foot" => "feet",
12
+ "tooth" => "teeth",
13
+ "goose" => "geese",
14
+ "mouse" => "mice",
15
+ "ox" => "oxen",
16
+ "index" => "indices",
17
+ "matrix" => "matrices",
18
+ "vertex" => "vertices",
19
+ "analysis" => "analyses",
20
+ "basis" => "bases",
21
+ "crisis" => "crises",
22
+ "datum" => "data",
23
+ "medium" => "media",
24
+ "criterion" => "criteria"
25
+ }.freeze
26
+
27
+ def generate(package_name)
28
+ variants = []
29
+
30
+ variants << pluralize(package_name)
31
+ variants << singularize(package_name)
32
+
33
+ variants.compact.reject { |v| v == package_name }.uniq
34
+ end
35
+
36
+ def pluralize(word)
37
+ return IRREGULAR_PLURALS[word] if IRREGULAR_PLURALS.key?(word)
38
+
39
+ case word
40
+ when /(.*)([^aeiou])y$/
41
+ "#{$1}#{$2}ies"
42
+ when /(.*)(ss|x|z|ch|sh)$/
43
+ "#{word}es"
44
+ when /(.*)fe$/
45
+ "#{$1}ves"
46
+ when /(.*)f$/
47
+ "#{$1}ves"
48
+ when /(.*)s$/
49
+ "#{word}es"
50
+ else
51
+ "#{word}s"
52
+ end
53
+ end
54
+
55
+ def singularize(word)
56
+ reverse_irregulars = IRREGULAR_PLURALS.invert
57
+ return reverse_irregulars[word] if reverse_irregulars.key?(word)
58
+
59
+ case word
60
+ when /(.*)ies$/
61
+ "#{$1}y"
62
+ when /(.*)ves$/
63
+ "#{$1}f"
64
+ when /(.*)(ses|xes|zes|ches|shes)$/
65
+ word[0..-3]
66
+ when /(.*)s$/
67
+ $1
68
+ else
69
+ word
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Algorithms
5
+ class Repetition < Base
6
+ def generate(package_name)
7
+ variants = []
8
+ package_name.each_char.with_index do |char, i|
9
+ variant = package_name[0..i] + char + package_name[(i + 1)..]
10
+ variants << variant
11
+ end
12
+ variants.uniq
13
+ end
14
+ end
15
+ end
16
+ end