typosquatting 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +34 -0
- data/lib/typosquatting/cli.rb +74 -0
- data/lib/typosquatting/lookup.rb +87 -0
- data/lib/typosquatting/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 59d9c744171a8ac32d88c7218078d24ce9a27da2822b03c5127569a9cf91be46
|
|
4
|
+
data.tar.gz: eb928d00e9d2f3eb5c195628c9a7bc8eaf33e02bc04ce07eab98449b48e3f12c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ef4a6f706d3bd5a53d603c1c7d124fd317241ecff5ec898dea1df810ae5e65173986ab3d329ddb7300c89c2608c797b99369425eeb7910cb40f7574de99dfd00
|
|
7
|
+
data.tar.gz: 93643869152bc1c8ee092a0289c5c49d8615f69adb849f1f5343d8f11748b425f48b6a1e6028223432cf1eecb725fae24181e9d218297657a9fa61e1309675ef
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -69,6 +69,12 @@ typosquatting check requests -e pypi -f json
|
|
|
69
69
|
|
|
70
70
|
# List available algorithms
|
|
71
71
|
typosquatting algorithms
|
|
72
|
+
|
|
73
|
+
# Discover existing packages similar to a target (by edit distance)
|
|
74
|
+
typosquatting discover requests -e pypi
|
|
75
|
+
|
|
76
|
+
# Discover with generated variants check
|
|
77
|
+
typosquatting discover requests -e pypi --with-variants
|
|
72
78
|
```
|
|
73
79
|
|
|
74
80
|
## Example Output
|
|
@@ -204,6 +210,34 @@ Package lookups use the [ecosyste.ms](https://packages.ecosyste.ms) API. Request
|
|
|
204
210
|
|
|
205
211
|
Be mindful when checking many packages. The `--dry-run` flag shows what would be checked without making API calls.
|
|
206
212
|
|
|
213
|
+
### packages.ecosyste.ms API
|
|
214
|
+
|
|
215
|
+
The package_names endpoint can help identify potential typosquats by searching for packages with similar prefixes or postfixes to popular package names.
|
|
216
|
+
|
|
217
|
+
```
|
|
218
|
+
GET /api/v1/registries/{registry}/package_names
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
**Parameters:**
|
|
222
|
+
- `prefix` - filter by package names starting with string (case insensitive)
|
|
223
|
+
- `postfix` - filter by package names ending with string (case insensitive)
|
|
224
|
+
- `page`, `per_page` - pagination
|
|
225
|
+
- `sort`, `order` - sorting
|
|
226
|
+
|
|
227
|
+
**Examples:**
|
|
228
|
+
```
|
|
229
|
+
# Find RubyGems packages ending in "ails" (potential "rails" typosquats)
|
|
230
|
+
https://packages.ecosyste.ms/api/v1/registries/rubygems.org/package_names?postfix=ails
|
|
231
|
+
|
|
232
|
+
# Find RubyGems packages starting with "rai" (potential "rails" typosquats)
|
|
233
|
+
https://packages.ecosyste.ms/api/v1/registries/rubygems.org/package_names?prefix=rai
|
|
234
|
+
|
|
235
|
+
# Find npm packages starting with "reac" (potential "react" typosquats)
|
|
236
|
+
https://packages.ecosyste.ms/api/v1/registries/npmjs.org/package_names?prefix=reac
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
Full API documentation: [packages.ecosyste.ms/docs](https://packages.ecosyste.ms/docs)
|
|
240
|
+
|
|
207
241
|
## Dataset
|
|
208
242
|
|
|
209
243
|
The [ecosyste-ms/typosquatting-dataset](https://github.com/ecosyste-ms/typosquatting-dataset) contains 143 confirmed typosquatting attacks from security research, mapping malicious packages to their targets with classification and source attribution. Useful for testing detection tools and understanding real attack patterns.
|
data/lib/typosquatting/cli.rb
CHANGED
|
@@ -16,6 +16,8 @@ module Typosquatting
|
|
|
16
16
|
generate(args)
|
|
17
17
|
when "check"
|
|
18
18
|
check(args)
|
|
19
|
+
when "discover"
|
|
20
|
+
discover(args)
|
|
19
21
|
when "confusion"
|
|
20
22
|
confusion(args)
|
|
21
23
|
when "sbom"
|
|
@@ -101,6 +103,39 @@ module Typosquatting
|
|
|
101
103
|
output_check_results(results, options)
|
|
102
104
|
end
|
|
103
105
|
|
|
106
|
+
def discover(args)
|
|
107
|
+
options = { format: "text", max_distance: 2 }
|
|
108
|
+
parser = OptionParser.new do |opts|
|
|
109
|
+
opts.banner = "Usage: typosquatting discover PACKAGE -e ECOSYSTEM [options]"
|
|
110
|
+
opts.on("-e", "--ecosystem ECOSYSTEM", "Package ecosystem (required)") { |v| options[:ecosystem] = v }
|
|
111
|
+
opts.on("-f", "--format FORMAT", "Output format (text, json)") { |v| options[:format] = v }
|
|
112
|
+
opts.on("-d", "--distance N", Integer, "Maximum edit distance (default: 2)") { |v| options[:max_distance] = v }
|
|
113
|
+
opts.on("--with-variants", "Also show which generated variants exist") { options[:with_variants] = true }
|
|
114
|
+
end
|
|
115
|
+
parser.parse!(args)
|
|
116
|
+
|
|
117
|
+
package = args.shift
|
|
118
|
+
unless package && options[:ecosystem]
|
|
119
|
+
$stderr.puts "Error: Package name and ecosystem required"
|
|
120
|
+
$stderr.puts parser
|
|
121
|
+
exit 1
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
lookup = Lookup.new(ecosystem: options[:ecosystem])
|
|
125
|
+
|
|
126
|
+
$stderr.puts "Discovering similar packages to #{package}..." if $stderr.tty?
|
|
127
|
+
results = lookup.discover(package, max_distance: options[:max_distance])
|
|
128
|
+
|
|
129
|
+
if options[:with_variants]
|
|
130
|
+
generator = Generator.new(ecosystem: options[:ecosystem])
|
|
131
|
+
variants = generator.generate(package)
|
|
132
|
+
variant_results = lookup.check_with_variants(package, variants)
|
|
133
|
+
existing_variants = variant_results.select(&:exists?)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
output_discover_results(results, existing_variants, options)
|
|
137
|
+
end
|
|
138
|
+
|
|
104
139
|
def confusion(args)
|
|
105
140
|
options = { format: "text" }
|
|
106
141
|
parser = OptionParser.new do |opts|
|
|
@@ -212,6 +247,7 @@ module Typosquatting
|
|
|
212
247
|
puts "Commands:"
|
|
213
248
|
puts " generate PACKAGE -e ECOSYSTEM Generate typosquat variants"
|
|
214
249
|
puts " check PACKAGE -e ECOSYSTEM Check which variants exist"
|
|
250
|
+
puts " discover PACKAGE -e ECOSYSTEM Find similar packages by edit distance"
|
|
215
251
|
puts " confusion PACKAGE -e ECOSYSTEM Check for dependency confusion"
|
|
216
252
|
puts " sbom FILE Check SBOM for potential typosquats"
|
|
217
253
|
puts " ecosystems List supported ecosystems"
|
|
@@ -222,6 +258,7 @@ module Typosquatting
|
|
|
222
258
|
puts "Examples:"
|
|
223
259
|
puts " typosquatting generate requests -e pypi"
|
|
224
260
|
puts " typosquatting check requests -e pypi --existing-only"
|
|
261
|
+
puts " typosquatting discover rails -e gem --with-variants"
|
|
225
262
|
puts " typosquatting confusion my-package -e maven"
|
|
226
263
|
puts " typosquatting sbom bom.json"
|
|
227
264
|
end
|
|
@@ -379,5 +416,42 @@ module Typosquatting
|
|
|
379
416
|
puts "Found #{results.length} suspicious package(s)"
|
|
380
417
|
end
|
|
381
418
|
end
|
|
419
|
+
|
|
420
|
+
def output_discover_results(discovered, existing_variants, options)
|
|
421
|
+
case options[:format]
|
|
422
|
+
when "json"
|
|
423
|
+
data = {
|
|
424
|
+
discovered: discovered.map(&:to_h),
|
|
425
|
+
existing_variants: existing_variants&.map(&:to_h)
|
|
426
|
+
}.compact
|
|
427
|
+
puts JSON.pretty_generate(data)
|
|
428
|
+
else
|
|
429
|
+
if discovered.empty? && (existing_variants.nil? || existing_variants.empty?)
|
|
430
|
+
puts "No similar packages found"
|
|
431
|
+
return
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
if discovered.any?
|
|
435
|
+
puts "Similar packages found (by edit distance):"
|
|
436
|
+
puts ""
|
|
437
|
+
discovered.each do |result|
|
|
438
|
+
puts " #{result.name} (distance: #{result.distance})"
|
|
439
|
+
end
|
|
440
|
+
puts ""
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
if existing_variants&.any?
|
|
444
|
+
puts "Generated variants that exist:"
|
|
445
|
+
puts ""
|
|
446
|
+
existing_variants.each do |result|
|
|
447
|
+
puts " #{result.name}"
|
|
448
|
+
end
|
|
449
|
+
puts ""
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
puts "Found #{discovered.length} similar package(s)"
|
|
453
|
+
puts "Found #{existing_variants.length} existing variant(s)" if existing_variants&.any?
|
|
454
|
+
end
|
|
455
|
+
end
|
|
382
456
|
end
|
|
383
457
|
end
|
data/lib/typosquatting/lookup.rb
CHANGED
|
@@ -4,6 +4,7 @@ require "net/http"
|
|
|
4
4
|
require "json"
|
|
5
5
|
require "uri"
|
|
6
6
|
require "purl"
|
|
7
|
+
require "set"
|
|
7
8
|
|
|
8
9
|
module Typosquatting
|
|
9
10
|
class Lookup
|
|
@@ -51,6 +52,92 @@ module Typosquatting
|
|
|
51
52
|
response&.map { |r| Registry.new(r) } || []
|
|
52
53
|
end
|
|
53
54
|
|
|
55
|
+
def list_names(registry:, prefix: nil, postfix: nil)
|
|
56
|
+
params = []
|
|
57
|
+
params << "prefix=#{URI.encode_www_form_component(prefix)}" if prefix
|
|
58
|
+
params << "postfix=#{URI.encode_www_form_component(postfix)}" if postfix
|
|
59
|
+
query = params.empty? ? "" : "?#{params.join("&")}"
|
|
60
|
+
|
|
61
|
+
fetch("/registries/#{URI.encode_www_form_component(registry)}/package_names#{query}") || []
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def discover(package_name, max_distance: 2)
|
|
65
|
+
registry = registries.first
|
|
66
|
+
return [] unless registry
|
|
67
|
+
|
|
68
|
+
prefix = package_name[0, 3]
|
|
69
|
+
candidates = list_names(registry: registry.name, prefix: prefix)
|
|
70
|
+
|
|
71
|
+
candidates.filter_map do |candidate|
|
|
72
|
+
next if candidate == package_name
|
|
73
|
+
|
|
74
|
+
distance = levenshtein(package_name.downcase, candidate.downcase)
|
|
75
|
+
next if distance > max_distance || distance == 0
|
|
76
|
+
|
|
77
|
+
DiscoveryResult.new(
|
|
78
|
+
name: candidate,
|
|
79
|
+
target: package_name,
|
|
80
|
+
distance: distance
|
|
81
|
+
)
|
|
82
|
+
end.sort_by(&:distance)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def check_with_variants(package_name, variants)
|
|
86
|
+
registry = registries.first
|
|
87
|
+
return [] unless registry
|
|
88
|
+
|
|
89
|
+
prefix = package_name[0, 3]
|
|
90
|
+
existing = list_names(registry: registry.name, prefix: prefix)
|
|
91
|
+
existing_set = existing.map(&:downcase).to_set
|
|
92
|
+
|
|
93
|
+
variant_names = variants.map { |v| v.is_a?(String) ? v : v.name }
|
|
94
|
+
|
|
95
|
+
variant_names.filter_map do |variant|
|
|
96
|
+
exists = existing_set.include?(variant.downcase)
|
|
97
|
+
VariantCheckResult.new(
|
|
98
|
+
name: variant,
|
|
99
|
+
exists: exists
|
|
100
|
+
)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def levenshtein(s1, s2)
|
|
105
|
+
m, n = s1.length, s2.length
|
|
106
|
+
return n if m == 0
|
|
107
|
+
return m if n == 0
|
|
108
|
+
|
|
109
|
+
d = Array.new(m + 1) { |i| i }
|
|
110
|
+
x = nil
|
|
111
|
+
|
|
112
|
+
(1..n).each do |j|
|
|
113
|
+
d[0] = j
|
|
114
|
+
x = j - 1
|
|
115
|
+
|
|
116
|
+
(1..m).each do |i|
|
|
117
|
+
cost = s1[i - 1] == s2[j - 1] ? 0 : 1
|
|
118
|
+
x, d[i] = d[i], [d[i] + 1, d[i - 1] + 1, x + cost].min
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
d[m]
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
DiscoveryResult = Struct.new(:name, :target, :distance, keyword_init: true) do
|
|
126
|
+
def to_h
|
|
127
|
+
{ name: name, target: target, distance: distance }
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
VariantCheckResult = Struct.new(:name, :exists, keyword_init: true) do
|
|
132
|
+
def exists?
|
|
133
|
+
exists
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def to_h
|
|
137
|
+
{ name: name, exists: exists }
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
54
141
|
Result = Struct.new(:name, :purl, :packages, :ecosystem, keyword_init: true) do
|
|
55
142
|
def exists?
|
|
56
143
|
!packages.empty?
|