typosquatting 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 57ce19f59014bac56c5922b53d59c794ef8b55c3ff13cde363db2bef133aff23
4
- data.tar.gz: facd26dd6b71803eadfb0c2395f7a0a1249d25992c38d5cf07a15a255de91d69
3
+ metadata.gz: 59d9c744171a8ac32d88c7218078d24ce9a27da2822b03c5127569a9cf91be46
4
+ data.tar.gz: eb928d00e9d2f3eb5c195628c9a7bc8eaf33e02bc04ce07eab98449b48e3f12c
5
5
  SHA512:
6
- metadata.gz: f1de5348e69ee48a5eadcd7fccc310b5f7224f888e84a69bac5ba5fd6bd7d01a64638650834e39ad9f7c55083ec6dd9b3613ed83aefe6019724325a5fcf3b07d
7
- data.tar.gz: 40fe04d1f03d7917bac5663a5a22f90b0bfef24307f9656d9401cb77dbff710f332de4c15c7165a564f81ca6a701ea783fd26ceea335b09309260d1526dc87ef
6
+ metadata.gz: ef4a6f706d3bd5a53d603c1c7d124fd317241ecff5ec898dea1df810ae5e65173986ab3d329ddb7300c89c2608c797b99369425eeb7910cb40f7574de99dfd00
7
+ data.tar.gz: 93643869152bc1c8ee092a0289c5c49d8615f69adb849f1f5343d8f11748b425f48b6a1e6028223432cf1eecb725fae24181e9d218297657a9fa61e1309675ef
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.0] - 2025-12-17
4
+
5
+ - Add `discover` command to find existing similar packages by edit distance using prefix/postfix API
6
+
3
7
  ## [0.2.0] - 2025-12-17
4
8
 
5
9
  - Add GitHub Actions ecosystem for CI/CD workflow typosquatting detection
data/README.md CHANGED
@@ -69,6 +69,12 @@ typosquatting check requests -e pypi -f json
69
69
 
70
70
  # List available algorithms
71
71
  typosquatting algorithms
72
+
73
+ # Discover existing packages similar to a target (by edit distance)
74
+ typosquatting discover requests -e pypi
75
+
76
+ # Discover with generated variants check
77
+ typosquatting discover requests -e pypi --with-variants
72
78
  ```
73
79
 
74
80
  ## Example Output
@@ -204,6 +210,34 @@ Package lookups use the [ecosyste.ms](https://packages.ecosyste.ms) API. Request
204
210
 
205
211
  Be mindful when checking many packages. The `--dry-run` flag shows what would be checked without making API calls.
206
212
 
213
+ ### packages.ecosyste.ms API
214
+
215
+ The package_names endpoint can help identify potential typosquats by searching for packages with similar prefixes or postfixes to popular package names.
216
+
217
+ ```
218
+ GET /api/v1/registries/{registry}/package_names
219
+ ```
220
+
221
+ **Parameters:**
222
+ - `prefix` - filter by package names starting with string (case insensitive)
223
+ - `postfix` - filter by package names ending with string (case insensitive)
224
+ - `page`, `per_page` - pagination
225
+ - `sort`, `order` - sorting
226
+
227
+ **Examples:**
228
+ ```
229
+ # Find RubyGems packages ending in "ails" (potential "rails" typosquats)
230
+ https://packages.ecosyste.ms/api/v1/registries/rubygems.org/package_names?postfix=ails
231
+
232
+ # Find RubyGems packages starting with "rai" (potential "rails" typosquats)
233
+ https://packages.ecosyste.ms/api/v1/registries/rubygems.org/package_names?prefix=rai
234
+
235
+ # Find npm packages starting with "reac" (potential "react" typosquats)
236
+ https://packages.ecosyste.ms/api/v1/registries/npmjs.org/package_names?prefix=reac
237
+ ```
238
+
239
+ Full API documentation: [packages.ecosyste.ms/docs](https://packages.ecosyste.ms/docs)
240
+
207
241
  ## Dataset
208
242
 
209
243
  The [ecosyste-ms/typosquatting-dataset](https://github.com/ecosyste-ms/typosquatting-dataset) contains 143 confirmed typosquatting attacks from security research, mapping malicious packages to their targets with classification and source attribution. Useful for testing detection tools and understanding real attack patterns.
@@ -16,6 +16,8 @@ module Typosquatting
16
16
  generate(args)
17
17
  when "check"
18
18
  check(args)
19
+ when "discover"
20
+ discover(args)
19
21
  when "confusion"
20
22
  confusion(args)
21
23
  when "sbom"
@@ -101,6 +103,39 @@ module Typosquatting
101
103
  output_check_results(results, options)
102
104
  end
103
105
 
106
+ def discover(args)
107
+ options = { format: "text", max_distance: 2 }
108
+ parser = OptionParser.new do |opts|
109
+ opts.banner = "Usage: typosquatting discover PACKAGE -e ECOSYSTEM [options]"
110
+ opts.on("-e", "--ecosystem ECOSYSTEM", "Package ecosystem (required)") { |v| options[:ecosystem] = v }
111
+ opts.on("-f", "--format FORMAT", "Output format (text, json)") { |v| options[:format] = v }
112
+ opts.on("-d", "--distance N", Integer, "Maximum edit distance (default: 2)") { |v| options[:max_distance] = v }
113
+ opts.on("--with-variants", "Also show which generated variants exist") { options[:with_variants] = true }
114
+ end
115
+ parser.parse!(args)
116
+
117
+ package = args.shift
118
+ unless package && options[:ecosystem]
119
+ $stderr.puts "Error: Package name and ecosystem required"
120
+ $stderr.puts parser
121
+ exit 1
122
+ end
123
+
124
+ lookup = Lookup.new(ecosystem: options[:ecosystem])
125
+
126
+ $stderr.puts "Discovering similar packages to #{package}..." if $stderr.tty?
127
+ results = lookup.discover(package, max_distance: options[:max_distance])
128
+
129
+ if options[:with_variants]
130
+ generator = Generator.new(ecosystem: options[:ecosystem])
131
+ variants = generator.generate(package)
132
+ variant_results = lookup.check_with_variants(package, variants)
133
+ existing_variants = variant_results.select(&:exists?)
134
+ end
135
+
136
+ output_discover_results(results, existing_variants, options)
137
+ end
138
+
104
139
  def confusion(args)
105
140
  options = { format: "text" }
106
141
  parser = OptionParser.new do |opts|
@@ -212,6 +247,7 @@ module Typosquatting
212
247
  puts "Commands:"
213
248
  puts " generate PACKAGE -e ECOSYSTEM Generate typosquat variants"
214
249
  puts " check PACKAGE -e ECOSYSTEM Check which variants exist"
250
+ puts " discover PACKAGE -e ECOSYSTEM Find similar packages by edit distance"
215
251
  puts " confusion PACKAGE -e ECOSYSTEM Check for dependency confusion"
216
252
  puts " sbom FILE Check SBOM for potential typosquats"
217
253
  puts " ecosystems List supported ecosystems"
@@ -222,6 +258,7 @@ module Typosquatting
222
258
  puts "Examples:"
223
259
  puts " typosquatting generate requests -e pypi"
224
260
  puts " typosquatting check requests -e pypi --existing-only"
261
+ puts " typosquatting discover rails -e gem --with-variants"
225
262
  puts " typosquatting confusion my-package -e maven"
226
263
  puts " typosquatting sbom bom.json"
227
264
  end
@@ -379,5 +416,42 @@ module Typosquatting
379
416
  puts "Found #{results.length} suspicious package(s)"
380
417
  end
381
418
  end
419
+
420
+ def output_discover_results(discovered, existing_variants, options)
421
+ case options[:format]
422
+ when "json"
423
+ data = {
424
+ discovered: discovered.map(&:to_h),
425
+ existing_variants: existing_variants&.map(&:to_h)
426
+ }.compact
427
+ puts JSON.pretty_generate(data)
428
+ else
429
+ if discovered.empty? && (existing_variants.nil? || existing_variants.empty?)
430
+ puts "No similar packages found"
431
+ return
432
+ end
433
+
434
+ if discovered.any?
435
+ puts "Similar packages found (by edit distance):"
436
+ puts ""
437
+ discovered.each do |result|
438
+ puts " #{result.name} (distance: #{result.distance})"
439
+ end
440
+ puts ""
441
+ end
442
+
443
+ if existing_variants&.any?
444
+ puts "Generated variants that exist:"
445
+ puts ""
446
+ existing_variants.each do |result|
447
+ puts " #{result.name}"
448
+ end
449
+ puts ""
450
+ end
451
+
452
+ puts "Found #{discovered.length} similar package(s)"
453
+ puts "Found #{existing_variants.length} existing variant(s)" if existing_variants&.any?
454
+ end
455
+ end
382
456
  end
383
457
  end
@@ -4,6 +4,7 @@ require "net/http"
4
4
  require "json"
5
5
  require "uri"
6
6
  require "purl"
7
+ require "set"
7
8
 
8
9
  module Typosquatting
9
10
  class Lookup
@@ -51,6 +52,92 @@ module Typosquatting
51
52
  response&.map { |r| Registry.new(r) } || []
52
53
  end
53
54
 
55
+ def list_names(registry:, prefix: nil, postfix: nil)
56
+ params = []
57
+ params << "prefix=#{URI.encode_www_form_component(prefix)}" if prefix
58
+ params << "postfix=#{URI.encode_www_form_component(postfix)}" if postfix
59
+ query = params.empty? ? "" : "?#{params.join("&")}"
60
+
61
+ fetch("/registries/#{URI.encode_www_form_component(registry)}/package_names#{query}") || []
62
+ end
63
+
64
+ def discover(package_name, max_distance: 2)
65
+ registry = registries.first
66
+ return [] unless registry
67
+
68
+ prefix = package_name[0, 3]
69
+ candidates = list_names(registry: registry.name, prefix: prefix)
70
+
71
+ candidates.filter_map do |candidate|
72
+ next if candidate == package_name
73
+
74
+ distance = levenshtein(package_name.downcase, candidate.downcase)
75
+ next if distance > max_distance || distance == 0
76
+
77
+ DiscoveryResult.new(
78
+ name: candidate,
79
+ target: package_name,
80
+ distance: distance
81
+ )
82
+ end.sort_by(&:distance)
83
+ end
84
+
85
+ def check_with_variants(package_name, variants)
86
+ registry = registries.first
87
+ return [] unless registry
88
+
89
+ prefix = package_name[0, 3]
90
+ existing = list_names(registry: registry.name, prefix: prefix)
91
+ existing_set = existing.map(&:downcase).to_set
92
+
93
+ variant_names = variants.map { |v| v.is_a?(String) ? v : v.name }
94
+
95
+ variant_names.filter_map do |variant|
96
+ exists = existing_set.include?(variant.downcase)
97
+ VariantCheckResult.new(
98
+ name: variant,
99
+ exists: exists
100
+ )
101
+ end
102
+ end
103
+
104
+ def levenshtein(s1, s2)
105
+ m, n = s1.length, s2.length
106
+ return n if m == 0
107
+ return m if n == 0
108
+
109
+ d = Array.new(m + 1) { |i| i }
110
+ x = nil
111
+
112
+ (1..n).each do |j|
113
+ d[0] = j
114
+ x = j - 1
115
+
116
+ (1..m).each do |i|
117
+ cost = s1[i - 1] == s2[j - 1] ? 0 : 1
118
+ x, d[i] = d[i], [d[i] + 1, d[i - 1] + 1, x + cost].min
119
+ end
120
+ end
121
+
122
+ d[m]
123
+ end
124
+
125
+ DiscoveryResult = Struct.new(:name, :target, :distance, keyword_init: true) do
126
+ def to_h
127
+ { name: name, target: target, distance: distance }
128
+ end
129
+ end
130
+
131
+ VariantCheckResult = Struct.new(:name, :exists, keyword_init: true) do
132
+ def exists?
133
+ exists
134
+ end
135
+
136
+ def to_h
137
+ { name: name, exists: exists }
138
+ end
139
+ end
140
+
54
141
  Result = Struct.new(:name, :purl, :packages, :ecosystem, keyword_init: true) do
55
142
  def exists?
56
143
  !packages.empty?
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Typosquatting
4
- VERSION = "0.2.0"
4
+ VERSION = "0.3.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: typosquatting
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Nesbitt