typosquatting 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +5 -0
  3. data/CODE_OF_CONDUCT.md +10 -0
  4. data/LICENSE +21 -0
  5. data/README.md +218 -0
  6. data/Rakefile +8 -0
  7. data/exe/typosquatting +6 -0
  8. data/lib/typosquatting/algorithms/addition.rb +20 -0
  9. data/lib/typosquatting/algorithms/base.rb +34 -0
  10. data/lib/typosquatting/algorithms/delimiter.rb +48 -0
  11. data/lib/typosquatting/algorithms/homoglyph.rb +61 -0
  12. data/lib/typosquatting/algorithms/misspelling.rb +78 -0
  13. data/lib/typosquatting/algorithms/numeral.rb +45 -0
  14. data/lib/typosquatting/algorithms/omission.rb +16 -0
  15. data/lib/typosquatting/algorithms/plural.rb +74 -0
  16. data/lib/typosquatting/algorithms/repetition.rb +16 -0
  17. data/lib/typosquatting/algorithms/replacement.rb +59 -0
  18. data/lib/typosquatting/algorithms/transposition.rb +17 -0
  19. data/lib/typosquatting/algorithms/vowel_swap.rb +27 -0
  20. data/lib/typosquatting/algorithms/word_order.rb +25 -0
  21. data/lib/typosquatting/cli.rb +380 -0
  22. data/lib/typosquatting/confusion.rb +70 -0
  23. data/lib/typosquatting/ecosystems/base.rb +65 -0
  24. data/lib/typosquatting/ecosystems/cargo.rb +45 -0
  25. data/lib/typosquatting/ecosystems/composer.rb +64 -0
  26. data/lib/typosquatting/ecosystems/golang.rb +56 -0
  27. data/lib/typosquatting/ecosystems/hex.rb +42 -0
  28. data/lib/typosquatting/ecosystems/maven.rb +64 -0
  29. data/lib/typosquatting/ecosystems/npm.rb +66 -0
  30. data/lib/typosquatting/ecosystems/nuget.rb +41 -0
  31. data/lib/typosquatting/ecosystems/pub.rb +43 -0
  32. data/lib/typosquatting/ecosystems/pypi.rb +38 -0
  33. data/lib/typosquatting/ecosystems/rubygems.rb +42 -0
  34. data/lib/typosquatting/generator.rb +58 -0
  35. data/lib/typosquatting/lookup.rb +138 -0
  36. data/lib/typosquatting/sbom.rb +98 -0
  37. data/lib/typosquatting/version.rb +5 -0
  38. data/lib/typosquatting.rb +103 -0
  39. data/sig/typosquatting.rbs +4 -0
  40. metadata +114 -0
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Ecosystems
5
+ class Composer < Base
6
+ def initialize
7
+ super
8
+ @purl_type = "composer"
9
+ end
10
+
11
+ def name_pattern
12
+ /\A[a-z0-9]([a-z0-9_.-]*[a-z0-9])?\/[a-z0-9]([a-z0-9_.-]*[a-z0-9])?\z/
13
+ end
14
+
15
+ def allowed_characters
16
+ /[a-z0-9_.\/-]/
17
+ end
18
+
19
+ def allowed_delimiters
20
+ %w[- _ .]
21
+ end
22
+
23
+ def case_sensitive?
24
+ false
25
+ end
26
+
27
+ def supports_namespaces?
28
+ true
29
+ end
30
+
31
+ def normalise(name)
32
+ name.downcase
33
+ end
34
+
35
+ def parse_namespace(name)
36
+ parts = name.split("/", 2)
37
+ if parts.length == 2
38
+ [parts[0], parts[1]]
39
+ else
40
+ [nil, name]
41
+ end
42
+ end
43
+
44
+ def valid_name?(name)
45
+ return false if name.nil? || name.empty?
46
+
47
+ vendor, package = parse_namespace(name)
48
+ return false if vendor.nil? || package.nil?
49
+ return false if vendor.empty? || package.empty?
50
+
51
+ vendor_valid = vendor =~ /\A[a-z0-9]([a-z0-9_.-]*[a-z0-9])?\z/i
52
+ package_valid = package =~ /\A[a-z0-9]([a-z0-9_.-]*[a-z0-9])?\z/i
53
+
54
+ vendor_valid && package_valid
55
+ end
56
+
57
+ def format_name(vendor, package)
58
+ "#{vendor}/#{package}"
59
+ end
60
+ end
61
+
62
+ Base.register(Composer.new)
63
+ end
64
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Ecosystems
5
+ class Golang < Base
6
+ def initialize
7
+ super
8
+ @name = "golang"
9
+ @purl_type = "golang"
10
+ end
11
+
12
+ def name_pattern
13
+ /\A[a-zA-Z0-9][a-zA-Z0-9._\/-]*\z/
14
+ end
15
+
16
+ def allowed_characters
17
+ /[a-zA-Z0-9._\/-]/
18
+ end
19
+
20
+ def allowed_delimiters
21
+ %w[- _ . /]
22
+ end
23
+
24
+ def case_sensitive?
25
+ true
26
+ end
27
+
28
+ def supports_namespaces?
29
+ true
30
+ end
31
+
32
+ def normalise(name)
33
+ name.sub(/\/v\d+$/, "")
34
+ end
35
+
36
+ def parse_namespace(name)
37
+ parts = name.split("/")
38
+ if parts.length > 1
39
+ [parts[0..-2].join("/"), parts.last]
40
+ else
41
+ [nil, name]
42
+ end
43
+ end
44
+
45
+ def valid_name?(name)
46
+ return false if name.nil? || name.empty?
47
+ return false if name.start_with?("/") || name.end_with?("/")
48
+ return false if name.include?("//")
49
+
50
+ !!(name =~ name_pattern)
51
+ end
52
+ end
53
+
54
+ Base.register(Golang.new)
55
+ end
56
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Ecosystems
5
+ class Hex < Base
6
+ def initialize
7
+ super
8
+ @purl_type = "hex"
9
+ end
10
+
11
+ def name_pattern
12
+ /\A[a-z][a-z0-9_]*\z/
13
+ end
14
+
15
+ def allowed_characters
16
+ /[a-z0-9_]/
17
+ end
18
+
19
+ def allowed_delimiters
20
+ %w[_]
21
+ end
22
+
23
+ def case_sensitive?
24
+ false
25
+ end
26
+
27
+ def normalise(name)
28
+ name.downcase
29
+ end
30
+
31
+ def valid_name?(name)
32
+ return false if name.nil? || name.empty?
33
+ return false if name.include?("-")
34
+ return false if name.include?(".")
35
+
36
+ !!(name =~ name_pattern)
37
+ end
38
+ end
39
+
40
+ Base.register(Hex.new)
41
+ end
42
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Ecosystems
5
+ class Maven < Base
6
+ def initialize
7
+ super
8
+ @purl_type = "maven"
9
+ end
10
+
11
+ def name_pattern
12
+ /\A[a-zA-Z0-9._-]+:[a-zA-Z0-9._-]+\z/
13
+ end
14
+
15
+ def allowed_characters
16
+ /[a-zA-Z0-9._:-]/
17
+ end
18
+
19
+ def allowed_delimiters
20
+ %w[- _ .]
21
+ end
22
+
23
+ def case_sensitive?
24
+ true
25
+ end
26
+
27
+ def supports_namespaces?
28
+ true
29
+ end
30
+
31
+ def normalise(name)
32
+ name
33
+ end
34
+
35
+ def parse_namespace(name)
36
+ parts = name.split(":", 2)
37
+ if parts.length == 2
38
+ [parts[0], parts[1]]
39
+ else
40
+ [nil, name]
41
+ end
42
+ end
43
+
44
+ def valid_name?(name)
45
+ return false if name.nil? || name.empty?
46
+
47
+ group_id, artifact_id = parse_namespace(name)
48
+ return false if group_id.nil? || artifact_id.nil?
49
+ return false if group_id.empty? || artifact_id.empty?
50
+
51
+ group_valid = group_id =~ /\A[a-zA-Z0-9._-]+\z/
52
+ artifact_valid = artifact_id =~ /\A[a-zA-Z0-9._-]+\z/
53
+
54
+ group_valid && artifact_valid
55
+ end
56
+
57
+ def format_name(group_id, artifact_id)
58
+ "#{group_id}:#{artifact_id}"
59
+ end
60
+ end
61
+
62
+ Base.register(Maven.new)
63
+ end
64
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Ecosystems
5
+ class Npm < Base
6
+ def initialize
7
+ super
8
+ @purl_type = "npm"
9
+ end
10
+
11
+ def name_pattern
12
+ /\A(@[a-z0-9~-][a-z0-9._~-]*\/)?[a-z0-9~-][a-z0-9._~-]*\z/
13
+ end
14
+
15
+ def allowed_characters
16
+ /[a-z0-9._~-]/
17
+ end
18
+
19
+ def allowed_delimiters
20
+ %w[- _ .]
21
+ end
22
+
23
+ def case_sensitive?
24
+ false
25
+ end
26
+
27
+ def supports_namespaces?
28
+ true
29
+ end
30
+
31
+ def normalise(name)
32
+ name.downcase
33
+ end
34
+
35
+ def parse_namespace(name)
36
+ if name.start_with?("@")
37
+ parts = name.split("/", 2)
38
+ [parts[0], parts[1]]
39
+ else
40
+ [nil, name]
41
+ end
42
+ end
43
+
44
+ def valid_name?(name)
45
+ return false if name.nil? || name.empty?
46
+ return false if name.length > 214
47
+
48
+ namespace, pkg_name = parse_namespace(name)
49
+
50
+ if namespace
51
+ return false unless namespace =~ /\A@[a-z0-9~-][a-z0-9._~-]*\z/
52
+ return false if pkg_name.nil? || pkg_name.empty?
53
+ end
54
+
55
+ pkg_name = name unless namespace
56
+ return false unless pkg_name =~ /\A[a-z0-9~-][a-z0-9._~-]*\z/
57
+ return false if pkg_name.start_with?(".")
58
+ return false if pkg_name.start_with?("_")
59
+
60
+ true
61
+ end
62
+ end
63
+
64
+ Base.register(Npm.new)
65
+ end
66
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Ecosystems
5
+ class Nuget < Base
6
+ def initialize
7
+ super
8
+ @purl_type = "nuget"
9
+ end
10
+
11
+ def name_pattern
12
+ /\A[a-zA-Z0-9][a-zA-Z0-9._-]*\z/
13
+ end
14
+
15
+ def allowed_characters
16
+ /[a-zA-Z0-9._-]/
17
+ end
18
+
19
+ def allowed_delimiters
20
+ %w[- _ .]
21
+ end
22
+
23
+ def case_sensitive?
24
+ false
25
+ end
26
+
27
+ def normalise(name)
28
+ name.downcase
29
+ end
30
+
31
+ def valid_name?(name)
32
+ return false if name.nil? || name.empty?
33
+ return false if name.length > 100
34
+
35
+ !!(name =~ name_pattern)
36
+ end
37
+ end
38
+
39
+ Base.register(Nuget.new)
40
+ end
41
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Ecosystems
5
+ class Pub < Base
6
+ def initialize
7
+ super
8
+ @purl_type = "pub"
9
+ end
10
+
11
+ def name_pattern
12
+ /\A[a-z][a-z0-9_]*\z/
13
+ end
14
+
15
+ def allowed_characters
16
+ /[a-z0-9_]/
17
+ end
18
+
19
+ def allowed_delimiters
20
+ %w[_]
21
+ end
22
+
23
+ def case_sensitive?
24
+ false
25
+ end
26
+
27
+ def normalise(name)
28
+ name.downcase
29
+ end
30
+
31
+ def valid_name?(name)
32
+ return false if name.nil? || name.empty?
33
+ return false if name.include?("-")
34
+ return false if name.include?(".")
35
+ return false if name.length < 2 || name.length > 64
36
+
37
+ !!(name =~ name_pattern)
38
+ end
39
+ end
40
+
41
+ Base.register(Pub.new)
42
+ end
43
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Ecosystems
5
+ class Pypi < Base
6
+ def initialize
7
+ super
8
+ @purl_type = "pypi"
9
+ end
10
+
11
+ def name_pattern
12
+ /\A[a-zA-Z0-9]([a-zA-Z0-9._-]*[a-zA-Z0-9])?\z/
13
+ end
14
+
15
+ def allowed_characters
16
+ /[a-zA-Z0-9._-]/
17
+ end
18
+
19
+ def allowed_delimiters
20
+ %w[- _ .]
21
+ end
22
+
23
+ def case_sensitive?
24
+ false
25
+ end
26
+
27
+ def normalise(name)
28
+ name.downcase.gsub(/[-_.]+/, "-")
29
+ end
30
+
31
+ def equivalent?(name1, name2)
32
+ normalise(name1) == normalise(name2)
33
+ end
34
+ end
35
+
36
+ Base.register(Pypi.new)
37
+ end
38
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ module Ecosystems
5
+ class Rubygems < Base
6
+ def initialize
7
+ super
8
+ @name = "rubygems"
9
+ @purl_type = "gem"
10
+ end
11
+
12
+ def name_pattern
13
+ /\A[a-zA-Z0-9][a-zA-Z0-9_-]*\z/
14
+ end
15
+
16
+ def allowed_characters
17
+ /[a-zA-Z0-9_-]/
18
+ end
19
+
20
+ def allowed_delimiters
21
+ %w[- _]
22
+ end
23
+
24
+ def case_sensitive?
25
+ true
26
+ end
27
+
28
+ def normalise(name)
29
+ name
30
+ end
31
+
32
+ def valid_name?(name)
33
+ return false if name.nil? || name.empty?
34
+ return false if name.include?(".")
35
+
36
+ !!(name =~ name_pattern)
37
+ end
38
+ end
39
+
40
+ Base.register(Rubygems.new)
41
+ end
42
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Typosquatting
4
+ class Generator
5
+ attr_reader :ecosystem, :algorithms
6
+
7
+ def initialize(ecosystem:, algorithms: nil)
8
+ @ecosystem = ecosystem.is_a?(String) ? Ecosystems::Base.get(ecosystem) : ecosystem
9
+ @algorithms = algorithms || Algorithms::Base.all
10
+ end
11
+
12
+ def generate(package_name)
13
+ results = []
14
+
15
+ algorithms.each do |algorithm|
16
+ variants = algorithm.generate(package_name)
17
+ variants.each do |variant|
18
+ next if variant == package_name
19
+ next unless ecosystem.valid_name?(variant)
20
+ next if same_after_normalisation?(package_name, variant)
21
+
22
+ results << Variant.new(
23
+ name: variant,
24
+ algorithm: algorithm.name,
25
+ original: package_name
26
+ )
27
+ end
28
+ end
29
+
30
+ dedupe_by_normalised_name(results)
31
+ end
32
+
33
+ Variant = Struct.new(:name, :algorithm, :original, keyword_init: true) do
34
+ def to_h
35
+ { name: name, algorithm: algorithm, original: original }
36
+ end
37
+
38
+ def to_s
39
+ name
40
+ end
41
+ end
42
+
43
+ def same_after_normalisation?(original, variant)
44
+ ecosystem.normalise(original) == ecosystem.normalise(variant)
45
+ end
46
+
47
+ def dedupe_by_normalised_name(variants)
48
+ seen = {}
49
+ variants.each_with_object([]) do |variant, result|
50
+ normalised = ecosystem.normalise(variant.name)
51
+ unless seen[normalised]
52
+ seen[normalised] = true
53
+ result << variant
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "json"
5
+ require "uri"
6
+ require "purl"
7
+
8
+ module Typosquatting
9
+ class Lookup
10
+ API_BASE = "https://packages.ecosyste.ms/api/v1"
11
+ USER_AGENT = "typosquatting-ruby/#{VERSION} (https://github.com/andrew/typosquatting)"
12
+
13
+ attr_reader :ecosystem
14
+
15
+ def initialize(ecosystem:)
16
+ @ecosystem = ecosystem.is_a?(String) ? Ecosystems::Base.get(ecosystem) : ecosystem
17
+ end
18
+
19
+ def check(package_name)
20
+ purl = build_purl(package_name)
21
+ response = fetch("/packages/lookup?purl=#{URI.encode_www_form_component(purl.to_s)}")
22
+
23
+ Result.new(
24
+ name: package_name,
25
+ purl: purl.to_s,
26
+ packages: response || [],
27
+ ecosystem: ecosystem.purl_type
28
+ )
29
+ end
30
+
31
+ def check_many(package_names, concurrency: 10)
32
+ results = []
33
+ mutex = Mutex.new
34
+ queue = package_names.dup
35
+
36
+ threads = concurrency.times.map do
37
+ Thread.new do
38
+ while (name = mutex.synchronize { queue.shift })
39
+ result = check(name)
40
+ mutex.synchronize { results << [name, result] }
41
+ end
42
+ end
43
+ end
44
+
45
+ threads.each(&:join)
46
+ package_names.map { |name| results.find { |n, _| n == name }&.last }
47
+ end
48
+
49
+ def registries
50
+ response = fetch("/registries?ecosystem=#{URI.encode_www_form_component(ecosystem.purl_type)}")
51
+ response&.map { |r| Registry.new(r) } || []
52
+ end
53
+
54
+ Result = Struct.new(:name, :purl, :packages, :ecosystem, keyword_init: true) do
55
+ def exists?
56
+ !packages.empty?
57
+ end
58
+
59
+ def registries
60
+ packages.map { |p| p.dig("registry", "name") }.compact.uniq
61
+ end
62
+
63
+ def to_h
64
+ {
65
+ name: name,
66
+ purl: purl,
67
+ exists: exists?,
68
+ registries: registries,
69
+ packages: packages
70
+ }
71
+ end
72
+ end
73
+
74
+ Registry = Struct.new(:data) do
75
+ def name
76
+ data["name"]
77
+ end
78
+
79
+ def url
80
+ data["url"]
81
+ end
82
+
83
+ def ecosystem
84
+ data["ecosystem"]
85
+ end
86
+
87
+ def packages_count
88
+ data["packages_count"]
89
+ end
90
+
91
+ def to_h
92
+ data
93
+ end
94
+ end
95
+
96
+ def build_purl(package_name)
97
+ if ecosystem.supports_namespaces?
98
+ namespace, name = ecosystem.parse_namespace(package_name)
99
+ Purl::PackageURL.new(
100
+ type: ecosystem.purl_type,
101
+ namespace: namespace,
102
+ name: name
103
+ )
104
+ else
105
+ Purl::PackageURL.new(
106
+ type: ecosystem.purl_type,
107
+ name: package_name
108
+ )
109
+ end
110
+ end
111
+
112
+ def fetch(path)
113
+ uri = URI("#{API_BASE}#{path}")
114
+ request = Net::HTTP::Get.new(uri)
115
+ request["User-Agent"] = USER_AGENT
116
+ request["Accept"] = "application/json"
117
+
118
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
119
+ http.request(request)
120
+ end
121
+
122
+ case response
123
+ when Net::HTTPSuccess
124
+ JSON.parse(response.body)
125
+ when Net::HTTPNotFound
126
+ []
127
+ else
128
+ raise APIError, "API request failed: #{response.code} #{response.message}"
129
+ end
130
+ rescue StandardError => e
131
+ raise APIError, "API request failed: #{e.message}" unless e.is_a?(APIError)
132
+
133
+ raise
134
+ end
135
+ end
136
+
137
+ class APIError < StandardError; end
138
+ end