purl 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 02a4895e51ce7c9ace65a53d97fd8d4c5d288d7137daa71c9e8b8a778595e6cb
4
- data.tar.gz: 55d348a442e23a0ffddc2b236bc1be22c9cf29d8eac0ce3ff289ce637e1ba6f5
3
+ metadata.gz: e6632d8fa11800fb9a4b376c1555505ba87047f98de52a42103c27c2d2b46e2a
4
+ data.tar.gz: c248313079783e3d5ac6c9e7dfc8663067671d4a19356757cd493e4fb23c2d8f
5
5
  SHA512:
6
- metadata.gz: 4f8ff4ff13c1184c4b1adf6950fc1d04acedb8ce2cbbf4d62f2430455567e98f8b153b5493732919e1921a55132a2f3f51b61157b6eba85e207e6675d8675771
7
- data.tar.gz: 9f203b73ef705fe70a27de5aae3b85558bd34204a2362a7429c441dc7f66d4a5ae403741fcb36471ab33e972868e4d31e6121a5eeaf23fb88832e3d38fec1e9c
6
+ metadata.gz: fc9b6cdde9d6efe15a93cd6760ec3c88aa8be1594d962682c9c27642c72408ebd242d2fa4cbbfc9f87e72e135f337bac8fbc278c69f991f4cbf7d5b3388a4b1b
7
+ data.tar.gz: db4bffcde1032b421fba9b68d717bad523482b2d457b304c997606d538bee59bbfbe185bc9f456c8e72a4150ed74f5bd80d38d757bfeba41ae533ff5260c7ffb
data/CHANGELOG.md CHANGED
@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [1.7.1] - 2026-01-14
11
+
12
+ ### Added
13
+ - ecosyste.ms API URL generation for packages and versions
14
+ - `ecosystems_registry` method returns the ecosyste.ms registry name for a PURL type
15
+ - `ecosystems_api_url` method generates the full API URL (version URL if version present, otherwise package URL)
16
+ - `ecosystems_package_api_url` method generates package API URL
17
+ - `ecosystems_version_api_url` method generates version API URL
18
+ - New `ecosystems_registry` field in purl-types.json for types where the registry name differs from the registry URL host
19
+
10
20
  ## [1.7.0] - 2026-01-02
11
21
 
12
22
  ### Added
data/README.md CHANGED
@@ -466,6 +466,30 @@ puts Purl.download_supported_types
466
466
  # "npm", "nuget", "pub", "swift"]
467
467
  ```
468
468
 
469
+ ### ecosyste.ms API URLs
470
+
471
+ Generate API URLs for the packages.ecosyste.ms service:
472
+
473
+ ```ruby
474
+ # Get the ecosyste.ms registry name for a package type
475
+ purl = Purl.parse("pkg:gem/rake@13.3.1")
476
+ purl.ecosystems_registry # => "rubygems.org"
477
+
478
+ # Generate API URLs
479
+ purl.ecosystems_api_url # => "https://packages.ecosyste.ms/api/v1/registries/rubygems.org/packages/rake/versions/13.3.1"
480
+ purl.ecosystems_package_api_url # => "https://packages.ecosyste.ms/api/v1/registries/rubygems.org/packages/rake"
481
+ purl.ecosystems_version_api_url # => "https://packages.ecosyste.ms/api/v1/registries/rubygems.org/packages/rake/versions/13.3.1"
482
+
483
+ # Works with namespaced packages
484
+ purl = Purl.parse("pkg:npm/@babel/core@7.20.0")
485
+ purl.ecosystems_registry # => "npmjs.org"
486
+ purl.ecosystems_api_url # => "https://packages.ecosyste.ms/api/v1/registries/npmjs.org/packages/%40babel%2Fcore/versions/7.20.0"
487
+
488
+ # Without version, returns package URL
489
+ purl = Purl.parse("pkg:cargo/serde")
490
+ purl.ecosystems_api_url # => "https://packages.ecosyste.ms/api/v1/registries/crates.io/packages/serde"
491
+ ```
492
+
469
493
  ### Reverse Parsing: Registry URLs to PURLs
470
494
 
471
495
  ```ruby
data/Rakefile CHANGED
@@ -790,12 +790,146 @@ namespace :benchmark do
790
790
  puts "✅ Registry URL benchmarks completed!"
791
791
  end
792
792
 
793
+ desc "Benchmark hot paths (to_s, equality, known_type?, type_info, supported_types)"
794
+ task :hotpaths do
795
+ require "benchmark"
796
+ require_relative "lib/purl"
797
+
798
+ iterations = 10_000
799
+
800
+ sample_purls = [
801
+ "pkg:gem/rails@7.0.0",
802
+ "pkg:npm/@babel/core@7.20.0?arch=x64&dev=true#lib/index.js",
803
+ "pkg:maven/org.apache.commons/commons-lang3@3.12.0?classifier=sources",
804
+ "pkg:cargo/rand@0.7.2",
805
+ "pkg:pypi/django@4.0.0",
806
+ "pkg:docker/nginx@sha256:abc123def",
807
+ "pkg:golang/github.com/gorilla/mux@1.8.0",
808
+ ]
809
+
810
+ parsed = sample_purls.map { |p| Purl.parse(p) }
811
+
812
+ puts "Hot Path Benchmarks (#{iterations} iterations)"
813
+ puts "=" * 50
814
+
815
+ Benchmark.bm(28) do |x|
816
+ x.report("to_s") do
817
+ iterations.times { parsed.each(&:to_s) }
818
+ end
819
+
820
+ x.report("== (equal)") do
821
+ pairs = parsed.map { |p| [p, Purl.parse(p.to_s)] }
822
+ iterations.times { pairs.each { |a, b| a == b } }
823
+ end
824
+
825
+ x.report("hash") do
826
+ iterations.times { parsed.each(&:hash) }
827
+ end
828
+
829
+ types = %w[gem npm maven cargo pypi docker golang unknown fake_type]
830
+ x.report("known_type?") do
831
+ iterations.times { types.each { |t| Purl.known_type?(t) } }
832
+ end
833
+
834
+ info_types = %w[gem npm maven cargo pypi]
835
+ x.report("type_info") do
836
+ 1_000.times { info_types.each { |t| Purl.type_info(t) } }
837
+ end
838
+
839
+ x.report("all_type_info") do
840
+ 100.times { Purl.all_type_info }
841
+ end
842
+
843
+ x.report("download_supported_types") do
844
+ iterations.times { Purl::DownloadURL.supported_types }
845
+ end
846
+
847
+ x.report("supported_reverse_types") do
848
+ iterations.times { Purl::RegistryURL.supported_reverse_types }
849
+ end
850
+
851
+ x.report("registry_supported_types") do
852
+ iterations.times { Purl::RegistryURL.supported_types }
853
+ end
854
+
855
+ x.report("parse (simple)") do
856
+ iterations.times { Purl.parse("pkg:gem/rails@7.0.0") }
857
+ end
858
+
859
+ x.report("parse (complex)") do
860
+ iterations.times { Purl.parse("pkg:npm/@babel/core@7.20.0?arch=x64&dev=true#lib/index.js") }
861
+ end
862
+
863
+ x.report("parse (namespaced)") do
864
+ iterations.times { Purl.parse("pkg:maven/org.apache.commons/commons-lang3@3.12.0") }
865
+ end
866
+
867
+ x.report("from_url (domain match)") do
868
+ 1_000.times { Purl.from_registry_url("https://rubygems.org/gems/rails") }
869
+ end
870
+
871
+ x.report("from_url (type hint)") do
872
+ 1_000.times { Purl.from_registry_url("https://gems.internal.com/gems/rails", type: "gem") }
873
+ end
874
+ end
875
+ end
876
+
877
+ desc "Benchmark memory allocations for hot paths"
878
+ task :memory do
879
+ require "memory_profiler"
880
+ require_relative "lib/purl"
881
+
882
+ sample_purls = [
883
+ "pkg:gem/rails@7.0.0",
884
+ "pkg:npm/@babel/core@7.20.0?arch=x64&dev=true#lib/index.js",
885
+ "pkg:maven/org.apache.commons/commons-lang3@3.12.0?classifier=sources",
886
+ "pkg:cargo/rand@0.7.2",
887
+ "pkg:pypi/django@4.0.0",
888
+ "pkg:docker/nginx@sha256:abc123def",
889
+ "pkg:golang/github.com/gorilla/mux@1.8.0",
890
+ ]
891
+
892
+ parsed = sample_purls.map { |p| Purl.parse(p) }
893
+
894
+ benchmarks = {
895
+ "parse (simple)" => -> { Purl.parse("pkg:gem/rails@7.0.0") },
896
+ "parse (complex)" => -> { Purl.parse("pkg:npm/@babel/core@7.20.0?arch=x64&dev=true#lib/index.js") },
897
+ "parse (namespaced)" => -> { Purl.parse("pkg:maven/org.apache.commons/commons-lang3@3.12.0") },
898
+ "to_s" => -> { parsed.each(&:to_s) },
899
+ "to_s (cold)" => -> { sample_purls.map { |p| Purl.parse(p) }.each(&:to_s) },
900
+ "== (equal)" => -> { parsed.each_cons(2) { |a, b| a == b } },
901
+ "known_type?" => -> { %w[gem npm maven cargo pypi].each { |t| Purl.known_type?(t) } },
902
+ "type_info" => -> { %w[gem npm maven].each { |t| Purl.type_info(t) } },
903
+ "supported_types" => -> { Purl::RegistryURL.supported_types },
904
+ "supported_reverse_types" => -> { Purl::RegistryURL.supported_reverse_types },
905
+ "download_supported_types" => -> { Purl::DownloadURL.supported_types },
906
+ "from_url (domain match)" => -> { Purl.from_registry_url("https://rubygems.org/gems/rails") },
907
+ "from_url (type hint)" => -> { Purl.from_registry_url("https://gems.internal.com/gems/rails", type: "gem") },
908
+ }
909
+
910
+ puts "Memory Allocation Benchmarks"
911
+ puts "=" * 70
912
+ printf "%-30s %10s %10s %10s\n", "Benchmark", "Objects", "Memsize", "Strings"
913
+ puts "-" * 70
914
+
915
+ benchmarks.each do |name, block|
916
+ report = MemoryProfiler.report { 100.times { block.call } }
917
+ printf "%-30s %10d %10d %10d\n",
918
+ name,
919
+ report.total_allocated,
920
+ report.total_allocated_memsize,
921
+ report.strings_allocated.size
922
+ end
923
+ end
924
+
793
925
  desc "Run all benchmarks"
794
- task all: [:parse, :types, :registry] do
926
+ task all: [:parse, :types, :registry, :hotpaths] do
795
927
  puts
796
- puts "🎉 All benchmarks completed!"
928
+ puts "All benchmarks completed!"
797
929
  puts " Use 'rake benchmark:parse' for parsing performance"
798
- puts " Use 'rake benchmark:types' for type comparison"
930
+ puts " Use 'rake benchmark:types' for type comparison"
799
931
  puts " Use 'rake benchmark:registry' for URL generation"
932
+ puts " Use 'rake benchmark:hotpaths' for memoization and lookup paths"
933
+ puts " Use 'rake benchmark:memory' for memory allocations"
800
934
  end
801
935
  end
@@ -159,12 +159,10 @@ module Purl
159
159
  NAMESPACE_REQUIRED_TYPES = %w[maven elm github gitlab bitbucket luarocks swift].freeze
160
160
 
161
161
  def self.supported_types
162
- DOWNLOAD_PATTERNS.keys.select do |k|
162
+ @supported_types ||= DOWNLOAD_PATTERNS.keys.select do |k|
163
163
  pattern = DOWNLOAD_PATTERNS[k]
164
- # Skip types with notes (they're not really supported)
165
164
  next false if pattern[:note]
166
165
 
167
- # Test with appropriate namespace for types that need it
168
166
  namespace = if NAMESPACE_REQUIRED_TYPES.include?(k)
169
167
  k == "swift" ? "github.com/test" : "test"
170
168
  end
@@ -174,7 +172,7 @@ module Purl
174
172
  rescue
175
173
  false
176
174
  end
177
- end.sort
175
+ end.sort.freeze
178
176
  end
179
177
 
180
178
  def self.supports?(type)
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Purl
4
+ class EcosystemsURL
5
+ API_BASE = "https://packages.ecosyste.ms/api/v1"
6
+ NAMESPACED_PACKAGE_TYPES = %w[npm composer maven golang swift elm clojars].freeze
7
+
8
+ def self.registry_name(purl)
9
+ new(purl).registry_name
10
+ end
11
+
12
+ def self.api_url(purl)
13
+ new(purl).api_url
14
+ end
15
+
16
+ def self.package_api_url(purl)
17
+ new(purl).package_api_url
18
+ end
19
+
20
+ def self.version_api_url(purl)
21
+ new(purl).version_api_url
22
+ end
23
+
24
+ def initialize(purl)
25
+ @purl = purl.is_a?(PackageURL) ? purl : PackageURL.parse(purl.to_s)
26
+ end
27
+
28
+ def registry_name
29
+ # Check for explicit ecosystems_registry in config first
30
+ type_config = Purl.type_config(@purl.type)
31
+ return type_config["ecosystems_registry"] if type_config&.dig("ecosystems_registry")
32
+
33
+ # Fall back to extracting host from registry_url
34
+ return nil unless @purl.supports_registry_url?
35
+
36
+ host = URI.parse(@purl.registry_url).host
37
+ host.sub(/^www\./, "")
38
+ rescue URI::InvalidURIError, RegistryError
39
+ nil
40
+ end
41
+
42
+ def api_url
43
+ @purl.version ? version_api_url : package_api_url
44
+ end
45
+
46
+ def package_api_url
47
+ registry = registry_name
48
+ return nil unless registry
49
+
50
+ name = package_name_for_api
51
+ "#{API_BASE}/registries/#{registry}/packages/#{encode_path_segment(name)}"
52
+ end
53
+
54
+ def version_api_url
55
+ registry = registry_name
56
+ return nil unless registry
57
+ return nil unless @purl.version
58
+
59
+ name = package_name_for_api
60
+ "#{API_BASE}/registries/#{registry}/packages/#{encode_path_segment(name)}/versions/#{encode_path_segment(@purl.version)}"
61
+ end
62
+
63
+ private
64
+
65
+ def package_name_for_api
66
+ # Some ecosystems use namespace/name format
67
+ if @purl.namespace && namespaced_package_types.include?(@purl.type.downcase)
68
+ "#{@purl.namespace}/#{@purl.name}"
69
+ else
70
+ @purl.name
71
+ end
72
+ end
73
+
74
+ def namespaced_package_types
75
+ NAMESPACED_PACKAGE_TYPES
76
+ end
77
+
78
+ def encode_path_segment(str)
79
+ URI.encode_www_form_component(str)
80
+ end
81
+
82
+ attr_reader :purl
83
+ end
84
+
85
+ class PackageURL
86
+ def ecosystems_registry
87
+ EcosystemsURL.registry_name(self)
88
+ end
89
+
90
+ def ecosystems_api_url
91
+ EcosystemsURL.api_url(self)
92
+ end
93
+
94
+ def ecosystems_package_api_url
95
+ EcosystemsURL.package_api_url(self)
96
+ end
97
+
98
+ def ecosystems_version_api_url
99
+ EcosystemsURL.version_api_url(self)
100
+ end
101
+ end
102
+ end
data/lib/purl/lookup.rb CHANGED
@@ -99,17 +99,39 @@ module Purl
99
99
 
100
100
  private
101
101
 
102
- def make_request(uri)
103
- http = Net::HTTP.new(uri.host, uri.port)
104
- http.use_ssl = true
105
- http.read_timeout = @timeout
106
- http.open_timeout = @timeout
107
-
102
+ def http_for(uri)
103
+ key = "#{uri.host}:#{uri.port}"
104
+ @connections ||= {}
105
+ @connections[key] ||= begin
106
+ http = Net::HTTP.new(uri.host, uri.port)
107
+ http.use_ssl = (uri.scheme == "https")
108
+ http.read_timeout = @timeout
109
+ http.open_timeout = @timeout
110
+ http.start
111
+ http
112
+ end
113
+ end
114
+
115
+ def reset_connection(uri)
116
+ key = "#{uri.host}:#{uri.port}"
117
+ old = @connections&.delete(key)
118
+ old&.finish rescue nil
119
+ end
120
+
121
+ def close
122
+ return unless @connections
123
+ @connections.each_value { |http| http.finish rescue nil }
124
+ @connections.clear
125
+ end
126
+
127
+ def make_request(uri, retried: false)
128
+ http = http_for(uri)
129
+
108
130
  request = Net::HTTP::Get.new(uri)
109
131
  request["User-Agent"] = @user_agent
110
-
132
+
111
133
  response = http.request(request)
112
-
134
+
113
135
  case response.code.to_i
114
136
  when 200
115
137
  JSON.parse(response.body)
@@ -118,6 +140,10 @@ module Purl
118
140
  else
119
141
  raise LookupError, "API request failed with status #{response.code}"
120
142
  end
143
+ rescue IOError, Errno::EPIPE, Errno::ECONNRESET => e
144
+ raise LookupError, "Connection failed: #{e.message}" if retried
145
+ reset_connection(uri)
146
+ make_request(uri, retried: true)
121
147
  rescue JSON::ParserError => e
122
148
  raise LookupError, "Failed to parse API response: #{e.message}"
123
149
  rescue Timeout::Error, Net::OpenTimeout, Net::ReadTimeout => e
@@ -48,8 +48,14 @@ module Purl
48
48
  # @return [String, nil] subpath within the package
49
49
  attr_reader :subpath
50
50
 
51
+ # Fast-path decode: skip URI.decode_www_form_component when no encoding present
52
+ def self.fast_decode(str)
53
+ str.include?("%") || str.include?("+") ? URI.decode_www_form_component(str) : str
54
+ end
55
+
51
56
  VALID_TYPE_CHARS = /\A[a-zA-Z0-9\.\+\-]+\z/.freeze
52
57
  VALID_QUALIFIER_KEY_CHARS = /\A[a-zA-Z0-9\.\-_]+\z/.freeze
58
+ STARTS_WITH_DIGIT = /\A\d/.freeze
53
59
 
54
60
  # Create a new PackageURL instance
55
61
  #
@@ -133,7 +139,7 @@ module Purl
133
139
 
134
140
  unless subpath_clean.empty?
135
141
  # Decode each component separately to handle paths properly
136
- subpath_components = subpath_clean.split("/").map { |part| URI.decode_www_form_component(part) }
142
+ subpath_components = subpath_clean.split("/").map { |part| fast_decode(part) }
137
143
  subpath = subpath_components.join("/")
138
144
  end
139
145
  end
@@ -147,7 +153,7 @@ module Purl
147
153
  at_index = path_and_version_part.rindex("@")
148
154
  path_part = path_and_version_part[0...at_index]
149
155
  version_part = path_and_version_part[at_index + 1..-1]
150
- version = URI.decode_www_form_component(version_part) unless version_part.empty?
156
+ version = fast_decode(version_part) unless version_part.empty?
151
157
  else
152
158
  path_part = path_and_version_part
153
159
  end
@@ -161,7 +167,7 @@ module Purl
161
167
  raise MalformedUrlError, "PURL path cannot be empty" if path_components.empty? || path_components == [""]
162
168
 
163
169
  # First component is always the type
164
- type = URI.decode_www_form_component(path_components.shift)
170
+ type = fast_decode(path_components.shift)
165
171
  raise MalformedUrlError, "PURL must have a name component" if path_components.empty?
166
172
 
167
173
  # Handle empty name component (trailing slash case)
@@ -175,29 +181,29 @@ module Purl
175
181
  # All non-type components become namespace
176
182
  name = nil
177
183
  if path_components.length == 1
178
- namespace = URI.decode_www_form_component(path_components[0])
184
+ namespace = fast_decode(path_components[0])
179
185
  else
180
- namespace = path_components.map { |part| URI.decode_www_form_component(part) }.join("/")
186
+ namespace = path_components.map { |part| fast_decode(part) }.join("/")
181
187
  end
182
188
  end
183
189
  else
184
190
  # Normal parsing logic
185
191
  # For simple cases like gem/rails, there's just the name
186
- # For namespaced cases like npm/@babel/core, @babel is namespace, core is name
192
+ # For namespaced cases like npm/@babel/core, @babel is namespace, core is name
187
193
  if path_components.length == 1
188
194
  # Simple case: just type/name
189
- name = URI.decode_www_form_component(path_components[0])
195
+ name = fast_decode(path_components[0])
190
196
  namespace = nil
191
197
  else
192
198
  # Multiple components - assume last is name, others are namespace
193
- name = URI.decode_www_form_component(path_components.pop)
194
-
199
+ name = fast_decode(path_components.pop)
200
+
195
201
  # Everything else is namespace
196
202
  if path_components.length == 1
197
- namespace = URI.decode_www_form_component(path_components[0])
203
+ namespace = fast_decode(path_components[0])
198
204
  else
199
205
  # Multiple remaining components - treat as namespace joined together
200
- namespace = path_components.map { |part| URI.decode_www_form_component(part) }.join("/")
206
+ namespace = path_components.map { |part| fast_decode(part) }.join("/")
201
207
  end
202
208
  end
203
209
  end
@@ -223,6 +229,8 @@ module Purl
223
229
  # purl = PackageURL.new(type: "gem", name: "rails", version: "7.0.0")
224
230
  # puts purl.to_s # "pkg:gem/rails@7.0.0"
225
231
  def to_s
232
+ return @canonical if @canonical
233
+
226
234
  parts = ["pkg:", type.downcase]
227
235
 
228
236
  if namespace
@@ -236,11 +244,10 @@ module Purl
236
244
  parts << "/" << URI.encode_www_form_component(name)
237
245
 
238
246
  if version
239
- # Special handling for version encoding - don't encode colon in certain contexts
240
247
  encoded_version = case type&.downcase
241
248
  when "docker"
242
249
  # Docker versions with sha256: should not encode the colon
243
- version.gsub("sha256:", "sha256:")
250
+ version
244
251
  else
245
252
  URI.encode_www_form_component(version)
246
253
  end
@@ -268,7 +275,7 @@ module Purl
268
275
  parts << "?" << query_parts.join("&")
269
276
  end
270
277
 
271
- parts.join
278
+ @canonical = parts.join.freeze
272
279
  end
273
280
 
274
281
  # Convert the PackageURL to a hash representation
@@ -427,7 +434,7 @@ module Purl
427
434
  )
428
435
  end
429
436
 
430
- if type_str.match?(/\A\d/)
437
+ if type_str.match?(STARTS_WITH_DIGIT)
431
438
  raise InvalidTypeError.new(
432
439
  "Type cannot start with a number",
433
440
  component: :type,
@@ -446,18 +453,15 @@ module Purl
446
453
  name_str = name.to_s.strip
447
454
  raise InvalidNameError.new("Name cannot contain only whitespace", component: :name, value: name) if name_str.empty?
448
455
 
449
- # Apply type-specific normalization
450
- case @type&.downcase
456
+ # Apply type-specific normalization (@type is already lowercased)
457
+ case @type
451
458
  when "bitbucket", "github"
452
459
  name_str.downcase
453
460
  when "pypi"
454
- # PyPI names are case-insensitive and _ should be normalized to -
455
461
  name_str.downcase.gsub("_", "-")
456
462
  when "mlflow"
457
- # MLflow name normalization happens after qualifiers are validated
458
463
  name_str
459
464
  when "composer"
460
- # Composer names should be lowercase
461
465
  name_str.downcase
462
466
  else
463
467
  name_str
@@ -482,7 +486,7 @@ module Purl
482
486
 
483
487
  # Check that decoded namespace segments don't contain '/'
484
488
  namespace_str.split("/").each do |segment|
485
- decoded_segment = URI.decode_www_form_component(segment)
489
+ decoded_segment = self.class.fast_decode(segment)
486
490
  if decoded_segment.include?("/")
487
491
  raise InvalidNamespaceError.new(
488
492
  "Namespace segments cannot contain '/' after URL decoding",
@@ -493,12 +497,11 @@ module Purl
493
497
  end
494
498
  end
495
499
 
496
- # Apply type-specific normalization
497
- case @type&.downcase
500
+ # Apply type-specific normalization (@type is already lowercased)
501
+ case @type
498
502
  when "bitbucket", "github"
499
503
  namespace_str.downcase
500
504
  when "composer"
501
- # Composer namespaces should be lowercase
502
505
  namespace_str.downcase
503
506
  else
504
507
  namespace_str
@@ -511,10 +514,9 @@ module Purl
511
514
  version_str = version.to_s.strip
512
515
  return nil if version_str.empty?
513
516
 
514
- # Apply type-specific normalization
515
- case @type&.downcase
517
+ # Apply type-specific normalization (@type is already lowercased)
518
+ case @type
516
519
  when "huggingface"
517
- # HuggingFace versions (git commit hashes) should be lowercase
518
520
  version_str.downcase
519
521
  else
520
522
  version_str
@@ -574,35 +576,28 @@ module Purl
574
576
  end
575
577
 
576
578
  def apply_post_validation_normalization
577
- # MLflow names are case sensitive or insensitive based on repository per spec
578
- if @type&.downcase == "mlflow" && @qualifiers && @qualifiers["repository_url"] && @qualifiers["repository_url"].include?("azuredatabricks")
579
+ if @type == "mlflow" && @qualifiers && @qualifiers["repository_url"] && @qualifiers["repository_url"].include?("azuredatabricks")
579
580
  # Databricks MLflow is case insensitive - normalize to lowercase per spec
580
581
  @name = @name.downcase
581
582
  end
582
583
  # Other MLflow repositories (like Azure ML) are case sensitive - no normalization needed
583
584
  end
584
585
 
586
+ def self.namespace_required_types
587
+ @namespace_required_types ||= begin
588
+ config = Purl.load_types_config
589
+ types = config["types"].select { |_, v| v["namespace_requirement"] == "required" }.keys
590
+ Set.new(types).freeze
591
+ end
592
+ end
593
+
585
594
  def namespace_required_for_type?(type)
586
595
  return false unless type
587
-
588
- # Read from purl-types.json (included in gem)
589
- types_data = self.class.purl_types_data
590
- type_config = types_data.dig("types", type.downcase)
591
- return false unless type_config
592
-
593
- # Check namespace_requirement field
594
- type_config["namespace_requirement"] == "required"
596
+ self.class.namespace_required_types.include?(type.downcase)
595
597
  end
596
598
 
597
599
  def self.purl_types_data
598
- @purl_types_data ||= begin
599
- require "json"
600
- types_file = File.join(File.dirname(__FILE__), "..", "..", "purl-types.json")
601
- JSON.parse(File.read(types_file))
602
- rescue
603
- # Fallback to empty structure if file can't be read
604
- {"types" => {}}
605
- end
600
+ Purl.load_types_config
606
601
  end
607
602
 
608
603
  def self.parse_qualifiers(query_string)
@@ -7,10 +7,7 @@ module Purl
7
7
  # Load registry patterns from JSON configuration
8
8
  def self.load_registry_patterns
9
9
  @registry_patterns ||= begin
10
- # Load extended registry configs
11
- config_path = File.join(__dir__, "..", "..", "purl-types.json")
12
- require "json"
13
- config = JSON.parse(File.read(config_path))
10
+ config = Purl.load_types_config
14
11
  patterns = {}
15
12
 
16
13
  config["types"].each do |type, type_config|
@@ -44,9 +41,21 @@ module Purl
44
41
  end
45
42
  end
46
43
 
44
+ # Precompute domain-agnostic regex for from_url with type: hint
45
+ domain_agnostic_regex = nil
46
+ if reverse_regex
47
+ original_source = reverse_regex.source
48
+ if config["reverse_regex"].start_with?("/")
49
+ domain_agnostic_regex = Regexp.new("^https?://[^/]+" + config["reverse_regex"])
50
+ elsif original_source =~ /\^https?:\/\/[^\/]+(.+)$/
51
+ domain_agnostic_regex = Regexp.new("^https?://[^/]+" + $1)
52
+ end
53
+ end
54
+
47
55
  {
48
56
  base_url: config["base_url"] || (default_registry ? default_registry + config["path_template"]&.split('/:').first : nil),
49
57
  reverse_regex: reverse_regex,
58
+ domain_agnostic_regex: domain_agnostic_regex,
50
59
  pattern: build_generation_lambda(type, config, default_registry),
51
60
  reverse_parser: reverse_regex ? build_reverse_parser(type, config) : nil,
52
61
  uri_template: config["uri_template"] ? Addressable::Template.new(config["uri_template"]) : nil,
@@ -58,11 +67,7 @@ module Purl
58
67
 
59
68
  # Load types config (needed for accessing default_registry)
60
69
  def self.load_types_config
61
- @types_config ||= begin
62
- config_path = File.join(__dir__, "..", "..", "purl-types.json")
63
- require "json"
64
- JSON.parse(File.read(config_path))
65
- end
70
+ Purl.load_types_config
66
71
  end
67
72
 
68
73
  def self.build_generation_lambda(type, config, default_registry = nil)
@@ -294,8 +299,10 @@ module Purl
294
299
  new(purl).generate(base_url: base_url)
295
300
  end
296
301
 
302
+ SUPPORTED_TYPES = REGISTRY_PATTERNS.keys.sort.freeze
303
+
297
304
  def self.supported_types
298
- REGISTRY_PATTERNS.keys.sort
305
+ SUPPORTED_TYPES
299
306
  end
300
307
 
301
308
  def self.supports?(type)
@@ -307,39 +314,20 @@ module Purl
307
314
  if type
308
315
  normalized_type = type.to_s.downcase
309
316
  config = REGISTRY_PATTERNS[normalized_type]
310
-
311
- if config && config[:reverse_regex] && config[:reverse_parser]
312
- # Create a domain-agnostic version of the regex by replacing the base domain
313
- original_regex = config[:reverse_regex].source
314
-
315
- # For simplified JSON patterns that start with /, create domain-agnostic regex
316
- domain_agnostic_regex = nil
317
- if original_regex.start_with?("/")
318
- # Domain-agnostic pattern - match any domain with this path
319
- domain_agnostic_regex = Regexp.new("^https?://[^/]+" + original_regex)
320
- else
321
- # Legacy full regex pattern
322
- if original_regex =~ /\^https?:\/\/[^\/]+(.+)$/
323
- path_pattern = $1
324
- # Create domain-agnostic regex that matches any domain with the same path structure
325
- domain_agnostic_regex = Regexp.new("^https?://[^/]+" + path_pattern)
326
- end
327
- end
328
-
329
- if domain_agnostic_regex
330
- match = registry_url.match(domain_agnostic_regex)
331
- if match
332
- parsed_data = config[:reverse_parser].call(match)
333
- return PackageURL.new(
334
- type: parsed_data[:type],
335
- namespace: parsed_data[:namespace],
336
- name: parsed_data[:name],
337
- version: parsed_data[:version]
338
- )
339
- end
317
+
318
+ if config && config[:domain_agnostic_regex] && config[:reverse_parser]
319
+ match = registry_url.match(config[:domain_agnostic_regex])
320
+ if match
321
+ parsed_data = config[:reverse_parser].call(match)
322
+ return PackageURL.new(
323
+ type: parsed_data[:type],
324
+ namespace: parsed_data[:namespace],
325
+ name: parsed_data[:name],
326
+ version: parsed_data[:version]
327
+ )
340
328
  end
341
329
  end
342
-
330
+
343
331
  # If specified type didn't work, fall through to normal domain-matching logic
344
332
  end
345
333
 
@@ -368,12 +356,14 @@ module Purl
368
356
 
369
357
  raise UnsupportedTypeError.new(
370
358
  error_message,
371
- supported_types: REGISTRY_PATTERNS.keys.select { |k| REGISTRY_PATTERNS[k][:reverse_regex] }
359
+ supported_types: SUPPORTED_REVERSE_TYPES
372
360
  )
373
361
  end
374
362
 
363
+ SUPPORTED_REVERSE_TYPES = REGISTRY_PATTERNS.select { |_, config| config[:reverse_regex] }.keys.sort.freeze
364
+
375
365
  def self.supported_reverse_types
376
- REGISTRY_PATTERNS.select { |_, config| config[:reverse_regex] }.keys.sort
366
+ SUPPORTED_REVERSE_TYPES
377
367
  end
378
368
 
379
369
  def self.route_patterns_for(type)
data/lib/purl/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Purl
4
- VERSION = "1.7.0"
4
+ VERSION = "1.8.0"
5
5
  end
data/lib/purl.rb CHANGED
@@ -2,13 +2,6 @@
2
2
 
3
3
  require_relative "purl/version"
4
4
  require_relative "purl/errors"
5
- require_relative "purl/package_url"
6
- require_relative "purl/registry_url"
7
- require_relative "purl/download_url"
8
- require_relative "purl/lookup"
9
- require_relative "purl/lookup_formatter"
10
- require_relative "purl/advisory"
11
- require_relative "purl/advisory_formatter"
12
5
 
13
6
  # The main PURL (Package URL) module providing functionality to parse,
14
7
  # validate, and generate package URLs according to the PURL specification.
@@ -36,18 +29,49 @@ require_relative "purl/advisory_formatter"
36
29
  module Purl
37
30
  # Base error class for all PURL-related errors
38
31
  class Error < StandardError; end
39
-
32
+
33
+ # Deep-freeze a parsed JSON structure so callers don't need defensive dups
34
+ def self.deep_freeze(obj)
35
+ case obj
36
+ when Hash
37
+ obj.each_value { |v| deep_freeze(v) }
38
+ obj.freeze
39
+ when Array
40
+ obj.each { |v| deep_freeze(v) }
41
+ obj.freeze
42
+ when String
43
+ obj.freeze
44
+ end
45
+ obj
46
+ end
47
+
40
48
  # Load PURL types configuration from JSON file
41
49
  def self.load_types_config
42
50
  @types_config ||= begin
43
51
  config_path = File.join(__dir__, "..", "purl-types.json")
44
52
  require "json"
45
- JSON.parse(File.read(config_path))
53
+ deep_freeze(JSON.parse(File.read(config_path)))
46
54
  end
47
55
  end
56
+ end
57
+
58
+ require_relative "purl/package_url"
59
+ require_relative "purl/registry_url"
60
+ require_relative "purl/download_url"
61
+ require_relative "purl/ecosystems_url"
62
+ require_relative "purl/lookup"
63
+ require_relative "purl/lookup_formatter"
64
+ require_relative "purl/advisory"
65
+ require_relative "purl/advisory_formatter"
66
+
67
+ module Purl
48
68
 
49
69
  # Known PURL types loaded from JSON configuration
50
70
  KNOWN_TYPES = load_types_config["types"].keys.sort.freeze
71
+
72
+ # Set for O(1) lookups
73
+ require "set"
74
+ KNOWN_TYPES_SET = Set.new(KNOWN_TYPES).freeze
51
75
 
52
76
  # Convenience method for parsing PURL strings
53
77
  #
@@ -123,7 +147,7 @@ module Purl
123
147
  # Purl.known_type?("gem") # true
124
148
  # Purl.known_type?("unknown") # false
125
149
  def self.known_type?(type)
126
- KNOWN_TYPES.include?(type.to_s.downcase)
150
+ KNOWN_TYPES_SET.include?(type.to_s.downcase)
127
151
  end
128
152
 
129
153
  # Get comprehensive type information including registry support
@@ -144,12 +168,13 @@ module Purl
144
168
  # puts info[:description] # "Ruby gems from RubyGems.org"
145
169
  def self.type_info(type)
146
170
  normalized_type = type.to_s.downcase
171
+ config = type_config(normalized_type)
147
172
  {
148
173
  type: normalized_type,
149
174
  known: known_type?(normalized_type),
150
- description: type_description(normalized_type),
151
- default_registry: default_registry(normalized_type),
152
- examples: type_examples(normalized_type),
175
+ description: config ? config["description"] : nil,
176
+ default_registry: config ? config["default_registry"] : nil,
177
+ examples: config ? (config["examples"] || []) : [],
153
178
  registry_url_generation: RegistryURL.supports?(normalized_type),
154
179
  reverse_parsing: RegistryURL.supported_reverse_types.include?(normalized_type),
155
180
  download_url_generation: DownloadURL.supports?(normalized_type),
@@ -191,8 +216,8 @@ module Purl
191
216
  def self.type_config(type)
192
217
  config = load_types_config["types"][type.to_s.downcase]
193
218
  return nil unless config
194
-
195
- config.dup # Return a copy to prevent modification
219
+
220
+ config
196
221
  end
197
222
 
198
223
  # Get human-readable description for a type
data/purl-types.json CHANGED
@@ -222,6 +222,7 @@
222
222
  "golang": {
223
223
  "description": "Go packages",
224
224
  "default_registry": "https://pkg.go.dev",
225
+ "ecosystems_registry": "proxy.golang.org",
225
226
  "examples": [
226
227
  "pkg:golang/github.com/gorilla/context@234fd47e07d1004f0aed9c",
227
228
  "pkg:golang/google.golang.org/genproto#googleapis/api/annotations",
@@ -319,7 +320,8 @@
319
320
  "maven": {
320
321
  "description": "PURL type for Maven JARs and related artifacts.",
321
322
  "default_registry": "https://repo.maven.apache.org/maven2",
322
- "namespace_requirement": "required",
323
+ "ecosystems_registry": "repo1.maven.org",
324
+ "namespace_requirement": "required",
323
325
  "examples": [
324
326
  "pkg:maven/org.apache.commons/commons-lang3@3.12.0",
325
327
  "pkg:maven/junit/junit@4.13.2",
@@ -349,6 +351,7 @@
349
351
  "npm": {
350
352
  "description": "PURL type for npm packages.",
351
353
  "default_registry": "https://registry.npmjs.org",
354
+ "ecosystems_registry": "npmjs.org",
352
355
  "namespace_requirement": "optional",
353
356
  "examples": [
354
357
  "pkg:npm/@babel/core@7.20.0",
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: purl
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.0
4
+ version: 1.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Nesbitt
@@ -46,6 +46,7 @@ files:
46
46
  - lib/purl/advisory.rb
47
47
  - lib/purl/advisory_formatter.rb
48
48
  - lib/purl/download_url.rb
49
+ - lib/purl/ecosystems_url.rb
49
50
  - lib/purl/errors.rb
50
51
  - lib/purl/lookup.rb
51
52
  - lib/purl/lookup_formatter.rb
@@ -75,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
75
76
  - !ruby/object:Gem::Version
76
77
  version: '0'
77
78
  requirements: []
78
- rubygems_version: 4.0.1
79
+ rubygems_version: 4.0.6
79
80
  specification_version: 4
80
81
  summary: Parse and convert package urls (purls)
81
82
  test_files: []