purl 1.7.1 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fd8e9d9c9a3cafa22edc44525dccb49e48a3d845d3449e34af58503fe073f108
4
- data.tar.gz: f53708d2eef113aa44d3d5518f6258785b475d7fa92c762110693543efd3dadf
3
+ metadata.gz: e6632d8fa11800fb9a4b376c1555505ba87047f98de52a42103c27c2d2b46e2a
4
+ data.tar.gz: c248313079783e3d5ac6c9e7dfc8663067671d4a19356757cd493e4fb23c2d8f
5
5
  SHA512:
6
- metadata.gz: e2c169dbd6baf5749e733fb689eb7ceac90f0f8e6570e79b1bf6a85968bab6cb95da9be8ac97662ea6b5efb5f1f2841250daa8c84bfeb861aac71af1cc05e02d
7
- data.tar.gz: 3bf9997d1529682639fe3f9d465688cd109178cd0902c57b08e3236f7c6d9657b9b94b8c80127c22153d3f05d5ea4cbb77219c615284ddc2eb775efdd6ea5632
6
+ metadata.gz: fc9b6cdde9d6efe15a93cd6760ec3c88aa8be1594d962682c9c27642c72408ebd242d2fa4cbbfc9f87e72e135f337bac8fbc278c69f991f4cbf7d5b3388a4b1b
7
+ data.tar.gz: db4bffcde1032b421fba9b68d717bad523482b2d457b304c997606d538bee59bbfbe185bc9f456c8e72a4150ed74f5bd80d38d757bfeba41ae533ff5260c7ffb
data/Rakefile CHANGED
@@ -790,12 +790,146 @@ namespace :benchmark do
790
790
  puts "✅ Registry URL benchmarks completed!"
791
791
  end
792
792
 
793
+ desc "Benchmark hot paths (to_s, equality, known_type?, type_info, supported_types)"
794
+ task :hotpaths do
795
+ require "benchmark"
796
+ require_relative "lib/purl"
797
+
798
+ iterations = 10_000
799
+
800
+ sample_purls = [
801
+ "pkg:gem/rails@7.0.0",
802
+ "pkg:npm/@babel/core@7.20.0?arch=x64&dev=true#lib/index.js",
803
+ "pkg:maven/org.apache.commons/commons-lang3@3.12.0?classifier=sources",
804
+ "pkg:cargo/rand@0.7.2",
805
+ "pkg:pypi/django@4.0.0",
806
+ "pkg:docker/nginx@sha256:abc123def",
807
+ "pkg:golang/github.com/gorilla/mux@1.8.0",
808
+ ]
809
+
810
+ parsed = sample_purls.map { |p| Purl.parse(p) }
811
+
812
+ puts "Hot Path Benchmarks (#{iterations} iterations)"
813
+ puts "=" * 50
814
+
815
+ Benchmark.bm(28) do |x|
816
+ x.report("to_s") do
817
+ iterations.times { parsed.each(&:to_s) }
818
+ end
819
+
820
+ x.report("== (equal)") do
821
+ pairs = parsed.map { |p| [p, Purl.parse(p.to_s)] }
822
+ iterations.times { pairs.each { |a, b| a == b } }
823
+ end
824
+
825
+ x.report("hash") do
826
+ iterations.times { parsed.each(&:hash) }
827
+ end
828
+
829
+ types = %w[gem npm maven cargo pypi docker golang unknown fake_type]
830
+ x.report("known_type?") do
831
+ iterations.times { types.each { |t| Purl.known_type?(t) } }
832
+ end
833
+
834
+ info_types = %w[gem npm maven cargo pypi]
835
+ x.report("type_info") do
836
+ 1_000.times { info_types.each { |t| Purl.type_info(t) } }
837
+ end
838
+
839
+ x.report("all_type_info") do
840
+ 100.times { Purl.all_type_info }
841
+ end
842
+
843
+ x.report("download_supported_types") do
844
+ iterations.times { Purl::DownloadURL.supported_types }
845
+ end
846
+
847
+ x.report("supported_reverse_types") do
848
+ iterations.times { Purl::RegistryURL.supported_reverse_types }
849
+ end
850
+
851
+ x.report("registry_supported_types") do
852
+ iterations.times { Purl::RegistryURL.supported_types }
853
+ end
854
+
855
+ x.report("parse (simple)") do
856
+ iterations.times { Purl.parse("pkg:gem/rails@7.0.0") }
857
+ end
858
+
859
+ x.report("parse (complex)") do
860
+ iterations.times { Purl.parse("pkg:npm/@babel/core@7.20.0?arch=x64&dev=true#lib/index.js") }
861
+ end
862
+
863
+ x.report("parse (namespaced)") do
864
+ iterations.times { Purl.parse("pkg:maven/org.apache.commons/commons-lang3@3.12.0") }
865
+ end
866
+
867
+ x.report("from_url (domain match)") do
868
+ 1_000.times { Purl.from_registry_url("https://rubygems.org/gems/rails") }
869
+ end
870
+
871
+ x.report("from_url (type hint)") do
872
+ 1_000.times { Purl.from_registry_url("https://gems.internal.com/gems/rails", type: "gem") }
873
+ end
874
+ end
875
+ end
876
+
877
+ desc "Benchmark memory allocations for hot paths"
878
+ task :memory do
879
+ require "memory_profiler"
880
+ require_relative "lib/purl"
881
+
882
+ sample_purls = [
883
+ "pkg:gem/rails@7.0.0",
884
+ "pkg:npm/@babel/core@7.20.0?arch=x64&dev=true#lib/index.js",
885
+ "pkg:maven/org.apache.commons/commons-lang3@3.12.0?classifier=sources",
886
+ "pkg:cargo/rand@0.7.2",
887
+ "pkg:pypi/django@4.0.0",
888
+ "pkg:docker/nginx@sha256:abc123def",
889
+ "pkg:golang/github.com/gorilla/mux@1.8.0",
890
+ ]
891
+
892
+ parsed = sample_purls.map { |p| Purl.parse(p) }
893
+
894
+ benchmarks = {
895
+ "parse (simple)" => -> { Purl.parse("pkg:gem/rails@7.0.0") },
896
+ "parse (complex)" => -> { Purl.parse("pkg:npm/@babel/core@7.20.0?arch=x64&dev=true#lib/index.js") },
897
+ "parse (namespaced)" => -> { Purl.parse("pkg:maven/org.apache.commons/commons-lang3@3.12.0") },
898
+ "to_s" => -> { parsed.each(&:to_s) },
899
+ "to_s (cold)" => -> { sample_purls.map { |p| Purl.parse(p) }.each(&:to_s) },
900
+ "== (equal)" => -> { parsed.each_cons(2) { |a, b| a == b } },
901
+ "known_type?" => -> { %w[gem npm maven cargo pypi].each { |t| Purl.known_type?(t) } },
902
+ "type_info" => -> { %w[gem npm maven].each { |t| Purl.type_info(t) } },
903
+ "supported_types" => -> { Purl::RegistryURL.supported_types },
904
+ "supported_reverse_types" => -> { Purl::RegistryURL.supported_reverse_types },
905
+ "download_supported_types" => -> { Purl::DownloadURL.supported_types },
906
+ "from_url (domain match)" => -> { Purl.from_registry_url("https://rubygems.org/gems/rails") },
907
+ "from_url (type hint)" => -> { Purl.from_registry_url("https://gems.internal.com/gems/rails", type: "gem") },
908
+ }
909
+
910
+ puts "Memory Allocation Benchmarks"
911
+ puts "=" * 70
912
+ printf "%-30s %10s %10s %10s\n", "Benchmark", "Objects", "Memsize", "Strings"
913
+ puts "-" * 70
914
+
915
+ benchmarks.each do |name, block|
916
+ report = MemoryProfiler.report { 100.times { block.call } }
917
+ printf "%-30s %10d %10d %10d\n",
918
+ name,
919
+ report.total_allocated,
920
+ report.total_allocated_memsize,
921
+ report.strings_allocated.size
922
+ end
923
+ end
924
+
793
925
  desc "Run all benchmarks"
794
- task all: [:parse, :types, :registry] do
926
+ task all: [:parse, :types, :registry, :hotpaths] do
795
927
  puts
796
- puts "🎉 All benchmarks completed!"
928
+ puts "All benchmarks completed!"
797
929
  puts " Use 'rake benchmark:parse' for parsing performance"
798
- puts " Use 'rake benchmark:types' for type comparison"
930
+ puts " Use 'rake benchmark:types' for type comparison"
799
931
  puts " Use 'rake benchmark:registry' for URL generation"
932
+ puts " Use 'rake benchmark:hotpaths' for memoization and lookup paths"
933
+ puts " Use 'rake benchmark:memory' for memory allocations"
800
934
  end
801
935
  end
@@ -159,12 +159,10 @@ module Purl
159
159
  NAMESPACE_REQUIRED_TYPES = %w[maven elm github gitlab bitbucket luarocks swift].freeze
160
160
 
161
161
  def self.supported_types
162
- DOWNLOAD_PATTERNS.keys.select do |k|
162
+ @supported_types ||= DOWNLOAD_PATTERNS.keys.select do |k|
163
163
  pattern = DOWNLOAD_PATTERNS[k]
164
- # Skip types with notes (they're not really supported)
165
164
  next false if pattern[:note]
166
165
 
167
- # Test with appropriate namespace for types that need it
168
166
  namespace = if NAMESPACE_REQUIRED_TYPES.include?(k)
169
167
  k == "swift" ? "github.com/test" : "test"
170
168
  end
@@ -174,7 +172,7 @@ module Purl
174
172
  rescue
175
173
  false
176
174
  end
177
- end.sort
175
+ end.sort.freeze
178
176
  end
179
177
 
180
178
  def self.supports?(type)
@@ -3,6 +3,7 @@
3
3
  module Purl
4
4
  class EcosystemsURL
5
5
  API_BASE = "https://packages.ecosyste.ms/api/v1"
6
+ NAMESPACED_PACKAGE_TYPES = %w[npm composer maven golang swift elm clojars].freeze
6
7
 
7
8
  def self.registry_name(purl)
8
9
  new(purl).registry_name
@@ -71,7 +72,7 @@ module Purl
71
72
  end
72
73
 
73
74
  def namespaced_package_types
74
- %w[npm composer maven golang swift elm clojars]
75
+ NAMESPACED_PACKAGE_TYPES
75
76
  end
76
77
 
77
78
  def encode_path_segment(str)
data/lib/purl/lookup.rb CHANGED
@@ -99,17 +99,39 @@ module Purl
99
99
 
100
100
  private
101
101
 
102
- def make_request(uri)
103
- http = Net::HTTP.new(uri.host, uri.port)
104
- http.use_ssl = true
105
- http.read_timeout = @timeout
106
- http.open_timeout = @timeout
107
-
102
+ def http_for(uri)
103
+ key = "#{uri.host}:#{uri.port}"
104
+ @connections ||= {}
105
+ @connections[key] ||= begin
106
+ http = Net::HTTP.new(uri.host, uri.port)
107
+ http.use_ssl = (uri.scheme == "https")
108
+ http.read_timeout = @timeout
109
+ http.open_timeout = @timeout
110
+ http.start
111
+ http
112
+ end
113
+ end
114
+
115
+ def reset_connection(uri)
116
+ key = "#{uri.host}:#{uri.port}"
117
+ old = @connections&.delete(key)
118
+ old&.finish rescue nil
119
+ end
120
+
121
+ def close
122
+ return unless @connections
123
+ @connections.each_value { |http| http.finish rescue nil }
124
+ @connections.clear
125
+ end
126
+
127
+ def make_request(uri, retried: false)
128
+ http = http_for(uri)
129
+
108
130
  request = Net::HTTP::Get.new(uri)
109
131
  request["User-Agent"] = @user_agent
110
-
132
+
111
133
  response = http.request(request)
112
-
134
+
113
135
  case response.code.to_i
114
136
  when 200
115
137
  JSON.parse(response.body)
@@ -118,6 +140,10 @@ module Purl
118
140
  else
119
141
  raise LookupError, "API request failed with status #{response.code}"
120
142
  end
143
+ rescue IOError, Errno::EPIPE, Errno::ECONNRESET => e
144
+ raise LookupError, "Connection failed: #{e.message}" if retried
145
+ reset_connection(uri)
146
+ make_request(uri, retried: true)
121
147
  rescue JSON::ParserError => e
122
148
  raise LookupError, "Failed to parse API response: #{e.message}"
123
149
  rescue Timeout::Error, Net::OpenTimeout, Net::ReadTimeout => e
@@ -48,8 +48,14 @@ module Purl
48
48
  # @return [String, nil] subpath within the package
49
49
  attr_reader :subpath
50
50
 
51
+ # Fast-path decode: skip URI.decode_www_form_component when no encoding present
52
+ def self.fast_decode(str)
53
+ str.include?("%") || str.include?("+") ? URI.decode_www_form_component(str) : str
54
+ end
55
+
51
56
  VALID_TYPE_CHARS = /\A[a-zA-Z0-9\.\+\-]+\z/.freeze
52
57
  VALID_QUALIFIER_KEY_CHARS = /\A[a-zA-Z0-9\.\-_]+\z/.freeze
58
+ STARTS_WITH_DIGIT = /\A\d/.freeze
53
59
 
54
60
  # Create a new PackageURL instance
55
61
  #
@@ -133,7 +139,7 @@ module Purl
133
139
 
134
140
  unless subpath_clean.empty?
135
141
  # Decode each component separately to handle paths properly
136
- subpath_components = subpath_clean.split("/").map { |part| URI.decode_www_form_component(part) }
142
+ subpath_components = subpath_clean.split("/").map { |part| fast_decode(part) }
137
143
  subpath = subpath_components.join("/")
138
144
  end
139
145
  end
@@ -147,7 +153,7 @@ module Purl
147
153
  at_index = path_and_version_part.rindex("@")
148
154
  path_part = path_and_version_part[0...at_index]
149
155
  version_part = path_and_version_part[at_index + 1..-1]
150
- version = URI.decode_www_form_component(version_part) unless version_part.empty?
156
+ version = fast_decode(version_part) unless version_part.empty?
151
157
  else
152
158
  path_part = path_and_version_part
153
159
  end
@@ -161,7 +167,7 @@ module Purl
161
167
  raise MalformedUrlError, "PURL path cannot be empty" if path_components.empty? || path_components == [""]
162
168
 
163
169
  # First component is always the type
164
- type = URI.decode_www_form_component(path_components.shift)
170
+ type = fast_decode(path_components.shift)
165
171
  raise MalformedUrlError, "PURL must have a name component" if path_components.empty?
166
172
 
167
173
  # Handle empty name component (trailing slash case)
@@ -175,29 +181,29 @@ module Purl
175
181
  # All non-type components become namespace
176
182
  name = nil
177
183
  if path_components.length == 1
178
- namespace = URI.decode_www_form_component(path_components[0])
184
+ namespace = fast_decode(path_components[0])
179
185
  else
180
- namespace = path_components.map { |part| URI.decode_www_form_component(part) }.join("/")
186
+ namespace = path_components.map { |part| fast_decode(part) }.join("/")
181
187
  end
182
188
  end
183
189
  else
184
190
  # Normal parsing logic
185
191
  # For simple cases like gem/rails, there's just the name
186
- # For namespaced cases like npm/@babel/core, @babel is namespace, core is name
192
+ # For namespaced cases like npm/@babel/core, @babel is namespace, core is name
187
193
  if path_components.length == 1
188
194
  # Simple case: just type/name
189
- name = URI.decode_www_form_component(path_components[0])
195
+ name = fast_decode(path_components[0])
190
196
  namespace = nil
191
197
  else
192
198
  # Multiple components - assume last is name, others are namespace
193
- name = URI.decode_www_form_component(path_components.pop)
194
-
199
+ name = fast_decode(path_components.pop)
200
+
195
201
  # Everything else is namespace
196
202
  if path_components.length == 1
197
- namespace = URI.decode_www_form_component(path_components[0])
203
+ namespace = fast_decode(path_components[0])
198
204
  else
199
205
  # Multiple remaining components - treat as namespace joined together
200
- namespace = path_components.map { |part| URI.decode_www_form_component(part) }.join("/")
206
+ namespace = path_components.map { |part| fast_decode(part) }.join("/")
201
207
  end
202
208
  end
203
209
  end
@@ -223,6 +229,8 @@ module Purl
223
229
  # purl = PackageURL.new(type: "gem", name: "rails", version: "7.0.0")
224
230
  # puts purl.to_s # "pkg:gem/rails@7.0.0"
225
231
  def to_s
232
+ return @canonical if @canonical
233
+
226
234
  parts = ["pkg:", type.downcase]
227
235
 
228
236
  if namespace
@@ -236,11 +244,10 @@ module Purl
236
244
  parts << "/" << URI.encode_www_form_component(name)
237
245
 
238
246
  if version
239
- # Special handling for version encoding - don't encode colon in certain contexts
240
247
  encoded_version = case type&.downcase
241
248
  when "docker"
242
249
  # Docker versions with sha256: should not encode the colon
243
- version.gsub("sha256:", "sha256:")
250
+ version
244
251
  else
245
252
  URI.encode_www_form_component(version)
246
253
  end
@@ -268,7 +275,7 @@ module Purl
268
275
  parts << "?" << query_parts.join("&")
269
276
  end
270
277
 
271
- parts.join
278
+ @canonical = parts.join.freeze
272
279
  end
273
280
 
274
281
  # Convert the PackageURL to a hash representation
@@ -427,7 +434,7 @@ module Purl
427
434
  )
428
435
  end
429
436
 
430
- if type_str.match?(/\A\d/)
437
+ if type_str.match?(STARTS_WITH_DIGIT)
431
438
  raise InvalidTypeError.new(
432
439
  "Type cannot start with a number",
433
440
  component: :type,
@@ -446,18 +453,15 @@ module Purl
446
453
  name_str = name.to_s.strip
447
454
  raise InvalidNameError.new("Name cannot contain only whitespace", component: :name, value: name) if name_str.empty?
448
455
 
449
- # Apply type-specific normalization
450
- case @type&.downcase
456
+ # Apply type-specific normalization (@type is already lowercased)
457
+ case @type
451
458
  when "bitbucket", "github"
452
459
  name_str.downcase
453
460
  when "pypi"
454
- # PyPI names are case-insensitive and _ should be normalized to -
455
461
  name_str.downcase.gsub("_", "-")
456
462
  when "mlflow"
457
- # MLflow name normalization happens after qualifiers are validated
458
463
  name_str
459
464
  when "composer"
460
- # Composer names should be lowercase
461
465
  name_str.downcase
462
466
  else
463
467
  name_str
@@ -482,7 +486,7 @@ module Purl
482
486
 
483
487
  # Check that decoded namespace segments don't contain '/'
484
488
  namespace_str.split("/").each do |segment|
485
- decoded_segment = URI.decode_www_form_component(segment)
489
+ decoded_segment = self.class.fast_decode(segment)
486
490
  if decoded_segment.include?("/")
487
491
  raise InvalidNamespaceError.new(
488
492
  "Namespace segments cannot contain '/' after URL decoding",
@@ -493,12 +497,11 @@ module Purl
493
497
  end
494
498
  end
495
499
 
496
- # Apply type-specific normalization
497
- case @type&.downcase
500
+ # Apply type-specific normalization (@type is already lowercased)
501
+ case @type
498
502
  when "bitbucket", "github"
499
503
  namespace_str.downcase
500
504
  when "composer"
501
- # Composer namespaces should be lowercase
502
505
  namespace_str.downcase
503
506
  else
504
507
  namespace_str
@@ -511,10 +514,9 @@ module Purl
511
514
  version_str = version.to_s.strip
512
515
  return nil if version_str.empty?
513
516
 
514
- # Apply type-specific normalization
515
- case @type&.downcase
517
+ # Apply type-specific normalization (@type is already lowercased)
518
+ case @type
516
519
  when "huggingface"
517
- # HuggingFace versions (git commit hashes) should be lowercase
518
520
  version_str.downcase
519
521
  else
520
522
  version_str
@@ -574,35 +576,28 @@ module Purl
574
576
  end
575
577
 
576
578
  def apply_post_validation_normalization
577
- # MLflow names are case sensitive or insensitive based on repository per spec
578
- if @type&.downcase == "mlflow" && @qualifiers && @qualifiers["repository_url"] && @qualifiers["repository_url"].include?("azuredatabricks")
579
+ if @type == "mlflow" && @qualifiers && @qualifiers["repository_url"] && @qualifiers["repository_url"].include?("azuredatabricks")
579
580
  # Databricks MLflow is case insensitive - normalize to lowercase per spec
580
581
  @name = @name.downcase
581
582
  end
582
583
  # Other MLflow repositories (like Azure ML) are case sensitive - no normalization needed
583
584
  end
584
585
 
586
+ def self.namespace_required_types
587
+ @namespace_required_types ||= begin
588
+ config = Purl.load_types_config
589
+ types = config["types"].select { |_, v| v["namespace_requirement"] == "required" }.keys
590
+ Set.new(types).freeze
591
+ end
592
+ end
593
+
585
594
  def namespace_required_for_type?(type)
586
595
  return false unless type
587
-
588
- # Read from purl-types.json (included in gem)
589
- types_data = self.class.purl_types_data
590
- type_config = types_data.dig("types", type.downcase)
591
- return false unless type_config
592
-
593
- # Check namespace_requirement field
594
- type_config["namespace_requirement"] == "required"
596
+ self.class.namespace_required_types.include?(type.downcase)
595
597
  end
596
598
 
597
599
  def self.purl_types_data
598
- @purl_types_data ||= begin
599
- require "json"
600
- types_file = File.join(File.dirname(__FILE__), "..", "..", "purl-types.json")
601
- JSON.parse(File.read(types_file))
602
- rescue
603
- # Fallback to empty structure if file can't be read
604
- {"types" => {}}
605
- end
600
+ Purl.load_types_config
606
601
  end
607
602
 
608
603
  def self.parse_qualifiers(query_string)
@@ -7,10 +7,7 @@ module Purl
7
7
  # Load registry patterns from JSON configuration
8
8
  def self.load_registry_patterns
9
9
  @registry_patterns ||= begin
10
- # Load extended registry configs
11
- config_path = File.join(__dir__, "..", "..", "purl-types.json")
12
- require "json"
13
- config = JSON.parse(File.read(config_path))
10
+ config = Purl.load_types_config
14
11
  patterns = {}
15
12
 
16
13
  config["types"].each do |type, type_config|
@@ -44,9 +41,21 @@ module Purl
44
41
  end
45
42
  end
46
43
 
44
+ # Precompute domain-agnostic regex for from_url with type: hint
45
+ domain_agnostic_regex = nil
46
+ if reverse_regex
47
+ original_source = reverse_regex.source
48
+ if config["reverse_regex"].start_with?("/")
49
+ domain_agnostic_regex = Regexp.new("^https?://[^/]+" + config["reverse_regex"])
50
+ elsif original_source =~ /\^https?:\/\/[^\/]+(.+)$/
51
+ domain_agnostic_regex = Regexp.new("^https?://[^/]+" + $1)
52
+ end
53
+ end
54
+
47
55
  {
48
56
  base_url: config["base_url"] || (default_registry ? default_registry + config["path_template"]&.split('/:').first : nil),
49
57
  reverse_regex: reverse_regex,
58
+ domain_agnostic_regex: domain_agnostic_regex,
50
59
  pattern: build_generation_lambda(type, config, default_registry),
51
60
  reverse_parser: reverse_regex ? build_reverse_parser(type, config) : nil,
52
61
  uri_template: config["uri_template"] ? Addressable::Template.new(config["uri_template"]) : nil,
@@ -58,11 +67,7 @@ module Purl
58
67
 
59
68
  # Load types config (needed for accessing default_registry)
60
69
  def self.load_types_config
61
- @types_config ||= begin
62
- config_path = File.join(__dir__, "..", "..", "purl-types.json")
63
- require "json"
64
- JSON.parse(File.read(config_path))
65
- end
70
+ Purl.load_types_config
66
71
  end
67
72
 
68
73
  def self.build_generation_lambda(type, config, default_registry = nil)
@@ -294,8 +299,10 @@ module Purl
294
299
  new(purl).generate(base_url: base_url)
295
300
  end
296
301
 
302
+ SUPPORTED_TYPES = REGISTRY_PATTERNS.keys.sort.freeze
303
+
297
304
  def self.supported_types
298
- REGISTRY_PATTERNS.keys.sort
305
+ SUPPORTED_TYPES
299
306
  end
300
307
 
301
308
  def self.supports?(type)
@@ -307,39 +314,20 @@ module Purl
307
314
  if type
308
315
  normalized_type = type.to_s.downcase
309
316
  config = REGISTRY_PATTERNS[normalized_type]
310
-
311
- if config && config[:reverse_regex] && config[:reverse_parser]
312
- # Create a domain-agnostic version of the regex by replacing the base domain
313
- original_regex = config[:reverse_regex].source
314
-
315
- # For simplified JSON patterns that start with /, create domain-agnostic regex
316
- domain_agnostic_regex = nil
317
- if original_regex.start_with?("/")
318
- # Domain-agnostic pattern - match any domain with this path
319
- domain_agnostic_regex = Regexp.new("^https?://[^/]+" + original_regex)
320
- else
321
- # Legacy full regex pattern
322
- if original_regex =~ /\^https?:\/\/[^\/]+(.+)$/
323
- path_pattern = $1
324
- # Create domain-agnostic regex that matches any domain with the same path structure
325
- domain_agnostic_regex = Regexp.new("^https?://[^/]+" + path_pattern)
326
- end
327
- end
328
-
329
- if domain_agnostic_regex
330
- match = registry_url.match(domain_agnostic_regex)
331
- if match
332
- parsed_data = config[:reverse_parser].call(match)
333
- return PackageURL.new(
334
- type: parsed_data[:type],
335
- namespace: parsed_data[:namespace],
336
- name: parsed_data[:name],
337
- version: parsed_data[:version]
338
- )
339
- end
317
+
318
+ if config && config[:domain_agnostic_regex] && config[:reverse_parser]
319
+ match = registry_url.match(config[:domain_agnostic_regex])
320
+ if match
321
+ parsed_data = config[:reverse_parser].call(match)
322
+ return PackageURL.new(
323
+ type: parsed_data[:type],
324
+ namespace: parsed_data[:namespace],
325
+ name: parsed_data[:name],
326
+ version: parsed_data[:version]
327
+ )
340
328
  end
341
329
  end
342
-
330
+
343
331
  # If specified type didn't work, fall through to normal domain-matching logic
344
332
  end
345
333
 
@@ -368,12 +356,14 @@ module Purl
368
356
 
369
357
  raise UnsupportedTypeError.new(
370
358
  error_message,
371
- supported_types: REGISTRY_PATTERNS.keys.select { |k| REGISTRY_PATTERNS[k][:reverse_regex] }
359
+ supported_types: SUPPORTED_REVERSE_TYPES
372
360
  )
373
361
  end
374
362
 
363
+ SUPPORTED_REVERSE_TYPES = REGISTRY_PATTERNS.select { |_, config| config[:reverse_regex] }.keys.sort.freeze
364
+
375
365
  def self.supported_reverse_types
376
- REGISTRY_PATTERNS.select { |_, config| config[:reverse_regex] }.keys.sort
366
+ SUPPORTED_REVERSE_TYPES
377
367
  end
378
368
 
379
369
  def self.route_patterns_for(type)
data/lib/purl/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Purl
4
- VERSION = "1.7.1"
4
+ VERSION = "1.8.0"
5
5
  end
data/lib/purl.rb CHANGED
@@ -2,14 +2,6 @@
2
2
 
3
3
  require_relative "purl/version"
4
4
  require_relative "purl/errors"
5
- require_relative "purl/package_url"
6
- require_relative "purl/registry_url"
7
- require_relative "purl/download_url"
8
- require_relative "purl/ecosystems_url"
9
- require_relative "purl/lookup"
10
- require_relative "purl/lookup_formatter"
11
- require_relative "purl/advisory"
12
- require_relative "purl/advisory_formatter"
13
5
 
14
6
  # The main PURL (Package URL) module providing functionality to parse,
15
7
  # validate, and generate package URLs according to the PURL specification.
@@ -37,18 +29,49 @@ require_relative "purl/advisory_formatter"
37
29
  module Purl
38
30
  # Base error class for all PURL-related errors
39
31
  class Error < StandardError; end
40
-
32
+
33
+ # Deep-freeze a parsed JSON structure so callers don't need defensive dups
34
+ def self.deep_freeze(obj)
35
+ case obj
36
+ when Hash
37
+ obj.each_value { |v| deep_freeze(v) }
38
+ obj.freeze
39
+ when Array
40
+ obj.each { |v| deep_freeze(v) }
41
+ obj.freeze
42
+ when String
43
+ obj.freeze
44
+ end
45
+ obj
46
+ end
47
+
41
48
  # Load PURL types configuration from JSON file
42
49
  def self.load_types_config
43
50
  @types_config ||= begin
44
51
  config_path = File.join(__dir__, "..", "purl-types.json")
45
52
  require "json"
46
- JSON.parse(File.read(config_path))
53
+ deep_freeze(JSON.parse(File.read(config_path)))
47
54
  end
48
55
  end
56
+ end
57
+
58
+ require_relative "purl/package_url"
59
+ require_relative "purl/registry_url"
60
+ require_relative "purl/download_url"
61
+ require_relative "purl/ecosystems_url"
62
+ require_relative "purl/lookup"
63
+ require_relative "purl/lookup_formatter"
64
+ require_relative "purl/advisory"
65
+ require_relative "purl/advisory_formatter"
66
+
67
+ module Purl
49
68
 
50
69
  # Known PURL types loaded from JSON configuration
51
70
  KNOWN_TYPES = load_types_config["types"].keys.sort.freeze
71
+
72
+ # Set for O(1) lookups
73
+ require "set"
74
+ KNOWN_TYPES_SET = Set.new(KNOWN_TYPES).freeze
52
75
 
53
76
  # Convenience method for parsing PURL strings
54
77
  #
@@ -124,7 +147,7 @@ module Purl
124
147
  # Purl.known_type?("gem") # true
125
148
  # Purl.known_type?("unknown") # false
126
149
  def self.known_type?(type)
127
- KNOWN_TYPES.include?(type.to_s.downcase)
150
+ KNOWN_TYPES_SET.include?(type.to_s.downcase)
128
151
  end
129
152
 
130
153
  # Get comprehensive type information including registry support
@@ -145,12 +168,13 @@ module Purl
145
168
  # puts info[:description] # "Ruby gems from RubyGems.org"
146
169
  def self.type_info(type)
147
170
  normalized_type = type.to_s.downcase
171
+ config = type_config(normalized_type)
148
172
  {
149
173
  type: normalized_type,
150
174
  known: known_type?(normalized_type),
151
- description: type_description(normalized_type),
152
- default_registry: default_registry(normalized_type),
153
- examples: type_examples(normalized_type),
175
+ description: config ? config["description"] : nil,
176
+ default_registry: config ? config["default_registry"] : nil,
177
+ examples: config ? (config["examples"] || []) : [],
154
178
  registry_url_generation: RegistryURL.supports?(normalized_type),
155
179
  reverse_parsing: RegistryURL.supported_reverse_types.include?(normalized_type),
156
180
  download_url_generation: DownloadURL.supports?(normalized_type),
@@ -192,8 +216,8 @@ module Purl
192
216
  def self.type_config(type)
193
217
  config = load_types_config["types"][type.to_s.downcase]
194
218
  return nil unless config
195
-
196
- config.dup # Return a copy to prevent modification
219
+
220
+ config
197
221
  end
198
222
 
199
223
  # Get human-readable description for a type
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: purl
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.1
4
+ version: 1.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Nesbitt
@@ -76,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
76
76
  - !ruby/object:Gem::Version
77
77
  version: '0'
78
78
  requirements: []
79
- rubygems_version: 4.0.1
79
+ rubygems_version: 4.0.6
80
80
  specification_version: 4
81
81
  summary: Parse and convert package urls (purls)
82
82
  test_files: []