purl 1.7.1 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +137 -3
- data/lib/purl/download_url.rb +2 -4
- data/lib/purl/ecosystems_url.rb +2 -1
- data/lib/purl/lookup.rb +34 -8
- data/lib/purl/package_url.rb +40 -45
- data/lib/purl/registry_url.rb +33 -43
- data/lib/purl/version.rb +1 -1
- data/lib/purl.rb +40 -16
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e6632d8fa11800fb9a4b376c1555505ba87047f98de52a42103c27c2d2b46e2a
|
|
4
|
+
data.tar.gz: c248313079783e3d5ac6c9e7dfc8663067671d4a19356757cd493e4fb23c2d8f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fc9b6cdde9d6efe15a93cd6760ec3c88aa8be1594d962682c9c27642c72408ebd242d2fa4cbbfc9f87e72e135f337bac8fbc278c69f991f4cbf7d5b3388a4b1b
|
|
7
|
+
data.tar.gz: db4bffcde1032b421fba9b68d717bad523482b2d457b304c997606d538bee59bbfbe185bc9f456c8e72a4150ed74f5bd80d38d757bfeba41ae533ff5260c7ffb
|
data/Rakefile
CHANGED
|
@@ -790,12 +790,146 @@ namespace :benchmark do
|
|
|
790
790
|
puts "✅ Registry URL benchmarks completed!"
|
|
791
791
|
end
|
|
792
792
|
|
|
793
|
+
desc "Benchmark hot paths (to_s, equality, known_type?, type_info, supported_types)"
|
|
794
|
+
task :hotpaths do
|
|
795
|
+
require "benchmark"
|
|
796
|
+
require_relative "lib/purl"
|
|
797
|
+
|
|
798
|
+
iterations = 10_000
|
|
799
|
+
|
|
800
|
+
sample_purls = [
|
|
801
|
+
"pkg:gem/rails@7.0.0",
|
|
802
|
+
"pkg:npm/@babel/core@7.20.0?arch=x64&dev=true#lib/index.js",
|
|
803
|
+
"pkg:maven/org.apache.commons/commons-lang3@3.12.0?classifier=sources",
|
|
804
|
+
"pkg:cargo/rand@0.7.2",
|
|
805
|
+
"pkg:pypi/django@4.0.0",
|
|
806
|
+
"pkg:docker/nginx@sha256:abc123def",
|
|
807
|
+
"pkg:golang/github.com/gorilla/mux@1.8.0",
|
|
808
|
+
]
|
|
809
|
+
|
|
810
|
+
parsed = sample_purls.map { |p| Purl.parse(p) }
|
|
811
|
+
|
|
812
|
+
puts "Hot Path Benchmarks (#{iterations} iterations)"
|
|
813
|
+
puts "=" * 50
|
|
814
|
+
|
|
815
|
+
Benchmark.bm(28) do |x|
|
|
816
|
+
x.report("to_s") do
|
|
817
|
+
iterations.times { parsed.each(&:to_s) }
|
|
818
|
+
end
|
|
819
|
+
|
|
820
|
+
x.report("== (equal)") do
|
|
821
|
+
pairs = parsed.map { |p| [p, Purl.parse(p.to_s)] }
|
|
822
|
+
iterations.times { pairs.each { |a, b| a == b } }
|
|
823
|
+
end
|
|
824
|
+
|
|
825
|
+
x.report("hash") do
|
|
826
|
+
iterations.times { parsed.each(&:hash) }
|
|
827
|
+
end
|
|
828
|
+
|
|
829
|
+
types = %w[gem npm maven cargo pypi docker golang unknown fake_type]
|
|
830
|
+
x.report("known_type?") do
|
|
831
|
+
iterations.times { types.each { |t| Purl.known_type?(t) } }
|
|
832
|
+
end
|
|
833
|
+
|
|
834
|
+
info_types = %w[gem npm maven cargo pypi]
|
|
835
|
+
x.report("type_info") do
|
|
836
|
+
1_000.times { info_types.each { |t| Purl.type_info(t) } }
|
|
837
|
+
end
|
|
838
|
+
|
|
839
|
+
x.report("all_type_info") do
|
|
840
|
+
100.times { Purl.all_type_info }
|
|
841
|
+
end
|
|
842
|
+
|
|
843
|
+
x.report("download_supported_types") do
|
|
844
|
+
iterations.times { Purl::DownloadURL.supported_types }
|
|
845
|
+
end
|
|
846
|
+
|
|
847
|
+
x.report("supported_reverse_types") do
|
|
848
|
+
iterations.times { Purl::RegistryURL.supported_reverse_types }
|
|
849
|
+
end
|
|
850
|
+
|
|
851
|
+
x.report("registry_supported_types") do
|
|
852
|
+
iterations.times { Purl::RegistryURL.supported_types }
|
|
853
|
+
end
|
|
854
|
+
|
|
855
|
+
x.report("parse (simple)") do
|
|
856
|
+
iterations.times { Purl.parse("pkg:gem/rails@7.0.0") }
|
|
857
|
+
end
|
|
858
|
+
|
|
859
|
+
x.report("parse (complex)") do
|
|
860
|
+
iterations.times { Purl.parse("pkg:npm/@babel/core@7.20.0?arch=x64&dev=true#lib/index.js") }
|
|
861
|
+
end
|
|
862
|
+
|
|
863
|
+
x.report("parse (namespaced)") do
|
|
864
|
+
iterations.times { Purl.parse("pkg:maven/org.apache.commons/commons-lang3@3.12.0") }
|
|
865
|
+
end
|
|
866
|
+
|
|
867
|
+
x.report("from_url (domain match)") do
|
|
868
|
+
1_000.times { Purl.from_registry_url("https://rubygems.org/gems/rails") }
|
|
869
|
+
end
|
|
870
|
+
|
|
871
|
+
x.report("from_url (type hint)") do
|
|
872
|
+
1_000.times { Purl.from_registry_url("https://gems.internal.com/gems/rails", type: "gem") }
|
|
873
|
+
end
|
|
874
|
+
end
|
|
875
|
+
end
|
|
876
|
+
|
|
877
|
+
desc "Benchmark memory allocations for hot paths"
|
|
878
|
+
task :memory do
|
|
879
|
+
require "memory_profiler"
|
|
880
|
+
require_relative "lib/purl"
|
|
881
|
+
|
|
882
|
+
sample_purls = [
|
|
883
|
+
"pkg:gem/rails@7.0.0",
|
|
884
|
+
"pkg:npm/@babel/core@7.20.0?arch=x64&dev=true#lib/index.js",
|
|
885
|
+
"pkg:maven/org.apache.commons/commons-lang3@3.12.0?classifier=sources",
|
|
886
|
+
"pkg:cargo/rand@0.7.2",
|
|
887
|
+
"pkg:pypi/django@4.0.0",
|
|
888
|
+
"pkg:docker/nginx@sha256:abc123def",
|
|
889
|
+
"pkg:golang/github.com/gorilla/mux@1.8.0",
|
|
890
|
+
]
|
|
891
|
+
|
|
892
|
+
parsed = sample_purls.map { |p| Purl.parse(p) }
|
|
893
|
+
|
|
894
|
+
benchmarks = {
|
|
895
|
+
"parse (simple)" => -> { Purl.parse("pkg:gem/rails@7.0.0") },
|
|
896
|
+
"parse (complex)" => -> { Purl.parse("pkg:npm/@babel/core@7.20.0?arch=x64&dev=true#lib/index.js") },
|
|
897
|
+
"parse (namespaced)" => -> { Purl.parse("pkg:maven/org.apache.commons/commons-lang3@3.12.0") },
|
|
898
|
+
"to_s" => -> { parsed.each(&:to_s) },
|
|
899
|
+
"to_s (cold)" => -> { sample_purls.map { |p| Purl.parse(p) }.each(&:to_s) },
|
|
900
|
+
"== (equal)" => -> { parsed.each_cons(2) { |a, b| a == b } },
|
|
901
|
+
"known_type?" => -> { %w[gem npm maven cargo pypi].each { |t| Purl.known_type?(t) } },
|
|
902
|
+
"type_info" => -> { %w[gem npm maven].each { |t| Purl.type_info(t) } },
|
|
903
|
+
"supported_types" => -> { Purl::RegistryURL.supported_types },
|
|
904
|
+
"supported_reverse_types" => -> { Purl::RegistryURL.supported_reverse_types },
|
|
905
|
+
"download_supported_types" => -> { Purl::DownloadURL.supported_types },
|
|
906
|
+
"from_url (domain match)" => -> { Purl.from_registry_url("https://rubygems.org/gems/rails") },
|
|
907
|
+
"from_url (type hint)" => -> { Purl.from_registry_url("https://gems.internal.com/gems/rails", type: "gem") },
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
puts "Memory Allocation Benchmarks"
|
|
911
|
+
puts "=" * 70
|
|
912
|
+
printf "%-30s %10s %10s %10s\n", "Benchmark", "Objects", "Memsize", "Strings"
|
|
913
|
+
puts "-" * 70
|
|
914
|
+
|
|
915
|
+
benchmarks.each do |name, block|
|
|
916
|
+
report = MemoryProfiler.report { 100.times { block.call } }
|
|
917
|
+
printf "%-30s %10d %10d %10d\n",
|
|
918
|
+
name,
|
|
919
|
+
report.total_allocated,
|
|
920
|
+
report.total_allocated_memsize,
|
|
921
|
+
report.strings_allocated.size
|
|
922
|
+
end
|
|
923
|
+
end
|
|
924
|
+
|
|
793
925
|
desc "Run all benchmarks"
|
|
794
|
-
task all: [:parse, :types, :registry] do
|
|
926
|
+
task all: [:parse, :types, :registry, :hotpaths] do
|
|
795
927
|
puts
|
|
796
|
-
puts "
|
|
928
|
+
puts "All benchmarks completed!"
|
|
797
929
|
puts " Use 'rake benchmark:parse' for parsing performance"
|
|
798
|
-
puts " Use 'rake benchmark:types' for type comparison"
|
|
930
|
+
puts " Use 'rake benchmark:types' for type comparison"
|
|
799
931
|
puts " Use 'rake benchmark:registry' for URL generation"
|
|
932
|
+
puts " Use 'rake benchmark:hotpaths' for memoization and lookup paths"
|
|
933
|
+
puts " Use 'rake benchmark:memory' for memory allocations"
|
|
800
934
|
end
|
|
801
935
|
end
|
data/lib/purl/download_url.rb
CHANGED
|
@@ -159,12 +159,10 @@ module Purl
|
|
|
159
159
|
NAMESPACE_REQUIRED_TYPES = %w[maven elm github gitlab bitbucket luarocks swift].freeze
|
|
160
160
|
|
|
161
161
|
def self.supported_types
|
|
162
|
-
DOWNLOAD_PATTERNS.keys.select do |k|
|
|
162
|
+
@supported_types ||= DOWNLOAD_PATTERNS.keys.select do |k|
|
|
163
163
|
pattern = DOWNLOAD_PATTERNS[k]
|
|
164
|
-
# Skip types with notes (they're not really supported)
|
|
165
164
|
next false if pattern[:note]
|
|
166
165
|
|
|
167
|
-
# Test with appropriate namespace for types that need it
|
|
168
166
|
namespace = if NAMESPACE_REQUIRED_TYPES.include?(k)
|
|
169
167
|
k == "swift" ? "github.com/test" : "test"
|
|
170
168
|
end
|
|
@@ -174,7 +172,7 @@ module Purl
|
|
|
174
172
|
rescue
|
|
175
173
|
false
|
|
176
174
|
end
|
|
177
|
-
end.sort
|
|
175
|
+
end.sort.freeze
|
|
178
176
|
end
|
|
179
177
|
|
|
180
178
|
def self.supports?(type)
|
data/lib/purl/ecosystems_url.rb
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
module Purl
|
|
4
4
|
class EcosystemsURL
|
|
5
5
|
API_BASE = "https://packages.ecosyste.ms/api/v1"
|
|
6
|
+
NAMESPACED_PACKAGE_TYPES = %w[npm composer maven golang swift elm clojars].freeze
|
|
6
7
|
|
|
7
8
|
def self.registry_name(purl)
|
|
8
9
|
new(purl).registry_name
|
|
@@ -71,7 +72,7 @@ module Purl
|
|
|
71
72
|
end
|
|
72
73
|
|
|
73
74
|
def namespaced_package_types
|
|
74
|
-
|
|
75
|
+
NAMESPACED_PACKAGE_TYPES
|
|
75
76
|
end
|
|
76
77
|
|
|
77
78
|
def encode_path_segment(str)
|
data/lib/purl/lookup.rb
CHANGED
|
@@ -99,17 +99,39 @@ module Purl
|
|
|
99
99
|
|
|
100
100
|
private
|
|
101
101
|
|
|
102
|
-
def
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
102
|
+
def http_for(uri)
|
|
103
|
+
key = "#{uri.host}:#{uri.port}"
|
|
104
|
+
@connections ||= {}
|
|
105
|
+
@connections[key] ||= begin
|
|
106
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
107
|
+
http.use_ssl = (uri.scheme == "https")
|
|
108
|
+
http.read_timeout = @timeout
|
|
109
|
+
http.open_timeout = @timeout
|
|
110
|
+
http.start
|
|
111
|
+
http
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def reset_connection(uri)
|
|
116
|
+
key = "#{uri.host}:#{uri.port}"
|
|
117
|
+
old = @connections&.delete(key)
|
|
118
|
+
old&.finish rescue nil
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def close
|
|
122
|
+
return unless @connections
|
|
123
|
+
@connections.each_value { |http| http.finish rescue nil }
|
|
124
|
+
@connections.clear
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def make_request(uri, retried: false)
|
|
128
|
+
http = http_for(uri)
|
|
129
|
+
|
|
108
130
|
request = Net::HTTP::Get.new(uri)
|
|
109
131
|
request["User-Agent"] = @user_agent
|
|
110
|
-
|
|
132
|
+
|
|
111
133
|
response = http.request(request)
|
|
112
|
-
|
|
134
|
+
|
|
113
135
|
case response.code.to_i
|
|
114
136
|
when 200
|
|
115
137
|
JSON.parse(response.body)
|
|
@@ -118,6 +140,10 @@ module Purl
|
|
|
118
140
|
else
|
|
119
141
|
raise LookupError, "API request failed with status #{response.code}"
|
|
120
142
|
end
|
|
143
|
+
rescue IOError, Errno::EPIPE, Errno::ECONNRESET => e
|
|
144
|
+
raise LookupError, "Connection failed: #{e.message}" if retried
|
|
145
|
+
reset_connection(uri)
|
|
146
|
+
make_request(uri, retried: true)
|
|
121
147
|
rescue JSON::ParserError => e
|
|
122
148
|
raise LookupError, "Failed to parse API response: #{e.message}"
|
|
123
149
|
rescue Timeout::Error, Net::OpenTimeout, Net::ReadTimeout => e
|
data/lib/purl/package_url.rb
CHANGED
|
@@ -48,8 +48,14 @@ module Purl
|
|
|
48
48
|
# @return [String, nil] subpath within the package
|
|
49
49
|
attr_reader :subpath
|
|
50
50
|
|
|
51
|
+
# Fast-path decode: skip URI.decode_www_form_component when no encoding present
|
|
52
|
+
def self.fast_decode(str)
|
|
53
|
+
str.include?("%") || str.include?("+") ? URI.decode_www_form_component(str) : str
|
|
54
|
+
end
|
|
55
|
+
|
|
51
56
|
VALID_TYPE_CHARS = /\A[a-zA-Z0-9\.\+\-]+\z/.freeze
|
|
52
57
|
VALID_QUALIFIER_KEY_CHARS = /\A[a-zA-Z0-9\.\-_]+\z/.freeze
|
|
58
|
+
STARTS_WITH_DIGIT = /\A\d/.freeze
|
|
53
59
|
|
|
54
60
|
# Create a new PackageURL instance
|
|
55
61
|
#
|
|
@@ -133,7 +139,7 @@ module Purl
|
|
|
133
139
|
|
|
134
140
|
unless subpath_clean.empty?
|
|
135
141
|
# Decode each component separately to handle paths properly
|
|
136
|
-
subpath_components = subpath_clean.split("/").map { |part|
|
|
142
|
+
subpath_components = subpath_clean.split("/").map { |part| fast_decode(part) }
|
|
137
143
|
subpath = subpath_components.join("/")
|
|
138
144
|
end
|
|
139
145
|
end
|
|
@@ -147,7 +153,7 @@ module Purl
|
|
|
147
153
|
at_index = path_and_version_part.rindex("@")
|
|
148
154
|
path_part = path_and_version_part[0...at_index]
|
|
149
155
|
version_part = path_and_version_part[at_index + 1..-1]
|
|
150
|
-
version =
|
|
156
|
+
version = fast_decode(version_part) unless version_part.empty?
|
|
151
157
|
else
|
|
152
158
|
path_part = path_and_version_part
|
|
153
159
|
end
|
|
@@ -161,7 +167,7 @@ module Purl
|
|
|
161
167
|
raise MalformedUrlError, "PURL path cannot be empty" if path_components.empty? || path_components == [""]
|
|
162
168
|
|
|
163
169
|
# First component is always the type
|
|
164
|
-
type =
|
|
170
|
+
type = fast_decode(path_components.shift)
|
|
165
171
|
raise MalformedUrlError, "PURL must have a name component" if path_components.empty?
|
|
166
172
|
|
|
167
173
|
# Handle empty name component (trailing slash case)
|
|
@@ -175,29 +181,29 @@ module Purl
|
|
|
175
181
|
# All non-type components become namespace
|
|
176
182
|
name = nil
|
|
177
183
|
if path_components.length == 1
|
|
178
|
-
namespace =
|
|
184
|
+
namespace = fast_decode(path_components[0])
|
|
179
185
|
else
|
|
180
|
-
namespace = path_components.map { |part|
|
|
186
|
+
namespace = path_components.map { |part| fast_decode(part) }.join("/")
|
|
181
187
|
end
|
|
182
188
|
end
|
|
183
189
|
else
|
|
184
190
|
# Normal parsing logic
|
|
185
191
|
# For simple cases like gem/rails, there's just the name
|
|
186
|
-
# For namespaced cases like npm/@babel/core, @babel is namespace, core is name
|
|
192
|
+
# For namespaced cases like npm/@babel/core, @babel is namespace, core is name
|
|
187
193
|
if path_components.length == 1
|
|
188
194
|
# Simple case: just type/name
|
|
189
|
-
name =
|
|
195
|
+
name = fast_decode(path_components[0])
|
|
190
196
|
namespace = nil
|
|
191
197
|
else
|
|
192
198
|
# Multiple components - assume last is name, others are namespace
|
|
193
|
-
name =
|
|
194
|
-
|
|
199
|
+
name = fast_decode(path_components.pop)
|
|
200
|
+
|
|
195
201
|
# Everything else is namespace
|
|
196
202
|
if path_components.length == 1
|
|
197
|
-
namespace =
|
|
203
|
+
namespace = fast_decode(path_components[0])
|
|
198
204
|
else
|
|
199
205
|
# Multiple remaining components - treat as namespace joined together
|
|
200
|
-
namespace = path_components.map { |part|
|
|
206
|
+
namespace = path_components.map { |part| fast_decode(part) }.join("/")
|
|
201
207
|
end
|
|
202
208
|
end
|
|
203
209
|
end
|
|
@@ -223,6 +229,8 @@ module Purl
|
|
|
223
229
|
# purl = PackageURL.new(type: "gem", name: "rails", version: "7.0.0")
|
|
224
230
|
# puts purl.to_s # "pkg:gem/rails@7.0.0"
|
|
225
231
|
def to_s
|
|
232
|
+
return @canonical if @canonical
|
|
233
|
+
|
|
226
234
|
parts = ["pkg:", type.downcase]
|
|
227
235
|
|
|
228
236
|
if namespace
|
|
@@ -236,11 +244,10 @@ module Purl
|
|
|
236
244
|
parts << "/" << URI.encode_www_form_component(name)
|
|
237
245
|
|
|
238
246
|
if version
|
|
239
|
-
# Special handling for version encoding - don't encode colon in certain contexts
|
|
240
247
|
encoded_version = case type&.downcase
|
|
241
248
|
when "docker"
|
|
242
249
|
# Docker versions with sha256: should not encode the colon
|
|
243
|
-
version
|
|
250
|
+
version
|
|
244
251
|
else
|
|
245
252
|
URI.encode_www_form_component(version)
|
|
246
253
|
end
|
|
@@ -268,7 +275,7 @@ module Purl
|
|
|
268
275
|
parts << "?" << query_parts.join("&")
|
|
269
276
|
end
|
|
270
277
|
|
|
271
|
-
parts.join
|
|
278
|
+
@canonical = parts.join.freeze
|
|
272
279
|
end
|
|
273
280
|
|
|
274
281
|
# Convert the PackageURL to a hash representation
|
|
@@ -427,7 +434,7 @@ module Purl
|
|
|
427
434
|
)
|
|
428
435
|
end
|
|
429
436
|
|
|
430
|
-
if type_str.match?(
|
|
437
|
+
if type_str.match?(STARTS_WITH_DIGIT)
|
|
431
438
|
raise InvalidTypeError.new(
|
|
432
439
|
"Type cannot start with a number",
|
|
433
440
|
component: :type,
|
|
@@ -446,18 +453,15 @@ module Purl
|
|
|
446
453
|
name_str = name.to_s.strip
|
|
447
454
|
raise InvalidNameError.new("Name cannot contain only whitespace", component: :name, value: name) if name_str.empty?
|
|
448
455
|
|
|
449
|
-
# Apply type-specific normalization
|
|
450
|
-
case @type
|
|
456
|
+
# Apply type-specific normalization (@type is already lowercased)
|
|
457
|
+
case @type
|
|
451
458
|
when "bitbucket", "github"
|
|
452
459
|
name_str.downcase
|
|
453
460
|
when "pypi"
|
|
454
|
-
# PyPI names are case-insensitive and _ should be normalized to -
|
|
455
461
|
name_str.downcase.gsub("_", "-")
|
|
456
462
|
when "mlflow"
|
|
457
|
-
# MLflow name normalization happens after qualifiers are validated
|
|
458
463
|
name_str
|
|
459
464
|
when "composer"
|
|
460
|
-
# Composer names should be lowercase
|
|
461
465
|
name_str.downcase
|
|
462
466
|
else
|
|
463
467
|
name_str
|
|
@@ -482,7 +486,7 @@ module Purl
|
|
|
482
486
|
|
|
483
487
|
# Check that decoded namespace segments don't contain '/'
|
|
484
488
|
namespace_str.split("/").each do |segment|
|
|
485
|
-
decoded_segment =
|
|
489
|
+
decoded_segment = self.class.fast_decode(segment)
|
|
486
490
|
if decoded_segment.include?("/")
|
|
487
491
|
raise InvalidNamespaceError.new(
|
|
488
492
|
"Namespace segments cannot contain '/' after URL decoding",
|
|
@@ -493,12 +497,11 @@ module Purl
|
|
|
493
497
|
end
|
|
494
498
|
end
|
|
495
499
|
|
|
496
|
-
# Apply type-specific normalization
|
|
497
|
-
case @type
|
|
500
|
+
# Apply type-specific normalization (@type is already lowercased)
|
|
501
|
+
case @type
|
|
498
502
|
when "bitbucket", "github"
|
|
499
503
|
namespace_str.downcase
|
|
500
504
|
when "composer"
|
|
501
|
-
# Composer namespaces should be lowercase
|
|
502
505
|
namespace_str.downcase
|
|
503
506
|
else
|
|
504
507
|
namespace_str
|
|
@@ -511,10 +514,9 @@ module Purl
|
|
|
511
514
|
version_str = version.to_s.strip
|
|
512
515
|
return nil if version_str.empty?
|
|
513
516
|
|
|
514
|
-
# Apply type-specific normalization
|
|
515
|
-
case @type
|
|
517
|
+
# Apply type-specific normalization (@type is already lowercased)
|
|
518
|
+
case @type
|
|
516
519
|
when "huggingface"
|
|
517
|
-
# HuggingFace versions (git commit hashes) should be lowercase
|
|
518
520
|
version_str.downcase
|
|
519
521
|
else
|
|
520
522
|
version_str
|
|
@@ -574,35 +576,28 @@ module Purl
|
|
|
574
576
|
end
|
|
575
577
|
|
|
576
578
|
def apply_post_validation_normalization
|
|
577
|
-
|
|
578
|
-
if @type&.downcase == "mlflow" && @qualifiers && @qualifiers["repository_url"] && @qualifiers["repository_url"].include?("azuredatabricks")
|
|
579
|
+
if @type == "mlflow" && @qualifiers && @qualifiers["repository_url"] && @qualifiers["repository_url"].include?("azuredatabricks")
|
|
579
580
|
# Databricks MLflow is case insensitive - normalize to lowercase per spec
|
|
580
581
|
@name = @name.downcase
|
|
581
582
|
end
|
|
582
583
|
# Other MLflow repositories (like Azure ML) are case sensitive - no normalization needed
|
|
583
584
|
end
|
|
584
585
|
|
|
586
|
+
def self.namespace_required_types
|
|
587
|
+
@namespace_required_types ||= begin
|
|
588
|
+
config = Purl.load_types_config
|
|
589
|
+
types = config["types"].select { |_, v| v["namespace_requirement"] == "required" }.keys
|
|
590
|
+
Set.new(types).freeze
|
|
591
|
+
end
|
|
592
|
+
end
|
|
593
|
+
|
|
585
594
|
def namespace_required_for_type?(type)
|
|
586
595
|
return false unless type
|
|
587
|
-
|
|
588
|
-
# Read from purl-types.json (included in gem)
|
|
589
|
-
types_data = self.class.purl_types_data
|
|
590
|
-
type_config = types_data.dig("types", type.downcase)
|
|
591
|
-
return false unless type_config
|
|
592
|
-
|
|
593
|
-
# Check namespace_requirement field
|
|
594
|
-
type_config["namespace_requirement"] == "required"
|
|
596
|
+
self.class.namespace_required_types.include?(type.downcase)
|
|
595
597
|
end
|
|
596
598
|
|
|
597
599
|
def self.purl_types_data
|
|
598
|
-
|
|
599
|
-
require "json"
|
|
600
|
-
types_file = File.join(File.dirname(__FILE__), "..", "..", "purl-types.json")
|
|
601
|
-
JSON.parse(File.read(types_file))
|
|
602
|
-
rescue
|
|
603
|
-
# Fallback to empty structure if file can't be read
|
|
604
|
-
{"types" => {}}
|
|
605
|
-
end
|
|
600
|
+
Purl.load_types_config
|
|
606
601
|
end
|
|
607
602
|
|
|
608
603
|
def self.parse_qualifiers(query_string)
|
data/lib/purl/registry_url.rb
CHANGED
|
@@ -7,10 +7,7 @@ module Purl
|
|
|
7
7
|
# Load registry patterns from JSON configuration
|
|
8
8
|
def self.load_registry_patterns
|
|
9
9
|
@registry_patterns ||= begin
|
|
10
|
-
|
|
11
|
-
config_path = File.join(__dir__, "..", "..", "purl-types.json")
|
|
12
|
-
require "json"
|
|
13
|
-
config = JSON.parse(File.read(config_path))
|
|
10
|
+
config = Purl.load_types_config
|
|
14
11
|
patterns = {}
|
|
15
12
|
|
|
16
13
|
config["types"].each do |type, type_config|
|
|
@@ -44,9 +41,21 @@ module Purl
|
|
|
44
41
|
end
|
|
45
42
|
end
|
|
46
43
|
|
|
44
|
+
# Precompute domain-agnostic regex for from_url with type: hint
|
|
45
|
+
domain_agnostic_regex = nil
|
|
46
|
+
if reverse_regex
|
|
47
|
+
original_source = reverse_regex.source
|
|
48
|
+
if config["reverse_regex"].start_with?("/")
|
|
49
|
+
domain_agnostic_regex = Regexp.new("^https?://[^/]+" + config["reverse_regex"])
|
|
50
|
+
elsif original_source =~ /\^https?:\/\/[^\/]+(.+)$/
|
|
51
|
+
domain_agnostic_regex = Regexp.new("^https?://[^/]+" + $1)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
47
55
|
{
|
|
48
56
|
base_url: config["base_url"] || (default_registry ? default_registry + config["path_template"]&.split('/:').first : nil),
|
|
49
57
|
reverse_regex: reverse_regex,
|
|
58
|
+
domain_agnostic_regex: domain_agnostic_regex,
|
|
50
59
|
pattern: build_generation_lambda(type, config, default_registry),
|
|
51
60
|
reverse_parser: reverse_regex ? build_reverse_parser(type, config) : nil,
|
|
52
61
|
uri_template: config["uri_template"] ? Addressable::Template.new(config["uri_template"]) : nil,
|
|
@@ -58,11 +67,7 @@ module Purl
|
|
|
58
67
|
|
|
59
68
|
# Load types config (needed for accessing default_registry)
|
|
60
69
|
def self.load_types_config
|
|
61
|
-
|
|
62
|
-
config_path = File.join(__dir__, "..", "..", "purl-types.json")
|
|
63
|
-
require "json"
|
|
64
|
-
JSON.parse(File.read(config_path))
|
|
65
|
-
end
|
|
70
|
+
Purl.load_types_config
|
|
66
71
|
end
|
|
67
72
|
|
|
68
73
|
def self.build_generation_lambda(type, config, default_registry = nil)
|
|
@@ -294,8 +299,10 @@ module Purl
|
|
|
294
299
|
new(purl).generate(base_url: base_url)
|
|
295
300
|
end
|
|
296
301
|
|
|
302
|
+
SUPPORTED_TYPES = REGISTRY_PATTERNS.keys.sort.freeze
|
|
303
|
+
|
|
297
304
|
def self.supported_types
|
|
298
|
-
|
|
305
|
+
SUPPORTED_TYPES
|
|
299
306
|
end
|
|
300
307
|
|
|
301
308
|
def self.supports?(type)
|
|
@@ -307,39 +314,20 @@ module Purl
|
|
|
307
314
|
if type
|
|
308
315
|
normalized_type = type.to_s.downcase
|
|
309
316
|
config = REGISTRY_PATTERNS[normalized_type]
|
|
310
|
-
|
|
311
|
-
if config && config[:
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
# Legacy full regex pattern
|
|
322
|
-
if original_regex =~ /\^https?:\/\/[^\/]+(.+)$/
|
|
323
|
-
path_pattern = $1
|
|
324
|
-
# Create domain-agnostic regex that matches any domain with the same path structure
|
|
325
|
-
domain_agnostic_regex = Regexp.new("^https?://[^/]+" + path_pattern)
|
|
326
|
-
end
|
|
327
|
-
end
|
|
328
|
-
|
|
329
|
-
if domain_agnostic_regex
|
|
330
|
-
match = registry_url.match(domain_agnostic_regex)
|
|
331
|
-
if match
|
|
332
|
-
parsed_data = config[:reverse_parser].call(match)
|
|
333
|
-
return PackageURL.new(
|
|
334
|
-
type: parsed_data[:type],
|
|
335
|
-
namespace: parsed_data[:namespace],
|
|
336
|
-
name: parsed_data[:name],
|
|
337
|
-
version: parsed_data[:version]
|
|
338
|
-
)
|
|
339
|
-
end
|
|
317
|
+
|
|
318
|
+
if config && config[:domain_agnostic_regex] && config[:reverse_parser]
|
|
319
|
+
match = registry_url.match(config[:domain_agnostic_regex])
|
|
320
|
+
if match
|
|
321
|
+
parsed_data = config[:reverse_parser].call(match)
|
|
322
|
+
return PackageURL.new(
|
|
323
|
+
type: parsed_data[:type],
|
|
324
|
+
namespace: parsed_data[:namespace],
|
|
325
|
+
name: parsed_data[:name],
|
|
326
|
+
version: parsed_data[:version]
|
|
327
|
+
)
|
|
340
328
|
end
|
|
341
329
|
end
|
|
342
|
-
|
|
330
|
+
|
|
343
331
|
# If specified type didn't work, fall through to normal domain-matching logic
|
|
344
332
|
end
|
|
345
333
|
|
|
@@ -368,12 +356,14 @@ module Purl
|
|
|
368
356
|
|
|
369
357
|
raise UnsupportedTypeError.new(
|
|
370
358
|
error_message,
|
|
371
|
-
supported_types:
|
|
359
|
+
supported_types: SUPPORTED_REVERSE_TYPES
|
|
372
360
|
)
|
|
373
361
|
end
|
|
374
362
|
|
|
363
|
+
SUPPORTED_REVERSE_TYPES = REGISTRY_PATTERNS.select { |_, config| config[:reverse_regex] }.keys.sort.freeze
|
|
364
|
+
|
|
375
365
|
def self.supported_reverse_types
|
|
376
|
-
|
|
366
|
+
SUPPORTED_REVERSE_TYPES
|
|
377
367
|
end
|
|
378
368
|
|
|
379
369
|
def self.route_patterns_for(type)
|
data/lib/purl/version.rb
CHANGED
data/lib/purl.rb
CHANGED
|
@@ -2,14 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative "purl/version"
|
|
4
4
|
require_relative "purl/errors"
|
|
5
|
-
require_relative "purl/package_url"
|
|
6
|
-
require_relative "purl/registry_url"
|
|
7
|
-
require_relative "purl/download_url"
|
|
8
|
-
require_relative "purl/ecosystems_url"
|
|
9
|
-
require_relative "purl/lookup"
|
|
10
|
-
require_relative "purl/lookup_formatter"
|
|
11
|
-
require_relative "purl/advisory"
|
|
12
|
-
require_relative "purl/advisory_formatter"
|
|
13
5
|
|
|
14
6
|
# The main PURL (Package URL) module providing functionality to parse,
|
|
15
7
|
# validate, and generate package URLs according to the PURL specification.
|
|
@@ -37,18 +29,49 @@ require_relative "purl/advisory_formatter"
|
|
|
37
29
|
module Purl
|
|
38
30
|
# Base error class for all PURL-related errors
|
|
39
31
|
class Error < StandardError; end
|
|
40
|
-
|
|
32
|
+
|
|
33
|
+
# Deep-freeze a parsed JSON structure so callers don't need defensive dups
|
|
34
|
+
def self.deep_freeze(obj)
|
|
35
|
+
case obj
|
|
36
|
+
when Hash
|
|
37
|
+
obj.each_value { |v| deep_freeze(v) }
|
|
38
|
+
obj.freeze
|
|
39
|
+
when Array
|
|
40
|
+
obj.each { |v| deep_freeze(v) }
|
|
41
|
+
obj.freeze
|
|
42
|
+
when String
|
|
43
|
+
obj.freeze
|
|
44
|
+
end
|
|
45
|
+
obj
|
|
46
|
+
end
|
|
47
|
+
|
|
41
48
|
# Load PURL types configuration from JSON file
|
|
42
49
|
def self.load_types_config
|
|
43
50
|
@types_config ||= begin
|
|
44
51
|
config_path = File.join(__dir__, "..", "purl-types.json")
|
|
45
52
|
require "json"
|
|
46
|
-
JSON.parse(File.read(config_path))
|
|
53
|
+
deep_freeze(JSON.parse(File.read(config_path)))
|
|
47
54
|
end
|
|
48
55
|
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
require_relative "purl/package_url"
|
|
59
|
+
require_relative "purl/registry_url"
|
|
60
|
+
require_relative "purl/download_url"
|
|
61
|
+
require_relative "purl/ecosystems_url"
|
|
62
|
+
require_relative "purl/lookup"
|
|
63
|
+
require_relative "purl/lookup_formatter"
|
|
64
|
+
require_relative "purl/advisory"
|
|
65
|
+
require_relative "purl/advisory_formatter"
|
|
66
|
+
|
|
67
|
+
module Purl
|
|
49
68
|
|
|
50
69
|
# Known PURL types loaded from JSON configuration
|
|
51
70
|
KNOWN_TYPES = load_types_config["types"].keys.sort.freeze
|
|
71
|
+
|
|
72
|
+
# Set for O(1) lookups
|
|
73
|
+
require "set"
|
|
74
|
+
KNOWN_TYPES_SET = Set.new(KNOWN_TYPES).freeze
|
|
52
75
|
|
|
53
76
|
# Convenience method for parsing PURL strings
|
|
54
77
|
#
|
|
@@ -124,7 +147,7 @@ module Purl
|
|
|
124
147
|
# Purl.known_type?("gem") # true
|
|
125
148
|
# Purl.known_type?("unknown") # false
|
|
126
149
|
def self.known_type?(type)
|
|
127
|
-
|
|
150
|
+
KNOWN_TYPES_SET.include?(type.to_s.downcase)
|
|
128
151
|
end
|
|
129
152
|
|
|
130
153
|
# Get comprehensive type information including registry support
|
|
@@ -145,12 +168,13 @@ module Purl
|
|
|
145
168
|
# puts info[:description] # "Ruby gems from RubyGems.org"
|
|
146
169
|
def self.type_info(type)
|
|
147
170
|
normalized_type = type.to_s.downcase
|
|
171
|
+
config = type_config(normalized_type)
|
|
148
172
|
{
|
|
149
173
|
type: normalized_type,
|
|
150
174
|
known: known_type?(normalized_type),
|
|
151
|
-
description:
|
|
152
|
-
default_registry: default_registry
|
|
153
|
-
examples:
|
|
175
|
+
description: config ? config["description"] : nil,
|
|
176
|
+
default_registry: config ? config["default_registry"] : nil,
|
|
177
|
+
examples: config ? (config["examples"] || []) : [],
|
|
154
178
|
registry_url_generation: RegistryURL.supports?(normalized_type),
|
|
155
179
|
reverse_parsing: RegistryURL.supported_reverse_types.include?(normalized_type),
|
|
156
180
|
download_url_generation: DownloadURL.supports?(normalized_type),
|
|
@@ -192,8 +216,8 @@ module Purl
|
|
|
192
216
|
def self.type_config(type)
|
|
193
217
|
config = load_types_config["types"][type.to_s.downcase]
|
|
194
218
|
return nil unless config
|
|
195
|
-
|
|
196
|
-
config
|
|
219
|
+
|
|
220
|
+
config
|
|
197
221
|
end
|
|
198
222
|
|
|
199
223
|
# Get human-readable description for a type
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: purl
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.8.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Nesbitt
|
|
@@ -76,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
76
76
|
- !ruby/object:Gem::Version
|
|
77
77
|
version: '0'
|
|
78
78
|
requirements: []
|
|
79
|
-
rubygems_version: 4.0.
|
|
79
|
+
rubygems_version: 4.0.6
|
|
80
80
|
specification_version: 4
|
|
81
81
|
summary: Parse and convert package urls (purls)
|
|
82
82
|
test_files: []
|