purl 0.1.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,543 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Purl
4
+ class RegistryURL
5
+ # Load registry patterns from JSON configuration
6
+ def self.load_registry_patterns
7
+ @registry_patterns ||= begin
8
+ # Load JSON config directly to avoid circular dependency
9
+ config_path = File.join(__dir__, "..", "..", "purl-types.json")
10
+ require "json"
11
+ config = JSON.parse(File.read(config_path))
12
+ patterns = {}
13
+
14
+ config["types"].each do |type, type_config|
15
+ # Only process types that have registry_config
16
+ next unless type_config["registry_config"]
17
+
18
+ registry_config = type_config["registry_config"]
19
+ patterns[type] = build_pattern_config(type, registry_config)
20
+ end
21
+
22
+ patterns
23
+ end
24
+ end
25
+
26
+ def self.build_pattern_config(type, config)
27
+ # Get the default registry for this type from parent config
28
+ type_config = load_types_config["types"][type]
29
+ default_registry = type_config["default_registry"]
30
+
31
+ # Build full URLs from templates if we have a default registry
32
+ route_patterns = []
33
+ if default_registry
34
+ # Add all template variations
35
+ if config["path_template"]
36
+ route_patterns << default_registry + config["path_template"]
37
+ end
38
+ if config["namespace_path_template"]
39
+ route_patterns << default_registry + config["namespace_path_template"]
40
+ end
41
+ if config["version_path_template"]
42
+ route_patterns << default_registry + config["version_path_template"]
43
+ end
44
+ if config["namespace_version_path_template"]
45
+ route_patterns << default_registry + config["namespace_version_path_template"]
46
+ end
47
+ end
48
+ # Fall back to legacy route_patterns if available
49
+ route_patterns = config["route_patterns"] if route_patterns.empty? && config["route_patterns"]
50
+
51
+ # Build reverse regex from template or use legacy format
52
+ reverse_regex = nil
53
+ if config["reverse_regex"]
54
+ if config["reverse_regex"].start_with?("/") && default_registry
55
+ # Domain-agnostic pattern - combine with default registry domain
56
+ domain_pattern = default_registry.sub(/^https?:\/\//, '').gsub('.', '\\.')
57
+ reverse_regex = Regexp.new("^https?://#{domain_pattern}" + config["reverse_regex"])
58
+ else
59
+ # Legacy full pattern
60
+ reverse_regex = Regexp.new(config["reverse_regex"])
61
+ end
62
+ end
63
+
64
+ {
65
+ base_url: config["base_url"] || (default_registry ? default_registry + config["path_template"]&.split('/:').first : nil),
66
+ route_patterns: route_patterns,
67
+ reverse_regex: reverse_regex,
68
+ pattern: build_generation_lambda(type, config, default_registry),
69
+ reverse_parser: reverse_regex ? build_reverse_parser(type, config) : nil
70
+ }
71
+ end
72
+
73
+ # Load types config (needed for accessing default_registry)
74
+ def self.load_types_config
75
+ @types_config ||= begin
76
+ config_path = File.join(__dir__, "..", "..", "purl-types.json")
77
+ require "json"
78
+ JSON.parse(File.read(config_path))
79
+ end
80
+ end
81
+
82
+ def self.build_generation_lambda(type, config, default_registry = nil)
83
+ # Use base_url from config, or build from default_registry + path_template base
84
+ if config["base_url"]
85
+ base_url = config["base_url"]
86
+ elsif default_registry && config["path_template"]
87
+ # Extract the base path from the template (everything before first :parameter)
88
+ base_path = config["path_template"].split('/:').first
89
+ base_url = default_registry + base_path
90
+ else
91
+ return nil
92
+ end
93
+ case type
94
+ when "npm"
95
+ ->(purl) do
96
+ if purl.namespace
97
+ "#{base_url}/#{purl.namespace}/#{purl.name}"
98
+ else
99
+ "#{base_url}/#{purl.name}"
100
+ end
101
+ end
102
+ when "composer", "maven", "swift"
103
+ ->(purl) do
104
+ if purl.namespace
105
+ "#{base_url}/#{purl.namespace}/#{purl.name}"
106
+ else
107
+ raise MissingRegistryInfoError.new(
108
+ "#{type.capitalize} packages require a namespace",
109
+ type: purl.type,
110
+ missing: "namespace"
111
+ )
112
+ end
113
+ end
114
+ when "golang"
115
+ ->(purl) do
116
+ if purl.namespace
117
+ "#{base_url}/#{purl.namespace}/#{purl.name}"
118
+ else
119
+ "#{base_url}/#{purl.name}"
120
+ end
121
+ end
122
+ when "pypi"
123
+ ->(purl) { "#{base_url}/#{purl.name}/" }
124
+ when "hackage"
125
+ ->(purl) do
126
+ if purl.version
127
+ "#{base_url}/#{purl.name}-#{purl.version}"
128
+ else
129
+ "#{base_url}/#{purl.name}"
130
+ end
131
+ end
132
+ when "deno"
133
+ ->(purl) do
134
+ if purl.version
135
+ "#{base_url}/#{purl.name}@#{purl.version}"
136
+ else
137
+ "#{base_url}/#{purl.name}"
138
+ end
139
+ end
140
+ when "clojars"
141
+ ->(purl) do
142
+ if purl.namespace
143
+ "#{base_url}/#{purl.namespace}/#{purl.name}"
144
+ else
145
+ "#{base_url}/#{purl.name}"
146
+ end
147
+ end
148
+ when "elm"
149
+ ->(purl) do
150
+ if purl.namespace
151
+ version = purl.version || "latest"
152
+ "#{base_url}/#{purl.namespace}/#{purl.name}/#{version}"
153
+ else
154
+ raise MissingRegistryInfoError.new(
155
+ "Elm packages require a namespace",
156
+ type: purl.type,
157
+ missing: "namespace"
158
+ )
159
+ end
160
+ end
161
+ else
162
+ ->(purl) { "#{base_url}/#{purl.name}" }
163
+ end
164
+ end
165
+
166
+ def self.build_reverse_parser(type, config)
167
+ case type
168
+ when "npm"
169
+ ->(match) do
170
+ namespace = match[1] # @scope or nil
171
+ name = match[2]
172
+ version = match[3] # from /v/version or nil
173
+ { type: type, namespace: namespace, name: name, version: version }
174
+ end
175
+ when "gem"
176
+ ->(match) do
177
+ name = match[1]
178
+ version = match[2] # from /versions/version or nil
179
+ { type: type, namespace: nil, name: name, version: version }
180
+ end
181
+ when "maven"
182
+ ->(match) do
183
+ namespace = match[1]
184
+ name = match[2]
185
+ version = match[3]
186
+ { type: type, namespace: namespace, name: name, version: version }
187
+ end
188
+ when "pypi"
189
+ ->(match) do
190
+ name = match[1]
191
+ version = match[2] unless match[2] == name # avoid duplicate name as version
192
+ { type: type, namespace: nil, name: name, version: version }
193
+ end
194
+ when "cargo"
195
+ ->(match) do
196
+ name = match[1]
197
+ { type: type, namespace: nil, name: name, version: nil }
198
+ end
199
+ when "golang"
200
+ ->(match) do
201
+ if match[1] && match[2]
202
+ # Has namespace: pkg.go.dev/namespace/name
203
+ namespace = match[1]
204
+ name = match[2]
205
+ else
206
+ # No namespace: pkg.go.dev/name
207
+ namespace = nil
208
+ name = match[1] || match[2]
209
+ end
210
+ { type: type, namespace: namespace, name: name, version: nil }
211
+ end
212
+ when "hackage"
213
+ ->(match) do
214
+ name = match[1]
215
+ version = match[2] # from name-version pattern
216
+ { type: type, namespace: nil, name: name, version: version }
217
+ end
218
+ when "deno"
219
+ ->(match) do
220
+ name = match[1]
221
+ version = match[2] # from @version pattern
222
+ { type: type, namespace: nil, name: name, version: version }
223
+ end
224
+ when "homebrew"
225
+ ->(match) do
226
+ name = match[1]
227
+ { type: type, namespace: nil, name: name, version: nil }
228
+ end
229
+ when "elm"
230
+ ->(match) do
231
+ namespace = match[1]
232
+ name = match[2]
233
+ version = match[3] unless match[3] == "latest"
234
+ { type: type, namespace: namespace, name: name, version: version }
235
+ end
236
+ when "cocoapods"
237
+ ->(match) do
238
+ name = match[1]
239
+ { type: type, namespace: nil, name: name, version: nil }
240
+ end
241
+ when "composer"
242
+ ->(match) do
243
+ namespace = match[1]
244
+ name = match[2]
245
+ { type: type, namespace: namespace, name: name, version: nil }
246
+ end
247
+ when "conda"
248
+ ->(match) do
249
+ name = match[1]
250
+ { type: type, namespace: nil, name: name, version: nil }
251
+ end
252
+ when "cpan"
253
+ ->(match) do
254
+ name = match[1]
255
+ { type: type, namespace: nil, name: name, version: nil }
256
+ end
257
+ when "hex"
258
+ ->(match) do
259
+ name = match[1]
260
+ { type: type, namespace: nil, name: name, version: nil }
261
+ end
262
+ when "nuget"
263
+ ->(match) do
264
+ name = match[1]
265
+ version = match[2] # from /version pattern
266
+ { type: type, namespace: nil, name: name, version: version }
267
+ end
268
+ when "pub"
269
+ ->(match) do
270
+ name = match[1]
271
+ { type: type, namespace: nil, name: name, version: nil }
272
+ end
273
+ when "swift"
274
+ ->(match) do
275
+ namespace = match[1]
276
+ name = match[2]
277
+ { type: type, namespace: namespace, name: name, version: nil }
278
+ end
279
+ when "bioconductor"
280
+ ->(match) do
281
+ name = match[1]
282
+ { type: type, namespace: nil, name: name, version: nil }
283
+ end
284
+ when "clojars"
285
+ ->(match) do
286
+ if match[1] && match[2]
287
+ # Has namespace: clojars.org/namespace/name
288
+ namespace = match[1]
289
+ name = match[2]
290
+ else
291
+ # No namespace: clojars.org/name
292
+ namespace = nil
293
+ name = match[1] || match[2]
294
+ end
295
+ { type: type, namespace: namespace, name: name, version: nil }
296
+ end
297
+ else
298
+ ->(match) do
299
+ { type: type, namespace: nil, name: match[1], version: nil }
300
+ end
301
+ end
302
+ end
303
+
304
+ # Registry patterns loaded from JSON configuration
305
+ REGISTRY_PATTERNS = load_registry_patterns.freeze
306
+
307
+ def self.generate(purl, base_url: nil)
308
+ new(purl).generate(base_url: base_url)
309
+ end
310
+
311
+ def self.supported_types
312
+ REGISTRY_PATTERNS.keys.sort
313
+ end
314
+
315
+ def self.supports?(type)
316
+ REGISTRY_PATTERNS.key?(type.to_s.downcase)
317
+ end
318
+
319
+ def self.from_url(registry_url, type: nil)
320
+ # If type is specified, try that specific type first with domain-agnostic parsing
321
+ if type
322
+ normalized_type = type.to_s.downcase
323
+ config = REGISTRY_PATTERNS[normalized_type]
324
+
325
+ if config && config[:reverse_regex] && config[:reverse_parser]
326
+ # Create a domain-agnostic version of the regex by replacing the base domain
327
+ original_regex = config[:reverse_regex].source
328
+
329
+ # For simplified JSON patterns that start with /, create domain-agnostic regex
330
+ domain_agnostic_regex = nil
331
+ if original_regex.start_with?("/")
332
+ # Domain-agnostic pattern - match any domain with this path
333
+ domain_agnostic_regex = Regexp.new("^https?://[^/]+" + original_regex)
334
+ else
335
+ # Legacy full regex pattern
336
+ if original_regex =~ /\^https?:\/\/[^\/]+(.+)$/
337
+ path_pattern = $1
338
+ # Create domain-agnostic regex that matches any domain with the same path structure
339
+ domain_agnostic_regex = Regexp.new("^https?://[^/]+" + path_pattern)
340
+ end
341
+ end
342
+
343
+ if domain_agnostic_regex
344
+ match = registry_url.match(domain_agnostic_regex)
345
+ if match
346
+ parsed_data = config[:reverse_parser].call(match)
347
+ return PackageURL.new(
348
+ type: parsed_data[:type],
349
+ namespace: parsed_data[:namespace],
350
+ name: parsed_data[:name],
351
+ version: parsed_data[:version]
352
+ )
353
+ end
354
+ end
355
+ end
356
+
357
+ # If specified type didn't work, fall through to normal domain-matching logic
358
+ end
359
+
360
+ # Try to parse the registry URL back into a PURL using domain matching
361
+ REGISTRY_PATTERNS.each do |registry_type, config|
362
+ next unless config[:reverse_regex] && config[:reverse_parser]
363
+
364
+ match = registry_url.match(config[:reverse_regex])
365
+ if match
366
+ parsed_data = config[:reverse_parser].call(match)
367
+ return PackageURL.new(
368
+ type: parsed_data[:type],
369
+ namespace: parsed_data[:namespace],
370
+ name: parsed_data[:name],
371
+ version: parsed_data[:version]
372
+ )
373
+ end
374
+ end
375
+
376
+ error_message = if type
377
+ "Unable to parse registry URL: #{registry_url} as type '#{type}'. " +
378
+ "URL structure doesn't match expected pattern for this type."
379
+ else
380
+ "Unable to parse registry URL: #{registry_url}. No matching pattern found."
381
+ end
382
+
383
+ raise UnsupportedTypeError.new(
384
+ error_message,
385
+ supported_types: REGISTRY_PATTERNS.keys.select { |k| REGISTRY_PATTERNS[k][:reverse_regex] }
386
+ )
387
+ end
388
+
389
+ def self.supported_reverse_types
390
+ REGISTRY_PATTERNS.select { |_, config| config[:reverse_regex] }.keys.sort
391
+ end
392
+
393
+ def self.route_patterns_for(type)
394
+ pattern_config = REGISTRY_PATTERNS[type.to_s.downcase]
395
+ return [] unless pattern_config
396
+
397
+ pattern_config[:route_patterns] || []
398
+ end
399
+
400
+ def self.all_route_patterns
401
+ result = {}
402
+ REGISTRY_PATTERNS.each do |type, config|
403
+ if config[:route_patterns]
404
+ result[type] = config[:route_patterns]
405
+ end
406
+ end
407
+ result
408
+ end
409
+
410
+ def initialize(purl)
411
+ @purl = purl
412
+ end
413
+
414
+ def generate(base_url: nil)
415
+ pattern_config = REGISTRY_PATTERNS[@purl.type.downcase]
416
+
417
+ unless pattern_config
418
+ raise UnsupportedTypeError.new(
419
+ "No registry URL pattern defined for type '#{@purl.type}'. Supported types: #{self.class.supported_types.join(", ")}",
420
+ type: @purl.type,
421
+ supported_types: self.class.supported_types
422
+ )
423
+ end
424
+
425
+ begin
426
+ if base_url
427
+ # Use custom base URL with the same URL structure
428
+ generate_with_custom_base_url(base_url, pattern_config)
429
+ else
430
+ # Use default base URL
431
+ pattern_config[:pattern].call(@purl)
432
+ end
433
+ rescue MissingRegistryInfoError
434
+ raise
435
+ rescue => e
436
+ raise RegistryError, "Failed to generate registry URL for #{@purl.type}: #{e.message}"
437
+ end
438
+ end
439
+
440
+ def generate_with_version(base_url: nil)
441
+ registry_url = generate(base_url: base_url)
442
+
443
+ case @purl.type.downcase
444
+ when "npm"
445
+ @purl.version ? "#{registry_url}/v/#{@purl.version}" : registry_url
446
+ when "pypi"
447
+ @purl.version ? "#{registry_url}#{@purl.version}/" : registry_url
448
+ when "gem"
449
+ @purl.version ? "#{registry_url}/versions/#{@purl.version}" : registry_url
450
+ when "maven"
451
+ @purl.version ? "#{registry_url}/#{@purl.version}" : registry_url
452
+ when "nuget"
453
+ @purl.version ? "#{registry_url}/#{@purl.version}" : registry_url
454
+ else
455
+ # For other types, just return the base URL since version-specific URLs vary
456
+ registry_url
457
+ end
458
+ end
459
+
460
+ private
461
+
462
+ def generate_with_custom_base_url(custom_base_url, pattern_config)
463
+
464
+ # Replace the base URL in the pattern lambda
465
+ case @purl.type.downcase
466
+ when "npm"
467
+ if @purl.namespace
468
+ "#{custom_base_url}/#{@purl.namespace}/#{@purl.name}"
469
+ else
470
+ "#{custom_base_url}/#{@purl.name}"
471
+ end
472
+ when "composer", "maven", "swift"
473
+ if @purl.namespace
474
+ "#{custom_base_url}/#{@purl.namespace}/#{@purl.name}"
475
+ else
476
+ raise MissingRegistryInfoError.new(
477
+ "#{@purl.type.capitalize} packages require a namespace",
478
+ type: @purl.type,
479
+ missing: "namespace"
480
+ )
481
+ end
482
+ when "golang"
483
+ if @purl.namespace
484
+ "#{custom_base_url}/#{@purl.namespace}/#{@purl.name}"
485
+ else
486
+ "#{custom_base_url}/#{@purl.name}"
487
+ end
488
+ when "pypi"
489
+ "#{custom_base_url}/#{@purl.name}/"
490
+ when "hackage"
491
+ if @purl.version
492
+ "#{custom_base_url}/#{@purl.name}-#{@purl.version}"
493
+ else
494
+ "#{custom_base_url}/#{@purl.name}"
495
+ end
496
+ when "deno"
497
+ if @purl.version
498
+ "#{custom_base_url}/#{@purl.name}@#{@purl.version}"
499
+ else
500
+ "#{custom_base_url}/#{@purl.name}"
501
+ end
502
+ when "clojars"
503
+ if @purl.namespace
504
+ "#{custom_base_url}/#{@purl.namespace}/#{@purl.name}"
505
+ else
506
+ "#{custom_base_url}/#{@purl.name}"
507
+ end
508
+ when "elm"
509
+ if @purl.namespace
510
+ version = @purl.version || "latest"
511
+ "#{custom_base_url}/#{@purl.namespace}/#{@purl.name}/#{version}"
512
+ else
513
+ raise MissingRegistryInfoError.new(
514
+ "Elm packages require a namespace",
515
+ type: @purl.type,
516
+ missing: "namespace"
517
+ )
518
+ end
519
+ else
520
+ "#{custom_base_url}/#{@purl.name}"
521
+ end
522
+ end
523
+
524
+ private
525
+
526
+ attr_reader :purl
527
+ end
528
+
529
+ # Add registry URL generation methods to PackageURL
530
+ class PackageURL
531
+ def registry_url(base_url: nil)
532
+ RegistryURL.generate(self, base_url: base_url)
533
+ end
534
+
535
+ def registry_url_with_version(base_url: nil)
536
+ RegistryURL.new(self).generate_with_version(base_url: base_url)
537
+ end
538
+
539
+ def supports_registry_url?
540
+ RegistryURL.supports?(type)
541
+ end
542
+ end
543
+ end
data/lib/purl/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Purl
4
- VERSION = "0.1.0"
4
+ VERSION = "1.1.0"
5
5
  end
data/lib/purl.rb CHANGED
@@ -1,8 +1,140 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "purl/version"
4
+ require_relative "purl/errors"
5
+ require_relative "purl/package_url"
6
+ require_relative "purl/registry_url"
4
7
 
5
8
  module Purl
6
9
  class Error < StandardError; end
7
- # Your code goes here...
10
+
11
+ # Load PURL types configuration from JSON file
12
+ def self.load_types_config
13
+ @types_config ||= begin
14
+ config_path = File.join(__dir__, "..", "purl-types.json")
15
+ require "json"
16
+ JSON.parse(File.read(config_path))
17
+ end
18
+ end
19
+
20
+ # Known PURL types loaded from JSON configuration
21
+ KNOWN_TYPES = load_types_config["types"].keys.sort.freeze
22
+
23
+ # Convenience method for parsing PURL strings
24
+ def self.parse(purl_string)
25
+ PackageURL.parse(purl_string)
26
+ end
27
+
28
+ # Convenience method for parsing registry URLs back to PURLs
29
+ # @param registry_url [String] The registry URL to parse
30
+ # @param type [String, Symbol, nil] Optional type hint for custom domains
31
+ def self.from_registry_url(registry_url, type: nil)
32
+ RegistryURL.from_url(registry_url, type: type)
33
+ end
34
+
35
+ # Returns all known PURL types
36
+ def self.known_types
37
+ KNOWN_TYPES.dup
38
+ end
39
+
40
+ # Returns types that have registry URL support
41
+ def self.registry_supported_types
42
+ RegistryURL.supported_types
43
+ end
44
+
45
+ # Returns types that support reverse parsing from registry URLs
46
+ def self.reverse_parsing_supported_types
47
+ RegistryURL.supported_reverse_types
48
+ end
49
+
50
+ # Check if a type is known/valid
51
+ def self.known_type?(type)
52
+ KNOWN_TYPES.include?(type.to_s.downcase)
53
+ end
54
+
55
+ # Get type information including registry support
56
+ def self.type_info(type)
57
+ normalized_type = type.to_s.downcase
58
+ {
59
+ type: normalized_type,
60
+ known: known_type?(normalized_type),
61
+ description: type_description(normalized_type),
62
+ default_registry: default_registry(normalized_type),
63
+ examples: type_examples(normalized_type),
64
+ registry_url_generation: RegistryURL.supports?(normalized_type),
65
+ reverse_parsing: RegistryURL.supported_reverse_types.include?(normalized_type),
66
+ route_patterns: RegistryURL.route_patterns_for(normalized_type)
67
+ }
68
+ end
69
+
70
+ # Get comprehensive information about all types
71
+ def self.all_type_info
72
+ result = {}
73
+
74
+ # Start with known types
75
+ KNOWN_TYPES.each do |type|
76
+ result[type] = type_info(type)
77
+ end
78
+
79
+ # Add any registry-supported types not in known list
80
+ RegistryURL.supported_types.each do |type|
81
+ unless result.key?(type)
82
+ result[type] = type_info(type)
83
+ end
84
+ end
85
+
86
+ result
87
+ end
88
+
89
+ # Get type configuration from JSON
90
+ def self.type_config(type)
91
+ config = load_types_config["types"][type.to_s.downcase]
92
+ return nil unless config
93
+
94
+ config.dup # Return a copy to prevent modification
95
+ end
96
+
97
+ # Get description for a type
98
+ def self.type_description(type)
99
+ config = type_config(type)
100
+ config ? config["description"] : nil
101
+ end
102
+
103
+ # Get examples for a type
104
+ def self.type_examples(type)
105
+ config = type_config(type)
106
+ return [] unless config
107
+
108
+ config["examples"] || []
109
+ end
110
+
111
+ # Get registry configuration for a type
112
+ def self.registry_config(type)
113
+ config = type_config(type)
114
+ return nil unless config
115
+
116
+ config["registry_config"]
117
+ end
118
+
119
+ # Get default registry URL for a type
120
+ def self.default_registry(type)
121
+ config = type_config(type)
122
+ return nil unless config
123
+
124
+ config["default_registry"]
125
+ end
126
+
127
+ # Get metadata about the types configuration
128
+ def self.types_config_metadata
129
+ config = load_types_config
130
+ {
131
+ version: config["version"],
132
+ description: config["description"],
133
+ source: config["source"],
134
+ last_updated: config["last_updated"],
135
+ total_types: config["types"].keys.length,
136
+ registry_supported_types: config["types"].select { |_, v| v["registry_config"] }.keys.length,
137
+ types_with_default_registry: config["types"].select { |_, v| v["default_registry"] }.keys.length
138
+ }
139
+ end
8
140
  end