spandx 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6b3a0275ae468012967376aa3bd813b3dc1efef7783eb11d0151d7babf4ac6a8
4
- data.tar.gz: b86bd82707ad5afa0b83c59674e4f0d08c12733d5f1788038845f30079bb8ba6
3
+ metadata.gz: ddd706dad19138c25501144fed49ae2148e7b5a703cdf06b9c4cd4bed4a940aa
4
+ data.tar.gz: c4596fdfa833988f80f7e3b17bce65e6fda8b8a0c059e441ff5b32583687b95d
5
5
  SHA512:
6
- metadata.gz: f71dd4bf8438ec8857f622076ea345f3c48c8dce3354949459b564ace2c0c002bcc95d09d9d906ba9063bdeb092ade90160f51bdcfb9900a0225114d1aa5aa80
7
- data.tar.gz: e0bbbdd0156924ac21d9730aa322b332f4d3c1ef43b530f58c56f411c57bfb95c154574bda77c61dda3fbd80b9c8fc9e8a2fea4c09053fa6bfda2c5a6ac86dc5
6
+ metadata.gz: 8749463ff0bacbe4e125b9822c317d910dded8688e04122989777c42ac37b908f4c06a0419c824cde4698c7bab823934b751520fba786ca497b26ef9d266f9b7
7
+ data.tar.gz: 5229f971f6b36428ef9a09d3b91bab6f5b049f1312e12daee926bc27baf1f463e39204e30d69daa51566df3277712d901f8790871f3d272e0d353eea66dbaaa3
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- Version 0.6.0
1
+ Version 0.7.0
2
2
 
3
3
  # Changelog
4
4
 
@@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
9
9
 
10
10
  ## [Unreleased]
11
11
 
12
+ ## [0.7.0] - 2020-03-11
13
+ ### Changed
14
+ - Improve how the `nuget` index is built.
15
+
12
16
  ## [0.6.0] - 2020-03-03
13
17
  ### Added
14
18
  - Add `spandx index update` command to fetch the latest `spandx-rubygems` index.
data/README.md CHANGED
@@ -25,7 +25,7 @@ Or install it yourself as:
25
25
  To fetch the latest version of the catalogue data from [SPDX](https://spdx.org/licenses/licenses.json).
26
26
 
27
27
  ```ruby
28
- catalogue = Spandx::Catalogue.latest
28
+ catalogue = Spandx::Spdx::Catalogue.latest
29
29
  catalogue.each do |license|
30
30
  puts license.inspect
31
31
  end
@@ -35,7 +35,7 @@ To load an offline copy of the data.
35
35
 
36
36
  ```ruby
37
37
  path = File.join(Dir.pwd, 'licenses.json')
38
- catalogue = Spandx::Catalogue.from_file(path)
38
+ catalogue = Spandx::Spdx::Catalogue.from_file(path)
39
39
  catalogue.each do |license|
40
40
  puts license.inspect
41
41
  end
@@ -12,7 +12,7 @@ module Spandx
12
12
  def execute(output: $stdout)
13
13
  catalogue = Spandx::Spdx::Catalogue.from_git
14
14
  indexes.each do |index|
15
- index.update!(catalogue: catalogue)
15
+ index.update!(catalogue: catalogue, output: output)
16
16
  end
17
17
  output.puts 'OK'
18
18
  end
@@ -10,62 +10,87 @@ module Spandx
10
10
  @directory = directory ? File.expand_path(directory) : DEFAULT_DIR
11
11
  end
12
12
 
13
- def update!(catalogue:, limit: nil)
14
- counter = 0
15
- gateway = Spandx::Dotnet::NugetGateway.new(catalogue: catalogue)
16
- gateway.each do |spec|
17
- next unless spec['licenseExpression']
18
-
19
- write([gateway.host, spec['id'], spec['version']], spec['licenseExpression'])
20
-
21
- if limit
22
- counter += 1
23
- break if counter > limit
24
- end
13
+ def licenses_for(name:, version:)
14
+ search_key = [name, version].join
15
+ open_data(name, mode: 'r') do |io|
16
+ found = io.readlines.bsearch { |x| search_key <=> [x[0], x[1]].join }
17
+ found ? found[2].split('-|-') : []
25
18
  end
26
19
  end
27
20
 
28
- def indexed?(key)
29
- File.exist?(data_file_for(digest_for(key)))
30
- end
31
-
32
- def read(key)
33
- open_data(digest_for(key), mode: 'r', &:read)
21
+ def update!(catalogue:, output: StringIO.new)
22
+ insert_latest(Spandx::Dotnet::NugetGateway.new(catalogue: catalogue)) do |page|
23
+ output.puts "Checkpoint #{page}"
24
+ checkpoint!(page)
25
+ end
26
+ sort_index!
34
27
  end
35
28
 
36
- def write(key, data)
37
- return if data.nil? || data.empty?
29
+ private
38
30
 
39
- open_data(digest_for(key)) do |x|
40
- x.write(data)
31
+ def files(pattern)
32
+ Dir.glob(pattern, base: directory).sort.each do |file|
33
+ fullpath = File.join(directory, file)
34
+ yield fullpath unless File.directory?(fullpath)
41
35
  end
42
36
  end
43
37
 
44
- private
38
+ def sort_index!
39
+ files('**/*') do |path|
40
+ IO.write(path, IO.readlines(path).sort.join)
41
+ end
42
+ end
45
43
 
46
44
  def digest_for(components)
47
45
  Digest::SHA1.hexdigest(Array(components).join('/'))
48
46
  end
49
47
 
50
- def open_data(key, mode: 'w')
51
- FileUtils.mkdir_p(data_dir_for(key))
52
- File.open(data_file_for(key), mode) do |file|
53
- yield file
48
+ def open_data(name, mode: 'a')
49
+ data_dir = data_dir_for(name)
50
+ FileUtils.mkdir_p(data_dir)
51
+ CSV.open(data_file_for(name), mode, force_quotes: true) do |csv|
52
+ yield csv
54
53
  end
55
54
  end
56
55
 
57
- def data_dir_for(index_key)
58
- File.join(directory, *index_key.scan(/../)).downcase
56
+ def data_dir_for(name)
57
+ digest = digest_for(name)
58
+ File.join(directory, digest[0...2].downcase)
59
59
  end
60
60
 
61
- def data_file_for(key)
62
- File.join(data_dir_for(key), 'data')
61
+ def data_file_for(name)
62
+ File.join(data_dir_for(name), 'nuget')
63
63
  end
64
64
 
65
- def upsert!(spec)
66
- return unless spec['licenseExpression']
65
+ def checkpoints_filepath
66
+ @checkpoints_filepath ||= File.join(directory, 'nuget.checkpoints')
67
+ end
67
68
 
68
- write([host, spec['id'], spec['version']], spec['licenseExpression'])
69
+ def checkpoints
70
+ @checkpoints ||= File.exist?(checkpoints_filepath) ? JSON.parse(IO.read(checkpoints_filepath)) : {}
71
+ end
72
+
73
+ def checkpoint!(page)
74
+ checkpoints[page.to_s] = Time.now.utc
75
+ IO.write(checkpoints_filepath, JSON.pretty_generate(checkpoints))
76
+ end
77
+
78
+ def insert(id, version, license)
79
+ open_data(id) do |io|
80
+ io << [id, version, license]
81
+ end
82
+ end
83
+
84
+ def insert_latest(gateway)
85
+ current_page = nil
86
+ gateway.each do |spec, page|
87
+ next unless spec['licenseExpression']
88
+ break if checkpoints[page.to_s]
89
+
90
+ yield current_page if current_page && page != current_page
91
+ current_page = page
92
+ insert(spec['id'], spec['version'], spec['licenseExpression'])
93
+ end
69
94
  end
70
95
  end
71
96
  end
@@ -21,10 +21,10 @@ module Spandx
21
21
  guess_licenses_from(document)
22
22
  end
23
23
 
24
- def each
25
- each_page do |page|
26
- items_from(page).each do |item|
27
- yield fetch_json(item['@id'])
24
+ def each(page: Float::INFINITY)
25
+ each_page(start_page: page) do |page_json|
26
+ items_from(page_json).each do |item|
27
+ yield(fetch_json(item['@id']), page_number_from(page_json['@id']))
28
28
  end
29
29
  end
30
30
  end
@@ -33,11 +33,11 @@ module Spandx
33
33
 
34
34
  attr_reader :http, :guess
35
35
 
36
- def each_page
36
+ def each_page(start_page:)
37
37
  url = "https://#{host}/v3/catalog0/index.json"
38
- items_from(fetch_json(url)).each do |page|
39
- yield fetch_json(page['@id'])
40
- end
38
+ items_from(fetch_json(url))
39
+ .find_all { |page| page_number_from(page['@id']) <= start_page }
40
+ .each { |page| yield fetch_json(page['@id']) }
41
41
  end
42
42
 
43
43
  def nuspec_url_for(name, version)
@@ -86,6 +86,10 @@ module Spandx
86
86
  .sort_by { |x| x['commitTimeStamp'] }
87
87
  .reverse
88
88
  end
89
+
90
+ def page_number_from(url)
91
+ url.match(/page(?<page_number>\d+)\.json/)[:page_number].to_i
92
+ end
89
93
  end
90
94
  end
91
95
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Spandx
4
- VERSION = '0.6.0'
4
+ VERSION = '0.7.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spandx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - mo khan
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-03-03 00:00:00.000000000 Z
11
+ date: 2020-03-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable