spandx 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6b3a0275ae468012967376aa3bd813b3dc1efef7783eb11d0151d7babf4ac6a8
4
- data.tar.gz: b86bd82707ad5afa0b83c59674e4f0d08c12733d5f1788038845f30079bb8ba6
3
+ metadata.gz: ddd706dad19138c25501144fed49ae2148e7b5a703cdf06b9c4cd4bed4a940aa
4
+ data.tar.gz: c4596fdfa833988f80f7e3b17bce65e6fda8b8a0c059e441ff5b32583687b95d
5
5
  SHA512:
6
- metadata.gz: f71dd4bf8438ec8857f622076ea345f3c48c8dce3354949459b564ace2c0c002bcc95d09d9d906ba9063bdeb092ade90160f51bdcfb9900a0225114d1aa5aa80
7
- data.tar.gz: e0bbbdd0156924ac21d9730aa322b332f4d3c1ef43b530f58c56f411c57bfb95c154574bda77c61dda3fbd80b9c8fc9e8a2fea4c09053fa6bfda2c5a6ac86dc5
6
+ metadata.gz: 8749463ff0bacbe4e125b9822c317d910dded8688e04122989777c42ac37b908f4c06a0419c824cde4698c7bab823934b751520fba786ca497b26ef9d266f9b7
7
+ data.tar.gz: 5229f971f6b36428ef9a09d3b91bab6f5b049f1312e12daee926bc27baf1f463e39204e30d69daa51566df3277712d901f8790871f3d272e0d353eea66dbaaa3
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- Version 0.6.0
1
+ Version 0.7.0
2
2
 
3
3
  # Changelog
4
4
 
@@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
9
9
 
10
10
  ## [Unreleased]
11
11
 
12
+ ## [0.7.0] - 2020-03-11
13
+ ### Changed
14
+ - Improve how the `nuget` index is built.
15
+
12
16
  ## [0.6.0] - 2020-03-03
13
17
  ### Added
14
18
  - Add `spandx index update` command to fetch the latest `spandx-rubygems` index.
data/README.md CHANGED
@@ -25,7 +25,7 @@ Or install it yourself as:
25
25
  To fetch the latest version of the catalogue data from [SPDX](https://spdx.org/licenses/licenses.json).
26
26
 
27
27
  ```ruby
28
- catalogue = Spandx::Catalogue.latest
28
+ catalogue = Spandx::Spdx::Catalogue.latest
29
29
  catalogue.each do |license|
30
30
  puts license.inspect
31
31
  end
@@ -35,7 +35,7 @@ To load an offline copy of the data.
35
35
 
36
36
  ```ruby
37
37
  path = File.join(Dir.pwd, 'licenses.json')
38
- catalogue = Spandx::Catalogue.from_file(path)
38
+ catalogue = Spandx::Spdx::Catalogue.from_file(path)
39
39
  catalogue.each do |license|
40
40
  puts license.inspect
41
41
  end
@@ -12,7 +12,7 @@ module Spandx
12
12
  def execute(output: $stdout)
13
13
  catalogue = Spandx::Spdx::Catalogue.from_git
14
14
  indexes.each do |index|
15
- index.update!(catalogue: catalogue)
15
+ index.update!(catalogue: catalogue, output: output)
16
16
  end
17
17
  output.puts 'OK'
18
18
  end
@@ -10,62 +10,87 @@ module Spandx
10
10
  @directory = directory ? File.expand_path(directory) : DEFAULT_DIR
11
11
  end
12
12
 
13
- def update!(catalogue:, limit: nil)
14
- counter = 0
15
- gateway = Spandx::Dotnet::NugetGateway.new(catalogue: catalogue)
16
- gateway.each do |spec|
17
- next unless spec['licenseExpression']
18
-
19
- write([gateway.host, spec['id'], spec['version']], spec['licenseExpression'])
20
-
21
- if limit
22
- counter += 1
23
- break if counter > limit
24
- end
13
+ def licenses_for(name:, version:)
14
+ search_key = [name, version].join
15
+ open_data(name, mode: 'r') do |io|
16
+ found = io.readlines.bsearch { |x| search_key <=> [x[0], x[1]].join }
17
+ found ? found[2].split('-|-') : []
25
18
  end
26
19
  end
27
20
 
28
- def indexed?(key)
29
- File.exist?(data_file_for(digest_for(key)))
30
- end
31
-
32
- def read(key)
33
- open_data(digest_for(key), mode: 'r', &:read)
21
+ def update!(catalogue:, output: StringIO.new)
22
+ insert_latest(Spandx::Dotnet::NugetGateway.new(catalogue: catalogue)) do |page|
23
+ output.puts "Checkpoint #{page}"
24
+ checkpoint!(page)
25
+ end
26
+ sort_index!
34
27
  end
35
28
 
36
- def write(key, data)
37
- return if data.nil? || data.empty?
29
+ private
38
30
 
39
- open_data(digest_for(key)) do |x|
40
- x.write(data)
31
+ def files(pattern)
32
+ Dir.glob(pattern, base: directory).sort.each do |file|
33
+ fullpath = File.join(directory, file)
34
+ yield fullpath unless File.directory?(fullpath)
41
35
  end
42
36
  end
43
37
 
44
- private
38
+ def sort_index!
39
+ files('**/*') do |path|
40
+ IO.write(path, IO.readlines(path).sort.join)
41
+ end
42
+ end
45
43
 
46
44
  def digest_for(components)
47
45
  Digest::SHA1.hexdigest(Array(components).join('/'))
48
46
  end
49
47
 
50
- def open_data(key, mode: 'w')
51
- FileUtils.mkdir_p(data_dir_for(key))
52
- File.open(data_file_for(key), mode) do |file|
53
- yield file
48
+ def open_data(name, mode: 'a')
49
+ data_dir = data_dir_for(name)
50
+ FileUtils.mkdir_p(data_dir)
51
+ CSV.open(data_file_for(name), mode, force_quotes: true) do |csv|
52
+ yield csv
54
53
  end
55
54
  end
56
55
 
57
- def data_dir_for(index_key)
58
- File.join(directory, *index_key.scan(/../)).downcase
56
+ def data_dir_for(name)
57
+ digest = digest_for(name)
58
+ File.join(directory, digest[0...2].downcase)
59
59
  end
60
60
 
61
- def data_file_for(key)
62
- File.join(data_dir_for(key), 'data')
61
+ def data_file_for(name)
62
+ File.join(data_dir_for(name), 'nuget')
63
63
  end
64
64
 
65
- def upsert!(spec)
66
- return unless spec['licenseExpression']
65
+ def checkpoints_filepath
66
+ @checkpoints_filepath ||= File.join(directory, 'nuget.checkpoints')
67
+ end
67
68
 
68
- write([host, spec['id'], spec['version']], spec['licenseExpression'])
69
+ def checkpoints
70
+ @checkpoints ||= File.exist?(checkpoints_filepath) ? JSON.parse(IO.read(checkpoints_filepath)) : {}
71
+ end
72
+
73
+ def checkpoint!(page)
74
+ checkpoints[page.to_s] = Time.now.utc
75
+ IO.write(checkpoints_filepath, JSON.pretty_generate(checkpoints))
76
+ end
77
+
78
+ def insert(id, version, license)
79
+ open_data(id) do |io|
80
+ io << [id, version, license]
81
+ end
82
+ end
83
+
84
+ def insert_latest(gateway)
85
+ current_page = nil
86
+ gateway.each do |spec, page|
87
+ next unless spec['licenseExpression']
88
+ break if checkpoints[page.to_s]
89
+
90
+ yield current_page if current_page && page != current_page
91
+ current_page = page
92
+ insert(spec['id'], spec['version'], spec['licenseExpression'])
93
+ end
69
94
  end
70
95
  end
71
96
  end
@@ -21,10 +21,10 @@ module Spandx
21
21
  guess_licenses_from(document)
22
22
  end
23
23
 
24
- def each
25
- each_page do |page|
26
- items_from(page).each do |item|
27
- yield fetch_json(item['@id'])
24
+ def each(page: Float::INFINITY)
25
+ each_page(start_page: page) do |page_json|
26
+ items_from(page_json).each do |item|
27
+ yield(fetch_json(item['@id']), page_number_from(page_json['@id']))
28
28
  end
29
29
  end
30
30
  end
@@ -33,11 +33,11 @@ module Spandx
33
33
 
34
34
  attr_reader :http, :guess
35
35
 
36
- def each_page
36
+ def each_page(start_page:)
37
37
  url = "https://#{host}/v3/catalog0/index.json"
38
- items_from(fetch_json(url)).each do |page|
39
- yield fetch_json(page['@id'])
40
- end
38
+ items_from(fetch_json(url))
39
+ .find_all { |page| page_number_from(page['@id']) <= start_page }
40
+ .each { |page| yield fetch_json(page['@id']) }
41
41
  end
42
42
 
43
43
  def nuspec_url_for(name, version)
@@ -86,6 +86,10 @@ module Spandx
86
86
  .sort_by { |x| x['commitTimeStamp'] }
87
87
  .reverse
88
88
  end
89
+
90
+ def page_number_from(url)
91
+ url.match(/page(?<page_number>\d+)\.json/)[:page_number].to_i
92
+ end
89
93
  end
90
94
  end
91
95
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Spandx
4
- VERSION = '0.6.0'
4
+ VERSION = '0.7.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spandx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - mo khan
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-03-03 00:00:00.000000000 Z
11
+ date: 2020-03-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable