spandx 0.12.3 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@
3
3
  module Spandx
4
4
  module Core
5
5
  class LicensePlugin < Spandx::Core::Plugin
6
- def initialize(catalogue: Spdx::Catalogue.from_git)
6
+ def initialize(catalogue: Spdx::Catalogue.default)
7
7
  @guess = Guess.new(catalogue)
8
8
  end
9
9
 
@@ -25,8 +25,8 @@ module Spandx
25
25
  end
26
26
 
27
27
  def cache_for(dependency, git: Spandx.git)
28
- db = git[dependency.package_manager.to_sym] || git[:cache]
29
- Spandx::Core::Cache.new(dependency.package_manager, db: db)
28
+ git = git[dependency.package_manager.to_sym] || git[:cache]
29
+ Spandx::Core::Cache.new(dependency.package_manager, root: "#{git.root}/.index")
30
30
  end
31
31
 
32
32
  def known?(package_manager)
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ module Core
5
+ class LineIo
6
+ def initialize(absolute_path)
7
+ file_descriptor = IO.sysopen(absolute_path)
8
+ @io = IO.new(file_descriptor)
9
+ @buffer = ''
10
+ end
11
+
12
+ def each(&block)
13
+ @buffer << @io.sysread(512) until @buffer.include?($INPUT_RECORD_SEPARATOR)
14
+
15
+ line, @buffer = @buffer.split($INPUT_RECORD_SEPARATOR, 2)
16
+ block.call(line)
17
+ each(&block)
18
+ rescue EOFError
19
+ @io.close
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ module Core
5
+ class PathTraversal
6
+ attr_reader :root
7
+
8
+ def initialize(root, recursive: true)
9
+ @root = root
10
+ @recursive = recursive
11
+ end
12
+
13
+ def each(&block)
14
+ each_file_in(root, &block)
15
+ end
16
+
17
+ def to_enum
18
+ Enumerator.new do |yielder|
19
+ each do |item|
20
+ yielder.yield item
21
+ end
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def recursive?
28
+ @recursive
29
+ end
30
+
31
+ def each_file_in(dir, &block)
32
+ files = File.directory?(dir) ? Dir.glob(File.join(dir, '*')) : [dir]
33
+ files.each do |file|
34
+ if File.directory?(file)
35
+ each_file_in(file, &block) if recursive?
36
+ else
37
+ Spandx.logger.debug(file)
38
+ block.call(file)
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ module Core
5
+ class Relation
6
+ attr_reader :io, :index
7
+
8
+ def initialize(io, index)
9
+ @io = io
10
+ @index = index
11
+ end
12
+
13
+ def each
14
+ size.times do |n|
15
+ yield row(n)
16
+ end
17
+ end
18
+
19
+ def size
20
+ index.size
21
+ end
22
+
23
+ def row(number)
24
+ offset = number.zero? ? 0 : index.position_for(number)
25
+ return unless offset
26
+
27
+ io.seek(offset)
28
+ parse_row(io.gets)
29
+ end
30
+
31
+ private
32
+
33
+ def parse_row(line)
34
+ CsvParser.parse(line)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -5,12 +5,16 @@ module Spandx
5
5
  class ThreadPool
6
6
  def initialize(size: Etc.nprocessors)
7
7
  @size = size
8
- @jobs = Queue.new
9
- @pool = size.times { start_worker_thread }
8
+ @queue = Queue.new
9
+ @pool = size.times.map { start_worker_thread }
10
10
  end
11
11
 
12
12
  def schedule(*args, &block)
13
- @jobs << [block, args]
13
+ @queue.enq([block, args])
14
+ end
15
+
16
+ def done?
17
+ @queue.empty?
14
18
  end
15
19
 
16
20
  def shutdown
@@ -21,13 +25,20 @@ module Spandx
21
25
  @pool.map(&:join)
22
26
  end
23
27
 
28
+ def self.open
29
+ pool = new
30
+ yield pool
31
+ ensure
32
+ pool.shutdown
33
+ end
34
+
24
35
  private
25
36
 
26
37
  def start_worker_thread
27
38
  Thread.new do
28
39
  catch(:exit) do
29
40
  loop do
30
- job, args = @jobs.deq
41
+ job, args = @queue.deq
31
42
  job.call(*args)
32
43
  end
33
44
  end
@@ -4,98 +4,40 @@ module Spandx
4
4
  module Dotnet
5
5
  class Index
6
6
  DEFAULT_DIR = File.expand_path(File.join(Dir.home, '.local', 'share', 'spandx'))
7
- attr_reader :directory, :name
7
+ attr_reader :cache, :directory, :name, :gateway
8
8
 
9
- def initialize(directory: DEFAULT_DIR)
9
+ def initialize(directory: DEFAULT_DIR, gateway: Spandx::Dotnet::NugetGateway.new)
10
10
  @directory = directory ? File.expand_path(directory) : DEFAULT_DIR
11
11
  @name = 'nuget'
12
+ @gateway = gateway
13
+ @cache = Spandx::Core::Cache.new(@name, root: directory)
12
14
  end
13
15
 
14
- def licenses_for(name:, version:)
15
- search_key = [name, version].join
16
- CSV.open(data_file_for(name), 'r') do |io|
17
- found = io.readlines.bsearch { |x| search_key <=> [x[0], x[1]].join }
18
- found ? found[2].split('-|-') : []
19
- end
20
- end
21
-
22
- def update!(catalogue:, output: StringIO.new)
23
- catalogue.version
24
- insert_latest(Spandx::Dotnet::NugetGateway.new) do |page|
25
- output.puts "Checkpoint #{page}"
26
- checkpoint!(page)
27
- end
28
- sort_index!
16
+ def update!(*)
17
+ queue = Queue.new
18
+ [fetch(queue), save(queue)].each(&:join)
19
+ cache.rebuild_index
29
20
  end
30
21
 
31
22
  private
32
23
 
33
- def files(pattern)
34
- Dir.glob(File.join(directory, pattern)).sort.each do |file|
35
- fullpath = File.join(directory, file)
36
- next if File.directory?(fullpath)
37
- next unless File.exist?(fullpath)
38
-
39
- yield fullpath
24
+ def fetch(queue)
25
+ Thread.new do
26
+ gateway.each do |item|
27
+ queue.enq(item)
28
+ end
29
+ queue.enq(:stop)
40
30
  end
41
31
  end
42
32
 
43
- def sort_index!
44
- files('**/nuget') do |path|
45
- next if File.extname(path) == '.checkpoints'
46
-
47
- IO.write(path, IO.readlines(path).sort.join)
48
- end
49
- end
50
-
51
- def digest_for(components)
52
- Digest::SHA1.hexdigest(Array(components).join('/'))
53
- end
54
-
55
- def data_dir_for(name)
56
- digest = digest_for(name)
57
- File.join(directory, digest[0...2].downcase)
58
- end
59
-
60
- def data_file_for(name)
61
- File.join(data_dir_for(name), 'nuget')
62
- end
63
-
64
- def checkpoints_filepath
65
- @checkpoints_filepath ||= File.join(directory, 'nuget.checkpoints')
66
- end
67
-
68
- def checkpoints
69
- @checkpoints ||= File.exist?(checkpoints_filepath) ? JSON.parse(IO.read(checkpoints_filepath)) : {}
70
- end
71
-
72
- def checkpoint!(page)
73
- checkpoints[page.to_s] = Time.now.utc
74
- IO.write(checkpoints_filepath, JSON.pretty_generate(checkpoints))
75
- end
76
-
77
- def insert(name, version, license)
78
- path = data_file_for(name)
79
- FileUtils.mkdir_p(File.dirname(path))
80
- IO.write(
81
- path,
82
- CSV.generate_line([name, version, license], force_quotes: true),
83
- mode: 'a'
84
- )
85
- end
86
-
87
- def completed_pages
88
- checkpoints.keys.map(&:to_i)
89
- end
90
-
91
- def insert_latest(gateway)
92
- current_page = completed_pages.max || 0
93
- gateway.each(start_page: current_page) do |spec, page|
94
- break if checkpoints[page.to_s]
33
+ def save(queue)
34
+ Thread.new do
35
+ loop do
36
+ item = queue.deq
37
+ break if item == :stop
95
38
 
96
- yield current_page if current_page && page != current_page
97
- current_page = page
98
- insert(spec['id'], spec['version'], spec['licenseExpression'])
39
+ cache.insert(item['id'], item['version'], [item['licenseExpression']])
40
+ end
99
41
  end
100
42
  end
101
43
  end
@@ -13,13 +13,16 @@ module Spandx
13
13
  @directory = directory
14
14
  @source = source
15
15
  @name = 'maven'
16
+ @cache = ::Spandx::Core::Cache.new(@name, root: directory)
16
17
  end
17
18
 
18
19
  def update!(catalogue:, output:)
19
20
  each do |metadata|
20
- name = "#{metadata.group_id}:#{metadata.artifact_id}:#{metadata.version}"
21
- output.puts [name, metadata.licenses_from(catalogue)].inspect
21
+ name = "#{metadata.group_id}:#{metadata.artifact_id}"
22
+ output.puts [name, metadata.version, metadata.licenses_from(catalogue)].inspect
23
+ @cache.insert(name, metadata.version, metadata.licenses_from(catalogue))
22
24
  end
25
+ @cache.rebuild_index
23
26
  end
24
27
 
25
28
  def each
@@ -12,28 +12,18 @@ module Spandx
12
12
  @name = 'pypi'
13
13
  @source = 'https://pypi.org'
14
14
  @pypi = Pypi.new
15
- Thread.abort_on_exception = true
15
+ @cache = ::Spandx::Core::Cache.new(@name, root: directory)
16
16
  end
17
17
 
18
18
  def update!(*)
19
19
  queue = Queue.new
20
20
  [fetch(queue), save(queue)].each(&:join)
21
+ cache.rebuild_index
21
22
  end
22
23
 
23
24
  private
24
25
 
25
- def files(pattern)
26
- Dir.glob(pattern, base: directory).sort.each do |file|
27
- fullpath = File.join(directory, file)
28
- yield fullpath unless File.directory?(fullpath)
29
- end
30
- end
31
-
32
- def sort_index!
33
- files('**/pypi') do |path|
34
- IO.write(path, IO.readlines(path).sort.join)
35
- end
36
- end
26
+ attr_reader :cache
37
27
 
38
28
  def fetch(queue)
39
29
  Thread.new do
@@ -50,29 +40,10 @@ module Spandx
50
40
  item = queue.deq
51
41
  break if item == :stop
52
42
 
53
- insert!(item[:name], item[:version], item[:license])
43
+ cache.insert(item[:name], item[:version], [item[:license]])
54
44
  end
55
45
  end
56
46
  end
57
-
58
- def digest_for(components)
59
- Digest::SHA1.hexdigest(Array(components).join('/'))
60
- end
61
-
62
- def data_dir_for(name)
63
- File.join(directory, digest_for(name)[0...2].downcase)
64
- end
65
-
66
- def data_file_for(name)
67
- File.join(data_dir_for(name), 'pypi')
68
- end
69
-
70
- def insert!(name, version, license)
71
- return if license.nil? || license.empty?
72
-
73
- csv = CSV.generate_line([name, version, license], force_quotes: true)
74
- IO.write(data_file_for(name), csv, mode: 'a')
75
- end
76
47
  end
77
48
  end
78
49
  end
@@ -40,6 +40,10 @@ module Spandx
40
40
  from_json(Spandx.git[:spdx].read('json/licenses.json'))
41
41
  end
42
42
 
43
+ def default
44
+ from_git
45
+ end
46
+
43
47
  def empty
44
48
  @empty ||= new(licenses: [])
45
49
  end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ module Spdx
5
+ class CompositeLicense < License
6
+ def self.from_expression(expression, catalogue)
7
+ tree = Spdx::Expression.new.parse(expression)
8
+ new(tree[0], catalogue)
9
+ rescue Parslet::ParseFailed
10
+ nil
11
+ end
12
+
13
+ def initialize(tree, catalogue)
14
+ @catalogue = catalogue
15
+ @tree = tree
16
+ super({})
17
+ end
18
+
19
+ def id
20
+ if right
21
+ [left.id, operator, right.id].compact.join(' ').squeeze(' ').strip
22
+ else
23
+ left.id.to_s
24
+ end
25
+ end
26
+
27
+ def name
28
+ if right
29
+ [left.name, operator, right.name].compact.join(' ').squeeze(' ').strip
30
+ else
31
+ left.name
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def left
38
+ node_for(@tree[:left])
39
+ end
40
+
41
+ def operator
42
+ @tree[:op].to_s.upcase
43
+ end
44
+
45
+ def right
46
+ node_for(@tree[:right])
47
+ end
48
+
49
+ def node_for(item)
50
+ return if item.nil?
51
+
52
+ if item.is_a?(Hash)
53
+ self.class.new(item, @catalogue)
54
+ else
55
+ @catalogue[item.to_s] || License.unknown(item.to_s)
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end