spandx 0.12.3 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,7 +3,7 @@
3
3
  module Spandx
4
4
  module Core
5
5
  class LicensePlugin < Spandx::Core::Plugin
6
- def initialize(catalogue: Spdx::Catalogue.from_git)
6
+ def initialize(catalogue: Spdx::Catalogue.default)
7
7
  @guess = Guess.new(catalogue)
8
8
  end
9
9
 
@@ -25,8 +25,8 @@ module Spandx
25
25
  end
26
26
 
27
27
  def cache_for(dependency, git: Spandx.git)
28
- db = git[dependency.package_manager.to_sym] || git[:cache]
29
- Spandx::Core::Cache.new(dependency.package_manager, db: db)
28
+ git = git[dependency.package_manager.to_sym] || git[:cache]
29
+ Spandx::Core::Cache.new(dependency.package_manager, root: "#{git.root}/.index")
30
30
  end
31
31
 
32
32
  def known?(package_manager)
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ module Core
5
+ class LineIo
6
+ def initialize(absolute_path)
7
+ file_descriptor = IO.sysopen(absolute_path)
8
+ @io = IO.new(file_descriptor)
9
+ @buffer = ''
10
+ end
11
+
12
+ def each(&block)
13
+ @buffer << @io.sysread(512) until @buffer.include?($INPUT_RECORD_SEPARATOR)
14
+
15
+ line, @buffer = @buffer.split($INPUT_RECORD_SEPARATOR, 2)
16
+ block.call(line)
17
+ each(&block)
18
+ rescue EOFError
19
+ @io.close
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ module Core
5
+ class PathTraversal
6
+ attr_reader :root
7
+
8
+ def initialize(root, recursive: true)
9
+ @root = root
10
+ @recursive = recursive
11
+ end
12
+
13
+ def each(&block)
14
+ each_file_in(root, &block)
15
+ end
16
+
17
+ def to_enum
18
+ Enumerator.new do |yielder|
19
+ each do |item|
20
+ yielder.yield item
21
+ end
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def recursive?
28
+ @recursive
29
+ end
30
+
31
+ def each_file_in(dir, &block)
32
+ files = File.directory?(dir) ? Dir.glob(File.join(dir, '*')) : [dir]
33
+ files.each do |file|
34
+ if File.directory?(file)
35
+ each_file_in(file, &block) if recursive?
36
+ else
37
+ Spandx.logger.debug(file)
38
+ block.call(file)
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ module Core
5
+ class Relation
6
+ attr_reader :io, :index
7
+
8
+ def initialize(io, index)
9
+ @io = io
10
+ @index = index
11
+ end
12
+
13
+ def each
14
+ size.times do |n|
15
+ yield row(n)
16
+ end
17
+ end
18
+
19
+ def size
20
+ index.size
21
+ end
22
+
23
+ def row(number)
24
+ offset = number.zero? ? 0 : index.position_for(number)
25
+ return unless offset
26
+
27
+ io.seek(offset)
28
+ parse_row(io.gets)
29
+ end
30
+
31
+ private
32
+
33
+ def parse_row(line)
34
+ CsvParser.parse(line)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -5,12 +5,16 @@ module Spandx
5
5
  class ThreadPool
6
6
  def initialize(size: Etc.nprocessors)
7
7
  @size = size
8
- @jobs = Queue.new
9
- @pool = size.times { start_worker_thread }
8
+ @queue = Queue.new
9
+ @pool = size.times.map { start_worker_thread }
10
10
  end
11
11
 
12
12
  def schedule(*args, &block)
13
- @jobs << [block, args]
13
+ @queue.enq([block, args])
14
+ end
15
+
16
+ def done?
17
+ @queue.empty?
14
18
  end
15
19
 
16
20
  def shutdown
@@ -21,13 +25,20 @@ module Spandx
21
25
  @pool.map(&:join)
22
26
  end
23
27
 
28
+ def self.open
29
+ pool = new
30
+ yield pool
31
+ ensure
32
+ pool.shutdown
33
+ end
34
+
24
35
  private
25
36
 
26
37
  def start_worker_thread
27
38
  Thread.new do
28
39
  catch(:exit) do
29
40
  loop do
30
- job, args = @jobs.deq
41
+ job, args = @queue.deq
31
42
  job.call(*args)
32
43
  end
33
44
  end
@@ -4,98 +4,40 @@ module Spandx
4
4
  module Dotnet
5
5
  class Index
6
6
  DEFAULT_DIR = File.expand_path(File.join(Dir.home, '.local', 'share', 'spandx'))
7
- attr_reader :directory, :name
7
+ attr_reader :cache, :directory, :name, :gateway
8
8
 
9
- def initialize(directory: DEFAULT_DIR)
9
+ def initialize(directory: DEFAULT_DIR, gateway: Spandx::Dotnet::NugetGateway.new)
10
10
  @directory = directory ? File.expand_path(directory) : DEFAULT_DIR
11
11
  @name = 'nuget'
12
+ @gateway = gateway
13
+ @cache = Spandx::Core::Cache.new(@name, root: directory)
12
14
  end
13
15
 
14
- def licenses_for(name:, version:)
15
- search_key = [name, version].join
16
- CSV.open(data_file_for(name), 'r') do |io|
17
- found = io.readlines.bsearch { |x| search_key <=> [x[0], x[1]].join }
18
- found ? found[2].split('-|-') : []
19
- end
20
- end
21
-
22
- def update!(catalogue:, output: StringIO.new)
23
- catalogue.version
24
- insert_latest(Spandx::Dotnet::NugetGateway.new) do |page|
25
- output.puts "Checkpoint #{page}"
26
- checkpoint!(page)
27
- end
28
- sort_index!
16
+ def update!(*)
17
+ queue = Queue.new
18
+ [fetch(queue), save(queue)].each(&:join)
19
+ cache.rebuild_index
29
20
  end
30
21
 
31
22
  private
32
23
 
33
- def files(pattern)
34
- Dir.glob(File.join(directory, pattern)).sort.each do |file|
35
- fullpath = File.join(directory, file)
36
- next if File.directory?(fullpath)
37
- next unless File.exist?(fullpath)
38
-
39
- yield fullpath
24
+ def fetch(queue)
25
+ Thread.new do
26
+ gateway.each do |item|
27
+ queue.enq(item)
28
+ end
29
+ queue.enq(:stop)
40
30
  end
41
31
  end
42
32
 
43
- def sort_index!
44
- files('**/nuget') do |path|
45
- next if File.extname(path) == '.checkpoints'
46
-
47
- IO.write(path, IO.readlines(path).sort.join)
48
- end
49
- end
50
-
51
- def digest_for(components)
52
- Digest::SHA1.hexdigest(Array(components).join('/'))
53
- end
54
-
55
- def data_dir_for(name)
56
- digest = digest_for(name)
57
- File.join(directory, digest[0...2].downcase)
58
- end
59
-
60
- def data_file_for(name)
61
- File.join(data_dir_for(name), 'nuget')
62
- end
63
-
64
- def checkpoints_filepath
65
- @checkpoints_filepath ||= File.join(directory, 'nuget.checkpoints')
66
- end
67
-
68
- def checkpoints
69
- @checkpoints ||= File.exist?(checkpoints_filepath) ? JSON.parse(IO.read(checkpoints_filepath)) : {}
70
- end
71
-
72
- def checkpoint!(page)
73
- checkpoints[page.to_s] = Time.now.utc
74
- IO.write(checkpoints_filepath, JSON.pretty_generate(checkpoints))
75
- end
76
-
77
- def insert(name, version, license)
78
- path = data_file_for(name)
79
- FileUtils.mkdir_p(File.dirname(path))
80
- IO.write(
81
- path,
82
- CSV.generate_line([name, version, license], force_quotes: true),
83
- mode: 'a'
84
- )
85
- end
86
-
87
- def completed_pages
88
- checkpoints.keys.map(&:to_i)
89
- end
90
-
91
- def insert_latest(gateway)
92
- current_page = completed_pages.max || 0
93
- gateway.each(start_page: current_page) do |spec, page|
94
- break if checkpoints[page.to_s]
33
+ def save(queue)
34
+ Thread.new do
35
+ loop do
36
+ item = queue.deq
37
+ break if item == :stop
95
38
 
96
- yield current_page if current_page && page != current_page
97
- current_page = page
98
- insert(spec['id'], spec['version'], spec['licenseExpression'])
39
+ cache.insert(item['id'], item['version'], [item['licenseExpression']])
40
+ end
99
41
  end
100
42
  end
101
43
  end
@@ -13,13 +13,16 @@ module Spandx
13
13
  @directory = directory
14
14
  @source = source
15
15
  @name = 'maven'
16
+ @cache = ::Spandx::Core::Cache.new(@name, root: directory)
16
17
  end
17
18
 
18
19
  def update!(catalogue:, output:)
19
20
  each do |metadata|
20
- name = "#{metadata.group_id}:#{metadata.artifact_id}:#{metadata.version}"
21
- output.puts [name, metadata.licenses_from(catalogue)].inspect
21
+ name = "#{metadata.group_id}:#{metadata.artifact_id}"
22
+ output.puts [name, metadata.version, metadata.licenses_from(catalogue)].inspect
23
+ @cache.insert(name, metadata.version, metadata.licenses_from(catalogue))
22
24
  end
25
+ @cache.rebuild_index
23
26
  end
24
27
 
25
28
  def each
@@ -12,28 +12,18 @@ module Spandx
12
12
  @name = 'pypi'
13
13
  @source = 'https://pypi.org'
14
14
  @pypi = Pypi.new
15
- Thread.abort_on_exception = true
15
+ @cache = ::Spandx::Core::Cache.new(@name, root: directory)
16
16
  end
17
17
 
18
18
  def update!(*)
19
19
  queue = Queue.new
20
20
  [fetch(queue), save(queue)].each(&:join)
21
+ cache.rebuild_index
21
22
  end
22
23
 
23
24
  private
24
25
 
25
- def files(pattern)
26
- Dir.glob(pattern, base: directory).sort.each do |file|
27
- fullpath = File.join(directory, file)
28
- yield fullpath unless File.directory?(fullpath)
29
- end
30
- end
31
-
32
- def sort_index!
33
- files('**/pypi') do |path|
34
- IO.write(path, IO.readlines(path).sort.join)
35
- end
36
- end
26
+ attr_reader :cache
37
27
 
38
28
  def fetch(queue)
39
29
  Thread.new do
@@ -50,29 +40,10 @@ module Spandx
50
40
  item = queue.deq
51
41
  break if item == :stop
52
42
 
53
- insert!(item[:name], item[:version], item[:license])
43
+ cache.insert(item[:name], item[:version], [item[:license]])
54
44
  end
55
45
  end
56
46
  end
57
-
58
- def digest_for(components)
59
- Digest::SHA1.hexdigest(Array(components).join('/'))
60
- end
61
-
62
- def data_dir_for(name)
63
- File.join(directory, digest_for(name)[0...2].downcase)
64
- end
65
-
66
- def data_file_for(name)
67
- File.join(data_dir_for(name), 'pypi')
68
- end
69
-
70
- def insert!(name, version, license)
71
- return if license.nil? || license.empty?
72
-
73
- csv = CSV.generate_line([name, version, license], force_quotes: true)
74
- IO.write(data_file_for(name), csv, mode: 'a')
75
- end
76
47
  end
77
48
  end
78
49
  end
@@ -40,6 +40,10 @@ module Spandx
40
40
  from_json(Spandx.git[:spdx].read('json/licenses.json'))
41
41
  end
42
42
 
43
+ def default
44
+ from_git
45
+ end
46
+
43
47
  def empty
44
48
  @empty ||= new(licenses: [])
45
49
  end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ module Spdx
5
+ class CompositeLicense < License
6
+ def self.from_expression(expression, catalogue)
7
+ tree = Spdx::Expression.new.parse(expression)
8
+ new(tree[0], catalogue)
9
+ rescue Parslet::ParseFailed
10
+ nil
11
+ end
12
+
13
+ def initialize(tree, catalogue)
14
+ @catalogue = catalogue
15
+ @tree = tree
16
+ super({})
17
+ end
18
+
19
+ def id
20
+ if right
21
+ [left.id, operator, right.id].compact.join(' ').squeeze(' ').strip
22
+ else
23
+ left.id.to_s
24
+ end
25
+ end
26
+
27
+ def name
28
+ if right
29
+ [left.name, operator, right.name].compact.join(' ').squeeze(' ').strip
30
+ else
31
+ left.name
32
+ end
33
+ end
34
+
35
+ private
36
+
37
+ def left
38
+ node_for(@tree[:left])
39
+ end
40
+
41
+ def operator
42
+ @tree[:op].to_s.upcase
43
+ end
44
+
45
+ def right
46
+ node_for(@tree[:right])
47
+ end
48
+
49
+ def node_for(item)
50
+ return if item.nil?
51
+
52
+ if item.is_a?(Hash)
53
+ self.class.new(item, @catalogue)
54
+ else
55
+ @catalogue[item.to_s] || License.unknown(item.to_s)
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end