spandx 0.12.3 → 0.13.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +74 -25
  3. data/README.md +11 -7
  4. data/exe/spandx +1 -2
  5. data/ext/spandx/extconf.rb +5 -0
  6. data/ext/spandx/spandx.c +55 -0
  7. data/ext/spandx/spandx.h +6 -0
  8. data/lib/spandx.rb +6 -3
  9. data/lib/spandx/cli.rb +2 -0
  10. data/lib/spandx/cli/commands/build.rb +13 -2
  11. data/lib/spandx/cli/commands/scan.rb +11 -20
  12. data/lib/spandx/cli/main.rb +3 -2
  13. data/lib/spandx/core/cache.rb +38 -51
  14. data/lib/spandx/core/content.rb +5 -23
  15. data/lib/spandx/core/data_file.rb +66 -0
  16. data/lib/spandx/core/dependency.rb +47 -13
  17. data/lib/spandx/core/git.rb +8 -32
  18. data/lib/spandx/core/guess.rb +48 -40
  19. data/lib/spandx/core/http.rb +7 -2
  20. data/lib/spandx/core/index_file.rb +103 -0
  21. data/lib/spandx/core/license_plugin.rb +15 -4
  22. data/lib/spandx/core/parser.rb +10 -3
  23. data/lib/spandx/core/path_traversal.rb +35 -0
  24. data/lib/spandx/core/relation.rb +38 -0
  25. data/lib/spandx/core/report.rb +6 -12
  26. data/lib/spandx/core/spinner.rb +51 -0
  27. data/lib/spandx/dotnet/index.rb +21 -79
  28. data/lib/spandx/dotnet/parsers/csproj.rb +7 -7
  29. data/lib/spandx/dotnet/parsers/packages_config.rb +7 -7
  30. data/lib/spandx/dotnet/parsers/sln.rb +10 -13
  31. data/lib/spandx/dotnet/project_file.rb +3 -3
  32. data/lib/spandx/java/index.rb +5 -2
  33. data/lib/spandx/java/parsers/maven.rb +7 -7
  34. data/lib/spandx/js/parsers/npm.rb +6 -6
  35. data/lib/spandx/js/parsers/yarn.rb +7 -7
  36. data/lib/spandx/php/parsers/composer.rb +7 -7
  37. data/lib/spandx/python/index.rb +4 -33
  38. data/lib/spandx/python/parsers/pipfile_lock.rb +4 -4
  39. data/lib/spandx/python/pypi.rb +0 -2
  40. data/lib/spandx/python/source.rb +12 -0
  41. data/lib/spandx/ruby/parsers/gemfile_lock.rb +10 -9
  42. data/lib/spandx/spdx/catalogue.rb +5 -1
  43. data/lib/spandx/spdx/composite_license.rb +60 -0
  44. data/lib/spandx/spdx/expression.rb +114 -0
  45. data/lib/spandx/spdx/license.rb +4 -14
  46. data/lib/spandx/version.rb +1 -1
  47. data/spandx.gemspec +16 -10
  48. metadata +100 -30
  49. data/lib/spandx/core/null_gateway.rb +0 -11
  50. data/lib/spandx/core/table.rb +0 -29
  51. data/lib/spandx/core/thread_pool.rb +0 -38
@@ -13,30 +13,12 @@ module Spandx
13
13
  @tokens ||= tokenize(canonicalize(raw)).to_set
14
14
  end
15
15
 
16
- def similar?(other, algorithm: :dice_coefficient)
17
- case algorithm
18
- when :dice_coefficient
19
- similarity_score(other, algorithm: algorithm) > 89.0
20
- when :levenshtein
21
- similarity_score(other, algorithm: algorithm) < 3
22
- when :jaro_winkler
23
- similarity_score(other, algorithm: algorithm) > 89.0
24
- end
16
+ def similar?(other, threshold: 89.0)
17
+ similarity_score(other) > threshold
25
18
  end
26
19
 
27
- def similarity_score(other, algorithm: :dice_coefficient)
28
- case algorithm
29
- when :dice_coefficient
30
- dice_coefficient(other)
31
- when :levenshtein
32
- require 'text'
33
-
34
- Text::Levenshtein.distance(raw, other.raw, 100)
35
- when :jaro_winkler
36
- require 'jaro_winkler'
37
-
38
- JaroWinkler.distance(raw, other.raw) * 100.0
39
- end
20
+ def similarity_score(other)
21
+ dice_coefficient(other)
40
22
  end
41
23
 
42
24
  private
@@ -46,7 +28,7 @@ module Spandx
46
28
  end
47
29
 
48
30
  def tokenize(content)
49
- content.to_s.scan(/[a-zA-Z]+/)
31
+ content.to_s.scan(/[a-zA-Z\d.]+/)
50
32
  end
51
33
 
52
34
  def blank?(content)
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ module Core
5
+ class DataFile
6
+ include Enumerable
7
+
8
+ attr_reader :absolute_path
9
+
10
+ def initialize(absolute_path)
11
+ @absolute_path = Pathname.new(absolute_path)
12
+ FileUtils.mkdir_p(@absolute_path.dirname)
13
+ end
14
+
15
+ def each
16
+ return unless exist?
17
+
18
+ open_file(mode: 'rb') do |io|
19
+ while (line = io.gets)
20
+ yield CsvParser.parse(line)
21
+ end
22
+ end
23
+ end
24
+
25
+ def search(name:, version:)
26
+ return if name.nil? || name.empty?
27
+ return if version.nil? || name.empty?
28
+ return unless absolute_path.exist?
29
+
30
+ term = "#{name}-#{version}"
31
+ index.search do |row|
32
+ term <=> "#{row[0]}-#{row[1]}"
33
+ end
34
+ end
35
+
36
+ def insert(name, version, licenses)
37
+ return if [name, version].any? { |x| x.nil? || x.empty? }
38
+
39
+ open_file(mode: 'a') do |io|
40
+ io.write(to_csv([name, version, licenses.join('-|-')]))
41
+ end
42
+ end
43
+
44
+ def exist?
45
+ absolute_path.exist?
46
+ end
47
+
48
+ def open_file(mode: 'rb')
49
+ absolute_path.open(mode) { |io| yield io }
50
+ rescue Errno::ENOENT => error
51
+ Spandx.logger.error(error)
52
+ nil
53
+ end
54
+
55
+ def index
56
+ @index ||= IndexFile.new(self)
57
+ end
58
+
59
+ private
60
+
61
+ def to_csv(array)
62
+ array.to_csv(force_quotes: true)
63
+ end
64
+ end
65
+ end
66
+ end
@@ -3,46 +3,80 @@
3
3
  module Spandx
4
4
  module Core
5
5
  class Dependency
6
- attr_reader :package_manager, :name, :version, :licenses, :meta
6
+ PACKAGE_MANAGERS = {
7
+ Spandx::Dotnet::Parsers::Csproj => :nuget,
8
+ Spandx::Dotnet::Parsers::PackagesConfig => :nuget,
9
+ Spandx::Dotnet::Parsers::Sln => :nuget,
10
+ Spandx::Java::Parsers::Maven => :maven,
11
+ Spandx::Js::Parsers::Npm => :npm,
12
+ Spandx::Js::Parsers::Yarn => :yarn,
13
+ Spandx::Php::Parsers::Composer => :composer,
14
+ Spandx::Python::Parsers::PipfileLock => :pypi,
15
+ Spandx::Ruby::Parsers::GemfileLock => :rubygems,
16
+ }.freeze
17
+ attr_reader :path, :name, :version, :licenses, :meta
7
18
 
8
- def initialize(package_manager:, name:, version:, licenses: [], meta: {})
9
- @package_manager = package_manager
10
- @name = name
11
- @version = version
12
- @licenses = licenses
19
+ def initialize(name:, version:, path:, meta: {})
20
+ @path = Pathname.new(path).realpath
21
+ @name = name || @path.basename.to_s
22
+ @version = version || @path.mtime.to_i.to_s
23
+ @licenses = []
13
24
  @meta = meta
14
25
  end
15
26
 
16
- def managed_by?(value)
17
- package_manager == value&.to_sym
27
+ def package_manager
28
+ PACKAGE_MANAGERS[Parser.for(path).class]
18
29
  end
19
30
 
20
31
  def <=>(other)
21
- to_s <=> other.to_s
32
+ return 1 if other.nil?
33
+
34
+ score = (name <=> other.name)
35
+ score = score.zero? ? (version <=> other&.version) : score
36
+ score.zero? ? (path.to_s <=> other&.path.to_s) : score
22
37
  end
23
38
 
24
39
  def hash
25
40
  to_s.hash
26
41
  end
27
42
 
43
+ def ==(other)
44
+ eql?(other)
45
+ end
46
+
28
47
  def eql?(other)
29
48
  to_s == other.to_s
30
49
  end
31
50
 
32
51
  def to_s
33
- @to_s ||= [name, version].compact.join(' ')
52
+ @to_s ||= [name, version, path].compact.join(' ')
34
53
  end
35
54
 
36
55
  def inspect
37
- "#<Spandx::Core::Dependency name=#{name}, version=#{version}>"
56
+ "#<#{self.class} name=#{name} version=#{version} path=#{relative_path}>"
38
57
  end
39
58
 
40
59
  def to_a
41
- [name, version, licenses.map(&:id)]
60
+ [name, version, license_expression, relative_path.to_s]
42
61
  end
43
62
 
44
63
  def to_h
45
- { name: name, version: version, licenses: licenses.map(&:id) }
64
+ {
65
+ name: name,
66
+ version: version,
67
+ licenses: license_expression,
68
+ path: relative_path.to_s
69
+ }
70
+ end
71
+
72
+ private
73
+
74
+ def relative_path(from: Pathname.pwd)
75
+ path.relative_path_from(from)
76
+ end
77
+
78
+ def license_expression
79
+ licenses.map(&:id).join(' AND ')
46
80
  end
47
81
  end
48
82
  end
@@ -3,37 +3,21 @@
3
3
  module Spandx
4
4
  module Core
5
5
  class Git
6
- attr_reader :path, :url
6
+ attr_reader :root, :url
7
7
 
8
8
  def initialize(url:)
9
9
  @url = url
10
- @path = path_for(url)
11
- end
12
-
13
- def update!
14
- dotgit? ? pull! : clone!
15
- end
16
-
17
- def expand_path(relative_path)
18
- File.join(path, relative_path)
10
+ @root = path_for(url)
19
11
  end
20
12
 
21
13
  def read(path)
22
- update! unless dotgit?
14
+ full_path = File.join(root, path)
23
15
 
24
- full_path = expand_path(path)
25
16
  IO.read(full_path) if File.exist?(full_path)
26
17
  end
27
18
 
28
- def open(path, mode: 'r')
29
- update! unless dotgit?
30
-
31
- full_path = expand_path(path)
32
- return unless File.exist?(full_path)
33
-
34
- File.open(full_path, mode) do |io|
35
- yield io
36
- end
19
+ def update!
20
+ dotgit? ? pull! : clone!
37
21
  end
38
22
 
39
23
  private
@@ -45,26 +29,18 @@ module Spandx
45
29
  end
46
30
 
47
31
  def dotgit?
48
- File.directory?(File.join(path, '.git'))
32
+ File.directory?(File.join(root, '.git'))
49
33
  end
50
34
 
51
35
  def clone!
52
- system('git', 'clone', '--quiet', url, path)
36
+ system('git', 'clone', '--quiet', '--depth=1', '--single-branch', '--branch', 'master', url, root)
53
37
  end
54
38
 
55
39
  def pull!
56
- within do
40
+ Dir.chdir(root) do
57
41
  system('git', 'pull', '--no-rebase', '--quiet', 'origin', 'master')
58
42
  end
59
43
  end
60
-
61
- def within
62
- Dir.chdir(path) do
63
- yield
64
- end
65
- end
66
44
  end
67
-
68
- Database = Git
69
45
  end
70
46
  end
@@ -9,80 +9,88 @@ module Spandx
9
9
  @catalogue = catalogue
10
10
  end
11
11
 
12
- def license_for(raw, algorithm: :dice_coefficient)
13
- raw.is_a?(Hash) ? from_hash(raw, algorithm) : from_string(raw, algorithm)
12
+ def license_for(raw)
13
+ case raw
14
+ when Hash
15
+ from_hash(raw)
16
+ when Array
17
+ from_array(raw)
18
+ else
19
+ from_string(raw)
20
+ end
14
21
  end
15
22
 
16
23
  private
17
24
 
18
- def from_hash(hash, algorithm)
19
- from_string(hash[:name], algorithm) ||
20
- from_url(hash[:url], algorithm) ||
25
+ def from_hash(hash)
26
+ from_string(hash[:name]) ||
27
+ from_url(hash[:url]) ||
21
28
  unknown(hash[:name] || hash[:url])
22
29
  end
23
30
 
24
- def from_string(raw, algorithm)
31
+ def from_array(array)
32
+ from_string(array.join(' AND '))
33
+ end
34
+
35
+ def from_string(raw)
36
+ return if raw.nil?
37
+
25
38
  content = Content.new(raw)
26
39
 
27
40
  catalogue[raw] ||
28
- match_name(content, algorithm) ||
29
- match_body(content, algorithm) ||
41
+ catalogue[raw.split(' ').join('-')] ||
42
+ match_name(content) ||
43
+ match_body(content) ||
30
44
  unknown(raw)
31
45
  end
32
46
 
33
- def from_url(url, algorithm)
47
+ def from_url(url)
34
48
  return if url.nil? || url.empty?
35
49
 
36
50
  response = Spandx.http.get(url)
37
51
  return unless Spandx.http.ok?(response)
38
52
 
39
- license_for(response.body, algorithm: algorithm)
53
+ license_for(response.body)
40
54
  end
41
55
 
42
- def match_name(content, _algorithm)
56
+ def match_name(content)
57
+ return if content.tokens.size < 2 || content.tokens.size > 10
58
+
59
+ result = from_expression(content)
60
+ return result if result
61
+
62
+ threshold = 85.0
43
63
  catalogue.find do |license|
44
- score = content.similarity_score(::Spandx::Core::Content.new(license.name))
45
- score > 85
64
+ content.similar?(Content.new(license.name), threshold: threshold)
46
65
  end
47
66
  end
48
67
 
49
- def match_body(content, algorithm)
68
+ def match_body(content)
50
69
  score = Score.new(nil, nil)
51
- threshold = threshold_for(algorithm)
52
- direction = algorithm == :levenshtein ? method(:min) : method(:max)
53
-
70
+ threshold = 89.0
54
71
  catalogue.each do |license|
55
- direction.call(content, license, score, threshold, algorithm) unless license.deprecated_license_id?
72
+ next if license.deprecated_license_id?
73
+
74
+ percentage = content.similarity_score(content_for(license))
75
+ next if percentage < threshold
76
+ next if score.score >= percentage
77
+
78
+ score.update(percentage, license)
56
79
  end
57
80
  score&.item
58
81
  end
59
82
 
60
- def unknown(text)
61
- ::Spandx::Spdx::License.unknown(text)
62
- end
63
-
64
- def threshold_for(algorithm)
65
- {
66
- dice_coefficient: 89.0,
67
- jaro_winkler: 80.0,
68
- levenshtein: 80.0,
69
- }[algorithm.to_sym]
83
+ def content_for(license)
84
+ ::Spandx::Core::Content.new(Spandx.git[:spdx].read("text/#{license.id}.txt") || '')
70
85
  end
71
86
 
72
- def min(target, other, score, threshold, algorithm)
73
- percentage = target.similarity_score(other.content, algorithm: algorithm)
74
- return if percentage > threshold
75
- return if score.score > 0.0 && score.score < percentage
76
-
77
- score.update(percentage, other)
87
+ def unknown(text)
88
+ ::Spandx::Spdx::License.unknown(text)
78
89
  end
79
90
 
80
- def max(target, other, score, threshold, algorithm)
81
- percentage = target.similarity_score(other.content, algorithm: algorithm)
82
- return if percentage < threshold
83
- return if score.score >= percentage
84
-
85
- score.update(percentage, other)
91
+ def from_expression(content)
92
+ Spandx::Spdx::CompositeLicense
93
+ .from_expression(content.raw, catalogue)
86
94
  end
87
95
  end
88
96
  end
@@ -8,7 +8,12 @@ module Spandx
8
8
  def initialize(driver: Http.default_driver, retries: 3)
9
9
  @driver = driver
10
10
  @retries = retries
11
- @circuits = Hash.new { |hash, key| hash[key] = Circuit.new(key) }
11
+ semaphore = Mutex.new
12
+ @circuits = Hash.new do |hash, key|
13
+ semaphore.synchronize do
14
+ hash[key] = Circuit.new(key)
15
+ end
16
+ end
12
17
  end
13
18
 
14
19
  def get(uri, default: nil, escape: true)
@@ -22,7 +27,7 @@ module Spandx
22
27
  client.get(escape ? Addressable::URI.escape(uri) : uri)
23
28
  end
24
29
  end
25
- rescue *Net::Hippie::CONNECTION_ERRORS
30
+ rescue *Net::Hippie::CONNECTION_ERRORS, URI::InvalidURIError
26
31
  default
27
32
  end
28
33
 
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ module Core
5
+ class IndexFile
6
+ UINT_32_DIRECTIVE = 'V'
7
+ UINT_32_SIZE = 4
8
+
9
+ attr_reader :data_file, :path
10
+
11
+ def initialize(data_file)
12
+ @data_file = data_file
13
+ @path = Pathname.new("#{data_file.absolute_path}.idx")
14
+ @entries = size.positive? ? Array.new(size) : []
15
+ end
16
+
17
+ def each
18
+ total = path.size / UINT_32_SIZE
19
+ total.times do |n|
20
+ yield position_for(n)
21
+ end
22
+ end
23
+
24
+ def search(min: 0, max: size)
25
+ scan do |reader|
26
+ until min >= max
27
+ mid = mid_for(min, max)
28
+ row = reader.row(mid)
29
+ return unless row
30
+
31
+ comparison = yield row
32
+ return row if comparison.zero?
33
+
34
+ comparison.positive? ? (min = mid + 1) : (max = mid)
35
+ end
36
+ end
37
+ end
38
+
39
+ def size
40
+ path.exist? ? path.size / UINT_32_SIZE : 0
41
+ end
42
+
43
+ def position_for(row_number)
44
+ return if row_number > size
45
+
46
+ entry = entries[row_number]
47
+ return entry if entry
48
+
49
+ bytes = IO.binread(path, UINT_32_SIZE, offset_for(row_number))
50
+ entry = bytes.unpack1(UINT_32_DIRECTIVE)
51
+ entries[row_number] = entry
52
+ entry
53
+ end
54
+
55
+ def update!
56
+ return unless data_file.exist?
57
+
58
+ sort(data_file)
59
+ rebuild_index!
60
+ end
61
+
62
+ private
63
+
64
+ attr_reader :entries
65
+
66
+ def scan
67
+ data_file.open_file(mode: 'rb') do |io|
68
+ yield Relation.new(io, self)
69
+ end
70
+ end
71
+
72
+ def offset_for(row_number)
73
+ row_number * UINT_32_SIZE
74
+ end
75
+
76
+ def sort(data_file)
77
+ data_file.absolute_path.write(data_file.absolute_path.readlines.sort.uniq.join)
78
+ end
79
+
80
+ def rebuild_index!
81
+ data_file.open_file do |data_io|
82
+ File.open(path, mode: 'wb') do |index_io|
83
+ lines_in(data_io).each do |pos|
84
+ index_io.write([pos].pack(UINT_32_DIRECTIVE))
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ def lines_in(io)
91
+ lines = [0]
92
+ io.seek(0)
93
+ lines << io.pos while io.gets
94
+ lines.pop if lines.size > 1
95
+ lines
96
+ end
97
+
98
+ def mid_for(min, max)
99
+ (max - min) == 1 ? min : (((max - min) / 2) + min)
100
+ end
101
+ end
102
+ end
103
+ end