spandx 0.12.3 → 0.13.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +74 -25
- data/README.md +11 -7
- data/exe/spandx +1 -2
- data/ext/spandx/extconf.rb +5 -0
- data/ext/spandx/spandx.c +55 -0
- data/ext/spandx/spandx.h +6 -0
- data/lib/spandx.rb +6 -3
- data/lib/spandx/cli.rb +2 -0
- data/lib/spandx/cli/commands/build.rb +13 -2
- data/lib/spandx/cli/commands/scan.rb +11 -20
- data/lib/spandx/cli/main.rb +3 -2
- data/lib/spandx/core/cache.rb +38 -51
- data/lib/spandx/core/content.rb +5 -23
- data/lib/spandx/core/data_file.rb +66 -0
- data/lib/spandx/core/dependency.rb +47 -13
- data/lib/spandx/core/git.rb +8 -32
- data/lib/spandx/core/guess.rb +48 -40
- data/lib/spandx/core/http.rb +7 -2
- data/lib/spandx/core/index_file.rb +103 -0
- data/lib/spandx/core/license_plugin.rb +15 -4
- data/lib/spandx/core/parser.rb +10 -3
- data/lib/spandx/core/path_traversal.rb +35 -0
- data/lib/spandx/core/relation.rb +38 -0
- data/lib/spandx/core/report.rb +6 -12
- data/lib/spandx/core/spinner.rb +51 -0
- data/lib/spandx/dotnet/index.rb +21 -79
- data/lib/spandx/dotnet/parsers/csproj.rb +7 -7
- data/lib/spandx/dotnet/parsers/packages_config.rb +7 -7
- data/lib/spandx/dotnet/parsers/sln.rb +10 -13
- data/lib/spandx/dotnet/project_file.rb +3 -3
- data/lib/spandx/java/index.rb +5 -2
- data/lib/spandx/java/parsers/maven.rb +7 -7
- data/lib/spandx/js/parsers/npm.rb +6 -6
- data/lib/spandx/js/parsers/yarn.rb +7 -7
- data/lib/spandx/php/parsers/composer.rb +7 -7
- data/lib/spandx/python/index.rb +4 -33
- data/lib/spandx/python/parsers/pipfile_lock.rb +4 -4
- data/lib/spandx/python/pypi.rb +0 -2
- data/lib/spandx/python/source.rb +12 -0
- data/lib/spandx/ruby/parsers/gemfile_lock.rb +10 -9
- data/lib/spandx/spdx/catalogue.rb +5 -1
- data/lib/spandx/spdx/composite_license.rb +60 -0
- data/lib/spandx/spdx/expression.rb +114 -0
- data/lib/spandx/spdx/license.rb +4 -14
- data/lib/spandx/version.rb +1 -1
- data/spandx.gemspec +16 -10
- metadata +100 -30
- data/lib/spandx/core/null_gateway.rb +0 -11
- data/lib/spandx/core/table.rb +0 -29
- data/lib/spandx/core/thread_pool.rb +0 -38
data/lib/spandx/core/content.rb
CHANGED
@@ -13,30 +13,12 @@ module Spandx
|
|
13
13
|
@tokens ||= tokenize(canonicalize(raw)).to_set
|
14
14
|
end
|
15
15
|
|
16
|
-
def similar?(other,
|
17
|
-
|
18
|
-
when :dice_coefficient
|
19
|
-
similarity_score(other, algorithm: algorithm) > 89.0
|
20
|
-
when :levenshtein
|
21
|
-
similarity_score(other, algorithm: algorithm) < 3
|
22
|
-
when :jaro_winkler
|
23
|
-
similarity_score(other, algorithm: algorithm) > 89.0
|
24
|
-
end
|
16
|
+
def similar?(other, threshold: 89.0)
|
17
|
+
similarity_score(other) > threshold
|
25
18
|
end
|
26
19
|
|
27
|
-
def similarity_score(other
|
28
|
-
|
29
|
-
when :dice_coefficient
|
30
|
-
dice_coefficient(other)
|
31
|
-
when :levenshtein
|
32
|
-
require 'text'
|
33
|
-
|
34
|
-
Text::Levenshtein.distance(raw, other.raw, 100)
|
35
|
-
when :jaro_winkler
|
36
|
-
require 'jaro_winkler'
|
37
|
-
|
38
|
-
JaroWinkler.distance(raw, other.raw) * 100.0
|
39
|
-
end
|
20
|
+
def similarity_score(other)
|
21
|
+
dice_coefficient(other)
|
40
22
|
end
|
41
23
|
|
42
24
|
private
|
@@ -46,7 +28,7 @@ module Spandx
|
|
46
28
|
end
|
47
29
|
|
48
30
|
def tokenize(content)
|
49
|
-
content.to_s.scan(/[a-zA-Z]+/)
|
31
|
+
content.to_s.scan(/[a-zA-Z\d.]+/)
|
50
32
|
end
|
51
33
|
|
52
34
|
def blank?(content)
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Spandx
|
4
|
+
module Core
|
5
|
+
class DataFile
|
6
|
+
include Enumerable
|
7
|
+
|
8
|
+
attr_reader :absolute_path
|
9
|
+
|
10
|
+
def initialize(absolute_path)
|
11
|
+
@absolute_path = Pathname.new(absolute_path)
|
12
|
+
FileUtils.mkdir_p(@absolute_path.dirname)
|
13
|
+
end
|
14
|
+
|
15
|
+
def each
|
16
|
+
return unless exist?
|
17
|
+
|
18
|
+
open_file(mode: 'rb') do |io|
|
19
|
+
while (line = io.gets)
|
20
|
+
yield CsvParser.parse(line)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def search(name:, version:)
|
26
|
+
return if name.nil? || name.empty?
|
27
|
+
return if version.nil? || name.empty?
|
28
|
+
return unless absolute_path.exist?
|
29
|
+
|
30
|
+
term = "#{name}-#{version}"
|
31
|
+
index.search do |row|
|
32
|
+
term <=> "#{row[0]}-#{row[1]}"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def insert(name, version, licenses)
|
37
|
+
return if [name, version].any? { |x| x.nil? || x.empty? }
|
38
|
+
|
39
|
+
open_file(mode: 'a') do |io|
|
40
|
+
io.write(to_csv([name, version, licenses.join('-|-')]))
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def exist?
|
45
|
+
absolute_path.exist?
|
46
|
+
end
|
47
|
+
|
48
|
+
def open_file(mode: 'rb')
|
49
|
+
absolute_path.open(mode) { |io| yield io }
|
50
|
+
rescue Errno::ENOENT => error
|
51
|
+
Spandx.logger.error(error)
|
52
|
+
nil
|
53
|
+
end
|
54
|
+
|
55
|
+
def index
|
56
|
+
@index ||= IndexFile.new(self)
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def to_csv(array)
|
62
|
+
array.to_csv(force_quotes: true)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -3,46 +3,80 @@
|
|
3
3
|
module Spandx
|
4
4
|
module Core
|
5
5
|
class Dependency
|
6
|
-
|
6
|
+
PACKAGE_MANAGERS = {
|
7
|
+
Spandx::Dotnet::Parsers::Csproj => :nuget,
|
8
|
+
Spandx::Dotnet::Parsers::PackagesConfig => :nuget,
|
9
|
+
Spandx::Dotnet::Parsers::Sln => :nuget,
|
10
|
+
Spandx::Java::Parsers::Maven => :maven,
|
11
|
+
Spandx::Js::Parsers::Npm => :npm,
|
12
|
+
Spandx::Js::Parsers::Yarn => :yarn,
|
13
|
+
Spandx::Php::Parsers::Composer => :composer,
|
14
|
+
Spandx::Python::Parsers::PipfileLock => :pypi,
|
15
|
+
Spandx::Ruby::Parsers::GemfileLock => :rubygems,
|
16
|
+
}.freeze
|
17
|
+
attr_reader :path, :name, :version, :licenses, :meta
|
7
18
|
|
8
|
-
def initialize(
|
9
|
-
@
|
10
|
-
@name = name
|
11
|
-
@version = version
|
12
|
-
@licenses =
|
19
|
+
def initialize(name:, version:, path:, meta: {})
|
20
|
+
@path = Pathname.new(path).realpath
|
21
|
+
@name = name || @path.basename.to_s
|
22
|
+
@version = version || @path.mtime.to_i.to_s
|
23
|
+
@licenses = []
|
13
24
|
@meta = meta
|
14
25
|
end
|
15
26
|
|
16
|
-
def
|
17
|
-
|
27
|
+
def package_manager
|
28
|
+
PACKAGE_MANAGERS[Parser.for(path).class]
|
18
29
|
end
|
19
30
|
|
20
31
|
def <=>(other)
|
21
|
-
|
32
|
+
return 1 if other.nil?
|
33
|
+
|
34
|
+
score = (name <=> other.name)
|
35
|
+
score = score.zero? ? (version <=> other&.version) : score
|
36
|
+
score.zero? ? (path.to_s <=> other&.path.to_s) : score
|
22
37
|
end
|
23
38
|
|
24
39
|
def hash
|
25
40
|
to_s.hash
|
26
41
|
end
|
27
42
|
|
43
|
+
def ==(other)
|
44
|
+
eql?(other)
|
45
|
+
end
|
46
|
+
|
28
47
|
def eql?(other)
|
29
48
|
to_s == other.to_s
|
30
49
|
end
|
31
50
|
|
32
51
|
def to_s
|
33
|
-
@to_s ||= [name, version].compact.join(' ')
|
52
|
+
@to_s ||= [name, version, path].compact.join(' ')
|
34
53
|
end
|
35
54
|
|
36
55
|
def inspect
|
37
|
-
"
|
56
|
+
"#<#{self.class} name=#{name} version=#{version} path=#{relative_path}>"
|
38
57
|
end
|
39
58
|
|
40
59
|
def to_a
|
41
|
-
[name, version,
|
60
|
+
[name, version, license_expression, relative_path.to_s]
|
42
61
|
end
|
43
62
|
|
44
63
|
def to_h
|
45
|
-
{
|
64
|
+
{
|
65
|
+
name: name,
|
66
|
+
version: version,
|
67
|
+
licenses: license_expression,
|
68
|
+
path: relative_path.to_s
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def relative_path(from: Pathname.pwd)
|
75
|
+
path.relative_path_from(from)
|
76
|
+
end
|
77
|
+
|
78
|
+
def license_expression
|
79
|
+
licenses.map(&:id).join(' AND ')
|
46
80
|
end
|
47
81
|
end
|
48
82
|
end
|
data/lib/spandx/core/git.rb
CHANGED
@@ -3,37 +3,21 @@
|
|
3
3
|
module Spandx
|
4
4
|
module Core
|
5
5
|
class Git
|
6
|
-
attr_reader :
|
6
|
+
attr_reader :root, :url
|
7
7
|
|
8
8
|
def initialize(url:)
|
9
9
|
@url = url
|
10
|
-
@
|
11
|
-
end
|
12
|
-
|
13
|
-
def update!
|
14
|
-
dotgit? ? pull! : clone!
|
15
|
-
end
|
16
|
-
|
17
|
-
def expand_path(relative_path)
|
18
|
-
File.join(path, relative_path)
|
10
|
+
@root = path_for(url)
|
19
11
|
end
|
20
12
|
|
21
13
|
def read(path)
|
22
|
-
|
14
|
+
full_path = File.join(root, path)
|
23
15
|
|
24
|
-
full_path = expand_path(path)
|
25
16
|
IO.read(full_path) if File.exist?(full_path)
|
26
17
|
end
|
27
18
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
31
|
-
full_path = expand_path(path)
|
32
|
-
return unless File.exist?(full_path)
|
33
|
-
|
34
|
-
File.open(full_path, mode) do |io|
|
35
|
-
yield io
|
36
|
-
end
|
19
|
+
def update!
|
20
|
+
dotgit? ? pull! : clone!
|
37
21
|
end
|
38
22
|
|
39
23
|
private
|
@@ -45,26 +29,18 @@ module Spandx
|
|
45
29
|
end
|
46
30
|
|
47
31
|
def dotgit?
|
48
|
-
File.directory?(File.join(
|
32
|
+
File.directory?(File.join(root, '.git'))
|
49
33
|
end
|
50
34
|
|
51
35
|
def clone!
|
52
|
-
system('git', 'clone', '--quiet', url,
|
36
|
+
system('git', 'clone', '--quiet', '--depth=1', '--single-branch', '--branch', 'master', url, root)
|
53
37
|
end
|
54
38
|
|
55
39
|
def pull!
|
56
|
-
|
40
|
+
Dir.chdir(root) do
|
57
41
|
system('git', 'pull', '--no-rebase', '--quiet', 'origin', 'master')
|
58
42
|
end
|
59
43
|
end
|
60
|
-
|
61
|
-
def within
|
62
|
-
Dir.chdir(path) do
|
63
|
-
yield
|
64
|
-
end
|
65
|
-
end
|
66
44
|
end
|
67
|
-
|
68
|
-
Database = Git
|
69
45
|
end
|
70
46
|
end
|
data/lib/spandx/core/guess.rb
CHANGED
@@ -9,80 +9,88 @@ module Spandx
|
|
9
9
|
@catalogue = catalogue
|
10
10
|
end
|
11
11
|
|
12
|
-
def license_for(raw
|
13
|
-
|
12
|
+
def license_for(raw)
|
13
|
+
case raw
|
14
|
+
when Hash
|
15
|
+
from_hash(raw)
|
16
|
+
when Array
|
17
|
+
from_array(raw)
|
18
|
+
else
|
19
|
+
from_string(raw)
|
20
|
+
end
|
14
21
|
end
|
15
22
|
|
16
23
|
private
|
17
24
|
|
18
|
-
def from_hash(hash
|
19
|
-
from_string(hash[:name]
|
20
|
-
from_url(hash[:url]
|
25
|
+
def from_hash(hash)
|
26
|
+
from_string(hash[:name]) ||
|
27
|
+
from_url(hash[:url]) ||
|
21
28
|
unknown(hash[:name] || hash[:url])
|
22
29
|
end
|
23
30
|
|
24
|
-
def
|
31
|
+
def from_array(array)
|
32
|
+
from_string(array.join(' AND '))
|
33
|
+
end
|
34
|
+
|
35
|
+
def from_string(raw)
|
36
|
+
return if raw.nil?
|
37
|
+
|
25
38
|
content = Content.new(raw)
|
26
39
|
|
27
40
|
catalogue[raw] ||
|
28
|
-
|
29
|
-
|
41
|
+
catalogue[raw.split(' ').join('-')] ||
|
42
|
+
match_name(content) ||
|
43
|
+
match_body(content) ||
|
30
44
|
unknown(raw)
|
31
45
|
end
|
32
46
|
|
33
|
-
def from_url(url
|
47
|
+
def from_url(url)
|
34
48
|
return if url.nil? || url.empty?
|
35
49
|
|
36
50
|
response = Spandx.http.get(url)
|
37
51
|
return unless Spandx.http.ok?(response)
|
38
52
|
|
39
|
-
license_for(response.body
|
53
|
+
license_for(response.body)
|
40
54
|
end
|
41
55
|
|
42
|
-
def match_name(content
|
56
|
+
def match_name(content)
|
57
|
+
return if content.tokens.size < 2 || content.tokens.size > 10
|
58
|
+
|
59
|
+
result = from_expression(content)
|
60
|
+
return result if result
|
61
|
+
|
62
|
+
threshold = 85.0
|
43
63
|
catalogue.find do |license|
|
44
|
-
|
45
|
-
score > 85
|
64
|
+
content.similar?(Content.new(license.name), threshold: threshold)
|
46
65
|
end
|
47
66
|
end
|
48
67
|
|
49
|
-
def match_body(content
|
68
|
+
def match_body(content)
|
50
69
|
score = Score.new(nil, nil)
|
51
|
-
threshold =
|
52
|
-
direction = algorithm == :levenshtein ? method(:min) : method(:max)
|
53
|
-
|
70
|
+
threshold = 89.0
|
54
71
|
catalogue.each do |license|
|
55
|
-
|
72
|
+
next if license.deprecated_license_id?
|
73
|
+
|
74
|
+
percentage = content.similarity_score(content_for(license))
|
75
|
+
next if percentage < threshold
|
76
|
+
next if score.score >= percentage
|
77
|
+
|
78
|
+
score.update(percentage, license)
|
56
79
|
end
|
57
80
|
score&.item
|
58
81
|
end
|
59
82
|
|
60
|
-
def
|
61
|
-
::Spandx::
|
62
|
-
end
|
63
|
-
|
64
|
-
def threshold_for(algorithm)
|
65
|
-
{
|
66
|
-
dice_coefficient: 89.0,
|
67
|
-
jaro_winkler: 80.0,
|
68
|
-
levenshtein: 80.0,
|
69
|
-
}[algorithm.to_sym]
|
83
|
+
def content_for(license)
|
84
|
+
::Spandx::Core::Content.new(Spandx.git[:spdx].read("text/#{license.id}.txt") || '')
|
70
85
|
end
|
71
86
|
|
72
|
-
def
|
73
|
-
|
74
|
-
return if percentage > threshold
|
75
|
-
return if score.score > 0.0 && score.score < percentage
|
76
|
-
|
77
|
-
score.update(percentage, other)
|
87
|
+
def unknown(text)
|
88
|
+
::Spandx::Spdx::License.unknown(text)
|
78
89
|
end
|
79
90
|
|
80
|
-
def
|
81
|
-
|
82
|
-
|
83
|
-
return if score.score >= percentage
|
84
|
-
|
85
|
-
score.update(percentage, other)
|
91
|
+
def from_expression(content)
|
92
|
+
Spandx::Spdx::CompositeLicense
|
93
|
+
.from_expression(content.raw, catalogue)
|
86
94
|
end
|
87
95
|
end
|
88
96
|
end
|
data/lib/spandx/core/http.rb
CHANGED
@@ -8,7 +8,12 @@ module Spandx
|
|
8
8
|
def initialize(driver: Http.default_driver, retries: 3)
|
9
9
|
@driver = driver
|
10
10
|
@retries = retries
|
11
|
-
|
11
|
+
semaphore = Mutex.new
|
12
|
+
@circuits = Hash.new do |hash, key|
|
13
|
+
semaphore.synchronize do
|
14
|
+
hash[key] = Circuit.new(key)
|
15
|
+
end
|
16
|
+
end
|
12
17
|
end
|
13
18
|
|
14
19
|
def get(uri, default: nil, escape: true)
|
@@ -22,7 +27,7 @@ module Spandx
|
|
22
27
|
client.get(escape ? Addressable::URI.escape(uri) : uri)
|
23
28
|
end
|
24
29
|
end
|
25
|
-
rescue *Net::Hippie::CONNECTION_ERRORS
|
30
|
+
rescue *Net::Hippie::CONNECTION_ERRORS, URI::InvalidURIError
|
26
31
|
default
|
27
32
|
end
|
28
33
|
|
@@ -0,0 +1,103 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Spandx
|
4
|
+
module Core
|
5
|
+
class IndexFile
|
6
|
+
UINT_32_DIRECTIVE = 'V'
|
7
|
+
UINT_32_SIZE = 4
|
8
|
+
|
9
|
+
attr_reader :data_file, :path
|
10
|
+
|
11
|
+
def initialize(data_file)
|
12
|
+
@data_file = data_file
|
13
|
+
@path = Pathname.new("#{data_file.absolute_path}.idx")
|
14
|
+
@entries = size.positive? ? Array.new(size) : []
|
15
|
+
end
|
16
|
+
|
17
|
+
def each
|
18
|
+
total = path.size / UINT_32_SIZE
|
19
|
+
total.times do |n|
|
20
|
+
yield position_for(n)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def search(min: 0, max: size)
|
25
|
+
scan do |reader|
|
26
|
+
until min >= max
|
27
|
+
mid = mid_for(min, max)
|
28
|
+
row = reader.row(mid)
|
29
|
+
return unless row
|
30
|
+
|
31
|
+
comparison = yield row
|
32
|
+
return row if comparison.zero?
|
33
|
+
|
34
|
+
comparison.positive? ? (min = mid + 1) : (max = mid)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def size
|
40
|
+
path.exist? ? path.size / UINT_32_SIZE : 0
|
41
|
+
end
|
42
|
+
|
43
|
+
def position_for(row_number)
|
44
|
+
return if row_number > size
|
45
|
+
|
46
|
+
entry = entries[row_number]
|
47
|
+
return entry if entry
|
48
|
+
|
49
|
+
bytes = IO.binread(path, UINT_32_SIZE, offset_for(row_number))
|
50
|
+
entry = bytes.unpack1(UINT_32_DIRECTIVE)
|
51
|
+
entries[row_number] = entry
|
52
|
+
entry
|
53
|
+
end
|
54
|
+
|
55
|
+
def update!
|
56
|
+
return unless data_file.exist?
|
57
|
+
|
58
|
+
sort(data_file)
|
59
|
+
rebuild_index!
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
attr_reader :entries
|
65
|
+
|
66
|
+
def scan
|
67
|
+
data_file.open_file(mode: 'rb') do |io|
|
68
|
+
yield Relation.new(io, self)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def offset_for(row_number)
|
73
|
+
row_number * UINT_32_SIZE
|
74
|
+
end
|
75
|
+
|
76
|
+
def sort(data_file)
|
77
|
+
data_file.absolute_path.write(data_file.absolute_path.readlines.sort.uniq.join)
|
78
|
+
end
|
79
|
+
|
80
|
+
def rebuild_index!
|
81
|
+
data_file.open_file do |data_io|
|
82
|
+
File.open(path, mode: 'wb') do |index_io|
|
83
|
+
lines_in(data_io).each do |pos|
|
84
|
+
index_io.write([pos].pack(UINT_32_DIRECTIVE))
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def lines_in(io)
|
91
|
+
lines = [0]
|
92
|
+
io.seek(0)
|
93
|
+
lines << io.pos while io.gets
|
94
|
+
lines.pop if lines.size > 1
|
95
|
+
lines
|
96
|
+
end
|
97
|
+
|
98
|
+
def mid_for(min, max)
|
99
|
+
(max - min) == 1 ? min : (((max - min) / 2) + min)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|