spandx 0.12.3 → 0.13.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +74 -25
- data/README.md +11 -7
- data/exe/spandx +1 -2
- data/ext/spandx/extconf.rb +5 -0
- data/ext/spandx/spandx.c +55 -0
- data/ext/spandx/spandx.h +6 -0
- data/lib/spandx.rb +6 -3
- data/lib/spandx/cli.rb +2 -0
- data/lib/spandx/cli/commands/build.rb +13 -2
- data/lib/spandx/cli/commands/scan.rb +11 -20
- data/lib/spandx/cli/main.rb +3 -2
- data/lib/spandx/core/cache.rb +38 -51
- data/lib/spandx/core/content.rb +5 -23
- data/lib/spandx/core/data_file.rb +66 -0
- data/lib/spandx/core/dependency.rb +47 -13
- data/lib/spandx/core/git.rb +8 -32
- data/lib/spandx/core/guess.rb +48 -40
- data/lib/spandx/core/http.rb +7 -2
- data/lib/spandx/core/index_file.rb +103 -0
- data/lib/spandx/core/license_plugin.rb +15 -4
- data/lib/spandx/core/parser.rb +10 -3
- data/lib/spandx/core/path_traversal.rb +35 -0
- data/lib/spandx/core/relation.rb +38 -0
- data/lib/spandx/core/report.rb +6 -12
- data/lib/spandx/core/spinner.rb +51 -0
- data/lib/spandx/dotnet/index.rb +21 -79
- data/lib/spandx/dotnet/parsers/csproj.rb +7 -7
- data/lib/spandx/dotnet/parsers/packages_config.rb +7 -7
- data/lib/spandx/dotnet/parsers/sln.rb +10 -13
- data/lib/spandx/dotnet/project_file.rb +3 -3
- data/lib/spandx/java/index.rb +5 -2
- data/lib/spandx/java/parsers/maven.rb +7 -7
- data/lib/spandx/js/parsers/npm.rb +6 -6
- data/lib/spandx/js/parsers/yarn.rb +7 -7
- data/lib/spandx/php/parsers/composer.rb +7 -7
- data/lib/spandx/python/index.rb +4 -33
- data/lib/spandx/python/parsers/pipfile_lock.rb +4 -4
- data/lib/spandx/python/pypi.rb +0 -2
- data/lib/spandx/python/source.rb +12 -0
- data/lib/spandx/ruby/parsers/gemfile_lock.rb +10 -9
- data/lib/spandx/spdx/catalogue.rb +5 -1
- data/lib/spandx/spdx/composite_license.rb +60 -0
- data/lib/spandx/spdx/expression.rb +114 -0
- data/lib/spandx/spdx/license.rb +4 -14
- data/lib/spandx/version.rb +1 -1
- data/spandx.gemspec +16 -10
- metadata +100 -30
- data/lib/spandx/core/null_gateway.rb +0 -11
- data/lib/spandx/core/table.rb +0 -29
- data/lib/spandx/core/thread_pool.rb +0 -38
data/lib/spandx/core/content.rb
CHANGED
@@ -13,30 +13,12 @@ module Spandx
|
|
13
13
|
@tokens ||= tokenize(canonicalize(raw)).to_set
|
14
14
|
end
|
15
15
|
|
16
|
-
def similar?(other,
|
17
|
-
|
18
|
-
when :dice_coefficient
|
19
|
-
similarity_score(other, algorithm: algorithm) > 89.0
|
20
|
-
when :levenshtein
|
21
|
-
similarity_score(other, algorithm: algorithm) < 3
|
22
|
-
when :jaro_winkler
|
23
|
-
similarity_score(other, algorithm: algorithm) > 89.0
|
24
|
-
end
|
16
|
+
def similar?(other, threshold: 89.0)
|
17
|
+
similarity_score(other) > threshold
|
25
18
|
end
|
26
19
|
|
27
|
-
def similarity_score(other
|
28
|
-
|
29
|
-
when :dice_coefficient
|
30
|
-
dice_coefficient(other)
|
31
|
-
when :levenshtein
|
32
|
-
require 'text'
|
33
|
-
|
34
|
-
Text::Levenshtein.distance(raw, other.raw, 100)
|
35
|
-
when :jaro_winkler
|
36
|
-
require 'jaro_winkler'
|
37
|
-
|
38
|
-
JaroWinkler.distance(raw, other.raw) * 100.0
|
39
|
-
end
|
20
|
+
def similarity_score(other)
|
21
|
+
dice_coefficient(other)
|
40
22
|
end
|
41
23
|
|
42
24
|
private
|
@@ -46,7 +28,7 @@ module Spandx
|
|
46
28
|
end
|
47
29
|
|
48
30
|
def tokenize(content)
|
49
|
-
content.to_s.scan(/[a-zA-Z]+/)
|
31
|
+
content.to_s.scan(/[a-zA-Z\d.]+/)
|
50
32
|
end
|
51
33
|
|
52
34
|
def blank?(content)
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Spandx
|
4
|
+
module Core
|
5
|
+
class DataFile
|
6
|
+
include Enumerable
|
7
|
+
|
8
|
+
attr_reader :absolute_path
|
9
|
+
|
10
|
+
def initialize(absolute_path)
|
11
|
+
@absolute_path = Pathname.new(absolute_path)
|
12
|
+
FileUtils.mkdir_p(@absolute_path.dirname)
|
13
|
+
end
|
14
|
+
|
15
|
+
def each
|
16
|
+
return unless exist?
|
17
|
+
|
18
|
+
open_file(mode: 'rb') do |io|
|
19
|
+
while (line = io.gets)
|
20
|
+
yield CsvParser.parse(line)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def search(name:, version:)
|
26
|
+
return if name.nil? || name.empty?
|
27
|
+
return if version.nil? || name.empty?
|
28
|
+
return unless absolute_path.exist?
|
29
|
+
|
30
|
+
term = "#{name}-#{version}"
|
31
|
+
index.search do |row|
|
32
|
+
term <=> "#{row[0]}-#{row[1]}"
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def insert(name, version, licenses)
|
37
|
+
return if [name, version].any? { |x| x.nil? || x.empty? }
|
38
|
+
|
39
|
+
open_file(mode: 'a') do |io|
|
40
|
+
io.write(to_csv([name, version, licenses.join('-|-')]))
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def exist?
|
45
|
+
absolute_path.exist?
|
46
|
+
end
|
47
|
+
|
48
|
+
def open_file(mode: 'rb')
|
49
|
+
absolute_path.open(mode) { |io| yield io }
|
50
|
+
rescue Errno::ENOENT => error
|
51
|
+
Spandx.logger.error(error)
|
52
|
+
nil
|
53
|
+
end
|
54
|
+
|
55
|
+
def index
|
56
|
+
@index ||= IndexFile.new(self)
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def to_csv(array)
|
62
|
+
array.to_csv(force_quotes: true)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -3,46 +3,80 @@
|
|
3
3
|
module Spandx
|
4
4
|
module Core
|
5
5
|
class Dependency
|
6
|
-
|
6
|
+
PACKAGE_MANAGERS = {
|
7
|
+
Spandx::Dotnet::Parsers::Csproj => :nuget,
|
8
|
+
Spandx::Dotnet::Parsers::PackagesConfig => :nuget,
|
9
|
+
Spandx::Dotnet::Parsers::Sln => :nuget,
|
10
|
+
Spandx::Java::Parsers::Maven => :maven,
|
11
|
+
Spandx::Js::Parsers::Npm => :npm,
|
12
|
+
Spandx::Js::Parsers::Yarn => :yarn,
|
13
|
+
Spandx::Php::Parsers::Composer => :composer,
|
14
|
+
Spandx::Python::Parsers::PipfileLock => :pypi,
|
15
|
+
Spandx::Ruby::Parsers::GemfileLock => :rubygems,
|
16
|
+
}.freeze
|
17
|
+
attr_reader :path, :name, :version, :licenses, :meta
|
7
18
|
|
8
|
-
def initialize(
|
9
|
-
@
|
10
|
-
@name = name
|
11
|
-
@version = version
|
12
|
-
@licenses =
|
19
|
+
def initialize(name:, version:, path:, meta: {})
|
20
|
+
@path = Pathname.new(path).realpath
|
21
|
+
@name = name || @path.basename.to_s
|
22
|
+
@version = version || @path.mtime.to_i.to_s
|
23
|
+
@licenses = []
|
13
24
|
@meta = meta
|
14
25
|
end
|
15
26
|
|
16
|
-
def
|
17
|
-
|
27
|
+
def package_manager
|
28
|
+
PACKAGE_MANAGERS[Parser.for(path).class]
|
18
29
|
end
|
19
30
|
|
20
31
|
def <=>(other)
|
21
|
-
|
32
|
+
return 1 if other.nil?
|
33
|
+
|
34
|
+
score = (name <=> other.name)
|
35
|
+
score = score.zero? ? (version <=> other&.version) : score
|
36
|
+
score.zero? ? (path.to_s <=> other&.path.to_s) : score
|
22
37
|
end
|
23
38
|
|
24
39
|
def hash
|
25
40
|
to_s.hash
|
26
41
|
end
|
27
42
|
|
43
|
+
def ==(other)
|
44
|
+
eql?(other)
|
45
|
+
end
|
46
|
+
|
28
47
|
def eql?(other)
|
29
48
|
to_s == other.to_s
|
30
49
|
end
|
31
50
|
|
32
51
|
def to_s
|
33
|
-
@to_s ||= [name, version].compact.join(' ')
|
52
|
+
@to_s ||= [name, version, path].compact.join(' ')
|
34
53
|
end
|
35
54
|
|
36
55
|
def inspect
|
37
|
-
"
|
56
|
+
"#<#{self.class} name=#{name} version=#{version} path=#{relative_path}>"
|
38
57
|
end
|
39
58
|
|
40
59
|
def to_a
|
41
|
-
[name, version,
|
60
|
+
[name, version, license_expression, relative_path.to_s]
|
42
61
|
end
|
43
62
|
|
44
63
|
def to_h
|
45
|
-
{
|
64
|
+
{
|
65
|
+
name: name,
|
66
|
+
version: version,
|
67
|
+
licenses: license_expression,
|
68
|
+
path: relative_path.to_s
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def relative_path(from: Pathname.pwd)
|
75
|
+
path.relative_path_from(from)
|
76
|
+
end
|
77
|
+
|
78
|
+
def license_expression
|
79
|
+
licenses.map(&:id).join(' AND ')
|
46
80
|
end
|
47
81
|
end
|
48
82
|
end
|
data/lib/spandx/core/git.rb
CHANGED
@@ -3,37 +3,21 @@
|
|
3
3
|
module Spandx
|
4
4
|
module Core
|
5
5
|
class Git
|
6
|
-
attr_reader :
|
6
|
+
attr_reader :root, :url
|
7
7
|
|
8
8
|
def initialize(url:)
|
9
9
|
@url = url
|
10
|
-
@
|
11
|
-
end
|
12
|
-
|
13
|
-
def update!
|
14
|
-
dotgit? ? pull! : clone!
|
15
|
-
end
|
16
|
-
|
17
|
-
def expand_path(relative_path)
|
18
|
-
File.join(path, relative_path)
|
10
|
+
@root = path_for(url)
|
19
11
|
end
|
20
12
|
|
21
13
|
def read(path)
|
22
|
-
|
14
|
+
full_path = File.join(root, path)
|
23
15
|
|
24
|
-
full_path = expand_path(path)
|
25
16
|
IO.read(full_path) if File.exist?(full_path)
|
26
17
|
end
|
27
18
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
31
|
-
full_path = expand_path(path)
|
32
|
-
return unless File.exist?(full_path)
|
33
|
-
|
34
|
-
File.open(full_path, mode) do |io|
|
35
|
-
yield io
|
36
|
-
end
|
19
|
+
def update!
|
20
|
+
dotgit? ? pull! : clone!
|
37
21
|
end
|
38
22
|
|
39
23
|
private
|
@@ -45,26 +29,18 @@ module Spandx
|
|
45
29
|
end
|
46
30
|
|
47
31
|
def dotgit?
|
48
|
-
File.directory?(File.join(
|
32
|
+
File.directory?(File.join(root, '.git'))
|
49
33
|
end
|
50
34
|
|
51
35
|
def clone!
|
52
|
-
system('git', 'clone', '--quiet', url,
|
36
|
+
system('git', 'clone', '--quiet', '--depth=1', '--single-branch', '--branch', 'master', url, root)
|
53
37
|
end
|
54
38
|
|
55
39
|
def pull!
|
56
|
-
|
40
|
+
Dir.chdir(root) do
|
57
41
|
system('git', 'pull', '--no-rebase', '--quiet', 'origin', 'master')
|
58
42
|
end
|
59
43
|
end
|
60
|
-
|
61
|
-
def within
|
62
|
-
Dir.chdir(path) do
|
63
|
-
yield
|
64
|
-
end
|
65
|
-
end
|
66
44
|
end
|
67
|
-
|
68
|
-
Database = Git
|
69
45
|
end
|
70
46
|
end
|
data/lib/spandx/core/guess.rb
CHANGED
@@ -9,80 +9,88 @@ module Spandx
|
|
9
9
|
@catalogue = catalogue
|
10
10
|
end
|
11
11
|
|
12
|
-
def license_for(raw
|
13
|
-
|
12
|
+
def license_for(raw)
|
13
|
+
case raw
|
14
|
+
when Hash
|
15
|
+
from_hash(raw)
|
16
|
+
when Array
|
17
|
+
from_array(raw)
|
18
|
+
else
|
19
|
+
from_string(raw)
|
20
|
+
end
|
14
21
|
end
|
15
22
|
|
16
23
|
private
|
17
24
|
|
18
|
-
def from_hash(hash
|
19
|
-
from_string(hash[:name]
|
20
|
-
from_url(hash[:url]
|
25
|
+
def from_hash(hash)
|
26
|
+
from_string(hash[:name]) ||
|
27
|
+
from_url(hash[:url]) ||
|
21
28
|
unknown(hash[:name] || hash[:url])
|
22
29
|
end
|
23
30
|
|
24
|
-
def
|
31
|
+
def from_array(array)
|
32
|
+
from_string(array.join(' AND '))
|
33
|
+
end
|
34
|
+
|
35
|
+
def from_string(raw)
|
36
|
+
return if raw.nil?
|
37
|
+
|
25
38
|
content = Content.new(raw)
|
26
39
|
|
27
40
|
catalogue[raw] ||
|
28
|
-
|
29
|
-
|
41
|
+
catalogue[raw.split(' ').join('-')] ||
|
42
|
+
match_name(content) ||
|
43
|
+
match_body(content) ||
|
30
44
|
unknown(raw)
|
31
45
|
end
|
32
46
|
|
33
|
-
def from_url(url
|
47
|
+
def from_url(url)
|
34
48
|
return if url.nil? || url.empty?
|
35
49
|
|
36
50
|
response = Spandx.http.get(url)
|
37
51
|
return unless Spandx.http.ok?(response)
|
38
52
|
|
39
|
-
license_for(response.body
|
53
|
+
license_for(response.body)
|
40
54
|
end
|
41
55
|
|
42
|
-
def match_name(content
|
56
|
+
def match_name(content)
|
57
|
+
return if content.tokens.size < 2 || content.tokens.size > 10
|
58
|
+
|
59
|
+
result = from_expression(content)
|
60
|
+
return result if result
|
61
|
+
|
62
|
+
threshold = 85.0
|
43
63
|
catalogue.find do |license|
|
44
|
-
|
45
|
-
score > 85
|
64
|
+
content.similar?(Content.new(license.name), threshold: threshold)
|
46
65
|
end
|
47
66
|
end
|
48
67
|
|
49
|
-
def match_body(content
|
68
|
+
def match_body(content)
|
50
69
|
score = Score.new(nil, nil)
|
51
|
-
threshold =
|
52
|
-
direction = algorithm == :levenshtein ? method(:min) : method(:max)
|
53
|
-
|
70
|
+
threshold = 89.0
|
54
71
|
catalogue.each do |license|
|
55
|
-
|
72
|
+
next if license.deprecated_license_id?
|
73
|
+
|
74
|
+
percentage = content.similarity_score(content_for(license))
|
75
|
+
next if percentage < threshold
|
76
|
+
next if score.score >= percentage
|
77
|
+
|
78
|
+
score.update(percentage, license)
|
56
79
|
end
|
57
80
|
score&.item
|
58
81
|
end
|
59
82
|
|
60
|
-
def
|
61
|
-
::Spandx::
|
62
|
-
end
|
63
|
-
|
64
|
-
def threshold_for(algorithm)
|
65
|
-
{
|
66
|
-
dice_coefficient: 89.0,
|
67
|
-
jaro_winkler: 80.0,
|
68
|
-
levenshtein: 80.0,
|
69
|
-
}[algorithm.to_sym]
|
83
|
+
def content_for(license)
|
84
|
+
::Spandx::Core::Content.new(Spandx.git[:spdx].read("text/#{license.id}.txt") || '')
|
70
85
|
end
|
71
86
|
|
72
|
-
def
|
73
|
-
|
74
|
-
return if percentage > threshold
|
75
|
-
return if score.score > 0.0 && score.score < percentage
|
76
|
-
|
77
|
-
score.update(percentage, other)
|
87
|
+
def unknown(text)
|
88
|
+
::Spandx::Spdx::License.unknown(text)
|
78
89
|
end
|
79
90
|
|
80
|
-
def
|
81
|
-
|
82
|
-
|
83
|
-
return if score.score >= percentage
|
84
|
-
|
85
|
-
score.update(percentage, other)
|
91
|
+
def from_expression(content)
|
92
|
+
Spandx::Spdx::CompositeLicense
|
93
|
+
.from_expression(content.raw, catalogue)
|
86
94
|
end
|
87
95
|
end
|
88
96
|
end
|
data/lib/spandx/core/http.rb
CHANGED
@@ -8,7 +8,12 @@ module Spandx
|
|
8
8
|
def initialize(driver: Http.default_driver, retries: 3)
|
9
9
|
@driver = driver
|
10
10
|
@retries = retries
|
11
|
-
|
11
|
+
semaphore = Mutex.new
|
12
|
+
@circuits = Hash.new do |hash, key|
|
13
|
+
semaphore.synchronize do
|
14
|
+
hash[key] = Circuit.new(key)
|
15
|
+
end
|
16
|
+
end
|
12
17
|
end
|
13
18
|
|
14
19
|
def get(uri, default: nil, escape: true)
|
@@ -22,7 +27,7 @@ module Spandx
|
|
22
27
|
client.get(escape ? Addressable::URI.escape(uri) : uri)
|
23
28
|
end
|
24
29
|
end
|
25
|
-
rescue *Net::Hippie::CONNECTION_ERRORS
|
30
|
+
rescue *Net::Hippie::CONNECTION_ERRORS, URI::InvalidURIError
|
26
31
|
default
|
27
32
|
end
|
28
33
|
|
@@ -0,0 +1,103 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Spandx
|
4
|
+
module Core
|
5
|
+
class IndexFile
|
6
|
+
UINT_32_DIRECTIVE = 'V'
|
7
|
+
UINT_32_SIZE = 4
|
8
|
+
|
9
|
+
attr_reader :data_file, :path
|
10
|
+
|
11
|
+
def initialize(data_file)
|
12
|
+
@data_file = data_file
|
13
|
+
@path = Pathname.new("#{data_file.absolute_path}.idx")
|
14
|
+
@entries = size.positive? ? Array.new(size) : []
|
15
|
+
end
|
16
|
+
|
17
|
+
def each
|
18
|
+
total = path.size / UINT_32_SIZE
|
19
|
+
total.times do |n|
|
20
|
+
yield position_for(n)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def search(min: 0, max: size)
|
25
|
+
scan do |reader|
|
26
|
+
until min >= max
|
27
|
+
mid = mid_for(min, max)
|
28
|
+
row = reader.row(mid)
|
29
|
+
return unless row
|
30
|
+
|
31
|
+
comparison = yield row
|
32
|
+
return row if comparison.zero?
|
33
|
+
|
34
|
+
comparison.positive? ? (min = mid + 1) : (max = mid)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def size
|
40
|
+
path.exist? ? path.size / UINT_32_SIZE : 0
|
41
|
+
end
|
42
|
+
|
43
|
+
def position_for(row_number)
|
44
|
+
return if row_number > size
|
45
|
+
|
46
|
+
entry = entries[row_number]
|
47
|
+
return entry if entry
|
48
|
+
|
49
|
+
bytes = IO.binread(path, UINT_32_SIZE, offset_for(row_number))
|
50
|
+
entry = bytes.unpack1(UINT_32_DIRECTIVE)
|
51
|
+
entries[row_number] = entry
|
52
|
+
entry
|
53
|
+
end
|
54
|
+
|
55
|
+
def update!
|
56
|
+
return unless data_file.exist?
|
57
|
+
|
58
|
+
sort(data_file)
|
59
|
+
rebuild_index!
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
attr_reader :entries
|
65
|
+
|
66
|
+
def scan
|
67
|
+
data_file.open_file(mode: 'rb') do |io|
|
68
|
+
yield Relation.new(io, self)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def offset_for(row_number)
|
73
|
+
row_number * UINT_32_SIZE
|
74
|
+
end
|
75
|
+
|
76
|
+
def sort(data_file)
|
77
|
+
data_file.absolute_path.write(data_file.absolute_path.readlines.sort.uniq.join)
|
78
|
+
end
|
79
|
+
|
80
|
+
def rebuild_index!
|
81
|
+
data_file.open_file do |data_io|
|
82
|
+
File.open(path, mode: 'wb') do |index_io|
|
83
|
+
lines_in(data_io).each do |pos|
|
84
|
+
index_io.write([pos].pack(UINT_32_DIRECTIVE))
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def lines_in(io)
|
91
|
+
lines = [0]
|
92
|
+
io.seek(0)
|
93
|
+
lines << io.pos while io.gets
|
94
|
+
lines.pop if lines.size > 1
|
95
|
+
lines
|
96
|
+
end
|
97
|
+
|
98
|
+
def mid_for(min, max)
|
99
|
+
(max - min) == 1 ? min : (((max - min) / 2) + min)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|