spandx 0.11.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +20 -2
  3. data/README.md +59 -2
  4. data/exe/spandx +3 -4
  5. data/lib/spandx.rb +13 -32
  6. data/lib/spandx/cli.rb +1 -30
  7. data/lib/spandx/cli/commands/build.rb +41 -0
  8. data/lib/spandx/cli/commands/pull.rb +21 -0
  9. data/lib/spandx/cli/commands/scan.rb +17 -2
  10. data/lib/spandx/cli/main.rb +54 -0
  11. data/lib/spandx/core/cache.rb +3 -3
  12. data/lib/spandx/core/circuit.rb +34 -0
  13. data/lib/spandx/core/dependency.rb +32 -7
  14. data/lib/spandx/core/gateway.rb +19 -0
  15. data/lib/spandx/core/{database.rb → git.rb} +7 -2
  16. data/lib/spandx/core/guess.rb +42 -4
  17. data/lib/spandx/core/http.rb +30 -5
  18. data/lib/spandx/core/license_plugin.rb +54 -0
  19. data/lib/spandx/core/null_gateway.rb +11 -0
  20. data/lib/spandx/core/parser.rb +8 -25
  21. data/lib/spandx/core/plugin.rb +15 -0
  22. data/lib/spandx/core/registerable.rb +27 -0
  23. data/lib/spandx/core/report.rb +30 -6
  24. data/lib/spandx/core/table.rb +29 -0
  25. data/lib/spandx/dotnet/index.rb +10 -5
  26. data/lib/spandx/dotnet/nuget_gateway.rb +20 -31
  27. data/lib/spandx/dotnet/parsers/csproj.rb +3 -12
  28. data/lib/spandx/dotnet/parsers/packages_config.rb +2 -10
  29. data/lib/spandx/dotnet/parsers/sln.rb +2 -2
  30. data/lib/spandx/java/gateway.rb +37 -0
  31. data/lib/spandx/java/index.rb +84 -2
  32. data/lib/spandx/java/metadata.rb +6 -3
  33. data/lib/spandx/java/parsers/maven.rb +11 -21
  34. data/lib/spandx/js/parsers/npm.rb +39 -0
  35. data/lib/spandx/js/parsers/yarn.rb +30 -0
  36. data/lib/spandx/js/yarn_lock.rb +67 -0
  37. data/lib/spandx/js/yarn_pkg.rb +59 -0
  38. data/lib/spandx/php/packagist_gateway.rb +25 -0
  39. data/lib/spandx/php/parsers/composer.rb +33 -0
  40. data/lib/spandx/python/index.rb +78 -0
  41. data/lib/spandx/python/parsers/pipfile_lock.rb +12 -16
  42. data/lib/spandx/python/pypi.rb +91 -8
  43. data/lib/spandx/python/source.rb +5 -1
  44. data/lib/spandx/{rubygems → ruby}/gateway.rb +8 -9
  45. data/lib/spandx/{rubygems → ruby}/parsers/gemfile_lock.rb +14 -16
  46. data/lib/spandx/spdx/catalogue.rb +1 -1
  47. data/lib/spandx/spdx/license.rb +12 -2
  48. data/lib/spandx/version.rb +1 -1
  49. data/spandx.gemspec +4 -1
  50. metadata +66 -10
  51. data/lib/spandx/cli/command.rb +0 -65
  52. data/lib/spandx/cli/commands/index.rb +0 -36
  53. data/lib/spandx/cli/commands/index/build.rb +0 -32
  54. data/lib/spandx/cli/commands/index/update.rb +0 -27
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ module Core
5
+ class Table
6
+ def initialize
7
+ @rows = []
8
+ @max_justification = 0
9
+ yield self
10
+ end
11
+
12
+ def <<(item)
13
+ row = item.to_a
14
+ new_max = row[0].size
15
+ @max_justification = new_max + 1 if new_max > @max_justification
16
+ @rows << row
17
+ end
18
+
19
+ def to_s
20
+ @rows.map do |row|
21
+ row.each.with_index.map do |cell, index|
22
+ justification = index.zero? ? @max_justification : 15
23
+ Array(cell).join(', ').ljust(justification, ' ')
24
+ end.join
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -4,10 +4,11 @@ module Spandx
4
4
  module Dotnet
5
5
  class Index
6
6
  DEFAULT_DIR = File.expand_path(File.join(Dir.home, '.local', 'share', 'spandx'))
7
- attr_reader :directory
7
+ attr_reader :directory, :name
8
8
 
9
9
  def initialize(directory: DEFAULT_DIR)
10
10
  @directory = directory ? File.expand_path(directory) : DEFAULT_DIR
11
+ @name = 'nuget'
11
12
  end
12
13
 
13
14
  def licenses_for(name:, version:)
@@ -19,7 +20,8 @@ module Spandx
19
20
  end
20
21
 
21
22
  def update!(catalogue:, output: StringIO.new)
22
- insert_latest(Spandx::Dotnet::NugetGateway.new(catalogue: catalogue)) do |page|
23
+ catalogue.version
24
+ insert_latest(Spandx::Dotnet::NugetGateway.new) do |page|
23
25
  output.puts "Checkpoint #{page}"
24
26
  checkpoint!(page)
25
27
  end
@@ -29,14 +31,17 @@ module Spandx
29
31
  private
30
32
 
31
33
  def files(pattern)
32
- Dir.glob(pattern, base: directory).sort.each do |file|
34
+ Dir.glob(File.join(directory, pattern)).sort.each do |file|
33
35
  fullpath = File.join(directory, file)
34
- yield fullpath unless File.directory?(fullpath)
36
+ next if File.directory?(fullpath)
37
+ next unless File.exist?(fullpath)
38
+
39
+ yield fullpath
35
40
  end
36
41
  end
37
42
 
38
43
  def sort_index!
39
- files('**/*') do |path|
44
+ files('**/nuget') do |path|
40
45
  next if File.extname(path) == '.checkpoints'
41
46
 
42
47
  IO.write(path, IO.readlines(path).sort.join)
@@ -5,23 +5,17 @@ module Spandx
5
5
  # https://api.nuget.org/v3-flatcontainer/#{name}/#{version}/#{name}.nuspec
6
6
  # https://api.nuget.org/v3-flatcontainer/#{package.name}/index.json
7
7
  # https://docs.microsoft.com/en-us/nuget/api/package-base-address-resource
8
- class NugetGateway
9
- attr_reader :host
10
-
11
- def initialize(http: Spandx.http, catalogue:)
8
+ class NugetGateway < ::Spandx::Core::Gateway
9
+ def initialize(http: Spandx.http)
12
10
  @http = http
13
- @guess = Core::Guess.new(catalogue)
14
- @host = 'api.nuget.org'
15
11
  end
16
12
 
17
- def licenses_for(name, version)
18
- found = cache.licenses_for(name: name, version: version)
19
- return found if found.any?
20
-
21
- document = nuspec_for(name, version)
13
+ def licenses_for(dependency)
14
+ extract_licenses_from(nuspec_for(dependency.name, dependency.version))
15
+ end
22
16
 
23
- extract_licenses_from(document) ||
24
- guess_licenses_from(document)
17
+ def matches?(dependency)
18
+ dependency.package_manager == :nuget
25
19
  end
26
20
 
27
21
  def each(start_page: 0)
@@ -34,21 +28,17 @@ module Spandx
34
28
 
35
29
  private
36
30
 
37
- attr_reader :http, :guess
38
-
39
- def cache
40
- @cache ||= ::Spandx::Core::Cache.new(:nuget, url: 'https://github.com/mokhan/spandx-index.git')
41
- end
31
+ attr_reader :http
42
32
 
43
33
  def each_page(start_page:)
44
- url = "https://#{host}/v3/catalog0/index.json"
34
+ url = 'https://api.nuget.org/v3/catalog0/index.json'
45
35
  items_from(fetch_json(url))
46
36
  .find_all { |page| page_number_from(page['@id']) >= start_page }
47
37
  .each { |page| yield fetch_json(page['@id']) }
48
38
  end
49
39
 
50
40
  def nuspec_url_for(name, version)
51
- "https://#{host}/v3-flatcontainer/#{name}/#{version}/#{name}.nuspec"
41
+ "https://api.nuget.org/v3-flatcontainer/#{name}/#{version}/#{name}.nuspec"
52
42
  end
53
43
 
54
44
  def nuspec_for(name, version)
@@ -62,20 +52,19 @@ module Spandx
62
52
  # TODO: Fix parsing https://github.com/NuGet/Home/wiki/Packaging-License-within-the-nupkg#license
63
53
  def extract_licenses_from(document)
64
54
  licenses = document.search('//package/metadata/license')
65
- licenses.map(&:text) if licenses.any?
66
- end
67
-
68
- def guess_licenses_from(document)
69
- document
70
- .search('//package/metadata/licenseUrl')
71
- .map { |node| guess_license_for(node.text) }
72
- .compact
55
+ if licenses.any?
56
+ licenses.map(&:text)
57
+ else
58
+ document
59
+ .search('//package/metadata/licenseUrl')
60
+ .map { |node| download_license(node.text) }
61
+ .compact
62
+ end
73
63
  end
74
64
 
75
- def guess_license_for(url)
65
+ def download_license(url)
76
66
  response = http.get(url)
77
-
78
- guess.license_for(response.body) if http.ok?(response)
67
+ http.ok?(response) ? response.body : url
79
68
  end
80
69
 
81
70
  def fetch_json(url)
@@ -4,7 +4,7 @@ module Spandx
4
4
  module Dotnet
5
5
  module Parsers
6
6
  class Csproj < ::Spandx::Core::Parser
7
- def self.matches?(filename)
7
+ def matches?(filename)
8
8
  ['.csproj', '.props'].include?(File.extname(filename))
9
9
  end
10
10
 
@@ -19,21 +19,12 @@ module Spandx
19
19
 
20
20
  def map_from(package_reference)
21
21
  ::Spandx::Core::Dependency.new(
22
+ package_manager: :nuget,
22
23
  name: package_reference.name,
23
24
  version: package_reference.version,
24
- licenses: licenses_for(package_reference)
25
+ meta: package_reference
25
26
  )
26
27
  end
27
-
28
- def licenses_for(package_reference)
29
- nuget
30
- .licenses_for(package_reference.name, package_reference.version)
31
- .map { |x| catalogue[x] }
32
- end
33
-
34
- def nuget
35
- @nuget ||= NugetGateway.new(catalogue: catalogue)
36
- end
37
28
  end
38
29
  end
39
30
  end
@@ -4,7 +4,7 @@ module Spandx
4
4
  module Dotnet
5
5
  module Parsers
6
6
  class PackagesConfig < ::Spandx::Core::Parser
7
- def self.matches?(filename)
7
+ def matches?(filename)
8
8
  filename.match?(/packages\.config/)
9
9
  end
10
10
 
@@ -19,20 +19,12 @@ module Spandx
19
19
  def map_from(node)
20
20
  name = attribute_for('id', node)
21
21
  version = attribute_for('version', node)
22
- ::Spandx::Core::Dependency.new(
23
- name: name,
24
- version: version,
25
- licenses: nuget.licenses_for(name, version).map { |x| catalogue[x] }
26
- )
22
+ ::Spandx::Core::Dependency.new(package_manager: :nuget, name: name, version: version)
27
23
  end
28
24
 
29
25
  def attribute_for(key, node)
30
26
  node.attribute(key)&.value&.strip || node.at_xpath("./#{key}")&.content&.strip
31
27
  end
32
-
33
- def nuget
34
- @nuget ||= NugetGateway.new(catalogue: catalogue)
35
- end
36
28
  end
37
29
  end
38
30
  end
@@ -4,14 +4,14 @@ module Spandx
4
4
  module Dotnet
5
5
  module Parsers
6
6
  class Sln < ::Spandx::Core::Parser
7
- def self.matches?(filename)
7
+ def matches?(filename)
8
8
  filename.match?(/.*\.sln/)
9
9
  end
10
10
 
11
11
  def parse(file_path)
12
12
  project_paths_from(file_path).map do |path|
13
13
  ::Spandx::Core::Parser
14
- .for(path, catalogue: catalogue)
14
+ .for(path)
15
15
  .parse(path)
16
16
  end.flatten
17
17
  end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ module Java
5
+ class Gateway < ::Spandx::Core::Gateway
6
+ DEFAULT_SOURCE = 'https://repo.maven.apache.org/maven2'
7
+
8
+ attr_reader :http
9
+
10
+ def initialize(http: Spandx.http)
11
+ @http = http
12
+ end
13
+
14
+ def matches?(dependency)
15
+ dependency.package_manager == :maven
16
+ end
17
+
18
+ def licenses_for(dependency)
19
+ group_id, artifact_id = dependency.name.split(':')
20
+ metadata_for(
21
+ group_id: group_id,
22
+ artifact_id: artifact_id,
23
+ version: dependency.version
24
+ ).licenses
25
+ end
26
+
27
+ def metadata_for(group_id:, artifact_id:, version:)
28
+ ::Spandx::Java::Metadata.new(
29
+ artifact_id: artifact_id,
30
+ group_id: group_id,
31
+ version: version,
32
+ source: DEFAULT_SOURCE
33
+ )
34
+ end
35
+ end
36
+ end
37
+ end
@@ -1,13 +1,95 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'tempfile'
4
+
3
5
  module Spandx
4
6
  module Java
5
7
  class Index
6
- def initialize(directory:)
8
+ include Enumerable
9
+
10
+ attr_reader :name, :source
11
+
12
+ def initialize(directory:, source: 'https://repo.maven.apache.org/maven2')
7
13
  @directory = directory
14
+ @source = source
15
+ @name = 'maven'
16
+ end
17
+
18
+ def update!(catalogue:, output:)
19
+ each do |metadata|
20
+ name = "#{metadata.group_id}:#{metadata.artifact_id}:#{metadata.version}"
21
+ output.puts [name, metadata.licenses_from(catalogue)].inspect
22
+ end
23
+ end
24
+
25
+ def each
26
+ each_index_url do |url|
27
+ each_record_from("#{source}/.index/#{url}") do |record|
28
+ group_id, artifact_id, version = record['u'].split('|')
29
+ yield Metadata.new(artifact_id: artifact_id, group_id: group_id, version: version)
30
+ end
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ def each_record(io, record = {})
37
+ until io.eof?
38
+ field_count = io.read(4).unpack1('N').to_i # read 4 bytes for field count
39
+ field_count.times do |_n|
40
+ io.read(1) # flags
41
+ key = read_key(io)
42
+ value = read_value(io)
43
+ record[key] = value
44
+ end
45
+ yield record
46
+ end
47
+ end
48
+
49
+ def read_key(io)
50
+ length = io.read(2).unpack1('n').to_i # unsigned 16 bit int
51
+ io.read(length)
52
+ end
53
+
54
+ def read_value(io)
55
+ length = io.read(4).unpack1('N').to_i
56
+ io.read(length)
8
57
  end
9
58
 
10
- def update!(catalogue:, output:); end
59
+ def each_record_from(url)
60
+ stream_from(url) do |io|
61
+ # read version
62
+ io.read(1)
63
+ # read timestamp
64
+ io.read(8)
65
+ # read records
66
+ each_record(io) do |x|
67
+ yield x
68
+ end
69
+ end
70
+ end
71
+
72
+ def each_index_url
73
+ html = Nokogiri::HTML(http.get("#{source}/.index/").body)
74
+ html.css('a[href*="nexus-maven-repository-index"]').each do |anchor|
75
+ url = anchor['href']
76
+ yield url if url.match(/\d+\.gz$/)
77
+ end
78
+ end
79
+
80
+ def stream_from(url, path: Tempfile.new.path)
81
+ return unless system("curl --progress-bar \"#{url}\" > #{path}", exception: true)
82
+
83
+ Zlib::GzipReader.open(path) do |gz|
84
+ yield gz
85
+ end
86
+ ensure
87
+ File.unlink(path) if File.exist?(path)
88
+ end
89
+
90
+ def http
91
+ Spandx.http
92
+ end
11
93
  end
12
94
  end
13
95
  end
@@ -3,15 +3,18 @@
3
3
  module Spandx
4
4
  module Java
5
5
  class Metadata
6
- attr_reader :artifact_id, :group_id, :version
6
+ attr_reader :artifact_id, :group_id, :version, :source
7
7
 
8
- def initialize(artifact_id:, group_id:, version:)
8
+ def initialize(artifact_id:, group_id:, version:, source: 'https://repo.maven.apache.org/maven2')
9
9
  @artifact_id = artifact_id
10
10
  @group_id = group_id.tr('.', '/')
11
11
  @version = version
12
+ @source = source
12
13
  end
13
14
 
14
15
  def licenses
16
+ return [] unless pom
17
+
15
18
  pom.search('//licenses/license').map do |node|
16
19
  {
17
20
  name: node.at_xpath('./name').text,
@@ -28,7 +31,7 @@ module Spandx
28
31
 
29
32
  def spec_url
30
33
  [
31
- 'https://repo.maven.apache.org/maven2',
34
+ source,
32
35
  group_id,
33
36
  artifact_id,
34
37
  version,
@@ -4,39 +4,29 @@ module Spandx
4
4
  module Java
5
5
  module Parsers
6
6
  class Maven < ::Spandx::Core::Parser
7
- def self.matches?(filename)
7
+ def matches?(filename)
8
8
  File.basename(filename) == 'pom.xml'
9
9
  end
10
10
 
11
11
  def parse(filename)
12
12
  document = Nokogiri.XML(IO.read(filename)).tap(&:remove_namespaces!)
13
13
  document.search('//project/dependencies/dependency').map do |node|
14
- metadata = metadata_for(node)
15
- ::Spandx::Core::Dependency.new(
16
- name: metadata.artifact_id,
17
- version: metadata.version,
18
- licenses: metadata.licenses.map { |x| search_catalogue_for(x) }.compact
19
- )
14
+ map_from(node)
20
15
  end
21
16
  end
22
17
 
23
18
  private
24
19
 
25
- def metadata_for(node)
26
- ::Spandx::Java::Metadata.new(
27
- artifact_id: node.at_xpath('./artifactId').text,
28
- group_id: node.at_xpath('./groupId').text,
29
- version: node.at_xpath('./version').text
30
- )
31
- end
32
-
33
- def search_catalogue_for(license_hash)
34
- name = ::Spandx::Core::Content.new(license_hash[:name])
20
+ def map_from(node)
21
+ artifact_id = node.at_xpath('./artifactId').text
22
+ group_id = node.at_xpath('./groupId').text
23
+ version = node.at_xpath('./version').text
35
24
 
36
- catalogue.find do |license|
37
- score = name.similarity_score(::Spandx::Core::Content.new(license.name))
38
- score > 85
39
- end
25
+ ::Spandx::Core::Dependency.new(
26
+ package_manager: :maven,
27
+ name: "#{group_id}:#{artifact_id}",
28
+ version: version
29
+ )
40
30
  end
41
31
  end
42
32
  end