spandx 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4da284f37427fcbe7d30317385ddf6e6eab0100046ccb8c0987bd4031072ad45
4
- data.tar.gz: 9c14d614550b6f34ad01e85a47ee4445281c94455b1902dd340ac47ec995a529
3
+ metadata.gz: 4e7053e8bfb9564f28b5ebe1b4b2250966eaf94f0edc22503e76d6d2fca76427
4
+ data.tar.gz: fb273d38a70bd4fa58d674c1a4f9032e76c0a1967c6d1390e78aa7671b37c3ca
5
5
  SHA512:
6
- metadata.gz: b12af7d885681938adc39cd0ae9d2b4987eccd87e95d200ced84c8d8715101f6d99e32498e62f567b7ad35035ac1570071418636e844b3bdfccfb35ddbeafb6f
7
- data.tar.gz: 5bb060dc2fb56a1bc980b6bca8e65c9f7fd68f8b4aa414fd2edfe9ba974ea23b01721a7c24f6755b26fe3596798c2fe3ff917a23687a1359cda17ab3bbb8696d
6
+ metadata.gz: f71bcfd8fd4fc3e0ce4081db8f397f3051ff58a1e76e5c214c410487b913163b584547b5a179d5140a45fb81be061676d3b65d52ad31ca7ae09389c69710d40a
7
+ data.tar.gz: c56ea13c20421a44e2dc29a33420187c1dbe017b0c02d38c10aa3d65cd6ddd5764ccbca3f1f9e65662b41bb3bd02bb322aea60f560a85551f5d8116ac288f792
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- Version 0.1.5
1
+ Version 0.1.6
2
2
 
3
3
  # Changelog
4
4
 
@@ -8,8 +8,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
8
8
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
9
9
 
10
10
  ## [Unreleased]
11
+ ### Added
11
12
  - nil
12
13
 
14
+ ## [0.1.6] - 2020-01-27
15
+ ### Added
16
+ - Scan csproj files that depend on other project files
17
+ - Replace licensee dependency with simple tokenizer
18
+ - Fetch license data from git clone of SPDX license list data
19
+
13
20
  ## [0.1.5] - 2020-01-23
14
21
  ### Added
15
22
  - Exclude `nil` licenses from report
@@ -36,7 +43,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
36
43
  ### Added
37
44
  - Provide ruby API to the latest SPDX catalogue.
38
45
 
39
- [Unreleased]: https://github.com/mokhan/spandx/compare/v0.1.5...HEAD
46
+ [Unreleased]: https://github.com/mokhan/spandx/compare/v0.1.6...HEAD
47
+ [0.1.6]: https://github.com/mokhan/spandx/compare/v0.1.5...v0.1.6
40
48
  [0.1.5]: https://github.com/mokhan/spandx/compare/v0.1.4...v0.1.5
41
49
  [0.1.4]: https://github.com/mokhan/spandx/compare/v0.1.3...v0.1.4
42
50
  [0.1.3]: https://github.com/mokhan/spandx/compare/v0.1.2...v0.1.3
@@ -18,17 +18,25 @@ module Spandx
18
18
 
19
19
  def each
20
20
  licenses.each do |license|
21
- yield license if present?(license.id)
21
+ yield license
22
22
  end
23
23
  end
24
24
 
25
25
  class << self
26
26
  def latest(gateway: ::Spandx::Gateways::Spdx.new)
27
- gateway.fetch
27
+ new(gateway.fetch)
28
+ end
29
+
30
+ def from_json(json)
31
+ new(JSON.parse(json, symbolize_names: true))
28
32
  end
29
33
 
30
34
  def from_file(path)
31
- new(JSON.parse(IO.read(path), symbolize_names: true))
35
+ from_json(IO.read(path))
36
+ end
37
+
38
+ def from_git
39
+ from_json(Spandx.db.read('json/licenses.json'))
32
40
  end
33
41
 
34
42
  def empty
@@ -41,11 +49,7 @@ module Spandx
41
49
  attr_reader :catalogue
42
50
 
43
51
  def licenses
44
- @licenses ||= identity_map.values
45
- end
46
-
47
- def map_from(license_hash)
48
- License.new(license_hash)
52
+ @licenses ||= identity_map.values.sort
49
53
  end
50
54
 
51
55
  def present?(item)
@@ -55,7 +59,7 @@ module Spandx
55
59
  def identity_map
56
60
  @identity_map ||=
57
61
  catalogue.fetch(:licenses, []).each_with_object({}) do |hash, memo|
58
- license = map_from(hash)
62
+ license = License.new(hash)
59
63
  memo[license.id] = license if present?(license.id)
60
64
  end
61
65
  end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ class Content
5
+ attr_reader :tokens, :threshold
6
+
7
+ def initialize(content, threshold: 89.0)
8
+ @threshold = threshold
9
+ @tokens = tokenize(canonicalize(content)).to_set
10
+ end
11
+
12
+ def similar?(other)
13
+ similarity_score(other) > threshold
14
+ end
15
+
16
+ # https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Dice%27s_coefficient#Ruby
17
+ def similarity_score(other)
18
+ overlap = (tokens & other.tokens).size
19
+ total = tokens.size + other.tokens.size
20
+ 100.0 * (overlap * 2.0 / total)
21
+ end
22
+
23
+ private
24
+
25
+ def canonicalize(content)
26
+ content&.downcase
27
+ end
28
+
29
+ def tokenize(content)
30
+ content.to_s.scan(/[a-zA-Z]+/)
31
+ end
32
+
33
+ def blank?(content)
34
+ content.nil? || content.chomp.strip.empty?
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ class Database
5
+ attr_reader :path, :url
6
+
7
+ def initialize(url:)
8
+ @url = url
9
+ @path = path_for(url)
10
+ end
11
+
12
+ def update!
13
+ dotgit? ? pull! : clone!
14
+ end
15
+
16
+ def read(file)
17
+ IO.read(File.join(path, file))
18
+ end
19
+
20
+ private
21
+
22
+ def path_for(url)
23
+ uri = URI.parse(url)
24
+ name = uri.path.gsub(/\.git$/, '')
25
+ File.expand_path(File.join(Dir.home, '.local', 'share', name))
26
+ end
27
+
28
+ def dotgit?
29
+ File.directory?(File.join(path, '.git'))
30
+ end
31
+
32
+ def clone!
33
+ system('git', 'clone', '--quiet', url, path)
34
+ end
35
+
36
+ def pull!
37
+ within do
38
+ system('git', 'pull', '--no-rebase', '--quiet', 'origin', 'master')
39
+ end
40
+ end
41
+
42
+ def within
43
+ Dir.chdir(path) do
44
+ yield
45
+ end
46
+ end
47
+ end
48
+ end
@@ -6,8 +6,9 @@ module Spandx
6
6
  # https://api.nuget.org/v3-flatcontainer/#{package.name}/index.json
7
7
  # https://docs.microsoft.com/en-us/nuget/api/package-base-address-resource
8
8
  class Nuget
9
- def initialize(http: Spandx.http)
9
+ def initialize(http: Spandx.http, catalogue:)
10
10
  @http = http
11
+ @catalogue = catalogue
11
12
  end
12
13
 
13
14
  def licenses_for(name, version)
@@ -19,7 +20,7 @@ module Spandx
19
20
 
20
21
  private
21
22
 
22
- attr_reader :http
23
+ attr_reader :http, :catalogue
23
24
 
24
25
  def nuspec_url_for(name, version)
25
26
  "https://api.nuget.org/v3-flatcontainer/#{name}/#{version}/#{name}.nuspec"
@@ -29,26 +30,20 @@ module Spandx
29
30
  from_xml(http.get(nuspec_url_for(name, version)).body)
30
31
  end
31
32
 
32
- def guess_license_in(content)
33
- Licensee::ProjectFiles::LicenseFile.new(content).license.key.upcase
34
- end
35
-
36
33
  def from_xml(xml)
37
34
  Nokogiri::XML(xml).tap(&:remove_namespaces!)
38
35
  end
39
36
 
40
37
  def exact_licenses_from(document)
41
- if (licenses = document.search('//package/metadata/license')).any?
42
- return licenses.map(&:text)
43
- end
44
-
45
- nil
38
+ licenses = document.search('//package/metadata/license')
39
+ licenses.map(&:text) if licenses.any?
46
40
  end
47
41
 
48
42
  def guess_licenses_from(document)
43
+ guess = Guess.new(catalogue)
49
44
  document
50
45
  .search('//package/metadata/licenseUrl')
51
- .map { |node| guess_license_in(Spandx.http.get(node.text).body) }
46
+ .map { |node| guess.license_for(http.get(node.text).body) }
52
47
  end
53
48
  end
54
49
  end
@@ -5,7 +5,7 @@ module Spandx
5
5
  class Spdx
6
6
  URL = 'https://spdx.org/licenses/licenses.json'
7
7
 
8
- def fetch(url: URL, http: Spandx.http, default: Catalogue.empty)
8
+ def fetch(url: URL, http: Spandx.http, default: {})
9
9
  response = http.get(url, default: default)
10
10
  http.ok?(response) ? parse(response.body) : default
11
11
  end
@@ -13,7 +13,7 @@ module Spandx
13
13
  private
14
14
 
15
15
  def parse(json)
16
- Catalogue.new(JSON.parse(json, symbolize_names: true))
16
+ JSON.parse(json, symbolize_names: true)
17
17
  end
18
18
  end
19
19
  end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ class Guess
5
+ class Score
6
+ include Comparable
7
+
8
+ attr_reader :score, :item
9
+
10
+ def initialize(score, item)
11
+ @score = score
12
+ @item = item
13
+ end
14
+
15
+ def <=>(other)
16
+ score <=> other.score
17
+ end
18
+
19
+ def to_s
20
+ "#{score}: #{item}"
21
+ end
22
+ end
23
+
24
+ attr_reader :catalogue
25
+
26
+ def initialize(catalogue)
27
+ @catalogue = catalogue
28
+ end
29
+
30
+ def license_for(raw_content)
31
+ content = Content.new(raw_content)
32
+
33
+ max_score = nil
34
+ catalogue.each do |license|
35
+ next if license.deprecated_license_id?
36
+
37
+ percentage = content.similarity_score(license.content)
38
+ if max_score.nil? || percentage > max_score.score
39
+ max_score = Score.new(percentage, license)
40
+ end
41
+ end
42
+ max_score.item.id
43
+ end
44
+ end
45
+ end
@@ -59,5 +59,17 @@ module Spandx
59
59
  def reference_number=(value)
60
60
  attributes[:referenceNumber] = value
61
61
  end
62
+
63
+ def content
64
+ @content ||= Content.new(Spandx.db.read("text/#{id}.txt"))
65
+ end
66
+
67
+ def <=>(other)
68
+ id <=> other.id
69
+ end
70
+
71
+ def to_s
72
+ id
73
+ end
62
74
  end
63
75
  end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ module Parsers
5
+ class Csproj
6
+ PackageReference = Struct.new(:name, :version, keyword_init: true)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ module Parsers
5
+ class Csproj
6
+ class ProjectFile
7
+ attr_reader :catalogue, :document, :nuget
8
+
9
+ def initialize(path)
10
+ @path = path
11
+ @dir = File.dirname(path)
12
+ @document = Nokogiri::XML(IO.read(path))
13
+ end
14
+
15
+ def package_references
16
+ other = project_references.map(&:package_references).flatten
17
+ other + document.search('//PackageReference').map do |node|
18
+ PackageReference.new(
19
+ name: attribute_for('Include', node),
20
+ version: attribute_for('Version', node)
21
+ )
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def project_references
28
+ document.search('//ProjectReference').map do |node|
29
+ relative_project_path = node.attribute('Include').value.strip.tr('\\', '/')
30
+ absolute_project_path = File.expand_path(File.join(@dir, relative_project_path))
31
+ self.class.new(absolute_project_path)
32
+ end
33
+ end
34
+
35
+ def attribute_for(key, node)
36
+ node.attribute(key)&.value&.strip ||
37
+ node.at_xpath("./#{key}")&.content&.strip
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -8,32 +8,33 @@ module Spandx
8
8
  end
9
9
 
10
10
  def parse(lockfile)
11
- document = from_xml(IO.read(lockfile))
12
- document.search('//PackageReference').map do |node|
13
- name = attribute_for('Include', node)
14
- version = attribute_for('Version', node)
15
- Dependency.new(
16
- name: name,
17
- version: version,
18
- licenses: nuget.licenses_for(name, version).map { |x| catalogue[x] }
19
- )
20
- end
11
+ ProjectFile
12
+ .new(lockfile)
13
+ .package_references
14
+ .map { |x| map_from(x) }
21
15
  end
22
16
 
23
17
  private
24
18
 
25
- def from_xml(xml)
26
- Nokogiri::XML(xml)
19
+ def map_from(package_reference)
20
+ Dependency.new(
21
+ name: package_reference.name,
22
+ version: package_reference.version,
23
+ licenses: licenses_for(package_reference)
24
+ )
27
25
  end
28
26
 
29
- def attribute_for(key, node)
30
- node.attribute(key)&.value&.strip ||
31
- node.at_xpath("./#{key}")&.content&.strip
27
+ def licenses_for(package_reference)
28
+ nuget
29
+ .licenses_for(package_reference.name, package_reference.version)
30
+ .map { |x| catalogue[x] }
32
31
  end
33
32
 
34
33
  def nuget
35
- @nuget ||= Gateways::Nuget.new
34
+ @nuget ||= Gateways::Nuget.new(catalogue: catalogue)
36
35
  end
37
36
  end
38
37
  end
39
38
  end
39
+ require 'spandx/parsers/csproj/package_reference'
40
+ require 'spandx/parsers/csproj/project_file'
@@ -30,7 +30,7 @@ module Spandx
30
30
  end
31
31
 
32
32
  def nuget
33
- @nuget ||= Gateways::Nuget.new
33
+ @nuget ||= Gateways::Nuget.new(catalogue: catalogue)
34
34
  end
35
35
  end
36
36
  end
@@ -15,7 +15,7 @@ module Spandx
15
15
  end
16
16
 
17
17
  class << self
18
- def for(path, catalogue: Spandx::Catalogue.latest)
18
+ def for(path, catalogue: Spandx::Catalogue.from_git)
19
19
  result = ::Spandx::Parsers::Base.find do |x|
20
20
  x.matches?(File.basename(path))
21
21
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Spandx
4
- VERSION = '0.1.5'
4
+ VERSION = '0.1.6'
5
5
  end
data/lib/spandx.rb CHANGED
@@ -3,18 +3,20 @@
3
3
  require 'bundler'
4
4
  require 'forwardable'
5
5
  require 'json'
6
- require 'licensee'
7
6
  require 'net/hippie'
8
7
  require 'nokogiri'
9
8
  require 'pathname'
10
9
 
11
10
  require 'spandx/catalogue'
11
+ require 'spandx/content'
12
+ require 'spandx/database'
12
13
  require 'spandx/dependency'
13
14
  require 'spandx/gateways/http'
14
15
  require 'spandx/gateways/nuget'
15
16
  require 'spandx/gateways/pypi'
16
17
  require 'spandx/gateways/rubygems'
17
18
  require 'spandx/gateways/spdx'
19
+ require 'spandx/guess'
18
20
  require 'spandx/license'
19
21
  require 'spandx/parsers'
20
22
  require 'spandx/report'
@@ -31,5 +33,9 @@ module Spandx
31
33
  def http
32
34
  @http ||= Spandx::Gateways::Http.new
33
35
  end
36
+
37
+ def db
38
+ @db ||= Spandx::Database.new(url: 'https://github.com/spdx/license-list-data.git').tap(&:update!)
39
+ end
34
40
  end
35
41
  end
data/spandx.gemspec CHANGED
@@ -29,8 +29,8 @@ Gem::Specification.new do |spec|
29
29
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
30
30
  spec.require_paths = ['lib']
31
31
 
32
+ spec.required_ruby_version = '>= 2.5.0'
32
33
  spec.add_dependency 'bundler', '>= 1.16', '< 3.0.0'
33
- spec.add_dependency 'licensee', '~> 9.13'
34
34
  spec.add_dependency 'net-hippie', '~> 0.3'
35
35
  spec.add_dependency 'nokogiri', '~> 1.10'
36
36
  spec.add_dependency 'thor', '~> 0.1'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spandx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - mo khan
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-01-23 00:00:00.000000000 Z
11
+ date: 2020-01-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -30,20 +30,6 @@ dependencies:
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
32
  version: 3.0.0
33
- - !ruby/object:Gem::Dependency
34
- name: licensee
35
- requirement: !ruby/object:Gem::Requirement
36
- requirements:
37
- - - "~>"
38
- - !ruby/object:Gem::Version
39
- version: '9.13'
40
- type: :runtime
41
- prerelease: false
42
- version_requirements: !ruby/object:Gem::Requirement
43
- requirements:
44
- - - "~>"
45
- - !ruby/object:Gem::Version
46
- version: '9.13'
47
33
  - !ruby/object:Gem::Dependency
48
34
  name: net-hippie
49
35
  requirement: !ruby/object:Gem::Requirement
@@ -201,16 +187,21 @@ files:
201
187
  - lib/spandx/cli.rb
202
188
  - lib/spandx/command.rb
203
189
  - lib/spandx/commands/scan.rb
190
+ - lib/spandx/content.rb
191
+ - lib/spandx/database.rb
204
192
  - lib/spandx/dependency.rb
205
193
  - lib/spandx/gateways/http.rb
206
194
  - lib/spandx/gateways/nuget.rb
207
195
  - lib/spandx/gateways/pypi.rb
208
196
  - lib/spandx/gateways/rubygems.rb
209
197
  - lib/spandx/gateways/spdx.rb
198
+ - lib/spandx/guess.rb
210
199
  - lib/spandx/license.rb
211
200
  - lib/spandx/parsers.rb
212
201
  - lib/spandx/parsers/base.rb
213
202
  - lib/spandx/parsers/csproj.rb
203
+ - lib/spandx/parsers/csproj/package_reference.rb
204
+ - lib/spandx/parsers/csproj/project_file.rb
214
205
  - lib/spandx/parsers/gemfile_lock.rb
215
206
  - lib/spandx/parsers/packages_config.rb
216
207
  - lib/spandx/parsers/pipfile_lock.rb
@@ -232,7 +223,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
232
223
  requirements:
233
224
  - - ">="
234
225
  - !ruby/object:Gem::Version
235
- version: '0'
226
+ version: 2.5.0
236
227
  required_rubygems_version: !ruby/object:Gem::Requirement
237
228
  requirements:
238
229
  - - ">="