licensee 9.13.2 → 9.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d931f50190ecf7abb790530607bd57eb31c7190926a394bca7aa9ec0550cfba8
4
- data.tar.gz: f119b575b2ff9538133a587ef3a23638756fac47e30c40b442ddad679bb62036
3
+ metadata.gz: 37e4eac01861b05d0484cc0e81e7c6865386004e2529c2c74e3be8d9e350c14b
4
+ data.tar.gz: c506aa44124763dd74b0fa5771fe0c0f750e43a39020dc90d03a716f97861633
5
5
  SHA512:
6
- metadata.gz: 4b423e68fb6496eefc0f4259fac2539f34430a13e1eb6d3758a6876c604fc40e5a763a04836025070410c082c2a516b28988ceeb46ed2b2a06276b318b9d0fb6
7
- data.tar.gz: f0e150efc09980729793f86bbfcff323617349f0cae92ff2d4ebace2e29dac96e980deacc224b6ab08a21e3224f77b95541bdfaab9fa016ba81a6f94c3fdcce7
6
+ metadata.gz: df862cae20c0deeaf1caa8c74456603aa91aae1d180103c415885343a0af3ccd6b4bd6d683c1af351b038e50225488d86e83ad031bb061ebf381da38b9ac1537
7
+ data.tar.gz: be9e4697ce9f1ca2f410cc7bcb3ef952435f75470d8f69e1236a2aa66f6104a4f6446c185f50117278f2a2c06e455bb68aa5436a9af494a761eb852030c2aa04
@@ -112,32 +112,27 @@ module Licensee
112
112
  @wordset ||= content_normalized&.scan(%r{(?:[\w\/](?:'s|(?<=s)')?)+})&.to_set
113
113
  end
114
114
 
115
- # Number of characteres in the normalized content
115
+ # Number of characters in the normalized content
116
116
  def length
117
117
  return 0 unless content_normalized
118
118
 
119
119
  content_normalized.length
120
120
  end
121
121
 
122
- # Number of characters that could be added/removed to still be
123
- # considered a potential match
124
- def max_delta
125
- @max_delta ||= fields_normalized.size * 10 +
126
- (length * Licensee.inverse_confidence_threshold).to_i
127
- end
128
-
129
122
  # Given another license or project file, calculates the difference in length
130
123
  def length_delta(other)
131
124
  (length - other.length).abs
132
125
  end
133
126
 
134
127
  # Given another license or project file, calculates the similarity
135
- # as a percentage of words in common
128
+ # as a percentage of words in common, minus a tiny penalty that
129
+ # increases with size difference between licenses so that false
130
+ # positives for long licnses are ruled out by this score alone.
136
131
  def similarity(other)
137
132
  overlap = (wordset_fieldless & other.wordset).size
138
133
  total = wordset_fieldless.size + other.wordset.size -
139
134
  fields_normalized_set.size
140
- 100.0 * (overlap * 2.0 / total)
135
+ (overlap * 200.0) / (total + length_delta(other) / 10)
141
136
  end
142
137
 
143
138
  # SHA1 of the normalized content
@@ -12,6 +12,7 @@ module Licensee
12
12
  autoload :Exact, 'licensee/matchers/exact'
13
13
  autoload :Gemspec, 'licensee/matchers/gemspec'
14
14
  autoload :NpmBower, 'licensee/matchers/npm_bower'
15
+ autoload :NuGet, 'licensee/matchers/nuget'
15
16
  autoload :Package, 'licensee/matchers/package'
16
17
  autoload :Reference, 'licensee/matchers/reference'
17
18
  autoload :Spdx, 'licensee/matchers/spdx'
@@ -26,7 +26,7 @@ module Licensee
26
26
  if license.creative_commons? && file.potential_false_positive?
27
27
  false
28
28
  else
29
- license.wordset && license.length_delta(file) <= license.max_delta
29
+ license.wordset
30
30
  end
31
31
  end
32
32
  end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Licensee
4
+ module Matchers
5
+ class NuGet < Licensee::Matchers::Package
6
+ # While we could parse the nuspec file, prefer a lenient regex for speed and security.
7
+ # Moar parsing moar problems.
8
+ LICENSE_REGEX = %r{
9
+ <license\s*type\s*=\s*["']expression["']\s*>([a-z\-0-9\. +()]+)<\/license\s*>
10
+ }ix.freeze
11
+
12
+ LICENSE_URL_REGEX = %r{<licenseUrl>\s*(.*)\s*<\/licenseUrl>}i.freeze
13
+
14
+ NUGET_REGEX = %r{https?:\/\/licenses.nuget.org\/(.*)}i.freeze
15
+ OPENSOURCE_REGEX = %r{https?:\/\/(?:www\.)?opensource.org\/licenses\/(.*)}i.freeze
16
+ SPDX_REGEX = %r{https?:\/\/(?:www\.)?spdx.org\/licenses\/(.*?)(?:\.html|\.txt)?$}i.freeze
17
+ APACHE_REGEX = %r{https?:\/\/(?:www\.)?apache.org\/licenses\/(.*?)(?:\.html|\.txt)?$}i.freeze
18
+
19
+ private
20
+
21
+ def license_from_first_capture(url, pattern)
22
+ match = url.match(pattern)
23
+ match[1].downcase if match && match[1]
24
+ end
25
+
26
+ def license_from_url(url)
27
+ license_from_first_capture(url, NUGET_REGEX) ||
28
+ license_from_first_capture(url, OPENSOURCE_REGEX) ||
29
+ license_from_first_capture(url, SPDX_REGEX) ||
30
+ license_from_first_capture(url, APACHE_REGEX)&.gsub('license', 'apache')
31
+ end
32
+
33
+ def license_property
34
+ # Prefer the explicit <license type="expression"> element
35
+ match = @file.content.match LICENSE_REGEX
36
+ return match[1].downcase if match && match[1]
37
+
38
+ url_match = @file.content.match LICENSE_URL_REGEX
39
+ license_from_url(url_match[1]) if url_match && url_match[1]
40
+ end
41
+ end
42
+ end
43
+ end
@@ -7,7 +7,8 @@ module Licensee
7
7
  MATCHERS_EXTENSIONS = {
8
8
  '.gemspec' => [Matchers::Gemspec],
9
9
  '.json' => [Matchers::NpmBower],
10
- '.cabal' => [Matchers::Cabal]
10
+ '.cabal' => [Matchers::Cabal],
11
+ '.nuspec' => [Matchers::NuGet]
11
12
  }.freeze
12
13
 
13
14
  # Hash of Filename => [possible matchers]
@@ -33,7 +34,7 @@ module Licensee
33
34
  end
34
35
 
35
36
  def self.name_score(filename)
36
- return 1.0 if ['.gemspec', '.cabal'].include?(File.extname(filename))
37
+ return 1.0 if ['.gemspec', '.cabal', '.nuspec'].include?(File.extname(filename))
37
38
 
38
39
  FILENAMES_SCORES[filename] || 0.0
39
40
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Licensee
4
- VERSION = '9.13.2'
4
+ VERSION = '9.14.0'
5
5
  end
@@ -55,21 +55,13 @@ RSpec.describe Licensee::ContentHelper do
55
55
  expect(subject.length).to be(135)
56
56
  end
57
57
 
58
- context 'a very long license' do
59
- let(:content) { 'license' * 1000 }
60
-
61
- it 'returns the max delta' do
62
- expect(subject.max_delta).to be(140)
63
- end
64
- end
65
-
66
58
  it 'knows the length delta' do
67
59
  expect(subject.length_delta(mit)).to be(885)
68
60
  expect(subject.length_delta(subject)).to be(0)
69
61
  end
70
62
 
71
63
  it 'knows the similarity' do
72
- expect(subject.similarity(mit)).to be_within(1).of(11)
64
+ expect(subject.similarity(mit)).to be_within(1).of(6)
73
65
  expect(subject.similarity(subject)).to be(100.0)
74
66
  end
75
67
 
@@ -6,6 +6,7 @@ RSpec.describe Licensee::Matchers::Dice do
6
6
  let(:mit) { Licensee::License.find('mit') }
7
7
  let(:gpl) { Licensee::License.find('gpl-3.0') }
8
8
  let(:agpl) { Licensee::License.find('agpl-3.0') }
9
+ let(:lgpl) { Licensee::License.find('lgpl-2.1') }
9
10
  let(:cc_by) { Licensee::License.find('cc-by-4.0') }
10
11
  let(:cc_by_sa) { Licensee::License.find('cc-by-sa-4.0') }
11
12
  let(:content) { sub_copyright_info(gpl) }
@@ -19,18 +20,10 @@ RSpec.describe Licensee::Matchers::Dice do
19
20
  expect(subject.match).to eql(gpl)
20
21
  end
21
22
 
22
- it 'builds a list of potential licenses' do
23
- expect(subject.potential_matches).to eql([agpl, gpl])
24
- end
25
-
26
23
  it 'sorts licenses by similarity' do
27
24
  expect(subject.matches_by_similarity[0]).to eql([gpl, 100.0])
28
- expect(subject.matches_by_similarity[1]).to eql([agpl, 95.6842105263158])
29
- end
30
-
31
- it 'returns a list of licenses above the confidence threshold' do
32
- expect(subject.matches_by_similarity[0]).to eql([gpl, 100.0])
33
- expect(subject.matches_by_similarity[1]).to eql([agpl, 95.6842105263158])
25
+ expect(subject.matches_by_similarity[1]).to eql([agpl, 95.28301886792453])
26
+ expect(subject.matches_by_similarity[2]).to eql([lgpl, 39.33253873659118])
34
27
  end
35
28
 
36
29
  it 'returns the match confidence' do
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe Licensee::Matchers::NuGet do
4
+ subject { described_class.new(file) }
5
+
6
+ let(:content) { '<license type="expression">mit</license>' }
7
+ let(:file) { Licensee::ProjectFiles::LicenseFile.new(content, 'foo.nuspec') }
8
+ let(:mit) { Licensee::License.find('mit') }
9
+ let(:apache2) { Licensee::License.find('apache-2.0') }
10
+ let(:other) { Licensee::License.find('other') }
11
+
12
+ it 'matches' do
13
+ expect(subject.match).to eql(mit)
14
+ end
15
+
16
+ it 'has a confidence' do
17
+ expect(subject.confidence).to be(90)
18
+ end
19
+
20
+ {
21
+ 'double quotes' => '<license type="expression">mit</license>',
22
+ 'single quotes' => "<license type='expression'>mit</license>",
23
+ 'whitespace' => '<license type = "expression" >mit</license >',
24
+ 'leading whitespace' => ' <license type="expression">mit</license>'
25
+ }.each do |description, license_declaration|
26
+ context "with a #{description} license element" do
27
+ let(:content) { license_declaration }
28
+
29
+ it 'matches' do
30
+ expect(subject.match).to eql(mit)
31
+ end
32
+ end
33
+ end
34
+
35
+ context 'no license field' do
36
+ let(:content) { '<file>wrongelement</file>' }
37
+
38
+ it 'returns nil' do
39
+ expect(subject.match).to be_nil
40
+ end
41
+ end
42
+
43
+ context 'an unknown license' do
44
+ let(:content) { '<license type="expression">foo</license>' }
45
+
46
+ it 'returns other' do
47
+ expect(subject.match).to eql(other)
48
+ end
49
+ end
50
+
51
+ context 'a license expression' do
52
+ let(:content) { '<license type="expression">BSD-2-Clause OR MIT</license>' }
53
+
54
+ it 'returns other' do
55
+ expect(subject.match).to eql(other)
56
+ end
57
+ end
58
+
59
+ {
60
+ 'nuget' => '<licenseUrl>https://licenses.nuget.org/Apache-2.0</licenseUrl>',
61
+ 'nuget (http)' => '<licenseUrl>http://licenses.nuget.org/Apache-2.0</licenseUrl>',
62
+ 'opensource' => '<licenseUrl>https://opensource.org/licenses/Apache-2.0</licenseUrl>',
63
+ 'opensource (www)' => '<licenseUrl>http://www.opensource.org/licenses/Apache-2.0</licenseUrl>',
64
+ 'spdx' => '<licenseUrl>https://spdx.org/licenses/Apache-2.0</licenseUrl>',
65
+ 'spdx (www)' => '<licenseUrl>http://www.spdx.org/licenses/Apache-2.0</licenseUrl>',
66
+ 'spdx (html)' => '<licenseUrl>https://spdx.org/licenses/Apache-2.0.html</licenseUrl>',
67
+ 'spdx (txt)' => '<licenseUrl>https://spdx.org/licenses/Apache-2.0.txt</licenseUrl>'
68
+ }.each do |description, license_declaration|
69
+ context "with a #{description} licenseUrl element containing SPDX" do
70
+ let(:content) { license_declaration }
71
+
72
+ it 'matches' do
73
+ expect(subject.match).to eql(apache2)
74
+ end
75
+ end
76
+ end
77
+
78
+ {
79
+ '2.0 (https)' => '<licenseUrl>https://apache.org/licenses/LICENSE-2.0</licenseUrl>',
80
+ '2.0 (http/www)' => '<licenseUrl>http://www.apache.org/licenses/LICENSE-2.0</licenseUrl>',
81
+ '2.0 (txt)' => '<licenseUrl>https://apache.org/licenses/LICENSE-2.0.txt</licenseUrl>'
82
+ }.each do |description, license_declaration|
83
+ context "with an apache.org #{description} licenseUrl element" do
84
+ let(:content) { license_declaration }
85
+
86
+ it 'matches' do
87
+ expect(subject.match).to eql(apache2)
88
+ end
89
+ end
90
+ end
91
+ end
@@ -71,5 +71,13 @@ RSpec.describe Licensee::ProjectFiles::PackageManagerFile do
71
71
  expect(possible_matchers).to eql([Licensee::Matchers::Cran])
72
72
  end
73
73
  end
74
+
75
+ context 'with nuspec file' do
76
+ let(:filename) { 'foo.nuspec' }
77
+
78
+ it 'returns the NuGet matcher' do
79
+ expect(possible_matchers).to eql([Licensee::Matchers::NuGet])
80
+ end
81
+ end
74
82
  end
75
83
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: licensee
3
3
  version: !ruby/object:Gem::Version
4
- version: 9.13.2
4
+ version: 9.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Balter
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-28 00:00:00.000000000 Z
11
+ date: 2020-05-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dotenv
@@ -237,6 +237,7 @@ files:
237
237
  - lib/licensee/matchers/gemspec.rb
238
238
  - lib/licensee/matchers/matcher.rb
239
239
  - lib/licensee/matchers/npm_bower.rb
240
+ - lib/licensee/matchers/nuget.rb
240
241
  - lib/licensee/matchers/package.rb
241
242
  - lib/licensee/matchers/reference.rb
242
243
  - lib/licensee/matchers/spdx.rb
@@ -323,6 +324,7 @@ files:
323
324
  - spec/licensee/matchers/gemspec_matcher_spec.rb
324
325
  - spec/licensee/matchers/matcher_spec.rb
325
326
  - spec/licensee/matchers/npm_bower_matcher_spec.rb
327
+ - spec/licensee/matchers/nu_get_matcher_spec.rb
326
328
  - spec/licensee/matchers/package_matcher_spec.rb
327
329
  - spec/licensee/matchers/reference_matcher_spec.rb
328
330
  - spec/licensee/matchers/spdx_matcher_spec.rb
@@ -398,7 +400,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
398
400
  - !ruby/object:Gem::Version
399
401
  version: '0'
400
402
  requirements: []
401
- rubygems_version: 3.0.3
403
+ rubygems_version: 3.1.3
402
404
  signing_key:
403
405
  specification_version: 4
404
406
  summary: A Ruby Gem to detect open source project licenses