licensee 9.13.2 → 9.14.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d931f50190ecf7abb790530607bd57eb31c7190926a394bca7aa9ec0550cfba8
4
- data.tar.gz: f119b575b2ff9538133a587ef3a23638756fac47e30c40b442ddad679bb62036
3
+ metadata.gz: 37e4eac01861b05d0484cc0e81e7c6865386004e2529c2c74e3be8d9e350c14b
4
+ data.tar.gz: c506aa44124763dd74b0fa5771fe0c0f750e43a39020dc90d03a716f97861633
5
5
  SHA512:
6
- metadata.gz: 4b423e68fb6496eefc0f4259fac2539f34430a13e1eb6d3758a6876c604fc40e5a763a04836025070410c082c2a516b28988ceeb46ed2b2a06276b318b9d0fb6
7
- data.tar.gz: f0e150efc09980729793f86bbfcff323617349f0cae92ff2d4ebace2e29dac96e980deacc224b6ab08a21e3224f77b95541bdfaab9fa016ba81a6f94c3fdcce7
6
+ metadata.gz: df862cae20c0deeaf1caa8c74456603aa91aae1d180103c415885343a0af3ccd6b4bd6d683c1af351b038e50225488d86e83ad031bb061ebf381da38b9ac1537
7
+ data.tar.gz: be9e4697ce9f1ca2f410cc7bcb3ef952435f75470d8f69e1236a2aa66f6104a4f6446c185f50117278f2a2c06e455bb68aa5436a9af494a761eb852030c2aa04
@@ -112,32 +112,27 @@ module Licensee
112
112
  @wordset ||= content_normalized&.scan(%r{(?:[\w\/](?:'s|(?<=s)')?)+})&.to_set
113
113
  end
114
114
 
115
- # Number of characteres in the normalized content
115
+ # Number of characters in the normalized content
116
116
  def length
117
117
  return 0 unless content_normalized
118
118
 
119
119
  content_normalized.length
120
120
  end
121
121
 
122
- # Number of characters that could be added/removed to still be
123
- # considered a potential match
124
- def max_delta
125
- @max_delta ||= fields_normalized.size * 10 +
126
- (length * Licensee.inverse_confidence_threshold).to_i
127
- end
128
-
129
122
  # Given another license or project file, calculates the difference in length
130
123
  def length_delta(other)
131
124
  (length - other.length).abs
132
125
  end
133
126
 
134
127
  # Given another license or project file, calculates the similarity
135
- # as a percentage of words in common
128
+ # as a percentage of words in common, minus a tiny penalty that
129
+ # increases with size difference between licenses so that false
130
+ # positives for long licnses are ruled out by this score alone.
136
131
  def similarity(other)
137
132
  overlap = (wordset_fieldless & other.wordset).size
138
133
  total = wordset_fieldless.size + other.wordset.size -
139
134
  fields_normalized_set.size
140
- 100.0 * (overlap * 2.0 / total)
135
+ (overlap * 200.0) / (total + length_delta(other) / 10)
141
136
  end
142
137
 
143
138
  # SHA1 of the normalized content
@@ -12,6 +12,7 @@ module Licensee
12
12
  autoload :Exact, 'licensee/matchers/exact'
13
13
  autoload :Gemspec, 'licensee/matchers/gemspec'
14
14
  autoload :NpmBower, 'licensee/matchers/npm_bower'
15
+ autoload :NuGet, 'licensee/matchers/nuget'
15
16
  autoload :Package, 'licensee/matchers/package'
16
17
  autoload :Reference, 'licensee/matchers/reference'
17
18
  autoload :Spdx, 'licensee/matchers/spdx'
@@ -26,7 +26,7 @@ module Licensee
26
26
  if license.creative_commons? && file.potential_false_positive?
27
27
  false
28
28
  else
29
- license.wordset && license.length_delta(file) <= license.max_delta
29
+ license.wordset
30
30
  end
31
31
  end
32
32
  end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Licensee
4
+ module Matchers
5
+ class NuGet < Licensee::Matchers::Package
6
+ # While we could parse the nuspec file, prefer a lenient regex for speed and security.
7
+ # Moar parsing moar problems.
8
+ LICENSE_REGEX = %r{
9
+ <license\s*type\s*=\s*["']expression["']\s*>([a-z\-0-9\. +()]+)<\/license\s*>
10
+ }ix.freeze
11
+
12
+ LICENSE_URL_REGEX = %r{<licenseUrl>\s*(.*)\s*<\/licenseUrl>}i.freeze
13
+
14
+ NUGET_REGEX = %r{https?:\/\/licenses.nuget.org\/(.*)}i.freeze
15
+ OPENSOURCE_REGEX = %r{https?:\/\/(?:www\.)?opensource.org\/licenses\/(.*)}i.freeze
16
+ SPDX_REGEX = %r{https?:\/\/(?:www\.)?spdx.org\/licenses\/(.*?)(?:\.html|\.txt)?$}i.freeze
17
+ APACHE_REGEX = %r{https?:\/\/(?:www\.)?apache.org\/licenses\/(.*?)(?:\.html|\.txt)?$}i.freeze
18
+
19
+ private
20
+
21
+ def license_from_first_capture(url, pattern)
22
+ match = url.match(pattern)
23
+ match[1].downcase if match && match[1]
24
+ end
25
+
26
+ def license_from_url(url)
27
+ license_from_first_capture(url, NUGET_REGEX) ||
28
+ license_from_first_capture(url, OPENSOURCE_REGEX) ||
29
+ license_from_first_capture(url, SPDX_REGEX) ||
30
+ license_from_first_capture(url, APACHE_REGEX)&.gsub('license', 'apache')
31
+ end
32
+
33
+ def license_property
34
+ # Prefer the explicit <license type="expression"> element
35
+ match = @file.content.match LICENSE_REGEX
36
+ return match[1].downcase if match && match[1]
37
+
38
+ url_match = @file.content.match LICENSE_URL_REGEX
39
+ license_from_url(url_match[1]) if url_match && url_match[1]
40
+ end
41
+ end
42
+ end
43
+ end
@@ -7,7 +7,8 @@ module Licensee
7
7
  MATCHERS_EXTENSIONS = {
8
8
  '.gemspec' => [Matchers::Gemspec],
9
9
  '.json' => [Matchers::NpmBower],
10
- '.cabal' => [Matchers::Cabal]
10
+ '.cabal' => [Matchers::Cabal],
11
+ '.nuspec' => [Matchers::NuGet]
11
12
  }.freeze
12
13
 
13
14
  # Hash of Filename => [possible matchers]
@@ -33,7 +34,7 @@ module Licensee
33
34
  end
34
35
 
35
36
  def self.name_score(filename)
36
- return 1.0 if ['.gemspec', '.cabal'].include?(File.extname(filename))
37
+ return 1.0 if ['.gemspec', '.cabal', '.nuspec'].include?(File.extname(filename))
37
38
 
38
39
  FILENAMES_SCORES[filename] || 0.0
39
40
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Licensee
4
- VERSION = '9.13.2'
4
+ VERSION = '9.14.0'
5
5
  end
@@ -55,21 +55,13 @@ RSpec.describe Licensee::ContentHelper do
55
55
  expect(subject.length).to be(135)
56
56
  end
57
57
 
58
- context 'a very long license' do
59
- let(:content) { 'license' * 1000 }
60
-
61
- it 'returns the max delta' do
62
- expect(subject.max_delta).to be(140)
63
- end
64
- end
65
-
66
58
  it 'knows the length delta' do
67
59
  expect(subject.length_delta(mit)).to be(885)
68
60
  expect(subject.length_delta(subject)).to be(0)
69
61
  end
70
62
 
71
63
  it 'knows the similarity' do
72
- expect(subject.similarity(mit)).to be_within(1).of(11)
64
+ expect(subject.similarity(mit)).to be_within(1).of(6)
73
65
  expect(subject.similarity(subject)).to be(100.0)
74
66
  end
75
67
 
@@ -6,6 +6,7 @@ RSpec.describe Licensee::Matchers::Dice do
6
6
  let(:mit) { Licensee::License.find('mit') }
7
7
  let(:gpl) { Licensee::License.find('gpl-3.0') }
8
8
  let(:agpl) { Licensee::License.find('agpl-3.0') }
9
+ let(:lgpl) { Licensee::License.find('lgpl-2.1') }
9
10
  let(:cc_by) { Licensee::License.find('cc-by-4.0') }
10
11
  let(:cc_by_sa) { Licensee::License.find('cc-by-sa-4.0') }
11
12
  let(:content) { sub_copyright_info(gpl) }
@@ -19,18 +20,10 @@ RSpec.describe Licensee::Matchers::Dice do
19
20
  expect(subject.match).to eql(gpl)
20
21
  end
21
22
 
22
- it 'builds a list of potential licenses' do
23
- expect(subject.potential_matches).to eql([agpl, gpl])
24
- end
25
-
26
23
  it 'sorts licenses by similarity' do
27
24
  expect(subject.matches_by_similarity[0]).to eql([gpl, 100.0])
28
- expect(subject.matches_by_similarity[1]).to eql([agpl, 95.6842105263158])
29
- end
30
-
31
- it 'returns a list of licenses above the confidence threshold' do
32
- expect(subject.matches_by_similarity[0]).to eql([gpl, 100.0])
33
- expect(subject.matches_by_similarity[1]).to eql([agpl, 95.6842105263158])
25
+ expect(subject.matches_by_similarity[1]).to eql([agpl, 95.28301886792453])
26
+ expect(subject.matches_by_similarity[2]).to eql([lgpl, 39.33253873659118])
34
27
  end
35
28
 
36
29
  it 'returns the match confidence' do
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe Licensee::Matchers::NuGet do
4
+ subject { described_class.new(file) }
5
+
6
+ let(:content) { '<license type="expression">mit</license>' }
7
+ let(:file) { Licensee::ProjectFiles::LicenseFile.new(content, 'foo.nuspec') }
8
+ let(:mit) { Licensee::License.find('mit') }
9
+ let(:apache2) { Licensee::License.find('apache-2.0') }
10
+ let(:other) { Licensee::License.find('other') }
11
+
12
+ it 'matches' do
13
+ expect(subject.match).to eql(mit)
14
+ end
15
+
16
+ it 'has a confidence' do
17
+ expect(subject.confidence).to be(90)
18
+ end
19
+
20
+ {
21
+ 'double quotes' => '<license type="expression">mit</license>',
22
+ 'single quotes' => "<license type='expression'>mit</license>",
23
+ 'whitespace' => '<license type = "expression" >mit</license >',
24
+ 'leading whitespace' => ' <license type="expression">mit</license>'
25
+ }.each do |description, license_declaration|
26
+ context "with a #{description} license element" do
27
+ let(:content) { license_declaration }
28
+
29
+ it 'matches' do
30
+ expect(subject.match).to eql(mit)
31
+ end
32
+ end
33
+ end
34
+
35
+ context 'no license field' do
36
+ let(:content) { '<file>wrongelement</file>' }
37
+
38
+ it 'returns nil' do
39
+ expect(subject.match).to be_nil
40
+ end
41
+ end
42
+
43
+ context 'an unknown license' do
44
+ let(:content) { '<license type="expression">foo</license>' }
45
+
46
+ it 'returns other' do
47
+ expect(subject.match).to eql(other)
48
+ end
49
+ end
50
+
51
+ context 'a license expression' do
52
+ let(:content) { '<license type="expression">BSD-2-Clause OR MIT</license>' }
53
+
54
+ it 'returns other' do
55
+ expect(subject.match).to eql(other)
56
+ end
57
+ end
58
+
59
+ {
60
+ 'nuget' => '<licenseUrl>https://licenses.nuget.org/Apache-2.0</licenseUrl>',
61
+ 'nuget (http)' => '<licenseUrl>http://licenses.nuget.org/Apache-2.0</licenseUrl>',
62
+ 'opensource' => '<licenseUrl>https://opensource.org/licenses/Apache-2.0</licenseUrl>',
63
+ 'opensource (www)' => '<licenseUrl>http://www.opensource.org/licenses/Apache-2.0</licenseUrl>',
64
+ 'spdx' => '<licenseUrl>https://spdx.org/licenses/Apache-2.0</licenseUrl>',
65
+ 'spdx (www)' => '<licenseUrl>http://www.spdx.org/licenses/Apache-2.0</licenseUrl>',
66
+ 'spdx (html)' => '<licenseUrl>https://spdx.org/licenses/Apache-2.0.html</licenseUrl>',
67
+ 'spdx (txt)' => '<licenseUrl>https://spdx.org/licenses/Apache-2.0.txt</licenseUrl>'
68
+ }.each do |description, license_declaration|
69
+ context "with a #{description} licenseUrl element containing SPDX" do
70
+ let(:content) { license_declaration }
71
+
72
+ it 'matches' do
73
+ expect(subject.match).to eql(apache2)
74
+ end
75
+ end
76
+ end
77
+
78
+ {
79
+ '2.0 (https)' => '<licenseUrl>https://apache.org/licenses/LICENSE-2.0</licenseUrl>',
80
+ '2.0 (http/www)' => '<licenseUrl>http://www.apache.org/licenses/LICENSE-2.0</licenseUrl>',
81
+ '2.0 (txt)' => '<licenseUrl>https://apache.org/licenses/LICENSE-2.0.txt</licenseUrl>'
82
+ }.each do |description, license_declaration|
83
+ context "with an apache.org #{description} licenseUrl element" do
84
+ let(:content) { license_declaration }
85
+
86
+ it 'matches' do
87
+ expect(subject.match).to eql(apache2)
88
+ end
89
+ end
90
+ end
91
+ end
@@ -71,5 +71,13 @@ RSpec.describe Licensee::ProjectFiles::PackageManagerFile do
71
71
  expect(possible_matchers).to eql([Licensee::Matchers::Cran])
72
72
  end
73
73
  end
74
+
75
+ context 'with nuspec file' do
76
+ let(:filename) { 'foo.nuspec' }
77
+
78
+ it 'returns the NuGet matcher' do
79
+ expect(possible_matchers).to eql([Licensee::Matchers::NuGet])
80
+ end
81
+ end
74
82
  end
75
83
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: licensee
3
3
  version: !ruby/object:Gem::Version
4
- version: 9.13.2
4
+ version: 9.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Balter
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-28 00:00:00.000000000 Z
11
+ date: 2020-05-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dotenv
@@ -237,6 +237,7 @@ files:
237
237
  - lib/licensee/matchers/gemspec.rb
238
238
  - lib/licensee/matchers/matcher.rb
239
239
  - lib/licensee/matchers/npm_bower.rb
240
+ - lib/licensee/matchers/nuget.rb
240
241
  - lib/licensee/matchers/package.rb
241
242
  - lib/licensee/matchers/reference.rb
242
243
  - lib/licensee/matchers/spdx.rb
@@ -323,6 +324,7 @@ files:
323
324
  - spec/licensee/matchers/gemspec_matcher_spec.rb
324
325
  - spec/licensee/matchers/matcher_spec.rb
325
326
  - spec/licensee/matchers/npm_bower_matcher_spec.rb
327
+ - spec/licensee/matchers/nu_get_matcher_spec.rb
326
328
  - spec/licensee/matchers/package_matcher_spec.rb
327
329
  - spec/licensee/matchers/reference_matcher_spec.rb
328
330
  - spec/licensee/matchers/spdx_matcher_spec.rb
@@ -398,7 +400,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
398
400
  - !ruby/object:Gem::Version
399
401
  version: '0'
400
402
  requirements: []
401
- rubygems_version: 3.0.3
403
+ rubygems_version: 3.1.3
402
404
  signing_key:
403
405
  specification_version: 4
404
406
  summary: A Ruby Gem to detect open source project licenses