licensee 9.19.0 → 9.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/licensee/content_helper/constants.rb +3 -1
- data/lib/licensee/content_helper/normalization_methods.rb +15 -3
- data/lib/licensee/content_helper/similarity_methods.rb +15 -1
- data/lib/licensee/content_helper.rb +19 -2
- data/lib/licensee/matchers/cabal.rb +5 -2
- data/lib/licensee/matchers/dice.rb +12 -2
- data/lib/licensee/matchers/gemspec.rb +0 -8
- data/lib/licensee/matchers/matcher.rb +3 -3
- data/lib/licensee/matchers/package.rb +1 -1
- data/lib/licensee/project_files/project_file.rb +1 -1
- data/lib/licensee/projects/project.rb +2 -2
- data/lib/licensee/version.rb +1 -1
- data/spec/fixture_spec.rb +2 -2
- data/spec/fixtures/bsd-3-linebreak-owner/LICENSE +30 -0
- data/spec/fixtures/bsd-3-multilinecopyright/LICENSE +27 -0
- data/spec/fixtures/detect.json +1 -1
- data/spec/fixtures/fixtures.yml +19 -11
- data/spec/fixtures/license-hashes.json +4 -4
- data/spec/licensee/content_helper_spec.rb +68 -4
- data/spec/licensee/hash_helper_spec.rb +6 -4
- data/spec/licensee/license_spec.rb +4 -0
- data/spec/licensee/matchers/cabal_matcher_spec.rb +36 -0
- data/spec/licensee/matchers/copyright_matcher_spec.rb +11 -0
- data/spec/licensee/matchers/dice_matcher_spec.rb +14 -1
- data/spec/licensee/matchers/matcher_spec.rb +10 -0
- data/spec/licensee/matchers/package_matcher_spec.rb +8 -0
- data/spec/licensee/matchers/reference_matcher_spec.rb +4 -0
- data/spec/licensee/project_files/project_file_spec.rb +16 -0
- data/spec/licensee/projects/project_spec.rb +15 -0
- data/spec/licensee/rule_spec.rb +4 -0
- data/spec/licensee_spec.rb +8 -0
- data/vendor/choosealicense.com/_licenses/bsd-4-clause.txt +1 -1
- data/vendor/choosealicense.com/_licenses/zlib.txt +1 -1
- data/vendor/license-list-XML/src/0BSD.xml +1 -1
- data/vendor/license-list-XML/src/AFL-3.0.xml +1 -1
- data/vendor/license-list-XML/src/AGPL-3.0.xml +1 -1
- data/vendor/license-list-XML/src/Apache-2.0.xml +1 -2
- data/vendor/license-list-XML/src/Artistic-2.0.xml +1 -1
- data/vendor/license-list-XML/src/BSD-2-Clause-Patent.xml +1 -1
- data/vendor/license-list-XML/src/BSD-2-Clause.xml +1 -1
- data/vendor/license-list-XML/src/BSD-3-Clause.xml +1 -1
- data/vendor/license-list-XML/src/BSD-4-Clause.xml +1 -1
- data/vendor/license-list-XML/src/BSL-1.0.xml +1 -1
- data/vendor/license-list-XML/src/ECL-2.0.xml +1 -1
- data/vendor/license-list-XML/src/EPL-1.0.xml +1 -1
- data/vendor/license-list-XML/src/EPL-2.0.xml +1 -1
- data/vendor/license-list-XML/src/EUPL-1.1.xml +1 -1
- data/vendor/license-list-XML/src/EUPL-1.2.xml +1 -1
- data/vendor/license-list-XML/src/GPL-2.0.xml +6 -3
- data/vendor/license-list-XML/src/GPL-3.0.xml +1 -1
- data/vendor/license-list-XML/src/ISC.xml +1 -1
- data/vendor/license-list-XML/src/LGPL-2.1.xml +6 -3
- data/vendor/license-list-XML/src/LGPL-3.0.xml +1 -1
- data/vendor/license-list-XML/src/LPPL-1.3c.xml +2 -2
- data/vendor/license-list-XML/src/MIT.xml +23 -20
- data/vendor/license-list-XML/src/MPL-2.0.xml +1 -1
- data/vendor/license-list-XML/src/MS-PL.xml +1 -1
- data/vendor/license-list-XML/src/MS-RL.xml +1 -1
- data/vendor/license-list-XML/src/NCSA.xml +1 -1
- data/vendor/license-list-XML/src/OFL-1.1.xml +1 -1
- data/vendor/license-list-XML/src/OSL-3.0.xml +1 -1
- data/vendor/license-list-XML/src/PostgreSQL.xml +1 -1
- data/vendor/license-list-XML/src/UPL-1.0.xml +1 -1
- data/vendor/license-list-XML/src/Zlib.xml +1 -1
- metadata +6 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5c28e30b13570d0619bc9ec837c115c8184e3ecd3a46f17a5a3146b425c75262
|
|
4
|
+
data.tar.gz: 79fdbe8f8702fa484280083757db94ec93ac7106f3adb5b54aa3287ad7edbc45
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f761cf9ba0303f36693af8d2b91acf2c64b35daf7a2409e7ba7c463a8bba3eea1c7f16a8657e7af47694bbcb640d46507bf341ce62198241d66066e1a0fc8991
|
|
7
|
+
data.tar.gz: 66599cd5fda2597a8d063c547383a259b16b1d8854e7c88057096a2b054193683d3ff2ffd7b47738f30d81d49a68371eee8025fd139187cf9fd8a2bb71b68bc6
|
|
@@ -24,6 +24,8 @@ module Licensee
|
|
|
24
24
|
developed_by: /#{START_REGEX}developed by:.*?\n\n/im,
|
|
25
25
|
cc_dedication: /The\s+text\s+of\s+the\s+Creative\s+Commons.*?Public\s+Domain\s+Dedication\./im,
|
|
26
26
|
cc_wiki: /wiki\.creativecommons\.org/i,
|
|
27
|
+
cc_preamble: /creative\s+commons\s+corporation.*?(?=by\s+exercising\s+the\s+licensed\s+rights)/im,
|
|
28
|
+
cc_notice: /creative\s+commons\s+is\s+not\s+a\s+party\s+to\s+its\s+public\s+licenses\..*\z/im,
|
|
27
29
|
cc_legal_code: /^\s*Creative Commons Legal Code\s*$/i,
|
|
28
30
|
cc0_info: /For more information, please see\s*\S+zero\S+/im,
|
|
29
31
|
cc0_disclaimer: /CREATIVE COMMONS CORPORATION.*?\n\n/im,
|
|
@@ -81,7 +83,7 @@ module Licensee
|
|
|
81
83
|
'sub license' => 'sublicense',
|
|
82
84
|
'utilisation' => 'utilization',
|
|
83
85
|
'whilst' => 'while',
|
|
84
|
-
'wilful' => '
|
|
86
|
+
'wilful' => 'willful',
|
|
85
87
|
'non-commercial' => 'noncommercial',
|
|
86
88
|
'per cent' => 'percent',
|
|
87
89
|
'copyright owner' => 'copyright holder'
|
|
@@ -65,8 +65,13 @@ module Licensee
|
|
|
65
65
|
end
|
|
66
66
|
|
|
67
67
|
def strip_copyright
|
|
68
|
-
|
|
69
|
-
|
|
68
|
+
copyright_notice_regex = Matchers::Copyright::MAIN_LINE_REGEX
|
|
69
|
+
copyright_regex = Regexp.union(Matchers::Copyright::REGEX, ContentHelper::REGEXES[:all_rights_reserved])
|
|
70
|
+
# Strip opening paragraph only when "All rights reserved." is present — confirms attribution, not license text.
|
|
71
|
+
strip(/\A.*?(?=\n\n)/m) if (p = _content[/\A.*?(?=\n\n)/m]) &&
|
|
72
|
+
p =~ copyright_notice_regex && /all rights reserved/i.match?(p)
|
|
73
|
+
# Strip any remaining copyright lines (e.g. when no blank line is present)
|
|
74
|
+
strip(copyright_regex) while _content =~ copyright_regex
|
|
70
75
|
end
|
|
71
76
|
|
|
72
77
|
def strip_cc0_optional
|
|
@@ -82,6 +87,8 @@ module Licensee
|
|
|
82
87
|
|
|
83
88
|
strip(ContentHelper::REGEXES[:cc_dedication])
|
|
84
89
|
strip(ContentHelper::REGEXES[:cc_wiki])
|
|
90
|
+
strip(ContentHelper::REGEXES[:cc_preamble]) if _content.include? 'creative commons corporation'
|
|
91
|
+
strip(ContentHelper::REGEXES[:cc_notice]) if _content.include? 'creative commons is not a party'
|
|
85
92
|
end
|
|
86
93
|
|
|
87
94
|
def strip_unlicense_optional
|
|
@@ -125,7 +132,12 @@ module Licensee
|
|
|
125
132
|
end
|
|
126
133
|
|
|
127
134
|
def normalize_spelling
|
|
128
|
-
|
|
135
|
+
# Use flexible whitespace between words so that line-wrapped content
|
|
136
|
+
# (e.g. "copyright\nowner") is still normalized correctly.
|
|
137
|
+
ContentHelper::VARIETAL_WORDS.each do |phrase, replacement|
|
|
138
|
+
pattern = phrase.split.map { |w| Regexp.escape(w) }.join('\s+')
|
|
139
|
+
@_content = _content.gsub(/\b#{pattern}\b/, replacement)
|
|
140
|
+
end
|
|
129
141
|
end
|
|
130
142
|
|
|
131
143
|
def normalize_bullets
|
|
@@ -7,12 +7,26 @@ module Licensee
|
|
|
7
7
|
# Given another license or project file, calculates the similarity
|
|
8
8
|
# as a percentage of words in common, minus a tiny penalty that
|
|
9
9
|
# increases with size difference between licenses so that false
|
|
10
|
-
# positives for long
|
|
10
|
+
# positives for long licenses are ruled out by this score alone.
|
|
11
11
|
def similarity(other)
|
|
12
12
|
overlap = (wordset_fieldless & other.wordset).size
|
|
13
13
|
(overlap * 200.0) / similarity_denominator(other)
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
+
# Given another license or project file, calculates the Dice coefficient
|
|
17
|
+
# over bigrams (consecutive word pairs). Unlike wordset similarity this
|
|
18
|
+
# is sensitive to word order, making it resistant to adversarial scrambling
|
|
19
|
+
# where all the correct words appear but in the wrong sequence.
|
|
20
|
+
def bigram_similarity(other)
|
|
21
|
+
my_bigrams = bigrams
|
|
22
|
+
other_bigrams = other.bigrams
|
|
23
|
+
total = my_bigrams.size + other_bigrams.size
|
|
24
|
+
return 0.0 if total.zero?
|
|
25
|
+
|
|
26
|
+
overlap = (my_bigrams & other_bigrams).size
|
|
27
|
+
(overlap * 200.0) / total
|
|
28
|
+
end
|
|
29
|
+
|
|
16
30
|
private
|
|
17
31
|
|
|
18
32
|
def wordset_fieldless
|
|
@@ -14,7 +14,18 @@ module Licensee
|
|
|
14
14
|
|
|
15
15
|
# A set of each word in the license, without duplicates
|
|
16
16
|
def wordset
|
|
17
|
-
@wordset ||=
|
|
17
|
+
@wordset ||= words&.to_set
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# A set of consecutive word pairs (bigrams) in the license, without duplicates.
|
|
21
|
+
# Unlike wordset, bigrams are order-sensitive, making similarity scores
|
|
22
|
+
# robust against adversarial word scrambling (see GitHub issue #602).
|
|
23
|
+
def bigrams
|
|
24
|
+
@bigrams ||= if words.nil? || words.length < 2
|
|
25
|
+
Set.new
|
|
26
|
+
else
|
|
27
|
+
words.each_cons(2).to_set { |a, b| "#{a} #{b}" }
|
|
28
|
+
end
|
|
18
29
|
end
|
|
19
30
|
|
|
20
31
|
# Number of characters in the normalized content
|
|
@@ -72,7 +83,7 @@ module Licensee
|
|
|
72
83
|
|
|
73
84
|
def self.title_regex
|
|
74
85
|
@title_regex ||= begin
|
|
75
|
-
licenses = Licensee::License.all(hidden: true,
|
|
86
|
+
licenses = Licensee::License.all(hidden: true, pseudo: false)
|
|
76
87
|
titles = licenses.map(&:title_regex)
|
|
77
88
|
|
|
78
89
|
# Title regex must include the version to support matching within
|
|
@@ -90,6 +101,12 @@ module Licensee
|
|
|
90
101
|
|
|
91
102
|
private
|
|
92
103
|
|
|
104
|
+
# Ordered array of words extracted from the normalized content.
|
|
105
|
+
# Memoized so that both wordset and bigrams share the same scan result.
|
|
106
|
+
def words
|
|
107
|
+
@words ||= content_normalized&.scan(%r{(?:[\w/-](?:'s|(?<=s)')?)+})
|
|
108
|
+
end
|
|
109
|
+
|
|
93
110
|
def _content
|
|
94
111
|
@_content ||= content.to_s.dup.strip
|
|
95
112
|
end
|
|
@@ -6,7 +6,8 @@ module Licensee
|
|
|
6
6
|
class Cabal < Licensee::Matchers::Package
|
|
7
7
|
# While we could parse the cabal file, prefer
|
|
8
8
|
# a lenient regex for speed and security. Moar parsing moar problems.
|
|
9
|
-
|
|
9
|
+
# The "+" suffix is the pre-SPDX Cabal notation for "or-later" (e.g. GPL-2+).
|
|
10
|
+
LICENSE_REGEX = /^\s*license\s*:\s*([a-z\-0-9.+]+)\s*$/ix
|
|
10
11
|
LICENSE_CONVERSIONS = {
|
|
11
12
|
'GPL-2' => 'GPL-2.0',
|
|
12
13
|
'GPL-3' => 'GPL-3.0',
|
|
@@ -24,7 +25,9 @@ module Licensee
|
|
|
24
25
|
end
|
|
25
26
|
|
|
26
27
|
def spdx_name(cabal_name)
|
|
27
|
-
|
|
28
|
+
# Strip pre-SPDX "or-later" suffix (+) before looking up conversions
|
|
29
|
+
normalized = cabal_name.chomp('+')
|
|
30
|
+
LICENSE_CONVERSIONS[normalized] || normalized
|
|
28
31
|
end
|
|
29
32
|
end
|
|
30
33
|
end
|
|
@@ -43,8 +43,9 @@ module Licensee
|
|
|
43
43
|
alias licenses_by_similarity matches_by_similarity
|
|
44
44
|
|
|
45
45
|
def matches
|
|
46
|
-
@matches ||= matches_by_similarity.select do |
|
|
47
|
-
similarity >= minimum_confidence
|
|
46
|
+
@matches ||= matches_by_similarity.select do |license, similarity|
|
|
47
|
+
similarity >= minimum_confidence &&
|
|
48
|
+
license.bigram_similarity(file) >= minimum_bigram_confidence
|
|
48
49
|
end
|
|
49
50
|
end
|
|
50
51
|
|
|
@@ -58,6 +59,15 @@ module Licensee
|
|
|
58
59
|
def minimum_confidence
|
|
59
60
|
Licensee.confidence_threshold
|
|
60
61
|
end
|
|
62
|
+
|
|
63
|
+
# A floor for bigram similarity, used to reject adversarially scrambled
|
|
64
|
+
# content that achieves high wordset similarity by including all the right
|
|
65
|
+
# words in the wrong order. Set to half the wordset threshold so that any
|
|
66
|
+
# genuine license match (which typically scores 90%+ on bigrams) passes,
|
|
67
|
+
# while scrambled content (which scores near 0%) is rejected.
|
|
68
|
+
def minimum_bigram_confidence
|
|
69
|
+
Licensee.confidence_threshold / 2.0
|
|
70
|
+
end
|
|
61
71
|
end
|
|
62
72
|
end
|
|
63
73
|
end
|
|
@@ -13,10 +13,6 @@ module Licensee
|
|
|
13
13
|
# non-value groups
|
|
14
14
|
ARRAY_REGEX = /\s*\[#{VALUE_REGEX}(?:,#{VALUE_REGEX})*\]\s*/i
|
|
15
15
|
|
|
16
|
-
DECLARATION_REGEX = /
|
|
17
|
-
^\s*[a-z0-9_]+\.([a-z0-9_]+)\s*=#{VALUE_REGEX}$
|
|
18
|
-
/ix
|
|
19
|
-
|
|
20
16
|
LICENSE_REGEX = /
|
|
21
17
|
^\s*[a-z0-9_]+\.license\s*=#{VALUE_REGEX}$
|
|
22
18
|
/ix
|
|
@@ -45,10 +41,6 @@ module Licensee
|
|
|
45
41
|
match = @file.content.match LICENSE_ARRAY_REGEX
|
|
46
42
|
match.captures.compact.map(&:downcase) if match
|
|
47
43
|
end
|
|
48
|
-
|
|
49
|
-
def declarations
|
|
50
|
-
@declarations ||= @file.content.match DECLARATION_REGEX
|
|
51
|
-
end
|
|
52
44
|
end
|
|
53
45
|
end
|
|
54
46
|
end
|
|
@@ -19,17 +19,17 @@ module Licensee
|
|
|
19
19
|
end
|
|
20
20
|
|
|
21
21
|
def match
|
|
22
|
-
raise
|
|
22
|
+
raise NotImplementedError, "#{self.class}#match is not implemented"
|
|
23
23
|
end
|
|
24
24
|
|
|
25
25
|
def confidence
|
|
26
|
-
raise
|
|
26
|
+
raise NotImplementedError, "#{self.class}#confidence is not implemented"
|
|
27
27
|
end
|
|
28
28
|
|
|
29
29
|
private
|
|
30
30
|
|
|
31
31
|
def potential_matches
|
|
32
|
-
@potential_matches ||= Licensee.licenses(hidden: true,
|
|
32
|
+
@potential_matches ||= Licensee.licenses(hidden: true, pseudo: false)
|
|
33
33
|
end
|
|
34
34
|
end
|
|
35
35
|
end
|
|
@@ -152,11 +152,11 @@ module Licensee
|
|
|
152
152
|
end
|
|
153
153
|
|
|
154
154
|
def files
|
|
155
|
-
raise
|
|
155
|
+
raise NotImplementedError, "#{self.class}#files is not implemented"
|
|
156
156
|
end
|
|
157
157
|
|
|
158
158
|
def load_file(_file)
|
|
159
|
-
raise
|
|
159
|
+
raise NotImplementedError, "#{self.class}#load_file is not implemented"
|
|
160
160
|
end
|
|
161
161
|
end
|
|
162
162
|
end
|
data/lib/licensee/version.rb
CHANGED
data/spec/fixture_spec.rb
CHANGED
|
@@ -18,9 +18,9 @@ RSpec.describe Fixture do
|
|
|
18
18
|
Licensee::License.find('none')
|
|
19
19
|
end
|
|
20
20
|
|
|
21
|
-
it 'has an expected license in fixtures
|
|
21
|
+
it 'has an expected license in fixtures.yml' do
|
|
22
22
|
msg = +'Expected an entry in `'
|
|
23
|
-
msg << fixture_path('fixtures
|
|
23
|
+
msg << fixture_path('fixtures.yml')
|
|
24
24
|
msg << "` for the `#{fixture}` fixture. Please run "
|
|
25
25
|
msg << 'script/dump-fixture-licenses and confirm the output.'
|
|
26
26
|
expect(fixture_licenses).to have_key(fixture), msg
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
Copyright (c) 2023, Karl Pettersson
|
|
2
|
+
|
|
3
|
+
All rights reserved.
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
* Redistributions of source code must retain the above copyright
|
|
9
|
+
notice, this list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
* Redistributions in binary form must reproduce the above
|
|
12
|
+
copyright notice, this list of conditions and the following
|
|
13
|
+
disclaimer in the documentation and/or other materials provided
|
|
14
|
+
with the distribution.
|
|
15
|
+
|
|
16
|
+
* Neither the name of Karl Pettersson nor the names of other
|
|
17
|
+
contributors may be used to endorse or promote products derived
|
|
18
|
+
from this software without specific prior written permission.
|
|
19
|
+
|
|
20
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
21
|
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
22
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
23
|
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
24
|
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
25
|
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
26
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
27
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
28
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
29
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
30
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2016-2026 by University of Kassel and Fraunhofer Institute for Energy Economics
|
|
4
|
+
and Energy System Technology (IEE) Kassel and individual contributors (see AUTHORS file for details).
|
|
5
|
+
All rights reserved.
|
|
6
|
+
|
|
7
|
+
Redistribution and use in source and binary forms, with or without modification, are permitted
|
|
8
|
+
provided that the following conditions are met:
|
|
9
|
+
|
|
10
|
+
1. Redistributions of source code must retain the above copyright notice, this list of conditions
|
|
11
|
+
and the following disclaimer.
|
|
12
|
+
|
|
13
|
+
2. Redistributions in binary form must reproduce the above copyright notice, this list of
|
|
14
|
+
conditions and the following disclaimer in the documentation and/or other materials provided
|
|
15
|
+
with the distribution.
|
|
16
|
+
|
|
17
|
+
3. Neither the name of the copyright holder nor the names of its contributors may be used to
|
|
18
|
+
endorse or promote products derived from this software without specific prior written permission.
|
|
19
|
+
|
|
20
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
|
|
21
|
+
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
|
23
|
+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
25
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
26
|
+
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
|
|
27
|
+
WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/spec/fixtures/detect.json
CHANGED
|
@@ -93,7 +93,7 @@
|
|
|
93
93
|
},
|
|
94
94
|
{
|
|
95
95
|
"filename": "licensee.gemspec",
|
|
96
|
-
"content": "# frozen_string_literal: true\n\nrequire File.expand_path('lib/licensee/version', __dir__)\n\nGem::Specification.new do |gem|\n gem.name = 'licensee'\n gem.version = Licensee::VERSION\n\n gem.summary = 'A Ruby Gem to detect open source project licenses'\n gem.description = <<-DESC\n Licensee automates the process of reading LICENSE files and\n compares their contents to known licenses using a fancy maths.\n DESC\n\n gem.authors = ['Ben Balter']\n gem.email = 'ben.balter@github.com'\n gem.homepage = 'https://github.com/
|
|
96
|
+
"content": "# frozen_string_literal: true\n\nrequire File.expand_path('lib/licensee/version', __dir__)\n\nGem::Specification.new do |gem|\n gem.name = 'licensee'\n gem.version = Licensee::VERSION\n\n gem.summary = 'A Ruby Gem to detect open source project licenses'\n gem.description = <<-DESC\n Licensee automates the process of reading LICENSE files and\n compares their contents to known licenses using a fancy maths.\n DESC\n\n gem.authors = ['Ben Balter']\n gem.email = 'ben.balter@github.com'\n gem.homepage = 'https://github.com/licensee/licensee'\n gem.license = 'MIT'\n gem.metadata['rubygems_mfa_required'] = 'true'\n\n gem.bindir = 'bin'\n gem.executables << 'licensee'\n\n gem.add_dependency('dotenv', '>= 2', '< 4')\n gem.add_dependency('octokit', '>= 4.20', '< 11.0')\n gem.add_dependency('reverse_markdown', '>= 1', '< 4')\n gem.add_dependency('rugged', '>= 0.24', '<2.0')\n gem.add_dependency('thor', '>= 0.19', '< 2.0')\n\n gem.add_development_dependency('gem-release', '~> 2.0')\n gem.add_development_dependency('mustache', '>= 0.9', '< 2.0')\n gem.add_development_dependency('pry', '~> 0.9')\n gem.add_development_dependency('rspec', '~> 3.5')\n gem.add_development_dependency('rubocop', '~> 1.0')\n gem.add_development_dependency('rubocop-performance', '~> 1.5')\n gem.add_development_dependency('rubocop-rspec', '~> 3.0')\n gem.add_development_dependency('simplecov', '~> 0.16')\n gem.add_development_dependency('webmock', '~> 3.1')\n\n gem.required_ruby_version = '>= 3.2'\n\n # ensure the gem is built out of versioned files\n gem.files = Dir[\n '{bin,lib,man,test,vendor,spec}/**/*',\n 'README*', 'LICENSE*'\n ] & `git ls-files -z`.split(\"\\0\")\nend\n",
|
|
97
97
|
"content_hash": null,
|
|
98
98
|
"content_normalized": null,
|
|
99
99
|
"matcher": {
|
data/spec/fixtures/fixtures.yml
CHANGED
|
@@ -7,7 +7,7 @@ agpl-3.0_markdown:
|
|
|
7
7
|
apache-2.0_markdown:
|
|
8
8
|
key: apache-2.0
|
|
9
9
|
matcher: dice
|
|
10
|
-
hash:
|
|
10
|
+
hash: 0b6213bf4bf883f67e804a39c23f92cb63c70f1d
|
|
11
11
|
apache-with-readme-notice:
|
|
12
12
|
key: apache-2.0
|
|
13
13
|
matcher: exact
|
|
@@ -28,10 +28,18 @@ bsd-3-authorowner:
|
|
|
28
28
|
key: bsd-3-clause
|
|
29
29
|
matcher: dice
|
|
30
30
|
hash: 2e6f215833d1a3d10e6194d479dbb2b4be2f64d7
|
|
31
|
+
bsd-3-multilinecopyright:
|
|
32
|
+
key: bsd-3-clause
|
|
33
|
+
matcher: exact
|
|
34
|
+
hash: a961b19cc6921d510e29a13b0ba1a826fcffe41c
|
|
31
35
|
bsd-3-clause_markdown:
|
|
32
36
|
key: bsd-3-clause
|
|
33
37
|
matcher: dice
|
|
34
38
|
hash: 2449fc8ece2fa342f2e82bbbf86f01d19329a531
|
|
39
|
+
bsd-3-linebreak-owner:
|
|
40
|
+
key: bsd-3-clause
|
|
41
|
+
matcher: dice
|
|
42
|
+
hash: 9a1ab486a9182629581b5598b415df85b48eb008
|
|
35
43
|
bsd-3-lists:
|
|
36
44
|
key: bsd-3-clause
|
|
37
45
|
matcher:
|
|
@@ -54,28 +62,28 @@ case-sensitive:
|
|
|
54
62
|
hash: da39a3ee5e6b4b0d3255bfef95601890afd80709
|
|
55
63
|
cc-by-4.0_markdown:
|
|
56
64
|
key: cc-by-4.0
|
|
57
|
-
matcher:
|
|
58
|
-
hash:
|
|
65
|
+
matcher: exact
|
|
66
|
+
hash: f2a70fcab522bfb2fbcdefb47b94d2a928e22091
|
|
59
67
|
cc-by-nc-sa:
|
|
60
68
|
key: other
|
|
61
69
|
matcher:
|
|
62
|
-
hash:
|
|
70
|
+
hash: ec364756344b00a9f6a59cb239a16634f7b40770
|
|
63
71
|
cc-by-nd:
|
|
64
72
|
key: other
|
|
65
73
|
matcher:
|
|
66
|
-
hash:
|
|
74
|
+
hash: b3cd0e6254ff1116ad09860ec03ea871cff30f64
|
|
67
75
|
cc-by-sa-4.0_markdown:
|
|
68
76
|
key: cc-by-sa-4.0
|
|
69
|
-
matcher:
|
|
70
|
-
hash:
|
|
77
|
+
matcher: exact
|
|
78
|
+
hash: 33464647bebf0a285d9df642ccb07c8e26b3e268
|
|
71
79
|
cc-by-sa-mdlinks:
|
|
72
80
|
key: cc-by-sa-4.0
|
|
73
|
-
matcher:
|
|
74
|
-
hash:
|
|
81
|
+
matcher: exact
|
|
82
|
+
hash: a81c2a3a07b59f58b3c09387874724670122fc90
|
|
75
83
|
cc-by-sa-nocclicensor:
|
|
76
84
|
key: cc-by-sa-4.0
|
|
77
85
|
matcher: dice
|
|
78
|
-
hash:
|
|
86
|
+
hash: 6fc6425689d3156be764c8e765ae2553a54e3589
|
|
79
87
|
cc0-1.0_markdown:
|
|
80
88
|
key: cc0-1.0
|
|
81
89
|
matcher: dice
|
|
@@ -111,7 +119,7 @@ epl-1.0_markdown:
|
|
|
111
119
|
eupl-cal2017:
|
|
112
120
|
key: eupl-1.2
|
|
113
121
|
matcher: exact
|
|
114
|
-
hash:
|
|
122
|
+
hash: d8debddd73476c481fc6ceed37c75f0ae97a6e81
|
|
115
123
|
fcpl-modified-mpl:
|
|
116
124
|
key: other
|
|
117
125
|
matcher:
|
|
@@ -11,8 +11,8 @@
|
|
|
11
11
|
"bsd-3-clause-clear": "0fcdb12c4060ce8f406e17bc67787e50a9b36a61",
|
|
12
12
|
"bsd-4-clause": "3b2917580b2b6f13efaaea37546b8b7a53716a30",
|
|
13
13
|
"bsl-1.0": "27e28f20b57048cf04be07e1532b6fb501a0753b",
|
|
14
|
-
"cc-by-4.0": "
|
|
15
|
-
"cc-by-sa-4.0": "
|
|
14
|
+
"cc-by-4.0": "e8c6c40ba40ff2a44f19d74987731e98facc1451",
|
|
15
|
+
"cc-by-sa-4.0": "35168cd69d6ef5b9dd81f7793898fad53c1798ec",
|
|
16
16
|
"cc0-1.0": "34dbb82be40b15f7c521d4f2d1a36ebe76246936",
|
|
17
17
|
"cecill-2.1": "ea372810464d71db27e62ad499628991ea2818cf",
|
|
18
18
|
"cern-ohl-p-2.0": "f10b4b8d75502ab65a7bdbe1d616e5eb8d157aed",
|
|
@@ -21,8 +21,8 @@
|
|
|
21
21
|
"ecl-2.0": "296976ce9e84ba380866e4519b68a779c2059b3a",
|
|
22
22
|
"epl-1.0": "5e3cb10996b4ba2821d04d5c99a912c924b3bdcb",
|
|
23
23
|
"epl-2.0": "e2f3e266432478d9248422228a75a404cce1c43c",
|
|
24
|
-
"eupl-1.1": "
|
|
25
|
-
"eupl-1.2": "
|
|
24
|
+
"eupl-1.1": "2e384f67f0cb5adb7f63470c5dcea0280873f2b3",
|
|
25
|
+
"eupl-1.2": "169fa5fdd2118679d1453414d0a5d28b2a5fcdc4",
|
|
26
26
|
"gfdl-1.3": "164a858691ea0a6fb0dd06c5ca00e5dd7620eef8",
|
|
27
27
|
"gpl-2.0": "32108116603c30687d8d0d2f77f140fb6ecea082",
|
|
28
28
|
"gpl-3.0": "7d4cdf499d39e2e1ce27b2878e22872f0f5a74dd",
|
|
@@ -13,10 +13,6 @@ class ContentHelperTestHelper
|
|
|
13
13
|
def filename
|
|
14
14
|
@data[:filename]
|
|
15
15
|
end
|
|
16
|
-
|
|
17
|
-
def spdx_id
|
|
18
|
-
@data[:spdx_id]
|
|
19
|
-
end
|
|
20
16
|
end
|
|
21
17
|
|
|
22
18
|
RSpec.describe Licensee::ContentHelper do
|
|
@@ -57,10 +53,41 @@ RSpec.describe Licensee::ContentHelper do
|
|
|
57
53
|
)
|
|
58
54
|
end
|
|
59
55
|
|
|
56
|
+
def expected_bigrams
|
|
57
|
+
Set.new(
|
|
58
|
+
[
|
|
59
|
+
'the made', 'made up', 'up license', 'license this', 'this license',
|
|
60
|
+
'license provided', 'provided as', 'as is\'', 'is\' please', 'please respect',
|
|
61
|
+
'respect the', 'the contributors\'', 'contributors\' wishes', 'wishes when',
|
|
62
|
+
'when implementing', 'implementing the', 'the license\'s', 'license\'s software'
|
|
63
|
+
]
|
|
64
|
+
)
|
|
65
|
+
end
|
|
66
|
+
|
|
60
67
|
it 'creates the wordset' do
|
|
61
68
|
expect(helper.wordset).to eql(expected_wordset)
|
|
62
69
|
end
|
|
63
70
|
|
|
71
|
+
it 'creates bigrams' do
|
|
72
|
+
expect(helper.bigrams).to eql(expected_bigrams)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
it 'returns empty set for content with fewer than two words' do
|
|
76
|
+
single_word = ContentHelperTestHelper.new('word', filename: 'LICENSE')
|
|
77
|
+
expect(single_word.bigrams).to eql(Set.new)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
it 'calculates bigram_similarity for exact content' do
|
|
81
|
+
expect(mit.bigram_similarity(mit)).to eq(100.0)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
it 'calculates bigram_similarity near zero for scrambled wordset' do
|
|
85
|
+
# All unique words from MIT sorted alphabetically: same wordset, different order.
|
|
86
|
+
sorted_words = mit.content_normalized.scan(%r{(?:[\w/-](?:'s|(?<=s)')?)+}).uniq.sort
|
|
87
|
+
scrambled = ContentHelperTestHelper.new(sorted_words.join(' '), filename: 'LICENSE')
|
|
88
|
+
expect(mit.bigram_similarity(scrambled)).to be < 5.0
|
|
89
|
+
end
|
|
90
|
+
|
|
64
91
|
it 'knows the length' do
|
|
65
92
|
expect(helper.length).to be(135)
|
|
66
93
|
end
|
|
@@ -275,6 +302,14 @@ RSpec.describe Licensee::ContentHelper do
|
|
|
275
302
|
end
|
|
276
303
|
end
|
|
277
304
|
|
|
305
|
+
context 'when normalizing wilful to willful' do
|
|
306
|
+
let(:content) { 'wilful misconduct' }
|
|
307
|
+
|
|
308
|
+
it 'normalizes wilful to willful' do
|
|
309
|
+
expect(helper.content_normalized).to eql('willful misconduct')
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
|
|
278
313
|
Licensee::License.all(hidden: true).each do |license|
|
|
279
314
|
context "with the #{license.name} license" do
|
|
280
315
|
let(:stripped_content) { helper.content_without_title_and_version }
|
|
@@ -334,6 +369,35 @@ RSpec.describe Licensee::ContentHelper do
|
|
|
334
369
|
expect(normalized_content).to eql('foo')
|
|
335
370
|
end
|
|
336
371
|
end
|
|
372
|
+
|
|
373
|
+
context 'with a multi-line copyright holder name followed by All rights reserved' do
|
|
374
|
+
let(:content) do
|
|
375
|
+
"Copyright (c) 2020 by Corporation Name and\n" \
|
|
376
|
+
"its Subsidiaries (see AUTHORS).\n" \
|
|
377
|
+
"All rights reserved.\n\n" \
|
|
378
|
+
'Foo'
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
it 'strips the wrapped copyright holder continuation and all rights reserved' do
|
|
382
|
+
expect(normalized_content).to eql('foo')
|
|
383
|
+
end
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
context 'with a multi-line copyright holder name without All rights reserved' do
|
|
387
|
+
let(:content) do
|
|
388
|
+
"Copyright (c) 2020 by Corporation Name and\n" \
|
|
389
|
+
"its Subsidiaries (see AUTHORS).\n\n" \
|
|
390
|
+
'Foo'
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
it 'leaves the ambiguous continuation' do
|
|
394
|
+
expect(normalized_content).to include('subsidiaries')
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
it 'strips the copyright notice line' do
|
|
398
|
+
expect(normalized_content).not_to include('copyright (c) 2020 by corporation name and')
|
|
399
|
+
end
|
|
400
|
+
end
|
|
337
401
|
end
|
|
338
402
|
|
|
339
403
|
context 'when matching title regex' do
|
|
@@ -21,13 +21,15 @@ class HashHelperSpecFixture
|
|
|
21
21
|
Licensee::Rule.all
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
def baz
|
|
25
|
-
'baz'
|
|
26
|
-
end
|
|
27
|
-
|
|
28
24
|
def nil_value
|
|
29
25
|
nil
|
|
30
26
|
end
|
|
27
|
+
|
|
28
|
+
# Method not listed in HASH_METHODS; used to ensure HashHelper#to_h
|
|
29
|
+
# does not expose arbitrary instance methods.
|
|
30
|
+
def baz
|
|
31
|
+
'not included'
|
|
32
|
+
end
|
|
31
33
|
end
|
|
32
34
|
|
|
33
35
|
RSpec.describe Licensee::HashHelper do
|
|
@@ -204,6 +204,10 @@ RSpec.describe Licensee::License do
|
|
|
204
204
|
expect(mit.key).to eql('mit')
|
|
205
205
|
end
|
|
206
206
|
|
|
207
|
+
it 'has a useful inspect string' do
|
|
208
|
+
expect(mit.inspect).to eql('#<Licensee::License key=mit>')
|
|
209
|
+
end
|
|
210
|
+
|
|
207
211
|
it 'exposes the SPDX ID' do
|
|
208
212
|
expect(gpl.spdx_id).to eql('GPL-3.0')
|
|
209
213
|
end
|
|
@@ -122,6 +122,42 @@ RSpec.describe Licensee::Matchers::Cabal do
|
|
|
122
122
|
end
|
|
123
123
|
end
|
|
124
124
|
|
|
125
|
+
context 'with pre-SPDX "or-later" (+) suffix' do
|
|
126
|
+
let(:content) { "license: #{cabal_license}" }
|
|
127
|
+
|
|
128
|
+
context 'with GPL-2+' do
|
|
129
|
+
let(:cabal_license) { 'GPL-2+' }
|
|
130
|
+
|
|
131
|
+
it 'returns GPL-2.0' do
|
|
132
|
+
expect(matcher.match).to eql(Licensee::License.find('GPL-2.0'))
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
context 'with GPL-3+' do
|
|
137
|
+
let(:cabal_license) { 'GPL-3+' }
|
|
138
|
+
|
|
139
|
+
it 'returns GPL-3.0' do
|
|
140
|
+
expect(matcher.match).to eql(Licensee::License.find('GPL-3.0'))
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
context 'with LGPL-3+' do
|
|
145
|
+
let(:cabal_license) { 'LGPL-3+' }
|
|
146
|
+
|
|
147
|
+
it 'returns LGPL-3.0' do
|
|
148
|
+
expect(matcher.match).to eql(Licensee::License.find('LGPL-3.0'))
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
context 'with AGPL-3+' do
|
|
153
|
+
let(:cabal_license) { 'AGPL-3+' }
|
|
154
|
+
|
|
155
|
+
it 'returns AGPL-3.0' do
|
|
156
|
+
expect(matcher.match).to eql(Licensee::License.find('AGPL-3.0'))
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
125
161
|
context 'with no license field' do
|
|
126
162
|
let(:content) { 'foo: bar' }
|
|
127
163
|
|
|
@@ -57,4 +57,15 @@ RSpec.describe Licensee::Matchers::Copyright do
|
|
|
57
57
|
expect(matcher.match).to be_nil
|
|
58
58
|
end
|
|
59
59
|
end
|
|
60
|
+
|
|
61
|
+
context 'with encoding-incompatible content' do
|
|
62
|
+
# A string with non-ASCII bytes in an encoding incompatible with the
|
|
63
|
+
# UTF-8 copyright regex triggers Encoding::CompatibilityError
|
|
64
|
+
let(:raw_content) { (+"\xC2\xA9 2015 Ben Balter").force_encoding('EUC-KR') }
|
|
65
|
+
|
|
66
|
+
it 'returns nil gracefully' do
|
|
67
|
+
allow(file).to receive(:content).and_return(raw_content)
|
|
68
|
+
expect(matcher.match).to be_nil
|
|
69
|
+
end
|
|
70
|
+
end
|
|
60
71
|
end
|