licensee 5.0.0b4 → 5.0.0b5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6ddb84b445d32f602815a66e09512dd9fd917bbb
4
- data.tar.gz: 44813cf43899909185a60b9bd3ae10fc5a5fa594
3
+ metadata.gz: 3141e0f1cc301cf2aebb6f835212533eafe27316
4
+ data.tar.gz: 3fac37012f662fd0520b99106793d3b43bafce61
5
5
  SHA512:
6
- metadata.gz: dd4d1ee0285d9425546a5dfa41d746b9db3a00903dcf9a5ea34ddea9d4eed4c979783bf9920d3db9756822a6d4caff44ec1a316ae12b606c6788ecb712552c12
7
- data.tar.gz: c3acb3e8ed77664b816ef5523d2cea79c236edd6b6b25bef1a4cc1100af077418c10b5ff792a5ab7041ab03275e91fa4348546710380576a3b204ef72deeab45
6
+ metadata.gz: bc48939cb0de92f1305b7e341b01da174c298b262ec58a1f6a7ed66298a1d4f68ecfbb1dc870e64cab51bb75298b643a66770c3590b666d2c3cdf7a8881049d3
7
+ data.tar.gz: 8f8350d224fa1cb4f18661a3a42c876e2dd450670de6f3888b8cbeef6a9a8ffdbe7ab635b8702b27b179b92115cf7defe32436b51bb7ee25a74ea7d8b440f474
@@ -7,8 +7,12 @@ Licensee.package_manager_files = true
7
7
  project = Licensee::Project.new(path)
8
8
  license = project.matched_file
9
9
 
10
+ if project.license_file
11
+ puts "License file: #{project.license_file.path}"
12
+ puts "Attribution: #{project.license_file.attribution}" if project.license_file.attribution
13
+ end
14
+
10
15
  if license
11
- puts "Matched file: #{license.path}"
12
16
  puts "License: #{license.match ? license.match.meta['title'] : 'no license'}"
13
17
  puts "Confidence: #{license.confidence}%"
14
18
  puts "Method: #{license.matcher.class}"
@@ -4,6 +4,7 @@ require 'rugged'
4
4
  require 'levenshtein'
5
5
 
6
6
  require_relative "licensee/version"
7
+ require_relative "licensee/content_helper"
7
8
  require_relative "licensee/license"
8
9
  require_relative "licensee/project"
9
10
  require_relative "licensee/project_file"
@@ -39,11 +40,6 @@ class Licensee
39
40
  Licensee::Project.new(path).license
40
41
  end
41
42
 
42
- # Diffs the project license and the known license
43
- def diff(path)
44
- Licensee::Project.new(path).license_file.diff
45
- end
46
-
47
43
  # Array of matchers to use, in order of preference
48
44
  # The order should be decending order of anticipated speed to match
49
45
  def matchers
@@ -0,0 +1,13 @@
1
+ class Licensee
2
+ module ContentHelper
3
+ def normalize_content(content)
4
+ return unless content
5
+ content = content.downcase
6
+ content = content.gsub(/\A[[:space:]]+/, '')
7
+ content = content.gsub(/[[:space:]]+\z/, '')
8
+ content = content.gsub(/^#{CopyrightMatcher::REGEX}$/i, '')
9
+ content = content.gsub(/[[:space:]]+/, ' ')
10
+ content.squeeze(' ').strip
11
+ end
12
+ end
13
+ end
@@ -39,6 +39,8 @@ class Licensee
39
39
 
40
40
  HIDDEN_LICENSES = %w[other no-license]
41
41
 
42
+ include Licensee::ContentHelper
43
+
42
44
  def initialize(key)
43
45
  @key=key.downcase
44
46
  end
@@ -76,6 +78,14 @@ class Licensee
76
78
  meta.nil? ? key.capitalize : meta["title"]
77
79
  end
78
80
 
81
+ def nickname
82
+ meta["nickname"] if meta
83
+ end
84
+
85
+ def name_without_version
86
+ /(.+?)(( v?\d\.\d)|$)/.match(name)[1]
87
+ end
88
+
79
89
  def featured?
80
90
  !!(meta["featured"] if meta)
81
91
  end
@@ -95,7 +105,7 @@ class Licensee
95
105
 
96
106
  # License body with all whitespace replaced with a single space
97
107
  def body_normalized
98
- @content_normalized ||= body.to_s.downcase.gsub(/\s+/, " ").strip
108
+ @body_normalized ||= normalize_content(body)
99
109
  end
100
110
 
101
111
  # Git-computed hash signature for the license file
@@ -118,6 +128,14 @@ class Licensee
118
128
  other != nil && key == other.key
119
129
  end
120
130
 
131
+ def body_includes_name?
132
+ @body_includes_name ||= body_normalized.include?(name_without_version.downcase)
133
+ end
134
+
135
+ def body_includes_nickname?
136
+ @body_includes_nickname ||= !!(nickname && body_normalized.include?(nickname.downcase))
137
+ end
138
+
121
139
  private
122
140
 
123
141
  def parts
@@ -2,10 +2,11 @@
2
2
  class Licensee
3
3
  class CopyrightMatcher < Matcher
4
4
 
5
- REGEX = /\ACopyright (©|\(c\)|\xC2\xA9)? ?\d{4}.*?\n?\z/i
5
+ REGEX = /\s*Copyright (©|\(c\)|\xC2\xA9)? ?(\d{4}|\[year\])(.*)?\s*/i
6
6
 
7
7
  def match
8
- no_license if file.content.strip =~ REGEX
8
+ # Note: must use content, and not content_normalized here
9
+ no_license if file.content.strip =~ /\A#{REGEX}\z/i
9
10
  rescue
10
11
  nil
11
12
  end
@@ -2,7 +2,7 @@ class Licensee
2
2
  class GitMatcher < Matcher
3
3
 
4
4
  def match
5
- match_info[0] unless match_info.nil?
5
+ match_info[0] if match_info && match_info[1] >= Licensee.confidence_threshold
6
6
  end
7
7
 
8
8
  def confidence
@@ -19,7 +19,7 @@ class Licensee
19
19
  def match_info
20
20
  @match_info ||= begin
21
21
  match = matches.max_by { |license, similarity| similarity }
22
- match if match && match[1] > Licensee.confidence_threshold
22
+ match if match
23
23
  end
24
24
  end
25
25
  end
@@ -4,6 +4,13 @@ class Licensee
4
4
  # Return the first potential license that is more similar than the confidence threshold
5
5
  def match
6
6
  @match ||= potential_licenses.find do |license|
7
+
8
+ # If we know the license text contains the license name or nickname,
9
+ # bail early unless the file we're checking contains it.
10
+ # Guards against OSL & AFL confusion. See https://github.com/benbalter/licensee/issues/50
11
+ next if license.body_includes_name? && !includes_license_name?(license)
12
+ next if license.body_includes_nickname? && !includes_license_nickname?(license)
13
+
7
14
  similarity(license) >= Licensee.confidence_threshold
8
15
  end
9
16
  end
@@ -12,7 +19,11 @@ class Licensee
12
19
  # Difference in lengths cannot exceed the file's length * the confidence threshold / 100
13
20
  def potential_licenses
14
21
  @potential_licenses ||= begin
15
- Licensee.licenses(:hidden => true).select { |license| length_delta(license) <= max_delta }.sort_by { |l| length_delta(l) }
22
+ licenses = Licensee.licenses(:hidden => true)
23
+ licenses = licenses.select do |license|
24
+ license.body_normalized && length_delta(license) <= max_delta
25
+ end
26
+ licenses.sort_by { |l| length_delta(l) }
16
27
  end
17
28
  end
18
29
 
@@ -51,5 +62,13 @@ class Licensee
51
62
  def distance(license)
52
63
  Levenshtein.distance(license.body_normalized, file.content_normalized).to_f
53
64
  end
65
+
66
+ def includes_license_name?(license)
67
+ file.content_normalized.include?(license.name_without_version.downcase)
68
+ end
69
+
70
+ def includes_license_nickname?(license)
71
+ license.nickname && file.content_normalized.include?(license.nickname.downcase)
72
+ end
54
73
  end
55
74
  end
@@ -7,6 +7,8 @@ class Licensee
7
7
  attr_reader :blob, :path
8
8
  alias_method :filename, :path
9
9
 
10
+ include Licensee::ContentHelper
11
+
10
12
  def initialize(blob, path)
11
13
  @blob = blob
12
14
  @path = path
@@ -21,7 +23,7 @@ class Licensee
21
23
 
22
24
  # File content with all whitespace replaced with a single space
23
25
  def content_normalized
24
- @content_normalized ||= content.downcase.gsub(/\s+/, " ").strip
26
+ @content_normalized ||= normalize_content(content)
25
27
  end
26
28
 
27
29
  # Determines which matching strategy to use, returns an instane of that matcher
@@ -46,12 +48,6 @@ class Licensee
46
48
  0
47
49
  end
48
50
 
49
- # Comptutes a diff between known license and project license
50
- def diff(options={})
51
- options = options.merge(:reverse => true)
52
- blob.diff(match.body, options).to_s if match
53
- end
54
-
55
51
  def license_score
56
52
  self.class.license_score(filename)
57
53
  end
@@ -60,6 +56,12 @@ class Licensee
60
56
  license_score != 0.0
61
57
  end
62
58
 
59
+ def attribution
60
+ return nil unless license?
61
+ matches = /^#{CopyrightMatcher::REGEX}$/i.match(content)
62
+ matches[0].strip if matches
63
+ end
64
+
63
65
  def package_score
64
66
  return 1.0 if filename =~ /[a-zA-Z0-9\-_]+\.gemspec/
65
67
  return 1.0 if filename =~ /package\.json/
@@ -1,3 +1,3 @@
1
1
  class Licensee
2
- VERSION = "5.0.0b4"
2
+ VERSION = "5.0.0b5"
3
3
  end
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2015 Ben Balter
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
@@ -22,8 +22,8 @@ end
22
22
  FakeBlob = Licensee::FilesystemRepository::Blob
23
23
 
24
24
  def chaos_monkey(string)
25
- Random.rand(5).times do
26
- string[Random.rand(string.length)] = SecureRandom.base64(Random.rand(5))
25
+ Random.rand(3).times do
26
+ string[Random.rand(string.length)] = SecureRandom.base64(Random.rand(3))
27
27
  end
28
28
  string
29
29
  end
@@ -39,15 +39,28 @@ def verify_license_file(license, chaos = false, wrap=false)
39
39
  license_file = Licensee::ProjectFile.new(blob, "LICENSE")
40
40
 
41
41
  actual = license_file.match
42
- assert actual, "No match for #{expected}. Here's the test text:\n#{text}"
42
+ msg = "No match for #{expected}."
43
+
44
+ unless actual
45
+ Licensee.matchers.each do |matcher|
46
+ matcher = matcher.new(license_file)
47
+ msg << "#{matcher.class}: #{matcher.confidence}% #{matcher.match.inspect}\n"
48
+ end
49
+ msg << "Here's the test text:\n#{text}"
50
+ end
51
+
52
+ assert actual, msg
43
53
  assert_equal expected, actual.key, "expeceted #{expected} but got #{actual.key} for .match. Confidence: #{license_file.confidence}. Method: #{license_file.matcher.class}"
44
54
  end
45
55
 
46
56
  def wrap(text, line_width=80)
47
57
  text = text.clone
58
+ copyright = /^#{Licensee::CopyrightMatcher::REGEX}$/i.match(text)
59
+ text.gsub! /^#{Licensee::CopyrightMatcher::REGEX}$/i, '[COPYRIGHT]' if copyright
48
60
  text.gsub! /([^\n])\n([^\n])/, '\1 \2'
49
61
  text = text.split("\n").collect do |line|
50
62
  line.length > line_width ? line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip : line
51
63
  end * "\n"
64
+ text.gsub! "[COPYRIGHT]", "\n#{copyright}\n" if copyright
52
65
  text.strip
53
66
  end
@@ -12,10 +12,6 @@ class TestLicensee < Minitest::Test
12
12
  assert_equal "mit", Licensee.license(fixture_path("licenses.git")).key
13
13
  end
14
14
 
15
- should "diff a license" do
16
- Licensee.diff(fixture_path("licenses.git"))
17
- end
18
-
19
15
  context "confidence threshold" do
20
16
  should "return the confidence threshold" do
21
17
  assert_equal 90, Licensee.confidence_threshold
@@ -6,7 +6,7 @@ class TestLicenseeBin < Minitest::Test
6
6
  Dir.chdir root
7
7
  stdout,stderr,status = Open3.capture3("#{root}/bin/licensee")
8
8
  assert stdout.include?("License: MIT"), "expected #{stdout} to include `License: MIT`"
9
- assert stdout.include?("Matched file: LICENSE.md"), "expected #{stdout} to include `Matched file: LICENSE.md`"
9
+ assert stdout.include?("License file: LICENSE.md"), "expected #{stdout} to include `Matched file: LICENSE.md`"
10
10
  assert_equal 0, status
11
11
  end
12
12
  end
@@ -0,0 +1,36 @@
1
+ require 'helper'
2
+
3
+ class TestHelper
4
+ include Licensee::ContentHelper
5
+ end
6
+
7
+ class TestLicenseeContentHelper < Minitest::Test
8
+
9
+ def setup
10
+ @helper = TestHelper.new
11
+ end
12
+
13
+ def normalize(content)
14
+ @helper.normalize_content(content)
15
+ end
16
+
17
+ should "downcase content" do
18
+ assert_equal "foo", normalize("Foo")
19
+ end
20
+
21
+ should "strip leading whitespace" do
22
+ assert_equal "foo", normalize("\n Foo")
23
+ end
24
+
25
+ should "strip trailing whitespace" do
26
+ assert_equal "foo", normalize("Foo \n ")
27
+ end
28
+
29
+ should "strip double spaces" do
30
+ assert_equal "foo bar", normalize("Foo bar")
31
+ end
32
+
33
+ should "strip copyrights" do
34
+ assert_equal "foo", normalize("Copyright (c) 2015 Ben Balter\nFoo")
35
+ end
36
+ end
@@ -15,4 +15,5 @@ class TestLicenseeGitMatcher < Minitest::Test
15
15
  should "know the match confidence" do
16
16
  assert_equal 94, Licensee::GitMatcher.new(@mit).confidence
17
17
  end
18
+
18
19
  end
@@ -18,13 +18,13 @@ class TestLicenseeLevenshteinMatcher < Minitest::Test
18
18
  end
19
19
 
20
20
  should "calculate max delta" do
21
- assert_equal 964.8000000000001, Licensee::LevenshteinMatcher.new(@mit).max_delta
21
+ assert_equal 937.8000000000001, Licensee::LevenshteinMatcher.new(@mit).max_delta
22
22
  end
23
23
 
24
24
  should "calculate length delta" do
25
25
  isc = Licensee::License.find("isc")
26
- assert_equal 2, Licensee::LevenshteinMatcher.new(@mit).length_delta(Licensee::License.find("mit"))
27
- assert_equal 334, Licensee::LevenshteinMatcher.new(@mit).length_delta(isc)
26
+ assert_equal 0.0, Licensee::LevenshteinMatcher.new(@mit).length_delta(Licensee::License.find("mit"))
27
+ assert_equal 347.0, Licensee::LevenshteinMatcher.new(@mit).length_delta(isc)
28
28
  end
29
29
 
30
30
  should "round up potential licenses" do
@@ -30,6 +30,11 @@ class TestLicenseeLicense < Minitest::Test
30
30
  assert_equal "MIT License", @license.name
31
31
  end
32
32
 
33
+ should "know the license nickname" do
34
+ expected = "GNU Affero GPL v3.0"
35
+ assert_equal expected, Licensee::License.find("agpl-3.0").nickname
36
+ end
37
+
33
38
  should "know the license ID" do
34
39
  assert_equal "mit", @license.key
35
40
  end
@@ -98,6 +103,32 @@ class TestLicenseeLicense < Minitest::Test
98
103
  refute license.featured?
99
104
  end
100
105
 
106
+ describe "name without version" do
107
+ should "strip the version from the license name" do
108
+ expected = "GNU Affero General Public License"
109
+ assert_equal expected, Licensee::License.find("agpl-3.0").name_without_version
110
+ expected = "GNU General Public License"
111
+ assert_equal expected, Licensee::License.find("gpl-2.0").name_without_version
112
+ assert_equal expected, Licensee::License.find("gpl-3.0").name_without_version
113
+ end
114
+
115
+ should "know if the license contains the name without version" do
116
+ refute Licensee::License.find("cc0-1.0").body_includes_name?
117
+ assert Licensee::License.find("agpl-3.0").body_includes_name?
118
+ end
119
+
120
+ should "know if the license contains the nickname" do
121
+ refute Licensee::License.find("mit").body_includes_nickname?
122
+ assert Licensee::License.find("apache-2.0").body_includes_nickname?
123
+ end
124
+
125
+ Licensee.licenses.each do |license|
126
+ should "strip the version number from the #{license.name} license" do
127
+ assert license.name_without_version
128
+ end
129
+ end
130
+ end
131
+
101
132
  describe "class methods" do
102
133
  should "know license names" do
103
134
  assert_equal Array, Licensee::License.keys.class
@@ -68,6 +68,10 @@ class TestLicenseeProject < Minitest::Test
68
68
  end
69
69
  end
70
70
 
71
+ should "detect the MIT license even with the title removed" do
72
+ verify_license_file fixture_path("mit-without-title/mit.txt")
73
+ end
74
+
71
75
  describe "packages" do
72
76
 
73
77
  def setup
@@ -22,13 +22,12 @@ class TestLicenseeProjectFile < Minitest::Test
22
22
  assert_equal "LICENSE", @file.path
23
23
  end
24
24
 
25
- should "diff the file" do
26
- expected = "-Copyright (c) [year] [fullname]\n+Copyright (c) 2014 Ben Balter"
27
- assert @file.diff.include?(expected)
25
+ should "calculate confidence" do
26
+ assert_equal 100, @file.confidence
28
27
  end
29
28
 
30
- should "calculate confidence" do
31
- assert_equal 94, @file.confidence
29
+ should "parse the attribution" do
30
+ assert_equal "Copyright (c) 2014 Ben Balter", @file.attribution
32
31
  end
33
32
 
34
33
  context "license filename scoring" do
@@ -1,15 +1,17 @@
1
1
  require 'helper'
2
2
 
3
3
  class TestLicenseeVendor < Minitest::Test
4
- Licensee::License.send(:license_files).shuffle.each do |license|
5
4
 
5
+ SKIP = %[wtfpl no-license]
6
+
7
+ Licensee::License.send(:license_files).shuffle.each do |license|
6
8
  should "detect the #{license} license" do
7
9
  verify_license_file(license)
8
10
  end
9
11
 
10
12
  context "when modified" do
11
13
  should "detect the #{license} license" do
12
- verify_license_file(license, true) unless license =~ /no-license\.txt$/
14
+ verify_license_file(license, true) unless SKIP.include?(File.basename(license, ".txt"))
13
15
  end
14
16
  end
15
17
 
@@ -20,7 +22,7 @@ class TestLicenseeVendor < Minitest::Test
20
22
 
21
23
  context "when modified" do
22
24
  should "detect the #{license} license" do
23
- verify_license_file(license, true, 50) unless license =~ /no-license\.txt$/
25
+ verify_license_file(license, true, 50) unless SKIP.include?(File.basename(license, ".txt"))
24
26
  end
25
27
  end
26
28
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: licensee
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.0.0b4
4
+ version: 5.0.0b5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Balter
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-01 00:00:00.000000000 Z
11
+ date: 2015-09-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rugged
@@ -107,6 +107,7 @@ files:
107
107
  - Rakefile
108
108
  - bin/licensee
109
109
  - lib/licensee.rb
110
+ - lib/licensee/content_helper.rb
110
111
  - lib/licensee/filesystem_repository.rb
111
112
  - lib/licensee/license.rb
112
113
  - lib/licensee/matcher.rb
@@ -151,6 +152,7 @@ files:
151
152
  - test/fixtures/licenses.git/objects/pack/pack-4a7088171ae3ca900f010a4be6f1c2c96490c338.pack
152
153
  - test/fixtures/licenses.git/packed-refs
153
154
  - test/fixtures/licenses.git/refs/heads/master
155
+ - test/fixtures/mit-without-title/mit.txt
154
156
  - test/fixtures/named-license-file-prefix.git/HEAD
155
157
  - test/fixtures/named-license-file-prefix.git/config
156
158
  - test/fixtures/named-license-file-prefix.git/objects/64/3983d3f82ecc2a7d8e4227946220ebffd477d2
@@ -177,6 +179,7 @@ files:
177
179
  - test/helper.rb
178
180
  - test/test_licensee.rb
179
181
  - test/test_licensee_bin.rb
182
+ - test/test_licensee_content_helper.rb
180
183
  - test/test_licensee_copyright_matcher.rb
181
184
  - test/test_licensee_exact_matcher.rb
182
185
  - test/test_licensee_gemspec_matcher.rb