licensee 5.0.0b4 → 5.0.0b5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6ddb84b445d32f602815a66e09512dd9fd917bbb
4
- data.tar.gz: 44813cf43899909185a60b9bd3ae10fc5a5fa594
3
+ metadata.gz: 3141e0f1cc301cf2aebb6f835212533eafe27316
4
+ data.tar.gz: 3fac37012f662fd0520b99106793d3b43bafce61
5
5
  SHA512:
6
- metadata.gz: dd4d1ee0285d9425546a5dfa41d746b9db3a00903dcf9a5ea34ddea9d4eed4c979783bf9920d3db9756822a6d4caff44ec1a316ae12b606c6788ecb712552c12
7
- data.tar.gz: c3acb3e8ed77664b816ef5523d2cea79c236edd6b6b25bef1a4cc1100af077418c10b5ff792a5ab7041ab03275e91fa4348546710380576a3b204ef72deeab45
6
+ metadata.gz: bc48939cb0de92f1305b7e341b01da174c298b262ec58a1f6a7ed66298a1d4f68ecfbb1dc870e64cab51bb75298b643a66770c3590b666d2c3cdf7a8881049d3
7
+ data.tar.gz: 8f8350d224fa1cb4f18661a3a42c876e2dd450670de6f3888b8cbeef6a9a8ffdbe7ab635b8702b27b179b92115cf7defe32436b51bb7ee25a74ea7d8b440f474
@@ -7,8 +7,12 @@ Licensee.package_manager_files = true
7
7
  project = Licensee::Project.new(path)
8
8
  license = project.matched_file
9
9
 
10
+ if project.license_file
11
+ puts "License file: #{project.license_file.path}"
12
+ puts "Attribution: #{project.license_file.attribution}" if project.license_file.attribution
13
+ end
14
+
10
15
  if license
11
- puts "Matched file: #{license.path}"
12
16
  puts "License: #{license.match ? license.match.meta['title'] : 'no license'}"
13
17
  puts "Confidence: #{license.confidence}%"
14
18
  puts "Method: #{license.matcher.class}"
@@ -4,6 +4,7 @@ require 'rugged'
4
4
  require 'levenshtein'
5
5
 
6
6
  require_relative "licensee/version"
7
+ require_relative "licensee/content_helper"
7
8
  require_relative "licensee/license"
8
9
  require_relative "licensee/project"
9
10
  require_relative "licensee/project_file"
@@ -39,11 +40,6 @@ class Licensee
39
40
  Licensee::Project.new(path).license
40
41
  end
41
42
 
42
- # Diffs the project license and the known license
43
- def diff(path)
44
- Licensee::Project.new(path).license_file.diff
45
- end
46
-
47
43
  # Array of matchers to use, in order of preference
48
44
  # The order should be decending order of anticipated speed to match
49
45
  def matchers
@@ -0,0 +1,13 @@
1
+ class Licensee
2
+ module ContentHelper
3
+ def normalize_content(content)
4
+ return unless content
5
+ content = content.downcase
6
+ content = content.gsub(/\A[[:space:]]+/, '')
7
+ content = content.gsub(/[[:space:]]+\z/, '')
8
+ content = content.gsub(/^#{CopyrightMatcher::REGEX}$/i, '')
9
+ content = content.gsub(/[[:space:]]+/, ' ')
10
+ content.squeeze(' ').strip
11
+ end
12
+ end
13
+ end
@@ -39,6 +39,8 @@ class Licensee
39
39
 
40
40
  HIDDEN_LICENSES = %w[other no-license]
41
41
 
42
+ include Licensee::ContentHelper
43
+
42
44
  def initialize(key)
43
45
  @key=key.downcase
44
46
  end
@@ -76,6 +78,14 @@ class Licensee
76
78
  meta.nil? ? key.capitalize : meta["title"]
77
79
  end
78
80
 
81
+ def nickname
82
+ meta["nickname"] if meta
83
+ end
84
+
85
+ def name_without_version
86
+ /(.+?)(( v?\d\.\d)|$)/.match(name)[1]
87
+ end
88
+
79
89
  def featured?
80
90
  !!(meta["featured"] if meta)
81
91
  end
@@ -95,7 +105,7 @@ class Licensee
95
105
 
96
106
  # License body with all whitespace replaced with a single space
97
107
  def body_normalized
98
- @content_normalized ||= body.to_s.downcase.gsub(/\s+/, " ").strip
108
+ @body_normalized ||= normalize_content(body)
99
109
  end
100
110
 
101
111
  # Git-computed hash signature for the license file
@@ -118,6 +128,14 @@ class Licensee
118
128
  other != nil && key == other.key
119
129
  end
120
130
 
131
+ def body_includes_name?
132
+ @body_includes_name ||= body_normalized.include?(name_without_version.downcase)
133
+ end
134
+
135
+ def body_includes_nickname?
136
+ @body_includes_nickname ||= !!(nickname && body_normalized.include?(nickname.downcase))
137
+ end
138
+
121
139
  private
122
140
 
123
141
  def parts
@@ -2,10 +2,11 @@
2
2
  class Licensee
3
3
  class CopyrightMatcher < Matcher
4
4
 
5
- REGEX = /\ACopyright (©|\(c\)|\xC2\xA9)? ?\d{4}.*?\n?\z/i
5
+ REGEX = /\s*Copyright (©|\(c\)|\xC2\xA9)? ?(\d{4}|\[year\])(.*)?\s*/i
6
6
 
7
7
  def match
8
- no_license if file.content.strip =~ REGEX
8
+ # Note: must use content, and not content_normalized here
9
+ no_license if file.content.strip =~ /\A#{REGEX}\z/i
9
10
  rescue
10
11
  nil
11
12
  end
@@ -2,7 +2,7 @@ class Licensee
2
2
  class GitMatcher < Matcher
3
3
 
4
4
  def match
5
- match_info[0] unless match_info.nil?
5
+ match_info[0] if match_info && match_info[1] >= Licensee.confidence_threshold
6
6
  end
7
7
 
8
8
  def confidence
@@ -19,7 +19,7 @@ class Licensee
19
19
  def match_info
20
20
  @match_info ||= begin
21
21
  match = matches.max_by { |license, similarity| similarity }
22
- match if match && match[1] > Licensee.confidence_threshold
22
+ match if match
23
23
  end
24
24
  end
25
25
  end
@@ -4,6 +4,13 @@ class Licensee
4
4
  # Return the first potential license that is more similar than the confidence threshold
5
5
  def match
6
6
  @match ||= potential_licenses.find do |license|
7
+
8
+ # If we know the license text contains the license name or nickname,
9
+ # bail early unless the file we're checking contains it.
10
+ # Guards against OSL & AFL confusion. See https://github.com/benbalter/licensee/issues/50
11
+ next if license.body_includes_name? && !includes_license_name?(license)
12
+ next if license.body_includes_nickname? && !includes_license_nickname?(license)
13
+
7
14
  similarity(license) >= Licensee.confidence_threshold
8
15
  end
9
16
  end
@@ -12,7 +19,11 @@ class Licensee
12
19
  # Difference in lengths cannot exceed the file's length * the confidence threshold / 100
13
20
  def potential_licenses
14
21
  @potential_licenses ||= begin
15
- Licensee.licenses(:hidden => true).select { |license| length_delta(license) <= max_delta }.sort_by { |l| length_delta(l) }
22
+ licenses = Licensee.licenses(:hidden => true)
23
+ licenses = licenses.select do |license|
24
+ license.body_normalized && length_delta(license) <= max_delta
25
+ end
26
+ licenses.sort_by { |l| length_delta(l) }
16
27
  end
17
28
  end
18
29
 
@@ -51,5 +62,13 @@ class Licensee
51
62
  def distance(license)
52
63
  Levenshtein.distance(license.body_normalized, file.content_normalized).to_f
53
64
  end
65
+
66
+ def includes_license_name?(license)
67
+ file.content_normalized.include?(license.name_without_version.downcase)
68
+ end
69
+
70
+ def includes_license_nickname?(license)
71
+ license.nickname && file.content_normalized.include?(license.nickname.downcase)
72
+ end
54
73
  end
55
74
  end
@@ -7,6 +7,8 @@ class Licensee
7
7
  attr_reader :blob, :path
8
8
  alias_method :filename, :path
9
9
 
10
+ include Licensee::ContentHelper
11
+
10
12
  def initialize(blob, path)
11
13
  @blob = blob
12
14
  @path = path
@@ -21,7 +23,7 @@ class Licensee
21
23
 
22
24
  # File content with all whitespace replaced with a single space
23
25
  def content_normalized
24
- @content_normalized ||= content.downcase.gsub(/\s+/, " ").strip
26
+ @content_normalized ||= normalize_content(content)
25
27
  end
26
28
 
27
29
  # Determines which matching strategy to use, returns an instane of that matcher
@@ -46,12 +48,6 @@ class Licensee
46
48
  0
47
49
  end
48
50
 
49
- # Comptutes a diff between known license and project license
50
- def diff(options={})
51
- options = options.merge(:reverse => true)
52
- blob.diff(match.body, options).to_s if match
53
- end
54
-
55
51
  def license_score
56
52
  self.class.license_score(filename)
57
53
  end
@@ -60,6 +56,12 @@ class Licensee
60
56
  license_score != 0.0
61
57
  end
62
58
 
59
+ def attribution
60
+ return nil unless license?
61
+ matches = /^#{CopyrightMatcher::REGEX}$/i.match(content)
62
+ matches[0].strip if matches
63
+ end
64
+
63
65
  def package_score
64
66
  return 1.0 if filename =~ /[a-zA-Z0-9\-_]+\.gemspec/
65
67
  return 1.0 if filename =~ /package\.json/
@@ -1,3 +1,3 @@
1
1
  class Licensee
2
- VERSION = "5.0.0b4"
2
+ VERSION = "5.0.0b5"
3
3
  end
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2015 Ben Balter
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
@@ -22,8 +22,8 @@ end
22
22
  FakeBlob = Licensee::FilesystemRepository::Blob
23
23
 
24
24
  def chaos_monkey(string)
25
- Random.rand(5).times do
26
- string[Random.rand(string.length)] = SecureRandom.base64(Random.rand(5))
25
+ Random.rand(3).times do
26
+ string[Random.rand(string.length)] = SecureRandom.base64(Random.rand(3))
27
27
  end
28
28
  string
29
29
  end
@@ -39,15 +39,28 @@ def verify_license_file(license, chaos = false, wrap=false)
39
39
  license_file = Licensee::ProjectFile.new(blob, "LICENSE")
40
40
 
41
41
  actual = license_file.match
42
- assert actual, "No match for #{expected}. Here's the test text:\n#{text}"
42
+ msg = "No match for #{expected}."
43
+
44
+ unless actual
45
+ Licensee.matchers.each do |matcher|
46
+ matcher = matcher.new(license_file)
47
+ msg << "#{matcher.class}: #{matcher.confidence}% #{matcher.match.inspect}\n"
48
+ end
49
+ msg << "Here's the test text:\n#{text}"
50
+ end
51
+
52
+ assert actual, msg
43
53
  assert_equal expected, actual.key, "expeceted #{expected} but got #{actual.key} for .match. Confidence: #{license_file.confidence}. Method: #{license_file.matcher.class}"
44
54
  end
45
55
 
46
56
  def wrap(text, line_width=80)
47
57
  text = text.clone
58
+ copyright = /^#{Licensee::CopyrightMatcher::REGEX}$/i.match(text)
59
+ text.gsub! /^#{Licensee::CopyrightMatcher::REGEX}$/i, '[COPYRIGHT]' if copyright
48
60
  text.gsub! /([^\n])\n([^\n])/, '\1 \2'
49
61
  text = text.split("\n").collect do |line|
50
62
  line.length > line_width ? line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip : line
51
63
  end * "\n"
64
+ text.gsub! "[COPYRIGHT]", "\n#{copyright}\n" if copyright
52
65
  text.strip
53
66
  end
@@ -12,10 +12,6 @@ class TestLicensee < Minitest::Test
12
12
  assert_equal "mit", Licensee.license(fixture_path("licenses.git")).key
13
13
  end
14
14
 
15
- should "diff a license" do
16
- Licensee.diff(fixture_path("licenses.git"))
17
- end
18
-
19
15
  context "confidence threshold" do
20
16
  should "return the confidence threshold" do
21
17
  assert_equal 90, Licensee.confidence_threshold
@@ -6,7 +6,7 @@ class TestLicenseeBin < Minitest::Test
6
6
  Dir.chdir root
7
7
  stdout,stderr,status = Open3.capture3("#{root}/bin/licensee")
8
8
  assert stdout.include?("License: MIT"), "expected #{stdout} to include `License: MIT`"
9
- assert stdout.include?("Matched file: LICENSE.md"), "expected #{stdout} to include `Matched file: LICENSE.md`"
9
+ assert stdout.include?("License file: LICENSE.md"), "expected #{stdout} to include `Matched file: LICENSE.md`"
10
10
  assert_equal 0, status
11
11
  end
12
12
  end
@@ -0,0 +1,36 @@
1
+ require 'helper'
2
+
3
+ class TestHelper
4
+ include Licensee::ContentHelper
5
+ end
6
+
7
+ class TestLicenseeContentHelper < Minitest::Test
8
+
9
+ def setup
10
+ @helper = TestHelper.new
11
+ end
12
+
13
+ def normalize(content)
14
+ @helper.normalize_content(content)
15
+ end
16
+
17
+ should "downcase content" do
18
+ assert_equal "foo", normalize("Foo")
19
+ end
20
+
21
+ should "strip leading whitespace" do
22
+ assert_equal "foo", normalize("\n Foo")
23
+ end
24
+
25
+ should "strip trailing whitespace" do
26
+ assert_equal "foo", normalize("Foo \n ")
27
+ end
28
+
29
+ should "strip double spaces" do
30
+ assert_equal "foo bar", normalize("Foo bar")
31
+ end
32
+
33
+ should "strip copyrights" do
34
+ assert_equal "foo", normalize("Copyright (c) 2015 Ben Balter\nFoo")
35
+ end
36
+ end
@@ -15,4 +15,5 @@ class TestLicenseeGitMatcher < Minitest::Test
15
15
  should "know the match confidence" do
16
16
  assert_equal 94, Licensee::GitMatcher.new(@mit).confidence
17
17
  end
18
+
18
19
  end
@@ -18,13 +18,13 @@ class TestLicenseeLevenshteinMatcher < Minitest::Test
18
18
  end
19
19
 
20
20
  should "calculate max delta" do
21
- assert_equal 964.8000000000001, Licensee::LevenshteinMatcher.new(@mit).max_delta
21
+ assert_equal 937.8000000000001, Licensee::LevenshteinMatcher.new(@mit).max_delta
22
22
  end
23
23
 
24
24
  should "calculate length delta" do
25
25
  isc = Licensee::License.find("isc")
26
- assert_equal 2, Licensee::LevenshteinMatcher.new(@mit).length_delta(Licensee::License.find("mit"))
27
- assert_equal 334, Licensee::LevenshteinMatcher.new(@mit).length_delta(isc)
26
+ assert_equal 0.0, Licensee::LevenshteinMatcher.new(@mit).length_delta(Licensee::License.find("mit"))
27
+ assert_equal 347.0, Licensee::LevenshteinMatcher.new(@mit).length_delta(isc)
28
28
  end
29
29
 
30
30
  should "round up potential licenses" do
@@ -30,6 +30,11 @@ class TestLicenseeLicense < Minitest::Test
30
30
  assert_equal "MIT License", @license.name
31
31
  end
32
32
 
33
+ should "know the license nickname" do
34
+ expected = "GNU Affero GPL v3.0"
35
+ assert_equal expected, Licensee::License.find("agpl-3.0").nickname
36
+ end
37
+
33
38
  should "know the license ID" do
34
39
  assert_equal "mit", @license.key
35
40
  end
@@ -98,6 +103,32 @@ class TestLicenseeLicense < Minitest::Test
98
103
  refute license.featured?
99
104
  end
100
105
 
106
+ describe "name without version" do
107
+ should "strip the version from the license name" do
108
+ expected = "GNU Affero General Public License"
109
+ assert_equal expected, Licensee::License.find("agpl-3.0").name_without_version
110
+ expected = "GNU General Public License"
111
+ assert_equal expected, Licensee::License.find("gpl-2.0").name_without_version
112
+ assert_equal expected, Licensee::License.find("gpl-3.0").name_without_version
113
+ end
114
+
115
+ should "know if the license contains the name without version" do
116
+ refute Licensee::License.find("cc0-1.0").body_includes_name?
117
+ assert Licensee::License.find("agpl-3.0").body_includes_name?
118
+ end
119
+
120
+ should "know if the license contains the nickname" do
121
+ refute Licensee::License.find("mit").body_includes_nickname?
122
+ assert Licensee::License.find("apache-2.0").body_includes_nickname?
123
+ end
124
+
125
+ Licensee.licenses.each do |license|
126
+ should "strip the version number from the #{license.name} license" do
127
+ assert license.name_without_version
128
+ end
129
+ end
130
+ end
131
+
101
132
  describe "class methods" do
102
133
  should "know license names" do
103
134
  assert_equal Array, Licensee::License.keys.class
@@ -68,6 +68,10 @@ class TestLicenseeProject < Minitest::Test
68
68
  end
69
69
  end
70
70
 
71
+ should "detect the MIT license even with the title removed" do
72
+ verify_license_file fixture_path("mit-without-title/mit.txt")
73
+ end
74
+
71
75
  describe "packages" do
72
76
 
73
77
  def setup
@@ -22,13 +22,12 @@ class TestLicenseeProjectFile < Minitest::Test
22
22
  assert_equal "LICENSE", @file.path
23
23
  end
24
24
 
25
- should "diff the file" do
26
- expected = "-Copyright (c) [year] [fullname]\n+Copyright (c) 2014 Ben Balter"
27
- assert @file.diff.include?(expected)
25
+ should "calculate confidence" do
26
+ assert_equal 100, @file.confidence
28
27
  end
29
28
 
30
- should "calculate confidence" do
31
- assert_equal 94, @file.confidence
29
+ should "parse the attribution" do
30
+ assert_equal "Copyright (c) 2014 Ben Balter", @file.attribution
32
31
  end
33
32
 
34
33
  context "license filename scoring" do
@@ -1,15 +1,17 @@
1
1
  require 'helper'
2
2
 
3
3
  class TestLicenseeVendor < Minitest::Test
4
- Licensee::License.send(:license_files).shuffle.each do |license|
5
4
 
5
+ SKIP = %[wtfpl no-license]
6
+
7
+ Licensee::License.send(:license_files).shuffle.each do |license|
6
8
  should "detect the #{license} license" do
7
9
  verify_license_file(license)
8
10
  end
9
11
 
10
12
  context "when modified" do
11
13
  should "detect the #{license} license" do
12
- verify_license_file(license, true) unless license =~ /no-license\.txt$/
14
+ verify_license_file(license, true) unless SKIP.include?(File.basename(license, ".txt"))
13
15
  end
14
16
  end
15
17
 
@@ -20,7 +22,7 @@ class TestLicenseeVendor < Minitest::Test
20
22
 
21
23
  context "when modified" do
22
24
  should "detect the #{license} license" do
23
- verify_license_file(license, true, 50) unless license =~ /no-license\.txt$/
25
+ verify_license_file(license, true, 50) unless SKIP.include?(File.basename(license, ".txt"))
24
26
  end
25
27
  end
26
28
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: licensee
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.0.0b4
4
+ version: 5.0.0b5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Balter
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-01 00:00:00.000000000 Z
11
+ date: 2015-09-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rugged
@@ -107,6 +107,7 @@ files:
107
107
  - Rakefile
108
108
  - bin/licensee
109
109
  - lib/licensee.rb
110
+ - lib/licensee/content_helper.rb
110
111
  - lib/licensee/filesystem_repository.rb
111
112
  - lib/licensee/license.rb
112
113
  - lib/licensee/matcher.rb
@@ -151,6 +152,7 @@ files:
151
152
  - test/fixtures/licenses.git/objects/pack/pack-4a7088171ae3ca900f010a4be6f1c2c96490c338.pack
152
153
  - test/fixtures/licenses.git/packed-refs
153
154
  - test/fixtures/licenses.git/refs/heads/master
155
+ - test/fixtures/mit-without-title/mit.txt
154
156
  - test/fixtures/named-license-file-prefix.git/HEAD
155
157
  - test/fixtures/named-license-file-prefix.git/config
156
158
  - test/fixtures/named-license-file-prefix.git/objects/64/3983d3f82ecc2a7d8e4227946220ebffd477d2
@@ -177,6 +179,7 @@ files:
177
179
  - test/helper.rb
178
180
  - test/test_licensee.rb
179
181
  - test/test_licensee_bin.rb
182
+ - test/test_licensee_content_helper.rb
180
183
  - test/test_licensee_copyright_matcher.rb
181
184
  - test/test_licensee_exact_matcher.rb
182
185
  - test/test_licensee_gemspec_matcher.rb