licensee 4.3.3 → 4.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.md +1 -1
- data/README.md +6 -4
- data/lib/licensee.rb +2 -1
- data/lib/licensee/license.rb +2 -0
- data/lib/licensee/license_file.rb +6 -0
- data/lib/licensee/matchers/copyright_matcher.rb +20 -0
- data/lib/licensee/matchers/git_matcher.rb +1 -5
- data/lib/licensee/version.rb +1 -1
- data/test/functions.rb +4 -2
- data/test/test_licensee.rb +1 -1
- data/test/test_licensee_copyright_matcher.rb +39 -0
- data/test/test_licensee_licenses.rb +2 -2
- data/test/test_licensee_vendor.rb +2 -2
- data/vendor/choosealicense.com/_licenses/no-license.txt +28 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e696930375e6996477c17001e3ce366ee37159fe
|
4
|
+
data.tar.gz: 55aef0447384d6af3763ecfbab6ffa9c961fe08a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4a0c18e5a8edbccf0e3eaf4137d8664cb6ccf179f0a1a2944cca00f1a8b48aa37e66276a1b357e8c4358179b258079e37aab893bdb53c9101d8a8a28922a552
|
7
|
+
data.tar.gz: d4a51dcc5f97b286c9920f4295acdb9b973259a396b1b6dedfca09e9700890019935b5dd9cfbbb1a0a2feae13755159a7d723eeecf923cd22d0c311d431c7d8a
|
data/LICENSE.md
CHANGED
data/README.md
CHANGED
@@ -12,13 +12,15 @@
|
|
12
12
|
|
13
13
|
## The solution
|
14
14
|
|
15
|
-
Licensee automates the process of reading `LICENSE` files and compares their contents to known licenses using a several strategies (which we call "Matchers"):
|
15
|
+
Licensee automates the process of reading `LICENSE` files and compares their contents to known licenses using a several strategies (which we call "Matchers"). It attempts to determine a project's license in the following order:
|
16
16
|
|
17
|
-
|
17
|
+
1. If the license file has an explicit copyright notice, and nothing more (e.g., `Copyright (c) 2015 Ben Balter`), we'll assume the author intends to retain all rights, and thus the project isn't licensed.
|
18
18
|
|
19
|
-
|
19
|
+
2. If the license is an exact match to a known license. Licenses like GPL don't have a copyright notice that needs to be changed in the license itself, so if we strip away whitespace, we might get lucky, and direct string comparison in Ruby is cheap.
|
20
20
|
|
21
|
-
|
21
|
+
3. If 90% of the lines match a known license. We use Git's internal change calculation method. To calcualte diffs, Git hashes each line of both files, and compares the hashes to tell the percent changed. This method is fast, but is done on a line-by-line basis, so if the license is wrapped differently, or has extra words inserted, it's not going to match the license.
|
22
|
+
|
23
|
+
4. If we still can't match the license, we use a fancy math thing called the [Levenshtein distance algorithm](https://en.wikipedia.org/wiki/Levenshtein_distance), which while very slow, is really good at calculating the similarity between two strings. By calculating the percent changed from the known license to the license file, you can tell, e.g., that a given license is 90% similar to the MIT license, that 10% likely representing the copyright line being properly adapted to the project.
|
22
24
|
|
23
25
|
Licensee will even diff the distributed license with the original, so you can see exactly what, if anything's been changed.
|
24
26
|
|
data/lib/licensee.rb
CHANGED
@@ -10,6 +10,7 @@ require_relative "licensee/license_file"
|
|
10
10
|
require_relative "licensee/project"
|
11
11
|
require_relative "licensee/matcher"
|
12
12
|
require_relative "licensee/matchers/exact_matcher"
|
13
|
+
require_relative "licensee/matchers/copyright_matcher"
|
13
14
|
require_relative "licensee/matchers/git_matcher"
|
14
15
|
require_relative "licensee/matchers/levenshtein_matcher"
|
15
16
|
|
@@ -41,7 +42,7 @@ class Licensee
|
|
41
42
|
# Array of matchers to use, in order of preference
|
42
43
|
# The order should be decending order of anticipated speed to match
|
43
44
|
def matchers
|
44
|
-
[Licensee::ExactMatcher, Licensee::GitMatcher, Licensee::LevenshteinMatcher]
|
45
|
+
[Licensee::CopyrightMatcher, Licensee::ExactMatcher, Licensee::GitMatcher, Licensee::LevenshteinMatcher]
|
45
46
|
end
|
46
47
|
end
|
47
48
|
end
|
data/lib/licensee/license.rb
CHANGED
@@ -4,7 +4,13 @@ class Licensee
|
|
4
4
|
|
5
5
|
def initialize(blob)
|
6
6
|
@blob = blob
|
7
|
+
end
|
8
|
+
|
9
|
+
def similarity(other)
|
7
10
|
blob.hashsig(Rugged::Blob::HashSignature::WHITESPACE_SMART)
|
11
|
+
other.hashsig ? blob.similarity(other.hashsig) : 0
|
12
|
+
rescue Rugged::InvalidError
|
13
|
+
0
|
8
14
|
end
|
9
15
|
|
10
16
|
# Raw file contents
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class Licensee
|
2
|
+
class CopyrightMatcher < Matcher
|
3
|
+
|
4
|
+
REGEX = /\A(Copyright|Copyright ©|Copyright \(c\)) \d{4}.*?\n?\z/i
|
5
|
+
|
6
|
+
def match
|
7
|
+
no_license if file.content.strip =~ REGEX
|
8
|
+
end
|
9
|
+
|
10
|
+
def confidence
|
11
|
+
100
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def no_license
|
17
|
+
@no_license ||= Licensee::Licenses.find("no-license")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -12,11 +12,7 @@ class Licensee
|
|
12
12
|
private
|
13
13
|
|
14
14
|
def matches
|
15
|
-
@matches ||= Licensee.licenses.map { |l| [l, similarity(l)] }.select { |l,sim| sim > 0 }
|
16
|
-
end
|
17
|
-
|
18
|
-
def similarity(other)
|
19
|
-
file.blob.similarity(other.hashsig)
|
15
|
+
@matches ||= Licensee.licenses.map { |l| [l, file.similarity(l)] }.select { |l,sim| sim > 0 }
|
20
16
|
end
|
21
17
|
|
22
18
|
# Pulled out for easier testing
|
data/lib/licensee/version.rb
CHANGED
data/test/functions.rb
CHANGED
@@ -30,16 +30,18 @@ class FakeBlob
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def similarity(other)
|
33
|
-
Rugged::Blob::HashSignature.compare(self.hashsig, other)
|
33
|
+
self.hashsig ? Rugged::Blob::HashSignature.compare(self.hashsig, other) : 0
|
34
34
|
end
|
35
35
|
|
36
36
|
def hashsig(options = 0)
|
37
37
|
@hashsig ||= Rugged::Blob::HashSignature.new(content, options)
|
38
|
+
rescue Rugged::InvalidError
|
39
|
+
nil
|
38
40
|
end
|
39
41
|
end
|
40
42
|
|
41
43
|
def chaos_monkey(string)
|
42
|
-
Random.rand(
|
44
|
+
Random.rand(5).times do
|
43
45
|
string[Random.rand(string.length)] = SecureRandom.base64(Random.rand(10))
|
44
46
|
end
|
45
47
|
string
|
data/test/test_licensee.rb
CHANGED
@@ -3,7 +3,7 @@ require 'helper'
|
|
3
3
|
class TestLicensee < Minitest::Test
|
4
4
|
should "know the licenses" do
|
5
5
|
assert_equal Array, Licensee.licenses.class
|
6
|
-
assert_equal
|
6
|
+
assert_equal 16, Licensee.licenses.size
|
7
7
|
assert_equal Licensee::License, Licensee.licenses.first.class
|
8
8
|
end
|
9
9
|
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestLicenseeCopyrightMatcher < Minitest::Test
|
4
|
+
|
5
|
+
def setup
|
6
|
+
text = "Copyright 2015 Ben Balter"
|
7
|
+
blob = FakeBlob.new(text)
|
8
|
+
@file = Licensee::LicenseFile.new(blob)
|
9
|
+
end
|
10
|
+
|
11
|
+
should "match the license" do
|
12
|
+
assert_equal "no-license", Licensee::CopyrightMatcher.match(@file).key
|
13
|
+
end
|
14
|
+
|
15
|
+
should "know the match confidence" do
|
16
|
+
assert_equal 100, Licensee::CopyrightMatcher.new(@file).confidence
|
17
|
+
end
|
18
|
+
|
19
|
+
should "match Copyright (C) copyright notices" do
|
20
|
+
text = "Copyright (C) 2015 Ben Balter"
|
21
|
+
blob = FakeBlob.new(text)
|
22
|
+
file = Licensee::LicenseFile.new(blob)
|
23
|
+
assert_equal "no-license", Licensee::CopyrightMatcher.match(file).key
|
24
|
+
end
|
25
|
+
|
26
|
+
should "match Copyright © copyright notices" do
|
27
|
+
text = "copyright © 2015 Ben Balter"
|
28
|
+
blob = FakeBlob.new(text)
|
29
|
+
file = Licensee::LicenseFile.new(blob)
|
30
|
+
assert_equal "no-license", Licensee::CopyrightMatcher.match(file).key
|
31
|
+
end
|
32
|
+
|
33
|
+
should "not false positive" do
|
34
|
+
text = File.open(Licensee::Licenses.find("mit").path).read.split("---").last
|
35
|
+
blob = FakeBlob.new(text)
|
36
|
+
file = Licensee::LicenseFile.new(blob)
|
37
|
+
assert_equal nil, Licensee::CopyrightMatcher.match(file)
|
38
|
+
end
|
39
|
+
end
|
@@ -4,12 +4,12 @@ class TestLicenseeLicenses < Minitest::Test
|
|
4
4
|
|
5
5
|
should "know license names" do
|
6
6
|
assert_equal Array, Licensee::Licenses.keys.class
|
7
|
-
assert_equal
|
7
|
+
assert_equal 16, Licensee::Licenses.keys.size
|
8
8
|
end
|
9
9
|
|
10
10
|
should "load the licenses" do
|
11
11
|
assert_equal Array, Licensee::Licenses.list.class
|
12
|
-
assert_equal
|
12
|
+
assert_equal 16, Licensee::Licenses.list.size
|
13
13
|
assert_equal Licensee::License, Licensee::Licenses.list.first.class
|
14
14
|
end
|
15
15
|
|
@@ -11,7 +11,7 @@ class TestLicenseeVendor < Minitest::Test
|
|
11
11
|
should "detect each vendored license when modified" do
|
12
12
|
licenses = Dir["#{Licensee::Licenses.base}/*"].shuffle
|
13
13
|
licenses.each do |license|
|
14
|
-
verify_license_file(license, true)
|
14
|
+
verify_license_file(license, true) unless license =~ /no-license\.txt$/
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
@@ -25,7 +25,7 @@ class TestLicenseeVendor < Minitest::Test
|
|
25
25
|
should "detect each vendored license with different line lengths when modified" do
|
26
26
|
licenses = Dir["#{Licensee::Licenses.base}/*"].shuffle
|
27
27
|
licenses.each do |license|
|
28
|
-
verify_license_file(license, true, 50)
|
28
|
+
verify_license_file(license, true, 50) unless license =~ /no-license\.txt$/
|
29
29
|
end
|
30
30
|
end
|
31
31
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
---
|
2
|
+
layout: license
|
3
|
+
permalink: /licenses/no-license/
|
4
|
+
category: No License
|
5
|
+
class: license-types
|
6
|
+
title: No License
|
7
|
+
|
8
|
+
description: You retain all rights and do not permit distribution, reproduction, or derivative works. You may grant some rights in cases where you publish your source code to a site that requires accepting terms of service. For example, publishing code in a public repository on GitHub requires that you allow others to view and fork your code.
|
9
|
+
|
10
|
+
note: This option may be subject to the Terms Of Use of the site where you publish your source code.
|
11
|
+
|
12
|
+
how: Simply do nothing, though including a copyright notice is recommended.
|
13
|
+
|
14
|
+
required:
|
15
|
+
- include-copyright
|
16
|
+
|
17
|
+
permitted:
|
18
|
+
- commercial-use
|
19
|
+
- private-use
|
20
|
+
|
21
|
+
forbidden:
|
22
|
+
- modifications
|
23
|
+
- distribution
|
24
|
+
- sublicense
|
25
|
+
|
26
|
+
---
|
27
|
+
|
28
|
+
Copyright [year] [fullname]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: licensee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Balter
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rugged
|
@@ -111,6 +111,7 @@ files:
|
|
111
111
|
- lib/licensee/license_file.rb
|
112
112
|
- lib/licensee/licenses.rb
|
113
113
|
- lib/licensee/matcher.rb
|
114
|
+
- lib/licensee/matchers/copyright_matcher.rb
|
114
115
|
- lib/licensee/matchers/exact_matcher.rb
|
115
116
|
- lib/licensee/matchers/git_matcher.rb
|
116
117
|
- lib/licensee/matchers/levenshtein_matcher.rb
|
@@ -160,6 +161,7 @@ files:
|
|
160
161
|
- test/helper.rb
|
161
162
|
- test/test_licensee.rb
|
162
163
|
- test/test_licensee_bin.rb
|
164
|
+
- test/test_licensee_copyright_matcher.rb
|
163
165
|
- test/test_licensee_exact_matcher.rb
|
164
166
|
- test/test_licensee_git_matcher.rb
|
165
167
|
- test/test_licensee_levenshtein_matcher.rb
|
@@ -183,6 +185,7 @@ files:
|
|
183
185
|
- vendor/choosealicense.com/_licenses/lgpl-3.0.txt
|
184
186
|
- vendor/choosealicense.com/_licenses/mit.txt
|
185
187
|
- vendor/choosealicense.com/_licenses/mpl-2.0.txt
|
188
|
+
- vendor/choosealicense.com/_licenses/no-license.txt
|
186
189
|
- vendor/choosealicense.com/_licenses/unlicense.txt
|
187
190
|
homepage: http://github.com/benbalter/licensee
|
188
191
|
licenses:
|