licensee 2.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -9
- data/bin/licensee +1 -0
- data/lib/licensee.rb +28 -5
- data/lib/licensee/license.rb +16 -8
- data/lib/licensee/license_file.rb +21 -25
- data/lib/licensee/licenses.rb +18 -11
- data/lib/licensee/matcher.rb +28 -0
- data/lib/licensee/matchers/exact_matcher.rb +11 -0
- data/lib/licensee/matchers/git_matcher.rb +30 -0
- data/lib/licensee/matchers/levenshtein_matcher.rb +55 -0
- data/lib/licensee/project.rb +10 -7
- data/lib/licensee/version.rb +1 -1
- data/test/functions.rb +18 -9
- data/test/test_licensee.rb +4 -0
- data/test/test_licensee_exact_matcher.rb +18 -0
- data/test/test_licensee_git_matcher.rb +18 -0
- data/test/test_licensee_levenshtein_matcher.rb +34 -0
- data/test/test_licensee_license.rb +0 -1
- data/test/test_licensee_license_file.rb +4 -11
- data/test/test_licensee_licenses.rb +2 -2
- data/test/test_licensee_matcher.rb +7 -0
- data/test/test_licensee_vendor.rb +15 -1
- metadata +24 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 839782ad5361d8938500dd5938eb33e38a821d9a
|
4
|
+
data.tar.gz: a688eb2acdf6a1da457cdcf4037a346e4b20691c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 61151787e52a75c131b53b6ffdc38e8e8c7140546837e39bc3586bf87928d43b12505ee5f983b853520c6f65d1c742c0b3c381200df95b824a8a957bb031576a
|
7
|
+
data.tar.gz: 9bfe7533e62e998eb1406c663ba93bf2db81f85a92d274f1efb5f2d389f6e6e029e88b64ba94955d4dc322545254506287734d767b37ae70b40840238083cf7d
|
data/README.md
CHANGED
@@ -12,9 +12,13 @@
|
|
12
12
|
|
13
13
|
## The solution
|
14
14
|
|
15
|
-
Licensee automates the process of reading `LICENSE` files and compares their contents to known licenses using a
|
15
|
+
Licensee automates the process of reading `LICENSE` files and compares their contents to known licenses using a several strategies (which we call "Matchers":
|
16
16
|
|
17
|
-
|
17
|
+
First, we look to see if the license is an exact match. Licenses like GPL don't have a copyright notice that needs to be changed in the license itself, so if we strip away whitespace, we might get lucky, and direct string comparison is cheap.
|
18
|
+
|
19
|
+
Next, we look to Git's internal change calculation method, which is fast, but is done on a line-by-line basis, so if the license is wrapped differently, or has extra words inserted, it's not going to match the license.
|
20
|
+
|
21
|
+
Finally, if we still can't match the license, we use a fancy math thing called the [Levenshtein distance algorthm](https://en.wikipedia.org/wiki/Levenshtein_distance), which while slow, is really good at calculating the similarity between two a known license and an unknown license. By calculating the percent changed from the known license, you can tell, e.g., that a given license is 98% similar to the MIT license, that 2% likely representing the copyright line being properly adapted to the project.
|
18
22
|
|
19
23
|
Licensee will even diff the distributed license with the original, so you can see exactly what, if anything's been changed.
|
20
24
|
|
@@ -41,13 +45,6 @@ license.meta["description"]
|
|
41
45
|
|
42
46
|
license.meta["permitted"]
|
43
47
|
=> ["commercial-use","modifications","distribution","sublicense","private-use"]
|
44
|
-
|
45
|
-
# Getting all matches
|
46
|
-
Licensee.matches "/path/to/a/project"
|
47
|
-
=> [#<Licensee::License name="MIT" match=0.9842154131847726>,
|
48
|
-
#<Licensee::License name="unlicense" match=0.4326833797585887>,
|
49
|
-
...
|
50
|
-
#<Licensee::License name="no-license" match=0.0232126276694522>]
|
51
48
|
```
|
52
49
|
|
53
50
|
## Diffing
|
@@ -72,6 +69,7 @@ You'll get an output that looks like:
|
|
72
69
|
```
|
73
70
|
License: MIT
|
74
71
|
Confidence: 98.42%
|
72
|
+
Matcher: Licensee::GitMatcher
|
75
73
|
```
|
76
74
|
|
77
75
|
## What it looks at
|
data/bin/licensee
CHANGED
data/lib/licensee.rb
CHANGED
@@ -1,19 +1,42 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
require 'rugged'
|
3
|
+
require 'levenshtein'
|
3
4
|
|
4
5
|
require_relative "licensee/license"
|
5
6
|
require_relative "licensee/licenses"
|
6
7
|
require_relative "licensee/license_file"
|
7
8
|
require_relative "licensee/project"
|
9
|
+
require_relative "licensee/matcher"
|
10
|
+
require_relative "licensee/matchers/exact_matcher"
|
11
|
+
require_relative "licensee/matchers/git_matcher"
|
12
|
+
require_relative "licensee/matchers/levenshtein_matcher"
|
8
13
|
|
9
14
|
class Licensee
|
15
|
+
|
16
|
+
# Over watch percent is a match considered a match
|
10
17
|
CONFIDENCE_THRESHOLD = 90
|
11
18
|
|
12
|
-
|
13
|
-
|
14
|
-
|
19
|
+
class << self
|
20
|
+
|
21
|
+
# Returns an array of Licensee::License instances
|
22
|
+
def licenses
|
23
|
+
@licenses ||= Licensee::Licenses.list
|
24
|
+
end
|
25
|
+
|
26
|
+
# Returns the license for a given git repo
|
27
|
+
def license(path)
|
28
|
+
Licensee::Project.new(path).license
|
29
|
+
end
|
30
|
+
|
31
|
+
# Diffs the project license and the known license
|
32
|
+
def diff(path)
|
33
|
+
Licensee::Project.new(path).license_file.diff
|
34
|
+
end
|
15
35
|
|
16
|
-
|
17
|
-
|
36
|
+
# Array of matchers to use, in order of preference
|
37
|
+
# The order should be decending order of anticipated speed to match
|
38
|
+
def matchers
|
39
|
+
[Licensee::ExactMatcher, Licensee::GitMatcher, Licensee::LevenshteinMatcher]
|
40
|
+
end
|
18
41
|
end
|
19
42
|
end
|
data/lib/licensee/license.rb
CHANGED
@@ -6,34 +6,36 @@ class Licensee
|
|
6
6
|
@name=name.downcase
|
7
7
|
end
|
8
8
|
|
9
|
+
# Path to vendored license file on disk
|
9
10
|
def path
|
10
11
|
@path ||= File.expand_path "#{@name}.txt", Licensee::Licenses.base
|
11
12
|
end
|
12
13
|
|
14
|
+
# Raw content of license file, including YAML front matter
|
13
15
|
def content
|
14
16
|
@content ||= File.open(path).read
|
15
17
|
end
|
16
18
|
|
17
|
-
|
18
|
-
@parts ||= content.match(/^(---\n.*\n---)?(.*)/m).to_a
|
19
|
-
end
|
20
|
-
|
19
|
+
# License metadata from YAML front matter
|
21
20
|
def meta
|
22
21
|
@meta ||= front_matter = YAML.load(parts[1]) if parts[1]
|
23
22
|
rescue
|
24
23
|
nil
|
25
24
|
end
|
26
25
|
|
27
|
-
|
28
|
-
@length ||= body.length
|
29
|
-
end
|
30
|
-
|
26
|
+
# The license body (e.g., contents - frontmatter)
|
31
27
|
def body
|
32
28
|
@body ||= parts[2]
|
33
29
|
end
|
34
30
|
alias_method :to_s, :body
|
35
31
|
alias_method :text, :body
|
36
32
|
|
33
|
+
# License body with all whitespace replaced with a single space
|
34
|
+
def body_normalized
|
35
|
+
@content_normalized ||= body.downcase.gsub(/\s+/, " ").strip
|
36
|
+
end
|
37
|
+
|
38
|
+
# Git-computed hash signature for the license file
|
37
39
|
def hashsig
|
38
40
|
@hashsig ||= Rugged::Blob::HashSignature.new(
|
39
41
|
body, Rugged::Blob::HashSignature::WHITESPACE_SMART)
|
@@ -42,5 +44,11 @@ class Licensee
|
|
42
44
|
def inspect
|
43
45
|
"#<Licensee::License name=\"#{name}\">"
|
44
46
|
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def parts
|
51
|
+
@parts ||= content.match(/^(---\n.*\n---)?(.*)/m).to_a
|
52
|
+
end
|
45
53
|
end
|
46
54
|
end
|
@@ -7,43 +7,39 @@ class Licensee
|
|
7
7
|
blob.hashsig(Rugged::Blob::HashSignature::WHITESPACE_SMART)
|
8
8
|
end
|
9
9
|
|
10
|
-
|
11
|
-
|
10
|
+
# Raw file contents
|
11
|
+
def content
|
12
|
+
@contents ||= begin
|
13
|
+
blob.content
|
14
|
+
end
|
12
15
|
end
|
13
|
-
alias_method :to_s, :
|
14
|
-
alias_method :
|
16
|
+
alias_method :to_s, :content
|
17
|
+
alias_method :contents, :content
|
15
18
|
|
16
|
-
|
17
|
-
|
19
|
+
# File content with all whitespace replaced with a single space
|
20
|
+
def content_normalized
|
21
|
+
@content_normalized ||= content.downcase.gsub(/\s+/, " ").strip
|
18
22
|
end
|
19
23
|
|
20
|
-
|
21
|
-
|
24
|
+
# Comptutes a diff between known license and project license
|
25
|
+
def diff(options={})
|
26
|
+
options = options.merge(:reverse => true)
|
27
|
+
blob.diff(match.body, options).to_s if match
|
22
28
|
end
|
23
29
|
|
24
|
-
|
25
|
-
|
30
|
+
# Determines which matching strategy to use, returns an instane of that matcher
|
31
|
+
def matcher
|
32
|
+
@matcher ||= Licensee.matchers.map { |m| m.new(self) }.find { |m| m.match }
|
26
33
|
end
|
27
34
|
|
35
|
+
# Returns an Licensee::License instance of the matches license
|
28
36
|
def match
|
29
|
-
|
37
|
+
@match ||= matcher.match if matcher
|
30
38
|
end
|
31
39
|
|
40
|
+
# Returns the percent confident with the match
|
32
41
|
def confidence
|
33
|
-
|
34
|
-
end
|
35
|
-
alias_method :similarity, :confidence
|
36
|
-
|
37
|
-
def diff(options={})
|
38
|
-
options = options.merge(:reverse => true)
|
39
|
-
blob.diff(match.body, options).to_s if match
|
40
|
-
end
|
41
|
-
|
42
|
-
private
|
43
|
-
|
44
|
-
# Pulled out for easier testing
|
45
|
-
def calculate_similarity(other)
|
46
|
-
blob.similarity(other.hashsig)
|
42
|
+
@condience ||= matcher.confidence if matcher
|
47
43
|
end
|
48
44
|
end
|
49
45
|
end
|
data/lib/licensee/licenses.rb
CHANGED
@@ -1,15 +1,8 @@
|
|
1
1
|
class Licensee
|
2
2
|
class Licenses
|
3
3
|
class << self
|
4
|
-
def names
|
5
|
-
@names ||= begin
|
6
|
-
names = Dir.entries(base)
|
7
|
-
names.map! { |l| File.basename(l, ".txt").downcase }
|
8
|
-
names.reject! { |l| l =~ /^\./ || l.nil? }
|
9
|
-
names
|
10
|
-
end
|
11
|
-
end
|
12
4
|
|
5
|
+
# Returns an array of Licensee::License instances
|
13
6
|
def list
|
14
7
|
@licenses ||= begin
|
15
8
|
licenses = []
|
@@ -18,14 +11,28 @@ class Licensee
|
|
18
11
|
end
|
19
12
|
end
|
20
13
|
|
14
|
+
# Given a license name, attempt to return a matching Licensee::License instance
|
15
|
+
def find(name)
|
16
|
+
list.find { |l| l.name.downcase == name.downcase }
|
17
|
+
end
|
18
|
+
|
19
|
+
# Path to vendored licenses
|
21
20
|
def base
|
22
21
|
@base ||= File.expand_path "../../vendor/choosealicense.com/_licenses", File.dirname(__FILE__)
|
23
22
|
end
|
24
23
|
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
private
|
25
|
+
|
26
|
+
# Returns a list of potential license names, as vendored
|
27
|
+
def names
|
28
|
+
@names ||= begin
|
29
|
+
names = Dir.entries(base)
|
30
|
+
names.map! { |l| File.basename(l, ".txt").downcase }
|
31
|
+
names.reject! { |l| l =~ /^\./ || l.nil? }
|
32
|
+
names
|
33
|
+
end
|
28
34
|
end
|
35
|
+
|
29
36
|
end
|
30
37
|
end
|
31
38
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# Abstract class to describe different matching strategies
|
2
|
+
# Must respond to:
|
3
|
+
# - match
|
4
|
+
# - confidence
|
5
|
+
#
|
6
|
+
# Can assume file will be a Licensee::LicenseFile instance
|
7
|
+
class Licensee
|
8
|
+
class Matcher
|
9
|
+
attr_reader :file
|
10
|
+
|
11
|
+
def self.match(file)
|
12
|
+
self.new(file).match
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(file)
|
16
|
+
@file = file
|
17
|
+
end
|
18
|
+
|
19
|
+
def match
|
20
|
+
nil
|
21
|
+
end
|
22
|
+
|
23
|
+
def confidence
|
24
|
+
0
|
25
|
+
end
|
26
|
+
alias_method :similarity, :confidence
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
class Licensee
|
2
|
+
class GitMatcher < Matcher
|
3
|
+
|
4
|
+
def match
|
5
|
+
match_info[0] unless match_info.nil?
|
6
|
+
end
|
7
|
+
|
8
|
+
def confidence
|
9
|
+
match_info[1] unless match_info.nil?
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def matches
|
15
|
+
@matches ||= Licensee.licenses.map { |l| [l, similarity(l)] }.select { |l,sim| sim > 0 }
|
16
|
+
end
|
17
|
+
|
18
|
+
def similarity(other)
|
19
|
+
file.blob.similarity(other.hashsig)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Pulled out for easier testing
|
23
|
+
def match_info
|
24
|
+
@match_info ||= begin
|
25
|
+
match = matches.max_by { |license, similarity| similarity }
|
26
|
+
match if match && match[1] > Licensee::CONFIDENCE_THRESHOLD
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
class Licensee
|
2
|
+
class LevenshteinMatcher < Matcher
|
3
|
+
|
4
|
+
# Return the first potential license that is more similar than the confidence threshold
|
5
|
+
def match
|
6
|
+
@match ||= potential_licenses.find do |license|
|
7
|
+
similarity(license) >= Licensee::CONFIDENCE_THRESHOLD
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
# Sort all licenses, in decending order, by difference in length to the file
|
12
|
+
# Difference in lengths cannot exceed the file's length * the confidence threshold / 100
|
13
|
+
def potential_licenses
|
14
|
+
@potential_licenses ||= begin
|
15
|
+
Licensee.licenses.select { |license| length_delta(license) <= max_delta }.sort_by { |l| length_delta(l) }
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Calculate the difference between the file length and a given license's length
|
20
|
+
def length_delta(license)
|
21
|
+
(file_length - license.body_normalized.length).abs
|
22
|
+
end
|
23
|
+
|
24
|
+
# Maximum possible difference between file length and license length
|
25
|
+
# for a license to be a potential license to be matched
|
26
|
+
def max_delta
|
27
|
+
@max_delta ||= (file_length * (Licensee::CONFIDENCE_THRESHOLD.to_f / 100.to_f ))
|
28
|
+
end
|
29
|
+
|
30
|
+
# Confidence that the matched license is a match
|
31
|
+
def confidence
|
32
|
+
@confidence ||= match ? similarity(match) : 0
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
# Length of the file, normalized to strip whitespace
|
38
|
+
def file_length
|
39
|
+
@file_length ||= file.content_normalized.length.to_f
|
40
|
+
end
|
41
|
+
|
42
|
+
# Calculate percent changed between file and potential license
|
43
|
+
def similarity(license)
|
44
|
+
100 * (file_length - distance(license)) / file_length
|
45
|
+
end
|
46
|
+
|
47
|
+
# Calculate the levenshtein distance between file and license
|
48
|
+
# Note: We used content/body normalized because white space and capitalization
|
49
|
+
# isn't legally significant in this context. Fewer characters lets levenshtein
|
50
|
+
# work faster. As long as they both undergo the same transformation, should match.
|
51
|
+
def distance(license)
|
52
|
+
Levenshtein.distance(license.body_normalized, file.content_normalized).to_f
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/licensee/project.rb
CHANGED
@@ -2,7 +2,8 @@ class Licensee
|
|
2
2
|
class Project
|
3
3
|
attr_reader :repository
|
4
4
|
|
5
|
-
|
5
|
+
# Array of file names to look for potential license files, in order
|
6
|
+
LICENSE_FILENAMES = %w[
|
6
7
|
LICENSE
|
7
8
|
LICENSE.txt
|
8
9
|
LICENSE.md
|
@@ -10,6 +11,10 @@ class Licensee
|
|
10
11
|
COPYING
|
11
12
|
]
|
12
13
|
|
14
|
+
# Initializes a new project
|
15
|
+
#
|
16
|
+
# path_or_repo path to git repo or Rugged::Repository instance
|
17
|
+
# revsion - revision ref, if any
|
13
18
|
def initialize(path_or_repo, revision = nil)
|
14
19
|
if path_or_repo.kind_of? Rugged::Repository
|
15
20
|
@repository = path_or_repo
|
@@ -20,22 +25,20 @@ class Licensee
|
|
20
25
|
@revision = revision
|
21
26
|
end
|
22
27
|
|
28
|
+
# Detects the license file, if any
|
29
|
+
# Returns a Licensee::LicenseFile instance
|
23
30
|
def license_file
|
24
31
|
return @license_file if defined? @license_file
|
25
32
|
|
26
33
|
commit = @revision ? @repository.lookup(@revision) : @repository.last_commit
|
27
|
-
license_blob = commit.tree.each_blob { |blob| break blob if
|
28
|
-
|
34
|
+
license_blob = commit.tree.each_blob { |blob| break blob if LICENSE_FILENAMES.include? blob[:name] }
|
29
35
|
|
30
36
|
@license_file = if license_blob
|
31
37
|
LicenseFile.new(@repository.lookup(license_blob[:oid]))
|
32
38
|
end
|
33
39
|
end
|
34
40
|
|
35
|
-
|
36
|
-
@matches ||= license_file.matches if license_file
|
37
|
-
end
|
38
|
-
|
41
|
+
# Returns the matching Licensee::License instance if a license can be detected
|
39
42
|
def license
|
40
43
|
@license ||= license_file.match if license_file
|
41
44
|
end
|
data/lib/licensee/version.rb
CHANGED
data/test/functions.rb
CHANGED
@@ -39,23 +39,32 @@ class FakeBlob
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def chaos_monkey(string)
|
42
|
-
|
43
|
-
|
44
|
-
Random.rand(5).times do
|
45
|
-
lines[Random.rand(lines.size)] = SecureRandom.base64(Random.rand(80)) + "\n"
|
42
|
+
Random.rand(7).times do
|
43
|
+
string[Random.rand(string.length)] = SecureRandom.base64(Random.rand(10))
|
46
44
|
end
|
47
|
-
|
48
|
-
lines.join('')
|
45
|
+
string
|
49
46
|
end
|
50
47
|
|
51
|
-
def verify_license_file(license, chaos = false)
|
48
|
+
def verify_license_file(license, chaos = false, wrap=false)
|
52
49
|
expected = File.basename(license, ".txt")
|
53
50
|
|
54
51
|
text = license_from_path(license)
|
55
|
-
|
52
|
+
text = chaos_monkey(text) if chaos
|
53
|
+
text = wrap(text, wrap) if wrap
|
54
|
+
|
55
|
+
blob = FakeBlob.new(text)
|
56
56
|
license_file = Licensee::LicenseFile.new(blob)
|
57
57
|
|
58
58
|
actual = license_file.match
|
59
59
|
assert actual, "No match for #{expected}."
|
60
|
-
assert_equal expected, actual.name, "expeceted #{expected} but got #{actual.name} for .match.
|
60
|
+
assert_equal expected, actual.name, "expeceted #{expected} but got #{actual.name} for .match. Confidence: #{license_file.confidence}. Method: #{license_file.matcher.class}"
|
61
|
+
end
|
62
|
+
|
63
|
+
def wrap(text, line_width=80)
|
64
|
+
text = text.clone
|
65
|
+
text.gsub! /([^\n])\n([^\n])/, '\1 \2'
|
66
|
+
text = text.split("\n").collect do |line|
|
67
|
+
line.length > line_width ? line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip : line
|
68
|
+
end * "\n"
|
69
|
+
text.strip
|
61
70
|
end
|
data/test/test_licensee.rb
CHANGED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestLicenseeExactMatcher < Minitest::Test
|
4
|
+
|
5
|
+
def setup
|
6
|
+
text = File.open(Licensee::Licenses.find("mit").path).read.split("---").last
|
7
|
+
blob = FakeBlob.new(text)
|
8
|
+
@mit = Licensee::LicenseFile.new(blob)
|
9
|
+
end
|
10
|
+
|
11
|
+
should "match the license" do
|
12
|
+
assert_equal "mit", Licensee::ExactMatcher.match(@mit).name
|
13
|
+
end
|
14
|
+
|
15
|
+
should "know the match confidence" do
|
16
|
+
assert_equal 100, Licensee::ExactMatcher.new(@mit).confidence
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestLicenseeGitMatcher < Minitest::Test
|
4
|
+
|
5
|
+
def setup
|
6
|
+
text = license_from_path( Licensee::Licenses.find("mit").path )
|
7
|
+
blob = FakeBlob.new(text)
|
8
|
+
@mit = Licensee::LicenseFile.new(blob)
|
9
|
+
end
|
10
|
+
|
11
|
+
should "match the license" do
|
12
|
+
assert_equal "mit", Licensee::GitMatcher.match(@mit).name
|
13
|
+
end
|
14
|
+
|
15
|
+
should "know the match confidence" do
|
16
|
+
assert_equal 94, Licensee::GitMatcher.new(@mit).confidence
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestLicenseeLevenshteinMatcher < Minitest::Test
|
4
|
+
|
5
|
+
def setup
|
6
|
+
text = license_from_path( Licensee::Licenses.find("mit").path )
|
7
|
+
blob = FakeBlob.new(text)
|
8
|
+
@mit = Licensee::LicenseFile.new(blob)
|
9
|
+
end
|
10
|
+
|
11
|
+
should "match the license" do
|
12
|
+
assert_equal "mit", Licensee::LevenshteinMatcher.match(@mit).name
|
13
|
+
end
|
14
|
+
|
15
|
+
should "know the match confidence" do
|
16
|
+
matcher = Licensee::LevenshteinMatcher.new(@mit)
|
17
|
+
assert matcher.confidence > 98, "#{matcher.confidence} < 98"
|
18
|
+
end
|
19
|
+
|
20
|
+
should "calculate max delta" do
|
21
|
+
assert_equal 964.8000000000001, Licensee::LevenshteinMatcher.new(@mit).max_delta
|
22
|
+
end
|
23
|
+
|
24
|
+
should "calculate length delta" do
|
25
|
+
isc = Licensee::Licenses.find("isc")
|
26
|
+
assert_equal 2, Licensee::LevenshteinMatcher.new(@mit).length_delta(Licensee::Licenses.find("mit"))
|
27
|
+
assert_equal 334, Licensee::LevenshteinMatcher.new(@mit).length_delta(isc)
|
28
|
+
end
|
29
|
+
|
30
|
+
should "round up potential licenses" do
|
31
|
+
assert_equal 5, Licensee::LevenshteinMatcher.new(@mit).potential_licenses.size
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -14,17 +14,6 @@ class TestLicenseeLicenseFile < Minitest::Test
|
|
14
14
|
assert @file.contents =~ /MIT/
|
15
15
|
end
|
16
16
|
|
17
|
-
should "known the file length" do
|
18
|
-
assert_equal 1077, @file.length
|
19
|
-
end
|
20
|
-
|
21
|
-
should "calculate similiarty" do
|
22
|
-
actual = @file.send(:calculate_similarity, @mit)
|
23
|
-
assert actual > Licensee::CONFIDENCE_THRESHOLD, "expected #{actual} to be > 90% for MIT"
|
24
|
-
actual = @file.send(:calculate_similarity, @gpl)
|
25
|
-
assert actual < 1, "expected #{actual} to be < 1% for GPL"
|
26
|
-
end
|
27
|
-
|
28
17
|
should "match the license" do
|
29
18
|
assert_equal "mit", @file.match.name
|
30
19
|
end
|
@@ -33,4 +22,8 @@ class TestLicenseeLicenseFile < Minitest::Test
|
|
33
22
|
expected = "-Copyright (c) [year] [fullname]\n+Copyright (c) 2014 Ben Balter"
|
34
23
|
assert @file.diff.include?(expected)
|
35
24
|
end
|
25
|
+
|
26
|
+
should "calculate confidence" do
|
27
|
+
assert_equal 94, @file.confidence
|
28
|
+
end
|
36
29
|
end
|
@@ -3,8 +3,8 @@ require 'helper'
|
|
3
3
|
class TestLicenseeLicenses < Minitest::Test
|
4
4
|
|
5
5
|
should "know license names" do
|
6
|
-
assert_equal Array, Licensee::Licenses.names.class
|
7
|
-
assert_equal 15, Licensee::Licenses.names.size
|
6
|
+
assert_equal Array, Licensee::Licenses.send(:names).class
|
7
|
+
assert_equal 15, Licensee::Licenses.send(:names).size
|
8
8
|
end
|
9
9
|
|
10
10
|
should "load the licenses" do
|
@@ -8,10 +8,24 @@ class TestLicenseeVendor < Minitest::Test
|
|
8
8
|
end
|
9
9
|
end
|
10
10
|
|
11
|
-
should "detect each vendored license" do
|
11
|
+
should "detect each vendored license when modified" do
|
12
12
|
licenses = Dir["#{Licensee::Licenses.base}/*"].shuffle
|
13
13
|
licenses.each do |license|
|
14
14
|
verify_license_file(license, true)
|
15
15
|
end
|
16
16
|
end
|
17
|
+
|
18
|
+
should "detect each vendored license with different line lengths" do
|
19
|
+
licenses = Dir["#{Licensee::Licenses.base}/*"].shuffle
|
20
|
+
licenses.each do |license|
|
21
|
+
verify_license_file(license, false, 50)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
should "detect each vendored license with different line lengths when modified" do
|
26
|
+
licenses = Dir["#{Licensee::Licenses.base}/*"].shuffle
|
27
|
+
licenses.each do |license|
|
28
|
+
verify_license_file(license, true, 50)
|
29
|
+
end
|
30
|
+
end
|
17
31
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: licensee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Balter
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rugged
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.21.1b2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: levenshtein-ffi
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.1'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.1'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: pry
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -96,6 +110,10 @@ files:
|
|
96
110
|
- lib/licensee/license.rb
|
97
111
|
- lib/licensee/license_file.rb
|
98
112
|
- lib/licensee/licenses.rb
|
113
|
+
- lib/licensee/matcher.rb
|
114
|
+
- lib/licensee/matchers/exact_matcher.rb
|
115
|
+
- lib/licensee/matchers/git_matcher.rb
|
116
|
+
- lib/licensee/matchers/levenshtein_matcher.rb
|
99
117
|
- lib/licensee/project.rb
|
100
118
|
- lib/licensee/version.rb
|
101
119
|
- test/fixtures/licenses.git/HEAD
|
@@ -108,9 +126,13 @@ files:
|
|
108
126
|
- test/helper.rb
|
109
127
|
- test/test_licensee.rb
|
110
128
|
- test/test_licensee_bin.rb
|
129
|
+
- test/test_licensee_exact_matcher.rb
|
130
|
+
- test/test_licensee_git_matcher.rb
|
131
|
+
- test/test_licensee_levenshtein_matcher.rb
|
111
132
|
- test/test_licensee_license.rb
|
112
133
|
- test/test_licensee_license_file.rb
|
113
134
|
- test/test_licensee_licenses.rb
|
135
|
+
- test/test_licensee_matcher.rb
|
114
136
|
- test/test_licensee_project.rb
|
115
137
|
- test/test_licensee_vendor.rb
|
116
138
|
- vendor/choosealicense.com/_licenses/agpl-3.0.txt
|