licensee 2.0.0 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +7 -9
- data/bin/licensee +1 -0
- data/lib/licensee.rb +28 -5
- data/lib/licensee/license.rb +16 -8
- data/lib/licensee/license_file.rb +21 -25
- data/lib/licensee/licenses.rb +18 -11
- data/lib/licensee/matcher.rb +28 -0
- data/lib/licensee/matchers/exact_matcher.rb +11 -0
- data/lib/licensee/matchers/git_matcher.rb +30 -0
- data/lib/licensee/matchers/levenshtein_matcher.rb +55 -0
- data/lib/licensee/project.rb +10 -7
- data/lib/licensee/version.rb +1 -1
- data/test/functions.rb +18 -9
- data/test/test_licensee.rb +4 -0
- data/test/test_licensee_exact_matcher.rb +18 -0
- data/test/test_licensee_git_matcher.rb +18 -0
- data/test/test_licensee_levenshtein_matcher.rb +34 -0
- data/test/test_licensee_license.rb +0 -1
- data/test/test_licensee_license_file.rb +4 -11
- data/test/test_licensee_licenses.rb +2 -2
- data/test/test_licensee_matcher.rb +7 -0
- data/test/test_licensee_vendor.rb +15 -1
- metadata +24 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 839782ad5361d8938500dd5938eb33e38a821d9a
|
4
|
+
data.tar.gz: a688eb2acdf6a1da457cdcf4037a346e4b20691c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 61151787e52a75c131b53b6ffdc38e8e8c7140546837e39bc3586bf87928d43b12505ee5f983b853520c6f65d1c742c0b3c381200df95b824a8a957bb031576a
|
7
|
+
data.tar.gz: 9bfe7533e62e998eb1406c663ba93bf2db81f85a92d274f1efb5f2d389f6e6e029e88b64ba94955d4dc322545254506287734d767b37ae70b40840238083cf7d
|
data/README.md
CHANGED
@@ -12,9 +12,13 @@
|
|
12
12
|
|
13
13
|
## The solution
|
14
14
|
|
15
|
-
Licensee automates the process of reading `LICENSE` files and compares their contents to known licenses using a
|
15
|
+
Licensee automates the process of reading `LICENSE` files and compares their contents to known licenses using a several strategies (which we call "Matchers":
|
16
16
|
|
17
|
-
|
17
|
+
First, we look to see if the license is an exact match. Licenses like GPL don't have a copyright notice that needs to be changed in the license itself, so if we strip away whitespace, we might get lucky, and direct string comparison is cheap.
|
18
|
+
|
19
|
+
Next, we look to Git's internal change calculation method, which is fast, but is done on a line-by-line basis, so if the license is wrapped differently, or has extra words inserted, it's not going to match the license.
|
20
|
+
|
21
|
+
Finally, if we still can't match the license, we use a fancy math thing called the [Levenshtein distance algorthm](https://en.wikipedia.org/wiki/Levenshtein_distance), which while slow, is really good at calculating the similarity between two a known license and an unknown license. By calculating the percent changed from the known license, you can tell, e.g., that a given license is 98% similar to the MIT license, that 2% likely representing the copyright line being properly adapted to the project.
|
18
22
|
|
19
23
|
Licensee will even diff the distributed license with the original, so you can see exactly what, if anything's been changed.
|
20
24
|
|
@@ -41,13 +45,6 @@ license.meta["description"]
|
|
41
45
|
|
42
46
|
license.meta["permitted"]
|
43
47
|
=> ["commercial-use","modifications","distribution","sublicense","private-use"]
|
44
|
-
|
45
|
-
# Getting all matches
|
46
|
-
Licensee.matches "/path/to/a/project"
|
47
|
-
=> [#<Licensee::License name="MIT" match=0.9842154131847726>,
|
48
|
-
#<Licensee::License name="unlicense" match=0.4326833797585887>,
|
49
|
-
...
|
50
|
-
#<Licensee::License name="no-license" match=0.0232126276694522>]
|
51
48
|
```
|
52
49
|
|
53
50
|
## Diffing
|
@@ -72,6 +69,7 @@ You'll get an output that looks like:
|
|
72
69
|
```
|
73
70
|
License: MIT
|
74
71
|
Confidence: 98.42%
|
72
|
+
Matcher: Licensee::GitMatcher
|
75
73
|
```
|
76
74
|
|
77
75
|
## What it looks at
|
data/bin/licensee
CHANGED
data/lib/licensee.rb
CHANGED
@@ -1,19 +1,42 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
require 'rugged'
|
3
|
+
require 'levenshtein'
|
3
4
|
|
4
5
|
require_relative "licensee/license"
|
5
6
|
require_relative "licensee/licenses"
|
6
7
|
require_relative "licensee/license_file"
|
7
8
|
require_relative "licensee/project"
|
9
|
+
require_relative "licensee/matcher"
|
10
|
+
require_relative "licensee/matchers/exact_matcher"
|
11
|
+
require_relative "licensee/matchers/git_matcher"
|
12
|
+
require_relative "licensee/matchers/levenshtein_matcher"
|
8
13
|
|
9
14
|
class Licensee
|
15
|
+
|
16
|
+
# Over watch percent is a match considered a match
|
10
17
|
CONFIDENCE_THRESHOLD = 90
|
11
18
|
|
12
|
-
|
13
|
-
|
14
|
-
|
19
|
+
class << self
|
20
|
+
|
21
|
+
# Returns an array of Licensee::License instances
|
22
|
+
def licenses
|
23
|
+
@licenses ||= Licensee::Licenses.list
|
24
|
+
end
|
25
|
+
|
26
|
+
# Returns the license for a given git repo
|
27
|
+
def license(path)
|
28
|
+
Licensee::Project.new(path).license
|
29
|
+
end
|
30
|
+
|
31
|
+
# Diffs the project license and the known license
|
32
|
+
def diff(path)
|
33
|
+
Licensee::Project.new(path).license_file.diff
|
34
|
+
end
|
15
35
|
|
16
|
-
|
17
|
-
|
36
|
+
# Array of matchers to use, in order of preference
|
37
|
+
# The order should be decending order of anticipated speed to match
|
38
|
+
def matchers
|
39
|
+
[Licensee::ExactMatcher, Licensee::GitMatcher, Licensee::LevenshteinMatcher]
|
40
|
+
end
|
18
41
|
end
|
19
42
|
end
|
data/lib/licensee/license.rb
CHANGED
@@ -6,34 +6,36 @@ class Licensee
|
|
6
6
|
@name=name.downcase
|
7
7
|
end
|
8
8
|
|
9
|
+
# Path to vendored license file on disk
|
9
10
|
def path
|
10
11
|
@path ||= File.expand_path "#{@name}.txt", Licensee::Licenses.base
|
11
12
|
end
|
12
13
|
|
14
|
+
# Raw content of license file, including YAML front matter
|
13
15
|
def content
|
14
16
|
@content ||= File.open(path).read
|
15
17
|
end
|
16
18
|
|
17
|
-
|
18
|
-
@parts ||= content.match(/^(---\n.*\n---)?(.*)/m).to_a
|
19
|
-
end
|
20
|
-
|
19
|
+
# License metadata from YAML front matter
|
21
20
|
def meta
|
22
21
|
@meta ||= front_matter = YAML.load(parts[1]) if parts[1]
|
23
22
|
rescue
|
24
23
|
nil
|
25
24
|
end
|
26
25
|
|
27
|
-
|
28
|
-
@length ||= body.length
|
29
|
-
end
|
30
|
-
|
26
|
+
# The license body (e.g., contents - frontmatter)
|
31
27
|
def body
|
32
28
|
@body ||= parts[2]
|
33
29
|
end
|
34
30
|
alias_method :to_s, :body
|
35
31
|
alias_method :text, :body
|
36
32
|
|
33
|
+
# License body with all whitespace replaced with a single space
|
34
|
+
def body_normalized
|
35
|
+
@content_normalized ||= body.downcase.gsub(/\s+/, " ").strip
|
36
|
+
end
|
37
|
+
|
38
|
+
# Git-computed hash signature for the license file
|
37
39
|
def hashsig
|
38
40
|
@hashsig ||= Rugged::Blob::HashSignature.new(
|
39
41
|
body, Rugged::Blob::HashSignature::WHITESPACE_SMART)
|
@@ -42,5 +44,11 @@ class Licensee
|
|
42
44
|
def inspect
|
43
45
|
"#<Licensee::License name=\"#{name}\">"
|
44
46
|
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def parts
|
51
|
+
@parts ||= content.match(/^(---\n.*\n---)?(.*)/m).to_a
|
52
|
+
end
|
45
53
|
end
|
46
54
|
end
|
@@ -7,43 +7,39 @@ class Licensee
|
|
7
7
|
blob.hashsig(Rugged::Blob::HashSignature::WHITESPACE_SMART)
|
8
8
|
end
|
9
9
|
|
10
|
-
|
11
|
-
|
10
|
+
# Raw file contents
|
11
|
+
def content
|
12
|
+
@contents ||= begin
|
13
|
+
blob.content
|
14
|
+
end
|
12
15
|
end
|
13
|
-
alias_method :to_s, :
|
14
|
-
alias_method :
|
16
|
+
alias_method :to_s, :content
|
17
|
+
alias_method :contents, :content
|
15
18
|
|
16
|
-
|
17
|
-
|
19
|
+
# File content with all whitespace replaced with a single space
|
20
|
+
def content_normalized
|
21
|
+
@content_normalized ||= content.downcase.gsub(/\s+/, " ").strip
|
18
22
|
end
|
19
23
|
|
20
|
-
|
21
|
-
|
24
|
+
# Comptutes a diff between known license and project license
|
25
|
+
def diff(options={})
|
26
|
+
options = options.merge(:reverse => true)
|
27
|
+
blob.diff(match.body, options).to_s if match
|
22
28
|
end
|
23
29
|
|
24
|
-
|
25
|
-
|
30
|
+
# Determines which matching strategy to use, returns an instane of that matcher
|
31
|
+
def matcher
|
32
|
+
@matcher ||= Licensee.matchers.map { |m| m.new(self) }.find { |m| m.match }
|
26
33
|
end
|
27
34
|
|
35
|
+
# Returns an Licensee::License instance of the matches license
|
28
36
|
def match
|
29
|
-
|
37
|
+
@match ||= matcher.match if matcher
|
30
38
|
end
|
31
39
|
|
40
|
+
# Returns the percent confident with the match
|
32
41
|
def confidence
|
33
|
-
|
34
|
-
end
|
35
|
-
alias_method :similarity, :confidence
|
36
|
-
|
37
|
-
def diff(options={})
|
38
|
-
options = options.merge(:reverse => true)
|
39
|
-
blob.diff(match.body, options).to_s if match
|
40
|
-
end
|
41
|
-
|
42
|
-
private
|
43
|
-
|
44
|
-
# Pulled out for easier testing
|
45
|
-
def calculate_similarity(other)
|
46
|
-
blob.similarity(other.hashsig)
|
42
|
+
@condience ||= matcher.confidence if matcher
|
47
43
|
end
|
48
44
|
end
|
49
45
|
end
|
data/lib/licensee/licenses.rb
CHANGED
@@ -1,15 +1,8 @@
|
|
1
1
|
class Licensee
|
2
2
|
class Licenses
|
3
3
|
class << self
|
4
|
-
def names
|
5
|
-
@names ||= begin
|
6
|
-
names = Dir.entries(base)
|
7
|
-
names.map! { |l| File.basename(l, ".txt").downcase }
|
8
|
-
names.reject! { |l| l =~ /^\./ || l.nil? }
|
9
|
-
names
|
10
|
-
end
|
11
|
-
end
|
12
4
|
|
5
|
+
# Returns an array of Licensee::License instances
|
13
6
|
def list
|
14
7
|
@licenses ||= begin
|
15
8
|
licenses = []
|
@@ -18,14 +11,28 @@ class Licensee
|
|
18
11
|
end
|
19
12
|
end
|
20
13
|
|
14
|
+
# Given a license name, attempt to return a matching Licensee::License instance
|
15
|
+
def find(name)
|
16
|
+
list.find { |l| l.name.downcase == name.downcase }
|
17
|
+
end
|
18
|
+
|
19
|
+
# Path to vendored licenses
|
21
20
|
def base
|
22
21
|
@base ||= File.expand_path "../../vendor/choosealicense.com/_licenses", File.dirname(__FILE__)
|
23
22
|
end
|
24
23
|
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
private
|
25
|
+
|
26
|
+
# Returns a list of potential license names, as vendored
|
27
|
+
def names
|
28
|
+
@names ||= begin
|
29
|
+
names = Dir.entries(base)
|
30
|
+
names.map! { |l| File.basename(l, ".txt").downcase }
|
31
|
+
names.reject! { |l| l =~ /^\./ || l.nil? }
|
32
|
+
names
|
33
|
+
end
|
28
34
|
end
|
35
|
+
|
29
36
|
end
|
30
37
|
end
|
31
38
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# Abstract class to describe different matching strategies
|
2
|
+
# Must respond to:
|
3
|
+
# - match
|
4
|
+
# - confidence
|
5
|
+
#
|
6
|
+
# Can assume file will be a Licensee::LicenseFile instance
|
7
|
+
class Licensee
|
8
|
+
class Matcher
|
9
|
+
attr_reader :file
|
10
|
+
|
11
|
+
def self.match(file)
|
12
|
+
self.new(file).match
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(file)
|
16
|
+
@file = file
|
17
|
+
end
|
18
|
+
|
19
|
+
def match
|
20
|
+
nil
|
21
|
+
end
|
22
|
+
|
23
|
+
def confidence
|
24
|
+
0
|
25
|
+
end
|
26
|
+
alias_method :similarity, :confidence
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
class Licensee
|
2
|
+
class GitMatcher < Matcher
|
3
|
+
|
4
|
+
def match
|
5
|
+
match_info[0] unless match_info.nil?
|
6
|
+
end
|
7
|
+
|
8
|
+
def confidence
|
9
|
+
match_info[1] unless match_info.nil?
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def matches
|
15
|
+
@matches ||= Licensee.licenses.map { |l| [l, similarity(l)] }.select { |l,sim| sim > 0 }
|
16
|
+
end
|
17
|
+
|
18
|
+
def similarity(other)
|
19
|
+
file.blob.similarity(other.hashsig)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Pulled out for easier testing
|
23
|
+
def match_info
|
24
|
+
@match_info ||= begin
|
25
|
+
match = matches.max_by { |license, similarity| similarity }
|
26
|
+
match if match && match[1] > Licensee::CONFIDENCE_THRESHOLD
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
class Licensee
|
2
|
+
class LevenshteinMatcher < Matcher
|
3
|
+
|
4
|
+
# Return the first potential license that is more similar than the confidence threshold
|
5
|
+
def match
|
6
|
+
@match ||= potential_licenses.find do |license|
|
7
|
+
similarity(license) >= Licensee::CONFIDENCE_THRESHOLD
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
# Sort all licenses, in decending order, by difference in length to the file
|
12
|
+
# Difference in lengths cannot exceed the file's length * the confidence threshold / 100
|
13
|
+
def potential_licenses
|
14
|
+
@potential_licenses ||= begin
|
15
|
+
Licensee.licenses.select { |license| length_delta(license) <= max_delta }.sort_by { |l| length_delta(l) }
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Calculate the difference between the file length and a given license's length
|
20
|
+
def length_delta(license)
|
21
|
+
(file_length - license.body_normalized.length).abs
|
22
|
+
end
|
23
|
+
|
24
|
+
# Maximum possible difference between file length and license length
|
25
|
+
# for a license to be a potential license to be matched
|
26
|
+
def max_delta
|
27
|
+
@max_delta ||= (file_length * (Licensee::CONFIDENCE_THRESHOLD.to_f / 100.to_f ))
|
28
|
+
end
|
29
|
+
|
30
|
+
# Confidence that the matched license is a match
|
31
|
+
def confidence
|
32
|
+
@confidence ||= match ? similarity(match) : 0
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
# Length of the file, normalized to strip whitespace
|
38
|
+
def file_length
|
39
|
+
@file_length ||= file.content_normalized.length.to_f
|
40
|
+
end
|
41
|
+
|
42
|
+
# Calculate percent changed between file and potential license
|
43
|
+
def similarity(license)
|
44
|
+
100 * (file_length - distance(license)) / file_length
|
45
|
+
end
|
46
|
+
|
47
|
+
# Calculate the levenshtein distance between file and license
|
48
|
+
# Note: We used content/body normalized because white space and capitalization
|
49
|
+
# isn't legally significant in this context. Fewer characters lets levenshtein
|
50
|
+
# work faster. As long as they both undergo the same transformation, should match.
|
51
|
+
def distance(license)
|
52
|
+
Levenshtein.distance(license.body_normalized, file.content_normalized).to_f
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/licensee/project.rb
CHANGED
@@ -2,7 +2,8 @@ class Licensee
|
|
2
2
|
class Project
|
3
3
|
attr_reader :repository
|
4
4
|
|
5
|
-
|
5
|
+
# Array of file names to look for potential license files, in order
|
6
|
+
LICENSE_FILENAMES = %w[
|
6
7
|
LICENSE
|
7
8
|
LICENSE.txt
|
8
9
|
LICENSE.md
|
@@ -10,6 +11,10 @@ class Licensee
|
|
10
11
|
COPYING
|
11
12
|
]
|
12
13
|
|
14
|
+
# Initializes a new project
|
15
|
+
#
|
16
|
+
# path_or_repo path to git repo or Rugged::Repository instance
|
17
|
+
# revsion - revision ref, if any
|
13
18
|
def initialize(path_or_repo, revision = nil)
|
14
19
|
if path_or_repo.kind_of? Rugged::Repository
|
15
20
|
@repository = path_or_repo
|
@@ -20,22 +25,20 @@ class Licensee
|
|
20
25
|
@revision = revision
|
21
26
|
end
|
22
27
|
|
28
|
+
# Detects the license file, if any
|
29
|
+
# Returns a Licensee::LicenseFile instance
|
23
30
|
def license_file
|
24
31
|
return @license_file if defined? @license_file
|
25
32
|
|
26
33
|
commit = @revision ? @repository.lookup(@revision) : @repository.last_commit
|
27
|
-
license_blob = commit.tree.each_blob { |blob| break blob if
|
28
|
-
|
34
|
+
license_blob = commit.tree.each_blob { |blob| break blob if LICENSE_FILENAMES.include? blob[:name] }
|
29
35
|
|
30
36
|
@license_file = if license_blob
|
31
37
|
LicenseFile.new(@repository.lookup(license_blob[:oid]))
|
32
38
|
end
|
33
39
|
end
|
34
40
|
|
35
|
-
|
36
|
-
@matches ||= license_file.matches if license_file
|
37
|
-
end
|
38
|
-
|
41
|
+
# Returns the matching Licensee::License instance if a license can be detected
|
39
42
|
def license
|
40
43
|
@license ||= license_file.match if license_file
|
41
44
|
end
|
data/lib/licensee/version.rb
CHANGED
data/test/functions.rb
CHANGED
@@ -39,23 +39,32 @@ class FakeBlob
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def chaos_monkey(string)
|
42
|
-
|
43
|
-
|
44
|
-
Random.rand(5).times do
|
45
|
-
lines[Random.rand(lines.size)] = SecureRandom.base64(Random.rand(80)) + "\n"
|
42
|
+
Random.rand(7).times do
|
43
|
+
string[Random.rand(string.length)] = SecureRandom.base64(Random.rand(10))
|
46
44
|
end
|
47
|
-
|
48
|
-
lines.join('')
|
45
|
+
string
|
49
46
|
end
|
50
47
|
|
51
|
-
def verify_license_file(license, chaos = false)
|
48
|
+
def verify_license_file(license, chaos = false, wrap=false)
|
52
49
|
expected = File.basename(license, ".txt")
|
53
50
|
|
54
51
|
text = license_from_path(license)
|
55
|
-
|
52
|
+
text = chaos_monkey(text) if chaos
|
53
|
+
text = wrap(text, wrap) if wrap
|
54
|
+
|
55
|
+
blob = FakeBlob.new(text)
|
56
56
|
license_file = Licensee::LicenseFile.new(blob)
|
57
57
|
|
58
58
|
actual = license_file.match
|
59
59
|
assert actual, "No match for #{expected}."
|
60
|
-
assert_equal expected, actual.name, "expeceted #{expected} but got #{actual.name} for .match.
|
60
|
+
assert_equal expected, actual.name, "expeceted #{expected} but got #{actual.name} for .match. Confidence: #{license_file.confidence}. Method: #{license_file.matcher.class}"
|
61
|
+
end
|
62
|
+
|
63
|
+
def wrap(text, line_width=80)
|
64
|
+
text = text.clone
|
65
|
+
text.gsub! /([^\n])\n([^\n])/, '\1 \2'
|
66
|
+
text = text.split("\n").collect do |line|
|
67
|
+
line.length > line_width ? line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip : line
|
68
|
+
end * "\n"
|
69
|
+
text.strip
|
61
70
|
end
|
data/test/test_licensee.rb
CHANGED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestLicenseeExactMatcher < Minitest::Test
|
4
|
+
|
5
|
+
def setup
|
6
|
+
text = File.open(Licensee::Licenses.find("mit").path).read.split("---").last
|
7
|
+
blob = FakeBlob.new(text)
|
8
|
+
@mit = Licensee::LicenseFile.new(blob)
|
9
|
+
end
|
10
|
+
|
11
|
+
should "match the license" do
|
12
|
+
assert_equal "mit", Licensee::ExactMatcher.match(@mit).name
|
13
|
+
end
|
14
|
+
|
15
|
+
should "know the match confidence" do
|
16
|
+
assert_equal 100, Licensee::ExactMatcher.new(@mit).confidence
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestLicenseeGitMatcher < Minitest::Test
|
4
|
+
|
5
|
+
def setup
|
6
|
+
text = license_from_path( Licensee::Licenses.find("mit").path )
|
7
|
+
blob = FakeBlob.new(text)
|
8
|
+
@mit = Licensee::LicenseFile.new(blob)
|
9
|
+
end
|
10
|
+
|
11
|
+
should "match the license" do
|
12
|
+
assert_equal "mit", Licensee::GitMatcher.match(@mit).name
|
13
|
+
end
|
14
|
+
|
15
|
+
should "know the match confidence" do
|
16
|
+
assert_equal 94, Licensee::GitMatcher.new(@mit).confidence
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class TestLicenseeLevenshteinMatcher < Minitest::Test
|
4
|
+
|
5
|
+
def setup
|
6
|
+
text = license_from_path( Licensee::Licenses.find("mit").path )
|
7
|
+
blob = FakeBlob.new(text)
|
8
|
+
@mit = Licensee::LicenseFile.new(blob)
|
9
|
+
end
|
10
|
+
|
11
|
+
should "match the license" do
|
12
|
+
assert_equal "mit", Licensee::LevenshteinMatcher.match(@mit).name
|
13
|
+
end
|
14
|
+
|
15
|
+
should "know the match confidence" do
|
16
|
+
matcher = Licensee::LevenshteinMatcher.new(@mit)
|
17
|
+
assert matcher.confidence > 98, "#{matcher.confidence} < 98"
|
18
|
+
end
|
19
|
+
|
20
|
+
should "calculate max delta" do
|
21
|
+
assert_equal 964.8000000000001, Licensee::LevenshteinMatcher.new(@mit).max_delta
|
22
|
+
end
|
23
|
+
|
24
|
+
should "calculate length delta" do
|
25
|
+
isc = Licensee::Licenses.find("isc")
|
26
|
+
assert_equal 2, Licensee::LevenshteinMatcher.new(@mit).length_delta(Licensee::Licenses.find("mit"))
|
27
|
+
assert_equal 334, Licensee::LevenshteinMatcher.new(@mit).length_delta(isc)
|
28
|
+
end
|
29
|
+
|
30
|
+
should "round up potential licenses" do
|
31
|
+
assert_equal 5, Licensee::LevenshteinMatcher.new(@mit).potential_licenses.size
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -14,17 +14,6 @@ class TestLicenseeLicenseFile < Minitest::Test
|
|
14
14
|
assert @file.contents =~ /MIT/
|
15
15
|
end
|
16
16
|
|
17
|
-
should "known the file length" do
|
18
|
-
assert_equal 1077, @file.length
|
19
|
-
end
|
20
|
-
|
21
|
-
should "calculate similiarty" do
|
22
|
-
actual = @file.send(:calculate_similarity, @mit)
|
23
|
-
assert actual > Licensee::CONFIDENCE_THRESHOLD, "expected #{actual} to be > 90% for MIT"
|
24
|
-
actual = @file.send(:calculate_similarity, @gpl)
|
25
|
-
assert actual < 1, "expected #{actual} to be < 1% for GPL"
|
26
|
-
end
|
27
|
-
|
28
17
|
should "match the license" do
|
29
18
|
assert_equal "mit", @file.match.name
|
30
19
|
end
|
@@ -33,4 +22,8 @@ class TestLicenseeLicenseFile < Minitest::Test
|
|
33
22
|
expected = "-Copyright (c) [year] [fullname]\n+Copyright (c) 2014 Ben Balter"
|
34
23
|
assert @file.diff.include?(expected)
|
35
24
|
end
|
25
|
+
|
26
|
+
should "calculate confidence" do
|
27
|
+
assert_equal 94, @file.confidence
|
28
|
+
end
|
36
29
|
end
|
@@ -3,8 +3,8 @@ require 'helper'
|
|
3
3
|
class TestLicenseeLicenses < Minitest::Test
|
4
4
|
|
5
5
|
should "know license names" do
|
6
|
-
assert_equal Array, Licensee::Licenses.names.class
|
7
|
-
assert_equal 15, Licensee::Licenses.names.size
|
6
|
+
assert_equal Array, Licensee::Licenses.send(:names).class
|
7
|
+
assert_equal 15, Licensee::Licenses.send(:names).size
|
8
8
|
end
|
9
9
|
|
10
10
|
should "load the licenses" do
|
@@ -8,10 +8,24 @@ class TestLicenseeVendor < Minitest::Test
|
|
8
8
|
end
|
9
9
|
end
|
10
10
|
|
11
|
-
should "detect each vendored license" do
|
11
|
+
should "detect each vendored license when modified" do
|
12
12
|
licenses = Dir["#{Licensee::Licenses.base}/*"].shuffle
|
13
13
|
licenses.each do |license|
|
14
14
|
verify_license_file(license, true)
|
15
15
|
end
|
16
16
|
end
|
17
|
+
|
18
|
+
should "detect each vendored license with different line lengths" do
|
19
|
+
licenses = Dir["#{Licensee::Licenses.base}/*"].shuffle
|
20
|
+
licenses.each do |license|
|
21
|
+
verify_license_file(license, false, 50)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
should "detect each vendored license with different line lengths when modified" do
|
26
|
+
licenses = Dir["#{Licensee::Licenses.base}/*"].shuffle
|
27
|
+
licenses.each do |license|
|
28
|
+
verify_license_file(license, true, 50)
|
29
|
+
end
|
30
|
+
end
|
17
31
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: licensee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 3.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Balter
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rugged
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.21.1b2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: levenshtein-ffi
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.1'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.1'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: pry
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -96,6 +110,10 @@ files:
|
|
96
110
|
- lib/licensee/license.rb
|
97
111
|
- lib/licensee/license_file.rb
|
98
112
|
- lib/licensee/licenses.rb
|
113
|
+
- lib/licensee/matcher.rb
|
114
|
+
- lib/licensee/matchers/exact_matcher.rb
|
115
|
+
- lib/licensee/matchers/git_matcher.rb
|
116
|
+
- lib/licensee/matchers/levenshtein_matcher.rb
|
99
117
|
- lib/licensee/project.rb
|
100
118
|
- lib/licensee/version.rb
|
101
119
|
- test/fixtures/licenses.git/HEAD
|
@@ -108,9 +126,13 @@ files:
|
|
108
126
|
- test/helper.rb
|
109
127
|
- test/test_licensee.rb
|
110
128
|
- test/test_licensee_bin.rb
|
129
|
+
- test/test_licensee_exact_matcher.rb
|
130
|
+
- test/test_licensee_git_matcher.rb
|
131
|
+
- test/test_licensee_levenshtein_matcher.rb
|
111
132
|
- test/test_licensee_license.rb
|
112
133
|
- test/test_licensee_license_file.rb
|
113
134
|
- test/test_licensee_licenses.rb
|
135
|
+
- test/test_licensee_matcher.rb
|
114
136
|
- test/test_licensee_project.rb
|
115
137
|
- test/test_licensee_vendor.rb
|
116
138
|
- vendor/choosealicense.com/_licenses/agpl-3.0.txt
|