licensee 5.0.0 → 6.0.0b1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -50
- data/bin/licensee +7 -8
- data/lib/licensee.rb +9 -33
- data/lib/licensee/content_helper.rb +7 -8
- data/lib/licensee/license.rb +5 -28
- data/lib/licensee/matchers/copyright_matcher.rb +17 -16
- data/lib/licensee/matchers/dice_matcher.rb +65 -0
- data/lib/licensee/matchers/exact_matcher.rb +12 -6
- data/lib/licensee/matchers/gemspec_matcher.rb +11 -11
- data/lib/licensee/matchers/npm_bower_matcher.rb +10 -10
- data/lib/licensee/matchers/package_matcher.rb +11 -10
- data/lib/licensee/project.rb +96 -30
- data/lib/licensee/project_file.rb +57 -77
- data/lib/licensee/version.rb +1 -1
- data/licensee.gemspec +26 -0
- data/test/fixtures/npm.git/HEAD +1 -0
- data/test/fixtures/npm.git/config +4 -0
- data/test/fixtures/npm.git/objects/info/packs +2 -0
- data/test/fixtures/npm.git/objects/pack/pack-03c0879445cabcc37f91d97c7955465adef26f4a.idx +0 -0
- data/test/fixtures/npm.git/objects/pack/pack-03c0879445cabcc37f91d97c7955465adef26f4a.pack +0 -0
- data/test/fixtures/npm.git/packed-refs +2 -0
- data/test/functions.rb +4 -15
- data/test/test_licensee.rb +1 -13
- data/test/test_licensee_copyright_matcher.rb +19 -28
- data/test/test_licensee_dice_matcher.rb +21 -0
- data/test/test_licensee_exact_matcher.rb +4 -6
- data/test/test_licensee_gemspec_matcher.rb +3 -11
- data/test/test_licensee_license.rb +2 -12
- data/test/test_licensee_npm_bower_matcher.rb +10 -16
- data/test/test_licensee_project.rb +24 -35
- data/test/test_licensee_project_file.rb +5 -10
- data/vendor/choosealicense.com/_licenses/afl-3.0.txt +69 -0
- data/vendor/choosealicense.com/_licenses/isc.txt +2 -2
- metadata +14 -26
- data/lib/licensee/filesystem_repository.rb +0 -38
- data/lib/licensee/matcher.rb +0 -32
- data/lib/licensee/matchers/git_matcher.rb +0 -27
- data/lib/licensee/matchers/levenshtein_matcher.rb +0 -75
- data/test/test_licensee_content_helper.rb +0 -40
- data/test/test_licensee_git_matcher.rb +0 -19
- data/test/test_licensee_levenshtein_matcher.rb +0 -34
- data/test/test_licensee_matcher.rb +0 -7
@@ -1,16 +1,16 @@
|
|
1
1
|
class Licensee
|
2
|
-
|
2
|
+
module Matchers
|
3
|
+
class Gemspec < Package
|
4
|
+
# We definitely don't want to be evaling arbitrary Gemspec files
|
5
|
+
# While not 100% accurate, use some lenient regex to try to grep the
|
6
|
+
# license declaration from the Gemspec as a string, if any
|
7
|
+
LICENSE_REGEX = /^\s*[a-z0-9_]+\.license\s*\=\s*[\'\"]([a-z\-0-9\.]+)[\'\"]\s*$/i
|
3
8
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
private
|
10
|
-
|
11
|
-
def license_property
|
12
|
-
match = file.content.match LICENSE_REGEX
|
13
|
-
match[1].downcase if match && match[1]
|
9
|
+
private
|
10
|
+
def license_property
|
11
|
+
match = @file.content.match LICENSE_REGEX
|
12
|
+
match[1].downcase if match && match[1]
|
13
|
+
end
|
14
14
|
end
|
15
15
|
end
|
16
16
|
end
|
@@ -1,15 +1,15 @@
|
|
1
1
|
class Licensee
|
2
|
-
|
2
|
+
module Matchers
|
3
|
+
class NpmBower < Package
|
4
|
+
# While we could parse the package.json or bower.json file, prefer
|
5
|
+
# a lenient regex for speed and security. Moar parsing moar problems.
|
6
|
+
LICENSE_REGEX = /\s*[\"\']license[\"\']\s*\:\s*[\'\"]([a-z\-0-9\.]+)[\'\"],?\s*/i
|
3
7
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
def license_property
|
11
|
-
match = file.content.match LICENSE_REGEX
|
12
|
-
match[1].downcase if match && match[1]
|
8
|
+
private
|
9
|
+
def license_property
|
10
|
+
match = @file.content.match LICENSE_REGEX
|
11
|
+
match[1].downcase if match && match[1]
|
12
|
+
end
|
13
13
|
end
|
14
14
|
end
|
15
15
|
end
|
@@ -1,16 +1,17 @@
|
|
1
1
|
class Licensee
|
2
|
-
|
2
|
+
module Matchers
|
3
|
+
class Package
|
4
|
+
def initialize(file)
|
5
|
+
@file = file
|
6
|
+
end
|
3
7
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
def confidence
|
9
|
-
90
|
10
|
-
end
|
8
|
+
def match
|
9
|
+
Licensee.licenses(:hidden => true).find { |l| l.key == license_property }
|
10
|
+
end
|
11
11
|
|
12
|
-
|
13
|
-
|
12
|
+
def confidence
|
13
|
+
90
|
14
|
+
end
|
14
15
|
end
|
15
16
|
end
|
16
17
|
end
|
data/lib/licensee/project.rb
CHANGED
@@ -1,59 +1,125 @@
|
|
1
|
+
require 'rugged'
|
2
|
+
|
1
3
|
class Licensee
|
4
|
+
private
|
2
5
|
class Project
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
#
|
7
|
-
# path_or_repo path to git repo or Rugged::Repository instance
|
8
|
-
# revsion - revision ref, if any
|
9
|
-
def initialize(path_or_repo, revision = nil)
|
10
|
-
if path_or_repo.kind_of? Rugged::Repository
|
11
|
-
@repository = path_or_repo
|
12
|
-
else
|
13
|
-
begin
|
14
|
-
@repository = Rugged::Repository.new(path_or_repo)
|
15
|
-
rescue Rugged::RepositoryError
|
16
|
-
raise if revision
|
17
|
-
@repository = FilesystemRepository.new(path_or_repo)
|
18
|
-
end
|
19
|
-
end
|
6
|
+
def initialize(detect_packages)
|
7
|
+
@detect_packages = detect_packages
|
8
|
+
end
|
20
9
|
|
21
|
-
|
10
|
+
def detect_packages?
|
11
|
+
@detect_packages
|
22
12
|
end
|
23
13
|
|
24
14
|
# Returns the matching Licensee::License instance if a license can be detected
|
25
15
|
def license
|
26
|
-
@license ||= matched_file
|
16
|
+
@license ||= matched_file && matched_file.license
|
17
|
+
end
|
18
|
+
|
19
|
+
def matched_file
|
20
|
+
@matched_file ||= (license_file || package_file)
|
27
21
|
end
|
28
22
|
|
29
23
|
def license_file
|
30
24
|
return @license_file if defined? @license_file
|
31
|
-
@license_file =
|
25
|
+
@license_file = begin
|
26
|
+
content, name = find_file { |name| LicenseFile.name_score(name) }
|
27
|
+
if content && name
|
28
|
+
LicenseFile.new(content, name)
|
29
|
+
end
|
30
|
+
end
|
32
31
|
end
|
33
32
|
|
34
33
|
def package_file
|
35
|
-
return unless
|
34
|
+
return unless detect_packages?
|
36
35
|
return @package_file if defined? @package_file
|
37
|
-
@package_file =
|
36
|
+
@package_file = begin
|
37
|
+
content, name = find_file { |name| PackageInfo.name_score(name) }
|
38
|
+
if content && name
|
39
|
+
PackageInfo.new(content, name)
|
40
|
+
end
|
41
|
+
end
|
38
42
|
end
|
43
|
+
end
|
39
44
|
|
40
|
-
|
41
|
-
|
42
|
-
|
45
|
+
public
|
46
|
+
|
47
|
+
# Git-based project
|
48
|
+
#
|
49
|
+
# analyze a given git repository for license information
|
50
|
+
class GitProject < Project
|
51
|
+
attr_reader :repository, :revision
|
52
|
+
|
53
|
+
class InvalidRepository < ArgumentError; end
|
54
|
+
|
55
|
+
def initialize(repo, revision: nil, detect_packages: false)
|
56
|
+
if repo.kind_of? Rugged::Repository
|
57
|
+
@repository = repo
|
58
|
+
else
|
59
|
+
@repository = Rugged::Repository.new(repo)
|
60
|
+
end
|
61
|
+
|
62
|
+
@revision = revision
|
63
|
+
super(detect_packages)
|
64
|
+
rescue Rugged::RepositoryError
|
65
|
+
raise InvalidRepository
|
43
66
|
end
|
44
67
|
|
45
68
|
private
|
46
|
-
|
47
69
|
def commit
|
48
70
|
@commit ||= revision ? repository.lookup(revision) : repository.last_commit
|
49
71
|
end
|
50
72
|
|
51
|
-
|
52
|
-
|
73
|
+
MAX_LICENSE_SIZE = 64 * 1024
|
74
|
+
|
75
|
+
def load_blob_data(oid)
|
76
|
+
data, _ = Rugged::Blob.to_buffer(repository, oid, MAX_LICENSE_SIZE)
|
77
|
+
data
|
78
|
+
end
|
79
|
+
|
80
|
+
def find_file
|
81
|
+
files = commit.tree.map do |entry|
|
82
|
+
next unless entry[:type] == :blob
|
83
|
+
if (score = yield entry[:name]) > 0
|
84
|
+
{ :name => entry[:name], :oid => entry[:oid], :score => score }
|
85
|
+
end
|
86
|
+
end.compact
|
87
|
+
|
88
|
+
return if files.empty?
|
89
|
+
files.sort! { |a, b| b[:score] <=> a[:score] }
|
90
|
+
|
91
|
+
f = files.first
|
92
|
+
[load_blob_data(f[:oid]), f[:name]]
|
53
93
|
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Filesystem-based project
|
97
|
+
#
|
98
|
+
# Analyze a folder on the filesystem for license information
|
99
|
+
class FSProject < Project
|
100
|
+
attr_reader :path
|
101
|
+
|
102
|
+
def initialize(path, detect_packages: false)
|
103
|
+
@path = path
|
104
|
+
super(detect_packages)
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
def find_file
|
109
|
+
files = []
|
110
|
+
|
111
|
+
Dir.foreach(path) do |file|
|
112
|
+
next unless ::File.file?(::File.join(path, file))
|
113
|
+
if (score = yield file) > 0
|
114
|
+
files.push({ :name => file, :score => score })
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
return if files.empty?
|
119
|
+
files.sort! { |a, b| b[:score] <=> a[:score] }
|
54
120
|
|
55
|
-
|
56
|
-
|
121
|
+
f = files.first
|
122
|
+
[::File.read(::File.join(path, f[:name])), f[:name]]
|
57
123
|
end
|
58
124
|
end
|
59
125
|
end
|
@@ -1,91 +1,51 @@
|
|
1
|
+
# encoding=utf-8
|
1
2
|
class Licensee
|
2
|
-
class
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
def initialize(blob, path)
|
13
|
-
@blob = blob
|
14
|
-
@path = path
|
15
|
-
end
|
16
|
-
|
17
|
-
# Raw file contents
|
18
|
-
def content
|
19
|
-
@contents ||= blob.content.force_encoding("UTF-8")
|
20
|
-
end
|
21
|
-
alias_method :to_s, :content
|
22
|
-
alias_method :contents, :content
|
23
|
-
|
24
|
-
# File content with all whitespace replaced with a single space
|
25
|
-
def content_normalized
|
26
|
-
@content_normalized ||= normalize_content(content)
|
27
|
-
end
|
28
|
-
|
29
|
-
# Determines which matching strategy to use, returns an instane of that matcher
|
30
|
-
def matcher
|
31
|
-
return @matcher if defined? @matcher
|
32
|
-
@matcher = Licensee.matchers.map { |m| m.new(self) }.find { |m| m.match }
|
33
|
-
end
|
3
|
+
class Project
|
4
|
+
private
|
5
|
+
class File
|
6
|
+
attr_reader :content, :filename
|
7
|
+
|
8
|
+
def initialize(content, filename = nil)
|
9
|
+
@content = content
|
10
|
+
@content.force_encoding(Encoding::UTF_8)
|
11
|
+
@filename = filename
|
12
|
+
end
|
34
13
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
end
|
14
|
+
def matcher
|
15
|
+
@matcher ||= possible_matchers.map { |m| m.new(self) }.find { |m| m.match }
|
16
|
+
end
|
39
17
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
18
|
+
# Returns the percent confident with the match
|
19
|
+
def confidence
|
20
|
+
matcher && matcher.confidence
|
21
|
+
end
|
44
22
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
rescue Rugged::InvalidError
|
49
|
-
0
|
50
|
-
end
|
23
|
+
def license
|
24
|
+
matcher && matcher.match
|
25
|
+
end
|
51
26
|
|
52
|
-
|
53
|
-
|
27
|
+
alias_method :match, :license
|
28
|
+
alias_method :path, :filename
|
54
29
|
end
|
55
30
|
|
56
|
-
|
57
|
-
|
58
|
-
|
31
|
+
public
|
32
|
+
class LicenseFile < File
|
33
|
+
include Licensee::ContentHelper
|
59
34
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
matches[0].strip if matches
|
64
|
-
end
|
35
|
+
def possible_matchers
|
36
|
+
[Matchers::Copyright, Matchers::Exact, Matchers::Dice]
|
37
|
+
end
|
65
38
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
return 0.75 if filename =~ /bower.json/
|
70
|
-
return 0.0
|
71
|
-
end
|
39
|
+
def wordset
|
40
|
+
@wordset ||= create_word_set(content)
|
41
|
+
end
|
72
42
|
|
73
|
-
|
74
|
-
|
75
|
-
|
43
|
+
def attribution
|
44
|
+
matches = /^#{Matchers::Copyright::REGEX}$/i.match(content)
|
45
|
+
matches[0].strip if matches
|
46
|
+
end
|
76
47
|
|
77
|
-
|
78
|
-
# Scores a given file as a potential license
|
79
|
-
#
|
80
|
-
# filename - (string) the name of the file to score
|
81
|
-
#
|
82
|
-
# Returns 1.0 if the file is definitely a license file (e.g, LICENSE)
|
83
|
-
# Returns 0.9 if the file is almost certainly a license file (e.g., LICENSE.md)
|
84
|
-
# Returns 0.8 if the file is probably a license file (e.g., COPYING, COPYING.md)
|
85
|
-
# Returns 0.7 if the file is potentially a license file (e.g., LICENSE.php)
|
86
|
-
# Returns 0.5 if the file is likely a license file (MIT-LICENSE)
|
87
|
-
# Returns 0.0 if the file is definitely not a license file (e.g., index.php)
|
88
|
-
def license_score(filename)
|
48
|
+
def self.name_score(filename)
|
89
49
|
return 1.0 if filename =~ /\A(un)?licen[sc]e\z/i
|
90
50
|
return 0.9 if filename =~ /\A(un)?licen[sc]e\.(md|markdown|txt)\z/i
|
91
51
|
return 0.8 if filename =~ /\Acopy(ing|right)(\.[^.]+)?\z/i
|
@@ -94,5 +54,25 @@ class Licensee
|
|
94
54
|
return 0.0
|
95
55
|
end
|
96
56
|
end
|
57
|
+
|
58
|
+
class PackageInfo < File
|
59
|
+
def possible_matchers
|
60
|
+
case ::File.extname(filename)
|
61
|
+
when ".gemspec"
|
62
|
+
[Matchers::Gemspec]
|
63
|
+
when ".json"
|
64
|
+
[Matchers::NpmBower]
|
65
|
+
else
|
66
|
+
[]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def self.name_score(filename)
|
71
|
+
return 1.0 if ::File.extname(filename) == ".gemspec"
|
72
|
+
return 1.0 if filename == "package.json"
|
73
|
+
return 0.75 if filename == "bower.json"
|
74
|
+
return 0.0
|
75
|
+
end
|
76
|
+
end
|
97
77
|
end
|
98
78
|
end
|
data/lib/licensee/version.rb
CHANGED
data/licensee.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require File.expand_path("../lib/licensee/version", __FILE__)
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = 'licensee'
|
5
|
+
gem.version = Licensee::VERSION
|
6
|
+
|
7
|
+
gem.summary = "A Ruby Gem to detect under what license a project is distributed"
|
8
|
+
gem.description = "Licensee automates the process of reading LICENSE files and compares their contents to known licenses using a fancy math thing called Rabin-Karp rolling-hashes."
|
9
|
+
|
10
|
+
gem.authors = ['Ben Balter']
|
11
|
+
gem.email = 'ben.balter@github.com'
|
12
|
+
gem.homepage = 'http://github.com/benbalter/licensee'
|
13
|
+
gem.license = "MIT"
|
14
|
+
|
15
|
+
gem.bindir = 'bin'
|
16
|
+
gem.executables << 'licensee'
|
17
|
+
|
18
|
+
gem.add_dependency('rugged', '~> 0.23')
|
19
|
+
gem.add_development_dependency('pry', '~> 0.9')
|
20
|
+
gem.add_development_dependency('shoulda', '~> 3.5')
|
21
|
+
gem.add_development_dependency('rake', '~> 10.3')
|
22
|
+
gem.add_development_dependency('ruby-prof', '~> 0.15')
|
23
|
+
|
24
|
+
# ensure the gem is built out of versioned files
|
25
|
+
gem.files = Dir['Rakefile', '{bin,lib,man,test,vendor,spec}/**/*', 'README*', 'LICENSE*'] & `git ls-files -z`.split("\0")
|
26
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
ref: refs/heads/master
|
Binary file
|
Binary file
|