licensee 5.0.0 → 6.0.0b1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +15 -50
- data/bin/licensee +7 -8
- data/lib/licensee.rb +9 -33
- data/lib/licensee/content_helper.rb +7 -8
- data/lib/licensee/license.rb +5 -28
- data/lib/licensee/matchers/copyright_matcher.rb +17 -16
- data/lib/licensee/matchers/dice_matcher.rb +65 -0
- data/lib/licensee/matchers/exact_matcher.rb +12 -6
- data/lib/licensee/matchers/gemspec_matcher.rb +11 -11
- data/lib/licensee/matchers/npm_bower_matcher.rb +10 -10
- data/lib/licensee/matchers/package_matcher.rb +11 -10
- data/lib/licensee/project.rb +96 -30
- data/lib/licensee/project_file.rb +57 -77
- data/lib/licensee/version.rb +1 -1
- data/licensee.gemspec +26 -0
- data/test/fixtures/npm.git/HEAD +1 -0
- data/test/fixtures/npm.git/config +4 -0
- data/test/fixtures/npm.git/objects/info/packs +2 -0
- data/test/fixtures/npm.git/objects/pack/pack-03c0879445cabcc37f91d97c7955465adef26f4a.idx +0 -0
- data/test/fixtures/npm.git/objects/pack/pack-03c0879445cabcc37f91d97c7955465adef26f4a.pack +0 -0
- data/test/fixtures/npm.git/packed-refs +2 -0
- data/test/functions.rb +4 -15
- data/test/test_licensee.rb +1 -13
- data/test/test_licensee_copyright_matcher.rb +19 -28
- data/test/test_licensee_dice_matcher.rb +21 -0
- data/test/test_licensee_exact_matcher.rb +4 -6
- data/test/test_licensee_gemspec_matcher.rb +3 -11
- data/test/test_licensee_license.rb +2 -12
- data/test/test_licensee_npm_bower_matcher.rb +10 -16
- data/test/test_licensee_project.rb +24 -35
- data/test/test_licensee_project_file.rb +5 -10
- data/vendor/choosealicense.com/_licenses/afl-3.0.txt +69 -0
- data/vendor/choosealicense.com/_licenses/isc.txt +2 -2
- metadata +14 -26
- data/lib/licensee/filesystem_repository.rb +0 -38
- data/lib/licensee/matcher.rb +0 -32
- data/lib/licensee/matchers/git_matcher.rb +0 -27
- data/lib/licensee/matchers/levenshtein_matcher.rb +0 -75
- data/test/test_licensee_content_helper.rb +0 -40
- data/test/test_licensee_git_matcher.rb +0 -19
- data/test/test_licensee_levenshtein_matcher.rb +0 -34
- data/test/test_licensee_matcher.rb +0 -7
@@ -1,16 +1,16 @@
|
|
1
1
|
class Licensee
|
2
|
-
|
2
|
+
module Matchers
|
3
|
+
class Gemspec < Package
|
4
|
+
# We definitely don't want to be evaling arbitrary Gemspec files
|
5
|
+
# While not 100% accurate, use some lenient regex to try to grep the
|
6
|
+
# license declaration from the Gemspec as a string, if any
|
7
|
+
LICENSE_REGEX = /^\s*[a-z0-9_]+\.license\s*\=\s*[\'\"]([a-z\-0-9\.]+)[\'\"]\s*$/i
|
3
8
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
private
|
10
|
-
|
11
|
-
def license_property
|
12
|
-
match = file.content.match LICENSE_REGEX
|
13
|
-
match[1].downcase if match && match[1]
|
9
|
+
private
|
10
|
+
def license_property
|
11
|
+
match = @file.content.match LICENSE_REGEX
|
12
|
+
match[1].downcase if match && match[1]
|
13
|
+
end
|
14
14
|
end
|
15
15
|
end
|
16
16
|
end
|
@@ -1,15 +1,15 @@
|
|
1
1
|
class Licensee
|
2
|
-
|
2
|
+
module Matchers
|
3
|
+
class NpmBower < Package
|
4
|
+
# While we could parse the package.json or bower.json file, prefer
|
5
|
+
# a lenient regex for speed and security. Moar parsing moar problems.
|
6
|
+
LICENSE_REGEX = /\s*[\"\']license[\"\']\s*\:\s*[\'\"]([a-z\-0-9\.]+)[\'\"],?\s*/i
|
3
7
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
def license_property
|
11
|
-
match = file.content.match LICENSE_REGEX
|
12
|
-
match[1].downcase if match && match[1]
|
8
|
+
private
|
9
|
+
def license_property
|
10
|
+
match = @file.content.match LICENSE_REGEX
|
11
|
+
match[1].downcase if match && match[1]
|
12
|
+
end
|
13
13
|
end
|
14
14
|
end
|
15
15
|
end
|
@@ -1,16 +1,17 @@
|
|
1
1
|
class Licensee
|
2
|
-
|
2
|
+
module Matchers
|
3
|
+
class Package
|
4
|
+
def initialize(file)
|
5
|
+
@file = file
|
6
|
+
end
|
3
7
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
def confidence
|
9
|
-
90
|
10
|
-
end
|
8
|
+
def match
|
9
|
+
Licensee.licenses(:hidden => true).find { |l| l.key == license_property }
|
10
|
+
end
|
11
11
|
|
12
|
-
|
13
|
-
|
12
|
+
def confidence
|
13
|
+
90
|
14
|
+
end
|
14
15
|
end
|
15
16
|
end
|
16
17
|
end
|
data/lib/licensee/project.rb
CHANGED
@@ -1,59 +1,125 @@
|
|
1
|
+
require 'rugged'
|
2
|
+
|
1
3
|
class Licensee
|
4
|
+
private
|
2
5
|
class Project
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
#
|
7
|
-
# path_or_repo path to git repo or Rugged::Repository instance
|
8
|
-
# revsion - revision ref, if any
|
9
|
-
def initialize(path_or_repo, revision = nil)
|
10
|
-
if path_or_repo.kind_of? Rugged::Repository
|
11
|
-
@repository = path_or_repo
|
12
|
-
else
|
13
|
-
begin
|
14
|
-
@repository = Rugged::Repository.new(path_or_repo)
|
15
|
-
rescue Rugged::RepositoryError
|
16
|
-
raise if revision
|
17
|
-
@repository = FilesystemRepository.new(path_or_repo)
|
18
|
-
end
|
19
|
-
end
|
6
|
+
def initialize(detect_packages)
|
7
|
+
@detect_packages = detect_packages
|
8
|
+
end
|
20
9
|
|
21
|
-
|
10
|
+
def detect_packages?
|
11
|
+
@detect_packages
|
22
12
|
end
|
23
13
|
|
24
14
|
# Returns the matching Licensee::License instance if a license can be detected
|
25
15
|
def license
|
26
|
-
@license ||= matched_file
|
16
|
+
@license ||= matched_file && matched_file.license
|
17
|
+
end
|
18
|
+
|
19
|
+
def matched_file
|
20
|
+
@matched_file ||= (license_file || package_file)
|
27
21
|
end
|
28
22
|
|
29
23
|
def license_file
|
30
24
|
return @license_file if defined? @license_file
|
31
|
-
@license_file =
|
25
|
+
@license_file = begin
|
26
|
+
content, name = find_file { |name| LicenseFile.name_score(name) }
|
27
|
+
if content && name
|
28
|
+
LicenseFile.new(content, name)
|
29
|
+
end
|
30
|
+
end
|
32
31
|
end
|
33
32
|
|
34
33
|
def package_file
|
35
|
-
return unless
|
34
|
+
return unless detect_packages?
|
36
35
|
return @package_file if defined? @package_file
|
37
|
-
@package_file =
|
36
|
+
@package_file = begin
|
37
|
+
content, name = find_file { |name| PackageInfo.name_score(name) }
|
38
|
+
if content && name
|
39
|
+
PackageInfo.new(content, name)
|
40
|
+
end
|
41
|
+
end
|
38
42
|
end
|
43
|
+
end
|
39
44
|
|
40
|
-
|
41
|
-
|
42
|
-
|
45
|
+
public
|
46
|
+
|
47
|
+
# Git-based project
|
48
|
+
#
|
49
|
+
# analyze a given git repository for license information
|
50
|
+
class GitProject < Project
|
51
|
+
attr_reader :repository, :revision
|
52
|
+
|
53
|
+
class InvalidRepository < ArgumentError; end
|
54
|
+
|
55
|
+
def initialize(repo, revision: nil, detect_packages: false)
|
56
|
+
if repo.kind_of? Rugged::Repository
|
57
|
+
@repository = repo
|
58
|
+
else
|
59
|
+
@repository = Rugged::Repository.new(repo)
|
60
|
+
end
|
61
|
+
|
62
|
+
@revision = revision
|
63
|
+
super(detect_packages)
|
64
|
+
rescue Rugged::RepositoryError
|
65
|
+
raise InvalidRepository
|
43
66
|
end
|
44
67
|
|
45
68
|
private
|
46
|
-
|
47
69
|
def commit
|
48
70
|
@commit ||= revision ? repository.lookup(revision) : repository.last_commit
|
49
71
|
end
|
50
72
|
|
51
|
-
|
52
|
-
|
73
|
+
MAX_LICENSE_SIZE = 64 * 1024
|
74
|
+
|
75
|
+
def load_blob_data(oid)
|
76
|
+
data, _ = Rugged::Blob.to_buffer(repository, oid, MAX_LICENSE_SIZE)
|
77
|
+
data
|
78
|
+
end
|
79
|
+
|
80
|
+
def find_file
|
81
|
+
files = commit.tree.map do |entry|
|
82
|
+
next unless entry[:type] == :blob
|
83
|
+
if (score = yield entry[:name]) > 0
|
84
|
+
{ :name => entry[:name], :oid => entry[:oid], :score => score }
|
85
|
+
end
|
86
|
+
end.compact
|
87
|
+
|
88
|
+
return if files.empty?
|
89
|
+
files.sort! { |a, b| b[:score] <=> a[:score] }
|
90
|
+
|
91
|
+
f = files.first
|
92
|
+
[load_blob_data(f[:oid]), f[:name]]
|
53
93
|
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Filesystem-based project
|
97
|
+
#
|
98
|
+
# Analyze a folder on the filesystem for license information
|
99
|
+
class FSProject < Project
|
100
|
+
attr_reader :path
|
101
|
+
|
102
|
+
def initialize(path, detect_packages: false)
|
103
|
+
@path = path
|
104
|
+
super(detect_packages)
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
def find_file
|
109
|
+
files = []
|
110
|
+
|
111
|
+
Dir.foreach(path) do |file|
|
112
|
+
next unless ::File.file?(::File.join(path, file))
|
113
|
+
if (score = yield file) > 0
|
114
|
+
files.push({ :name => file, :score => score })
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
return if files.empty?
|
119
|
+
files.sort! { |a, b| b[:score] <=> a[:score] }
|
54
120
|
|
55
|
-
|
56
|
-
|
121
|
+
f = files.first
|
122
|
+
[::File.read(::File.join(path, f[:name])), f[:name]]
|
57
123
|
end
|
58
124
|
end
|
59
125
|
end
|
@@ -1,91 +1,51 @@
|
|
1
|
+
# encoding=utf-8
|
1
2
|
class Licensee
|
2
|
-
class
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
def initialize(blob, path)
|
13
|
-
@blob = blob
|
14
|
-
@path = path
|
15
|
-
end
|
16
|
-
|
17
|
-
# Raw file contents
|
18
|
-
def content
|
19
|
-
@contents ||= blob.content.force_encoding("UTF-8")
|
20
|
-
end
|
21
|
-
alias_method :to_s, :content
|
22
|
-
alias_method :contents, :content
|
23
|
-
|
24
|
-
# File content with all whitespace replaced with a single space
|
25
|
-
def content_normalized
|
26
|
-
@content_normalized ||= normalize_content(content)
|
27
|
-
end
|
28
|
-
|
29
|
-
# Determines which matching strategy to use, returns an instane of that matcher
|
30
|
-
def matcher
|
31
|
-
return @matcher if defined? @matcher
|
32
|
-
@matcher = Licensee.matchers.map { |m| m.new(self) }.find { |m| m.match }
|
33
|
-
end
|
3
|
+
class Project
|
4
|
+
private
|
5
|
+
class File
|
6
|
+
attr_reader :content, :filename
|
7
|
+
|
8
|
+
def initialize(content, filename = nil)
|
9
|
+
@content = content
|
10
|
+
@content.force_encoding(Encoding::UTF_8)
|
11
|
+
@filename = filename
|
12
|
+
end
|
34
13
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
end
|
14
|
+
def matcher
|
15
|
+
@matcher ||= possible_matchers.map { |m| m.new(self) }.find { |m| m.match }
|
16
|
+
end
|
39
17
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
18
|
+
# Returns the percent confident with the match
|
19
|
+
def confidence
|
20
|
+
matcher && matcher.confidence
|
21
|
+
end
|
44
22
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
rescue Rugged::InvalidError
|
49
|
-
0
|
50
|
-
end
|
23
|
+
def license
|
24
|
+
matcher && matcher.match
|
25
|
+
end
|
51
26
|
|
52
|
-
|
53
|
-
|
27
|
+
alias_method :match, :license
|
28
|
+
alias_method :path, :filename
|
54
29
|
end
|
55
30
|
|
56
|
-
|
57
|
-
|
58
|
-
|
31
|
+
public
|
32
|
+
class LicenseFile < File
|
33
|
+
include Licensee::ContentHelper
|
59
34
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
matches[0].strip if matches
|
64
|
-
end
|
35
|
+
def possible_matchers
|
36
|
+
[Matchers::Copyright, Matchers::Exact, Matchers::Dice]
|
37
|
+
end
|
65
38
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
return 0.75 if filename =~ /bower.json/
|
70
|
-
return 0.0
|
71
|
-
end
|
39
|
+
def wordset
|
40
|
+
@wordset ||= create_word_set(content)
|
41
|
+
end
|
72
42
|
|
73
|
-
|
74
|
-
|
75
|
-
|
43
|
+
def attribution
|
44
|
+
matches = /^#{Matchers::Copyright::REGEX}$/i.match(content)
|
45
|
+
matches[0].strip if matches
|
46
|
+
end
|
76
47
|
|
77
|
-
|
78
|
-
# Scores a given file as a potential license
|
79
|
-
#
|
80
|
-
# filename - (string) the name of the file to score
|
81
|
-
#
|
82
|
-
# Returns 1.0 if the file is definitely a license file (e.g, LICENSE)
|
83
|
-
# Returns 0.9 if the file is almost certainly a license file (e.g., LICENSE.md)
|
84
|
-
# Returns 0.8 if the file is probably a license file (e.g., COPYING, COPYING.md)
|
85
|
-
# Returns 0.7 if the file is potentially a license file (e.g., LICENSE.php)
|
86
|
-
# Returns 0.5 if the file is likely a license file (MIT-LICENSE)
|
87
|
-
# Returns 0.0 if the file is definitely not a license file (e.g., index.php)
|
88
|
-
def license_score(filename)
|
48
|
+
def self.name_score(filename)
|
89
49
|
return 1.0 if filename =~ /\A(un)?licen[sc]e\z/i
|
90
50
|
return 0.9 if filename =~ /\A(un)?licen[sc]e\.(md|markdown|txt)\z/i
|
91
51
|
return 0.8 if filename =~ /\Acopy(ing|right)(\.[^.]+)?\z/i
|
@@ -94,5 +54,25 @@ class Licensee
|
|
94
54
|
return 0.0
|
95
55
|
end
|
96
56
|
end
|
57
|
+
|
58
|
+
class PackageInfo < File
|
59
|
+
def possible_matchers
|
60
|
+
case ::File.extname(filename)
|
61
|
+
when ".gemspec"
|
62
|
+
[Matchers::Gemspec]
|
63
|
+
when ".json"
|
64
|
+
[Matchers::NpmBower]
|
65
|
+
else
|
66
|
+
[]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def self.name_score(filename)
|
71
|
+
return 1.0 if ::File.extname(filename) == ".gemspec"
|
72
|
+
return 1.0 if filename == "package.json"
|
73
|
+
return 0.75 if filename == "bower.json"
|
74
|
+
return 0.0
|
75
|
+
end
|
76
|
+
end
|
97
77
|
end
|
98
78
|
end
|
data/lib/licensee/version.rb
CHANGED
data/licensee.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require File.expand_path("../lib/licensee/version", __FILE__)
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = 'licensee'
|
5
|
+
gem.version = Licensee::VERSION
|
6
|
+
|
7
|
+
gem.summary = "A Ruby Gem to detect under what license a project is distributed"
|
8
|
+
gem.description = "Licensee automates the process of reading LICENSE files and compares their contents to known licenses using a fancy math thing called Rabin-Karp rolling-hashes."
|
9
|
+
|
10
|
+
gem.authors = ['Ben Balter']
|
11
|
+
gem.email = 'ben.balter@github.com'
|
12
|
+
gem.homepage = 'http://github.com/benbalter/licensee'
|
13
|
+
gem.license = "MIT"
|
14
|
+
|
15
|
+
gem.bindir = 'bin'
|
16
|
+
gem.executables << 'licensee'
|
17
|
+
|
18
|
+
gem.add_dependency('rugged', '~> 0.23')
|
19
|
+
gem.add_development_dependency('pry', '~> 0.9')
|
20
|
+
gem.add_development_dependency('shoulda', '~> 3.5')
|
21
|
+
gem.add_development_dependency('rake', '~> 10.3')
|
22
|
+
gem.add_development_dependency('ruby-prof', '~> 0.15')
|
23
|
+
|
24
|
+
# ensure the gem is built out of versioned files
|
25
|
+
gem.files = Dir['Rakefile', '{bin,lib,man,test,vendor,spec}/**/*', 'README*', 'LICENSE*'] & `git ls-files -z`.split("\0")
|
26
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
ref: refs/heads/master
|
Binary file
|
Binary file
|