licensee 5.0.0 → 6.0.0b1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +15 -50
  3. data/bin/licensee +7 -8
  4. data/lib/licensee.rb +9 -33
  5. data/lib/licensee/content_helper.rb +7 -8
  6. data/lib/licensee/license.rb +5 -28
  7. data/lib/licensee/matchers/copyright_matcher.rb +17 -16
  8. data/lib/licensee/matchers/dice_matcher.rb +65 -0
  9. data/lib/licensee/matchers/exact_matcher.rb +12 -6
  10. data/lib/licensee/matchers/gemspec_matcher.rb +11 -11
  11. data/lib/licensee/matchers/npm_bower_matcher.rb +10 -10
  12. data/lib/licensee/matchers/package_matcher.rb +11 -10
  13. data/lib/licensee/project.rb +96 -30
  14. data/lib/licensee/project_file.rb +57 -77
  15. data/lib/licensee/version.rb +1 -1
  16. data/licensee.gemspec +26 -0
  17. data/test/fixtures/npm.git/HEAD +1 -0
  18. data/test/fixtures/npm.git/config +4 -0
  19. data/test/fixtures/npm.git/objects/info/packs +2 -0
  20. data/test/fixtures/npm.git/objects/pack/pack-03c0879445cabcc37f91d97c7955465adef26f4a.idx +0 -0
  21. data/test/fixtures/npm.git/objects/pack/pack-03c0879445cabcc37f91d97c7955465adef26f4a.pack +0 -0
  22. data/test/fixtures/npm.git/packed-refs +2 -0
  23. data/test/functions.rb +4 -15
  24. data/test/test_licensee.rb +1 -13
  25. data/test/test_licensee_copyright_matcher.rb +19 -28
  26. data/test/test_licensee_dice_matcher.rb +21 -0
  27. data/test/test_licensee_exact_matcher.rb +4 -6
  28. data/test/test_licensee_gemspec_matcher.rb +3 -11
  29. data/test/test_licensee_license.rb +2 -12
  30. data/test/test_licensee_npm_bower_matcher.rb +10 -16
  31. data/test/test_licensee_project.rb +24 -35
  32. data/test/test_licensee_project_file.rb +5 -10
  33. data/vendor/choosealicense.com/_licenses/afl-3.0.txt +69 -0
  34. data/vendor/choosealicense.com/_licenses/isc.txt +2 -2
  35. metadata +14 -26
  36. data/lib/licensee/filesystem_repository.rb +0 -38
  37. data/lib/licensee/matcher.rb +0 -32
  38. data/lib/licensee/matchers/git_matcher.rb +0 -27
  39. data/lib/licensee/matchers/levenshtein_matcher.rb +0 -75
  40. data/test/test_licensee_content_helper.rb +0 -40
  41. data/test/test_licensee_git_matcher.rb +0 -19
  42. data/test/test_licensee_levenshtein_matcher.rb +0 -34
  43. data/test/test_licensee_matcher.rb +0 -7
@@ -1,16 +1,16 @@
1
1
  class Licensee
2
- class GemspecMatcher < PackageMatcher
2
+ module Matchers
3
+ class Gemspec < Package
4
+ # We definitely don't want to be evaling arbitrary Gemspec files
5
+ # While not 100% accurate, use some lenient regex to try to grep the
6
+ # license declaration from the Gemspec as a string, if any
7
+ LICENSE_REGEX = /^\s*[a-z0-9_]+\.license\s*\=\s*[\'\"]([a-z\-0-9\.]+)[\'\"]\s*$/i
3
8
 
4
- # We definitely don't want to be evaling arbitrary Gemspec files
5
- # While not 100% accurate, use some lenient regex to try to grep the
6
- # license declaration from the Gemspec as a string, if any
7
- LICENSE_REGEX = /^\s*[a-z0-9_]+\.license\s*\=\s*[\'\"]([a-z\-0-9\.]+)[\'\"]\s*$/i
8
-
9
- private
10
-
11
- def license_property
12
- match = file.content.match LICENSE_REGEX
13
- match[1].downcase if match && match[1]
9
+ private
10
+ def license_property
11
+ match = @file.content.match LICENSE_REGEX
12
+ match[1].downcase if match && match[1]
13
+ end
14
14
  end
15
15
  end
16
16
  end
@@ -1,15 +1,15 @@
1
1
  class Licensee
2
- class NpmBowerMatcher < PackageMatcher
2
+ module Matchers
3
+ class NpmBower < Package
4
+ # While we could parse the package.json or bower.json file, prefer
5
+ # a lenient regex for speed and security. Moar parsing moar problems.
6
+ LICENSE_REGEX = /\s*[\"\']license[\"\']\s*\:\s*[\'\"]([a-z\-0-9\.]+)[\'\"],?\s*/i
3
7
 
4
- # While we could parse the package.json or bower.json file, prefer
5
- # a lenient regex for speed and security. Moar parsing moar problems.
6
- LICENSE_REGEX = /\s*[\"\']license[\"\']\s*\:\s*[\'\"]([a-z\-0-9\.]+)[\'\"],?\s*/i
7
-
8
- private
9
-
10
- def license_property
11
- match = file.content.match LICENSE_REGEX
12
- match[1].downcase if match && match[1]
8
+ private
9
+ def license_property
10
+ match = @file.content.match LICENSE_REGEX
11
+ match[1].downcase if match && match[1]
12
+ end
13
13
  end
14
14
  end
15
15
  end
@@ -1,16 +1,17 @@
1
1
  class Licensee
2
- class PackageMatcher < Matcher
2
+ module Matchers
3
+ class Package
4
+ def initialize(file)
5
+ @file = file
6
+ end
3
7
 
4
- def match
5
- Licensee.licenses(:hidden => true).find { |l| l.key == license_property } if file.package?
6
- end
7
-
8
- def confidence
9
- 90
10
- end
8
+ def match
9
+ Licensee.licenses(:hidden => true).find { |l| l.key == license_property }
10
+ end
11
11
 
12
- def self.package_manager?
13
- true
12
+ def confidence
13
+ 90
14
+ end
14
15
  end
15
16
  end
16
17
  end
@@ -1,59 +1,125 @@
1
+ require 'rugged'
2
+
1
3
  class Licensee
4
+ private
2
5
  class Project
3
- attr_reader :repository, :revision
4
-
5
- # Initializes a new project
6
- #
7
- # path_or_repo path to git repo or Rugged::Repository instance
8
- # revsion - revision ref, if any
9
- def initialize(path_or_repo, revision = nil)
10
- if path_or_repo.kind_of? Rugged::Repository
11
- @repository = path_or_repo
12
- else
13
- begin
14
- @repository = Rugged::Repository.new(path_or_repo)
15
- rescue Rugged::RepositoryError
16
- raise if revision
17
- @repository = FilesystemRepository.new(path_or_repo)
18
- end
19
- end
6
+ def initialize(detect_packages)
7
+ @detect_packages = detect_packages
8
+ end
20
9
 
21
- @revision = revision
10
+ def detect_packages?
11
+ @detect_packages
22
12
  end
23
13
 
24
14
  # Returns the matching Licensee::License instance if a license can be detected
25
15
  def license
26
- @license ||= matched_file.match if matched_file
16
+ @license ||= matched_file && matched_file.license
17
+ end
18
+
19
+ def matched_file
20
+ @matched_file ||= (license_file || package_file)
27
21
  end
28
22
 
29
23
  def license_file
30
24
  return @license_file if defined? @license_file
31
- @license_file = files.select { |f| f.license? }.sort_by { |f| f.license_score }.last
25
+ @license_file = begin
26
+ content, name = find_file { |name| LicenseFile.name_score(name) }
27
+ if content && name
28
+ LicenseFile.new(content, name)
29
+ end
30
+ end
32
31
  end
33
32
 
34
33
  def package_file
35
- return unless Licensee.package_manager_files?
34
+ return unless detect_packages?
36
35
  return @package_file if defined? @package_file
37
- @package_file = files.select { |f| f.package? }.sort_by { |f| f.package_score }.last
36
+ @package_file = begin
37
+ content, name = find_file { |name| PackageInfo.name_score(name) }
38
+ if content && name
39
+ PackageInfo.new(content, name)
40
+ end
41
+ end
38
42
  end
43
+ end
39
44
 
40
- def matched_file
41
- return license_file if license_file && license_file.match
42
- return package_file if package_file && package_file.match
45
+ public
46
+
47
+ # Git-based project
48
+ #
49
+ # analyze a given git repository for license information
50
+ class GitProject < Project
51
+ attr_reader :repository, :revision
52
+
53
+ class InvalidRepository < ArgumentError; end
54
+
55
+ def initialize(repo, revision: nil, detect_packages: false)
56
+ if repo.kind_of? Rugged::Repository
57
+ @repository = repo
58
+ else
59
+ @repository = Rugged::Repository.new(repo)
60
+ end
61
+
62
+ @revision = revision
63
+ super(detect_packages)
64
+ rescue Rugged::RepositoryError
65
+ raise InvalidRepository
43
66
  end
44
67
 
45
68
  private
46
-
47
69
  def commit
48
70
  @commit ||= revision ? repository.lookup(revision) : repository.last_commit
49
71
  end
50
72
 
51
- def tree
52
- @tree ||= commit.tree.select { |blob| blob[:type] == :blob }
73
+ MAX_LICENSE_SIZE = 64 * 1024
74
+
75
+ def load_blob_data(oid)
76
+ data, _ = Rugged::Blob.to_buffer(repository, oid, MAX_LICENSE_SIZE)
77
+ data
78
+ end
79
+
80
+ def find_file
81
+ files = commit.tree.map do |entry|
82
+ next unless entry[:type] == :blob
83
+ if (score = yield entry[:name]) > 0
84
+ { :name => entry[:name], :oid => entry[:oid], :score => score }
85
+ end
86
+ end.compact
87
+
88
+ return if files.empty?
89
+ files.sort! { |a, b| b[:score] <=> a[:score] }
90
+
91
+ f = files.first
92
+ [load_blob_data(f[:oid]), f[:name]]
53
93
  end
94
+ end
95
+
96
+ # Filesystem-based project
97
+ #
98
+ # Analyze a folder on the filesystem for license information
99
+ class FSProject < Project
100
+ attr_reader :path
101
+
102
+ def initialize(path, detect_packages: false)
103
+ @path = path
104
+ super(detect_packages)
105
+ end
106
+
107
+ private
108
+ def find_file
109
+ files = []
110
+
111
+ Dir.foreach(path) do |file|
112
+ next unless ::File.file?(::File.join(path, file))
113
+ if (score = yield file) > 0
114
+ files.push({ :name => file, :score => score })
115
+ end
116
+ end
117
+
118
+ return if files.empty?
119
+ files.sort! { |a, b| b[:score] <=> a[:score] }
54
120
 
55
- def files
56
- @files ||= tree.map { |blob| ProjectFile.new(repository.lookup(blob[:oid]), blob[:name]) }
121
+ f = files.first
122
+ [::File.read(::File.join(path, f[:name])), f[:name]]
57
123
  end
58
124
  end
59
125
  end
@@ -1,91 +1,51 @@
1
+ # encoding=utf-8
1
2
  class Licensee
2
- class ProjectFile
3
-
4
- # Note: File can be a license file (e.g., `LICENSE.txt`)
5
- # or a package manager file (e.g, `package.json`)
6
-
7
- attr_reader :blob, :path
8
- alias_method :filename, :path
9
-
10
- include Licensee::ContentHelper
11
-
12
- def initialize(blob, path)
13
- @blob = blob
14
- @path = path
15
- end
16
-
17
- # Raw file contents
18
- def content
19
- @contents ||= blob.content.force_encoding("UTF-8")
20
- end
21
- alias_method :to_s, :content
22
- alias_method :contents, :content
23
-
24
- # File content with all whitespace replaced with a single space
25
- def content_normalized
26
- @content_normalized ||= normalize_content(content)
27
- end
28
-
29
- # Determines which matching strategy to use, returns an instane of that matcher
30
- def matcher
31
- return @matcher if defined? @matcher
32
- @matcher = Licensee.matchers.map { |m| m.new(self) }.find { |m| m.match }
33
- end
3
+ class Project
4
+ private
5
+ class File
6
+ attr_reader :content, :filename
7
+
8
+ def initialize(content, filename = nil)
9
+ @content = content
10
+ @content.force_encoding(Encoding::UTF_8)
11
+ @filename = filename
12
+ end
34
13
 
35
- # Returns an Licensee::License instance of the matches license
36
- def match
37
- @match ||= matcher.match if matcher
38
- end
14
+ def matcher
15
+ @matcher ||= possible_matchers.map { |m| m.new(self) }.find { |m| m.match }
16
+ end
39
17
 
40
- # Returns the percent confident with the match
41
- def confidence
42
- @condience ||= matcher.confidence if matcher
43
- end
18
+ # Returns the percent confident with the match
19
+ def confidence
20
+ matcher && matcher.confidence
21
+ end
44
22
 
45
- def similarity(other)
46
- blob.hashsig(Rugged::Blob::HashSignature::WHITESPACE_SMART)
47
- other.hashsig && blob.hashsig ? blob.similarity(other.hashsig) : 0
48
- rescue Rugged::InvalidError
49
- 0
50
- end
23
+ def license
24
+ matcher && matcher.match
25
+ end
51
26
 
52
- def license_score
53
- self.class.license_score(filename)
27
+ alias_method :match, :license
28
+ alias_method :path, :filename
54
29
  end
55
30
 
56
- def license?
57
- license_score != 0.0
58
- end
31
+ public
32
+ class LicenseFile < File
33
+ include Licensee::ContentHelper
59
34
 
60
- def attribution
61
- return nil unless license?
62
- matches = /^#{CopyrightMatcher::REGEX}$/i.match(content)
63
- matches[0].strip if matches
64
- end
35
+ def possible_matchers
36
+ [Matchers::Copyright, Matchers::Exact, Matchers::Dice]
37
+ end
65
38
 
66
- def package_score
67
- return 1.0 if filename =~ /[a-zA-Z0-9\-_]+\.gemspec/
68
- return 1.0 if filename =~ /package\.json/
69
- return 0.75 if filename =~ /bower.json/
70
- return 0.0
71
- end
39
+ def wordset
40
+ @wordset ||= create_word_set(content)
41
+ end
72
42
 
73
- def package?
74
- Licensee.package_manager_files? && package_score != 0.0
75
- end
43
+ def attribution
44
+ matches = /^#{Matchers::Copyright::REGEX}$/i.match(content)
45
+ matches[0].strip if matches
46
+ end
76
47
 
77
- class << self
78
- # Scores a given file as a potential license
79
- #
80
- # filename - (string) the name of the file to score
81
- #
82
- # Returns 1.0 if the file is definitely a license file (e.g, LICENSE)
83
- # Returns 0.9 if the file is almost certainly a license file (e.g., LICENSE.md)
84
- # Returns 0.8 if the file is probably a license file (e.g., COPYING, COPYING.md)
85
- # Returns 0.7 if the file is potentially a license file (e.g., LICENSE.php)
86
- # Returns 0.5 if the file is likely a license file (MIT-LICENSE)
87
- # Returns 0.0 if the file is definitely not a license file (e.g., index.php)
88
- def license_score(filename)
48
+ def self.name_score(filename)
89
49
  return 1.0 if filename =~ /\A(un)?licen[sc]e\z/i
90
50
  return 0.9 if filename =~ /\A(un)?licen[sc]e\.(md|markdown|txt)\z/i
91
51
  return 0.8 if filename =~ /\Acopy(ing|right)(\.[^.]+)?\z/i
@@ -94,5 +54,25 @@ class Licensee
94
54
  return 0.0
95
55
  end
96
56
  end
57
+
58
+ class PackageInfo < File
59
+ def possible_matchers
60
+ case ::File.extname(filename)
61
+ when ".gemspec"
62
+ [Matchers::Gemspec]
63
+ when ".json"
64
+ [Matchers::NpmBower]
65
+ else
66
+ []
67
+ end
68
+ end
69
+
70
+ def self.name_score(filename)
71
+ return 1.0 if ::File.extname(filename) == ".gemspec"
72
+ return 1.0 if filename == "package.json"
73
+ return 0.75 if filename == "bower.json"
74
+ return 0.0
75
+ end
76
+ end
97
77
  end
98
78
  end
@@ -1,3 +1,3 @@
1
1
  class Licensee
2
- VERSION = "5.0.0"
2
+ VERSION = "6.0.0b1"
3
3
  end
data/licensee.gemspec ADDED
@@ -0,0 +1,26 @@
1
+ require File.expand_path("../lib/licensee/version", __FILE__)
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.name = 'licensee'
5
+ gem.version = Licensee::VERSION
6
+
7
+ gem.summary = "A Ruby Gem to detect under what license a project is distributed"
8
+ gem.description = "Licensee automates the process of reading LICENSE files and compares their contents to known licenses using a fancy math thing called Rabin-Karp rolling-hashes."
9
+
10
+ gem.authors = ['Ben Balter']
11
+ gem.email = 'ben.balter@github.com'
12
+ gem.homepage = 'http://github.com/benbalter/licensee'
13
+ gem.license = "MIT"
14
+
15
+ gem.bindir = 'bin'
16
+ gem.executables << 'licensee'
17
+
18
+ gem.add_dependency('rugged', '~> 0.23')
19
+ gem.add_development_dependency('pry', '~> 0.9')
20
+ gem.add_development_dependency('shoulda', '~> 3.5')
21
+ gem.add_development_dependency('rake', '~> 10.3')
22
+ gem.add_development_dependency('ruby-prof', '~> 0.15')
23
+
24
+ # ensure the gem is built out of versioned files
25
+ gem.files = Dir['Rakefile', '{bin,lib,man,test,vendor,spec}/**/*', 'README*', 'LICENSE*'] & `git ls-files -z`.split("\0")
26
+ end
@@ -0,0 +1 @@
1
+ ref: refs/heads/master
@@ -0,0 +1,4 @@
1
+ [core]
2
+ repositoryformatversion = 0
3
+ filemode = true
4
+ bare = true
@@ -0,0 +1,2 @@
1
+ P pack-03c0879445cabcc37f91d97c7955465adef26f4a.pack
2
+
@@ -0,0 +1,2 @@
1
+ # pack-refs with: peeled fully-peeled
2
+ 0a3da38fcc340e45990520699bc3ec5570e60c11 refs/heads/master