licensee 9.10.0 → 9.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/licensee +5 -4
- data/lib/licensee.rb +4 -2
- data/lib/licensee/commands/detect.rb +10 -5
- data/lib/licensee/commands/diff.rb +7 -8
- data/lib/licensee/commands/license_path.rb +2 -0
- data/lib/licensee/commands/version.rb +2 -0
- data/lib/licensee/content_helper.rb +234 -85
- data/lib/licensee/hash_helper.rb +7 -5
- data/lib/licensee/license.rb +32 -25
- data/lib/licensee/license_field.rb +4 -1
- data/lib/licensee/license_meta.rb +3 -0
- data/lib/licensee/license_rules.rb +2 -0
- data/lib/licensee/matchers.rb +2 -0
- data/lib/licensee/matchers/cabal.rb +16 -2
- data/lib/licensee/matchers/cargo.rb +3 -1
- data/lib/licensee/matchers/copyright.rb +6 -4
- data/lib/licensee/matchers/cran.rb +7 -3
- data/lib/licensee/matchers/dice.rb +6 -4
- data/lib/licensee/matchers/dist_zilla.rb +3 -1
- data/lib/licensee/matchers/exact.rb +3 -0
- data/lib/licensee/matchers/gemspec.rb +8 -5
- data/lib/licensee/matchers/matcher.rb +3 -1
- data/lib/licensee/matchers/npm_bower.rb +3 -1
- data/lib/licensee/matchers/package.rb +3 -0
- data/lib/licensee/matchers/reference.rb +3 -1
- data/lib/licensee/matchers/spdx.rb +3 -1
- data/lib/licensee/project_files.rb +2 -0
- data/lib/licensee/project_files/license_file.rb +13 -10
- data/lib/licensee/project_files/package_manager_file.rb +3 -0
- data/lib/licensee/project_files/project_file.rb +12 -4
- data/lib/licensee/project_files/readme_file.rb +5 -3
- data/lib/licensee/projects.rb +2 -0
- data/lib/licensee/projects/fs_project.rb +3 -0
- data/lib/licensee/projects/git_project.rb +19 -11
- data/lib/licensee/projects/github_project.rb +6 -1
- data/lib/licensee/projects/project.rb +16 -5
- data/lib/licensee/rule.rb +2 -0
- data/lib/licensee/version.rb +3 -1
- data/licensee.gemspec +47 -0
- data/spec/bin_spec.rb +3 -1
- data/spec/fixture_spec.rb +46 -0
- data/spec/fixtures/bsd-3-noendorseslash/LICENSE +30 -0
- data/spec/fixtures/cc0-cal2013/LICENSE +116 -0
- data/spec/fixtures/cc0-cc/LICENSE +121 -0
- data/spec/fixtures/detect.json +9 -7
- data/spec/fixtures/fixtures.yml +130 -0
- data/spec/fixtures/html/license.html +262 -0
- data/spec/fixtures/license-hashes.json +41 -0
- data/spec/fixtures/mit-optional/LICENSE.txt +21 -0
- data/spec/fixtures/multiple-arrs/LICENSE +30 -0
- data/spec/fixtures/unlicense-noinfo/LICENSE +22 -0
- data/spec/integration_spec.rb +68 -2
- data/spec/licensee/commands/detect_spec.rb +10 -6
- data/spec/licensee/commands/license_path_spec.rb +3 -1
- data/spec/licensee/commands/version_spec.rb +3 -1
- data/spec/licensee/content_helper_spec.rb +184 -67
- data/spec/licensee/hash_helper_spec.rb +3 -1
- data/spec/licensee/license_field_spec.rb +5 -3
- data/spec/licensee/license_meta_spec.rb +16 -12
- data/spec/licensee/license_rules_spec.rb +6 -2
- data/spec/licensee/license_spec.rb +62 -37
- data/spec/licensee/matchers/cabal_matcher_spec.rb +97 -2
- data/spec/licensee/matchers/cargo_matcher_spec.rb +5 -2
- data/spec/licensee/matchers/copyright_matcher_spec.rb +7 -5
- data/spec/licensee/matchers/cran_matcher_spec.rb +5 -2
- data/spec/licensee/matchers/dice_matcher_spec.rb +15 -12
- data/spec/licensee/matchers/dist_zilla_matcher_spec.rb +5 -2
- data/spec/licensee/matchers/exact_matcher_spec.rb +5 -2
- data/spec/licensee/matchers/gemspec_matcher_spec.rb +5 -2
- data/spec/licensee/matchers/matcher_spec.rb +6 -2
- data/spec/licensee/matchers/npm_bower_matcher_spec.rb +5 -3
- data/spec/licensee/matchers/package_matcher_spec.rb +6 -2
- data/spec/licensee/matchers/reference_matcher_spec.rb +4 -2
- data/spec/licensee/matchers/spdx_matcher_spec.rb +5 -2
- data/spec/licensee/project_files/license_file_spec.rb +20 -18
- data/spec/licensee/project_files/package_info_spec.rb +5 -1
- data/spec/licensee/project_files/project_file_spec.rb +8 -2
- data/spec/licensee/project_files/readme_file_spec.rb +4 -1
- data/spec/licensee/project_spec.rb +24 -17
- data/spec/licensee/projects/git_project_spec.rb +23 -0
- data/spec/licensee/projects/github_project_spec.rb +8 -5
- data/spec/licensee/rule_spec.rb +6 -3
- data/spec/licensee_spec.rb +12 -9
- data/spec/spec_helper.rb +28 -9
- data/spec/vendored_license_spec.rb +29 -10
- data/vendor/choosealicense.com/_data/meta.yml +0 -4
- data/vendor/choosealicense.com/_data/rules.yml +3 -0
- data/vendor/choosealicense.com/_licenses/0bsd.txt +39 -0
- data/vendor/choosealicense.com/_licenses/afl-3.0.txt +7 -6
- data/vendor/choosealicense.com/_licenses/agpl-3.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/apache-2.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/artistic-2.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/bsd-2-clause.txt +8 -6
- data/vendor/choosealicense.com/_licenses/bsd-3-clause-clear.txt +2 -2
- data/vendor/choosealicense.com/_licenses/bsd-3-clause.txt +12 -10
- data/vendor/choosealicense.com/_licenses/bsd-4-clause.txt +61 -0
- data/vendor/choosealicense.com/_licenses/bsl-1.0.txt +5 -2
- data/vendor/choosealicense.com/_licenses/cc-by-4.0.txt +3 -1
- data/vendor/choosealicense.com/_licenses/cc-by-sa-4.0.txt +3 -1
- data/vendor/choosealicense.com/_licenses/cc0-1.0.txt +113 -105
- data/vendor/choosealicense.com/_licenses/cecill-2.1.txt +579 -0
- data/vendor/choosealicense.com/_licenses/ecl-2.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/epl-1.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/epl-2.0.txt +3 -4
- data/vendor/choosealicense.com/_licenses/eupl-1.1.txt +0 -1
- data/vendor/choosealicense.com/_licenses/eupl-1.2.txt +0 -1
- data/vendor/choosealicense.com/_licenses/gpl-2.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/gpl-3.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/isc.txt +0 -1
- data/vendor/choosealicense.com/_licenses/lgpl-2.1.txt +0 -1
- data/vendor/choosealicense.com/_licenses/lgpl-3.0.txt +1 -3
- data/vendor/choosealicense.com/_licenses/lppl-1.3c.txt +1 -2
- data/vendor/choosealicense.com/_licenses/mit.txt +1 -2
- data/vendor/choosealicense.com/_licenses/mpl-2.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/ms-pl.txt +0 -1
- data/vendor/choosealicense.com/_licenses/ms-rl.txt +0 -1
- data/vendor/choosealicense.com/_licenses/ncsa.txt +21 -22
- data/vendor/choosealicense.com/_licenses/odbl-1.0.txt +573 -0
- data/vendor/choosealicense.com/_licenses/ofl-1.1.txt +4 -2
- data/vendor/choosealicense.com/_licenses/osl-3.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/postgresql.txt +4 -5
- data/vendor/choosealicense.com/_licenses/unlicense.txt +1 -2
- data/vendor/choosealicense.com/_licenses/upl-1.0.txt +4 -5
- data/vendor/choosealicense.com/_licenses/vim.txt +111 -0
- data/vendor/choosealicense.com/_licenses/wtfpl.txt +0 -1
- data/vendor/choosealicense.com/_licenses/zlib.txt +4 -2
- metadata +77 -19
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bd74e74f07c0dc4111b3cbbebf62ae3ab140a788ed511491b5995d45f2371de0
|
|
4
|
+
data.tar.gz: 90561b8b85ded55614a88e38b0469e80d110bd113e3a6ddfee9bdd840dec237a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: bc64456f4f05411ab8152ce3ade984aa52851f39399d0bffe1e6c51fa4f9de6b00fcd84008bed0d0b71b0569a9799ba0b539ce6323fee55b4436c583ae4ce92c
|
|
7
|
+
data.tar.gz: d797f174a17e2f91eb8f4e7c9eff9b3d4631f58531d4d57cc9da98e75e062345a52354a9ca3a8e1f991cb70a92b91e7b600814dddca345c3528742011e92c8ed
|
data/bin/licensee
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
2
3
|
|
|
3
4
|
require 'dotenv/load'
|
|
4
5
|
require 'thor'
|
|
@@ -15,9 +16,9 @@ class LicenseeCLI < Thor
|
|
|
15
16
|
|
|
16
17
|
def path
|
|
17
18
|
@path ||= if !options[:remote] || args.first =~ %r{^https://}
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
args.first || Dir.pwd
|
|
20
|
+
else
|
|
21
|
+
"https://github.com/#{args.first}"
|
|
21
22
|
end
|
|
22
23
|
end
|
|
23
24
|
|
|
@@ -32,6 +33,6 @@ class LicenseeCLI < Thor
|
|
|
32
33
|
end
|
|
33
34
|
|
|
34
35
|
commands_dir = File.expand_path '../lib/licensee/commands/', __dir__
|
|
35
|
-
Dir["#{commands_dir}/*.rb"].each { |c| require(c) }
|
|
36
|
+
Dir["#{commands_dir}/*.rb"].sort.each { |c| require(c) }
|
|
36
37
|
|
|
37
38
|
LicenseeCLI.start(ARGV)
|
data/lib/licensee.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require_relative 'licensee/version'
|
|
2
4
|
require 'forwardable'
|
|
3
5
|
require 'pathname'
|
|
@@ -19,7 +21,7 @@ module Licensee
|
|
|
19
21
|
CONFIDENCE_THRESHOLD = 98
|
|
20
22
|
|
|
21
23
|
# Base domain from which to build license URLs
|
|
22
|
-
DOMAIN = 'http://choosealicense.com'
|
|
24
|
+
DOMAIN = 'http://choosealicense.com'
|
|
23
25
|
|
|
24
26
|
class << self
|
|
25
27
|
attr_writer :confidence_threshold
|
|
@@ -49,7 +51,7 @@ module Licensee
|
|
|
49
51
|
end
|
|
50
52
|
|
|
51
53
|
# Inverse of the confidence threshold, represented as a float
|
|
52
|
-
# By default this will be 0.
|
|
54
|
+
# By default this will be 0.02
|
|
53
55
|
def inverse_confidence_threshold
|
|
54
56
|
@inverse_confidence_threshold ||=
|
|
55
57
|
(1 - Licensee.confidence_threshold / 100.0).round(2)
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
class LicenseeCLI < Thor
|
|
2
4
|
# Methods to call when displaying information about ProjectFiles
|
|
3
5
|
MATCHED_FILE_METHODS = %i[
|
|
@@ -21,11 +23,11 @@ class LicenseeCLI < Thor
|
|
|
21
23
|
|
|
22
24
|
rows = []
|
|
23
25
|
rows << if project.license
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
26
|
+
['License:', project.license.spdx_id]
|
|
27
|
+
elsif !project.licenses.empty?
|
|
28
|
+
['Licenses:', project.licenses.map(&:spdx_id)]
|
|
29
|
+
else
|
|
30
|
+
['License:', set_color('None', :red)]
|
|
29
31
|
end
|
|
30
32
|
|
|
31
33
|
unless project.matched_files.empty?
|
|
@@ -40,8 +42,10 @@ class LicenseeCLI < Thor
|
|
|
40
42
|
|
|
41
43
|
MATCHED_FILE_METHODS.each do |method|
|
|
42
44
|
next unless matched_file.respond_to? method
|
|
45
|
+
|
|
43
46
|
value = matched_file.public_send method
|
|
44
47
|
next if value.nil?
|
|
48
|
+
|
|
45
49
|
rows << [humanize(method, :method), humanize(value, method)]
|
|
46
50
|
end
|
|
47
51
|
print_table rows, indent: 2
|
|
@@ -51,6 +55,7 @@ class LicenseeCLI < Thor
|
|
|
51
55
|
|
|
52
56
|
licenses = licenses_by_similarity(matched_file)
|
|
53
57
|
next if licenses.empty?
|
|
58
|
+
|
|
54
59
|
say ' Closest non-matching licenses:'
|
|
55
60
|
rows = licenses[0...3].map do |license, similarity|
|
|
56
61
|
spdx_id = license.meta['spdx-id']
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'tmpdir'
|
|
2
4
|
|
|
3
5
|
class LicenseeCLI < Thor
|
|
@@ -39,26 +41,23 @@ class LicenseeCLI < Thor
|
|
|
39
41
|
|
|
40
42
|
def license_to_diff
|
|
41
43
|
return options[:license_to_diff] if options[:license_to_diff]
|
|
42
|
-
return project.license_file if remote?
|
|
44
|
+
return project.license_file if remote? || STDIN.tty? && project.license_file
|
|
43
45
|
|
|
44
46
|
@license_to_diff ||= begin
|
|
45
|
-
if STDIN.tty?
|
|
46
|
-
error 'You must pipe license contents to the command via STDIN'
|
|
47
|
-
exit 1
|
|
48
|
-
end
|
|
49
|
-
|
|
50
47
|
Licensee::ProjectFiles::LicenseFile.new(STDIN.read, 'LICENSE')
|
|
51
48
|
end
|
|
52
49
|
end
|
|
53
50
|
|
|
54
51
|
def expected_license
|
|
55
|
-
|
|
52
|
+
if options[:license]
|
|
53
|
+
@expected_license ||= Licensee::License.find options[:license]
|
|
54
|
+
end
|
|
56
55
|
return @expected_license if @expected_license
|
|
57
56
|
|
|
58
57
|
if options[:license]
|
|
59
58
|
error "#{options[:license]} is not a valid license"
|
|
60
59
|
else
|
|
61
|
-
error '
|
|
60
|
+
error 'Usage: provide a license to diff against with --license (spdx name)'
|
|
62
61
|
end
|
|
63
62
|
|
|
64
63
|
error "Valid licenses: #{Licensee::License.all(hidden: true).map(&:key).join(', ')}"
|
|
@@ -1,38 +1,129 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'set'
|
|
2
4
|
require 'digest'
|
|
3
5
|
|
|
4
6
|
module Licensee
|
|
5
7
|
module ContentHelper
|
|
6
8
|
DIGEST = Digest::SHA1
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
9
|
+
START_REGEX = /\A\s*/.freeze
|
|
10
|
+
END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions\s*$/i.freeze
|
|
11
|
+
REGEXES = {
|
|
12
|
+
hrs: /^\s*[=\-\*]{3,}\s*$/,
|
|
13
|
+
all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
|
|
14
|
+
whitespace: /\s+/,
|
|
15
|
+
markdown_headings: /#{START_REGEX}#+/,
|
|
16
|
+
version: /#{START_REGEX}version.*$/i,
|
|
17
|
+
span_markup: /[_*~]+(.*?)[_*~]+/,
|
|
18
|
+
link_markup: /\[(.+?)\]\(.+?\)/,
|
|
19
|
+
block_markup: /^\s*>/,
|
|
20
|
+
border_markup: /^[\*-](.*?)[\*-]$/,
|
|
21
|
+
comment_markup: %r{^\s*?[/\*]{1,2}},
|
|
22
|
+
url: %r{#{START_REGEX}https?://[^ ]+\n},
|
|
23
|
+
bullet: /\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[)\.])\s+/i,
|
|
24
|
+
developed_by: /#{START_REGEX}developed by:.*?\n\n/im,
|
|
25
|
+
quote_begin: /[`'"‘“]/,
|
|
26
|
+
quote_end: /[`'"’”]/,
|
|
27
|
+
cc_legal_code: /^\s*Creative Commons Legal Code\s*$/i,
|
|
28
|
+
cc0_info: /For more information, please see\s*\S+zero\S+/im,
|
|
29
|
+
cc0_disclaimer: /CREATIVE COMMONS CORPORATION.*?\n\n/im,
|
|
30
|
+
unlicense_info: /For more information, please.*\S+unlicense\S+/im,
|
|
31
|
+
mit_optional: /\(including the next paragraph\)/i
|
|
32
|
+
}.freeze
|
|
33
|
+
NORMALIZATIONS = {
|
|
34
|
+
lists: { from: /^\s*(?:\d\.|\*)\s+([^\n])/, to: '- \1' },
|
|
35
|
+
https: { from: /http:/, to: 'https:' },
|
|
36
|
+
ampersands: { from: '&', to: 'and' },
|
|
37
|
+
dashes: { from: /(?<!^)([—–-]+)(?!$)/, to: '-' },
|
|
38
|
+
quotes: {
|
|
39
|
+
from: /#{REGEXES[:quote_begin]}+([\w -]*?\w)#{REGEXES[:quote_end]}+/,
|
|
40
|
+
to: '"\1"'
|
|
41
|
+
}
|
|
42
|
+
}.freeze
|
|
43
|
+
|
|
44
|
+
# Legally equivalent words that schould be ignored for comparison
|
|
45
|
+
# See https://spdx.org/spdx-license-list/matching-guidelines
|
|
46
|
+
VARIETAL_WORDS = {
|
|
47
|
+
'acknowledgment' => 'acknowledgement',
|
|
48
|
+
'analogue' => 'analog',
|
|
49
|
+
'analyse' => 'analyze',
|
|
50
|
+
'artefact' => 'artifact',
|
|
51
|
+
'authorisation' => 'authorization',
|
|
52
|
+
'authorised' => 'authorized',
|
|
53
|
+
'calibre' => 'caliber',
|
|
54
|
+
'cancelled' => 'canceled',
|
|
55
|
+
'capitalisations' => 'capitalizations',
|
|
56
|
+
'catalogue' => 'catalog',
|
|
57
|
+
'categorise' => 'categorize',
|
|
58
|
+
'centre' => 'center',
|
|
59
|
+
'emphasised' => 'emphasized',
|
|
60
|
+
'favour' => 'favor',
|
|
61
|
+
'favourite' => 'favorite',
|
|
62
|
+
'fulfil' => 'fulfill',
|
|
63
|
+
'fulfilment' => 'fulfillment',
|
|
64
|
+
'initialise' => 'initialize',
|
|
65
|
+
'judgment' => 'judgement',
|
|
66
|
+
'labelling' => 'labeling',
|
|
67
|
+
'labour' => 'labor',
|
|
68
|
+
'licence' => 'license',
|
|
69
|
+
'maximise' => 'maximize',
|
|
70
|
+
'modelled' => 'modeled',
|
|
71
|
+
'modelling' => 'modeling',
|
|
72
|
+
'offence' => 'offense',
|
|
73
|
+
'optimise' => 'optimize',
|
|
74
|
+
'organisation' => 'organization',
|
|
75
|
+
'organise' => 'organize',
|
|
76
|
+
'practise' => 'practice',
|
|
77
|
+
'programme' => 'program',
|
|
78
|
+
'realise' => 'realize',
|
|
79
|
+
'recognise' => 'recognize',
|
|
80
|
+
'signalling' => 'signaling',
|
|
81
|
+
'sub-license' => 'sublicense',
|
|
82
|
+
'sub license' => 'sublicense',
|
|
83
|
+
'utilisation' => 'utilization',
|
|
84
|
+
'whilst' => 'while',
|
|
85
|
+
'wilful' => 'wilfull',
|
|
86
|
+
'non-commercial' => 'noncommercial',
|
|
87
|
+
'cent' => 'percent',
|
|
88
|
+
'owner' => 'holder'
|
|
89
|
+
}.freeze
|
|
90
|
+
STRIP_METHODS = %i[
|
|
91
|
+
cc0_optional
|
|
92
|
+
unlicense_optional
|
|
93
|
+
hrs
|
|
94
|
+
markdown_headings
|
|
95
|
+
borders
|
|
96
|
+
title
|
|
97
|
+
version
|
|
98
|
+
url
|
|
99
|
+
copyright
|
|
100
|
+
title
|
|
101
|
+
block_markup
|
|
102
|
+
span_markup
|
|
103
|
+
link_markup
|
|
104
|
+
developed_by
|
|
105
|
+
end_of_terms
|
|
106
|
+
whitespace
|
|
107
|
+
mit_optional
|
|
108
|
+
].freeze
|
|
18
109
|
|
|
19
110
|
# A set of each word in the license, without duplicates
|
|
20
111
|
def wordset
|
|
21
|
-
@wordset ||=
|
|
22
|
-
content_normalized.scan(/(?:\w(?:'s|(?<=s)')?)+/).to_set
|
|
23
|
-
end
|
|
112
|
+
@wordset ||= content_normalized&.scan(%r{(?:[\w\/](?:'s|(?<=s)')?)+})&.to_set
|
|
24
113
|
end
|
|
25
114
|
|
|
26
115
|
# Number of characteres in the normalized content
|
|
27
116
|
def length
|
|
28
117
|
return 0 unless content_normalized
|
|
118
|
+
|
|
29
119
|
content_normalized.length
|
|
30
120
|
end
|
|
31
121
|
|
|
32
122
|
# Number of characters that could be added/removed to still be
|
|
33
123
|
# considered a potential match
|
|
34
124
|
def max_delta
|
|
35
|
-
@max_delta ||=
|
|
125
|
+
@max_delta ||= fields_normalized.size * 10 +
|
|
126
|
+
(length * Licensee.inverse_confidence_threshold).to_i
|
|
36
127
|
end
|
|
37
128
|
|
|
38
129
|
# Given another license or project file, calculates the difference in length
|
|
@@ -43,8 +134,9 @@ module Licensee
|
|
|
43
134
|
# Given another license or project file, calculates the similarity
|
|
44
135
|
# as a percentage of words in common
|
|
45
136
|
def similarity(other)
|
|
46
|
-
overlap = (
|
|
47
|
-
total =
|
|
137
|
+
overlap = (wordset_fieldless & other.wordset).size
|
|
138
|
+
total = wordset_fieldless.size + other.wordset.size -
|
|
139
|
+
fields_normalized_set.size
|
|
48
140
|
100.0 * (overlap * 2.0 / total)
|
|
49
141
|
end
|
|
50
142
|
|
|
@@ -59,34 +151,21 @@ module Licensee
|
|
|
59
151
|
# content with attribution first to detect attribuion in LicenseFile
|
|
60
152
|
def content_without_title_and_version
|
|
61
153
|
@content_without_title_and_version ||= begin
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
strip_version(string).strip
|
|
154
|
+
@_content = nil
|
|
155
|
+
ops = %i[html hrs comments markdown_headings title version]
|
|
156
|
+
ops.each { |op| strip(op) }
|
|
157
|
+
_content
|
|
67
158
|
end
|
|
68
159
|
end
|
|
69
160
|
|
|
70
|
-
# Content without title, version, copyright, whitespace, or insturctions
|
|
71
|
-
#
|
|
72
|
-
# wrap - Optional width to wrap the content
|
|
73
|
-
#
|
|
74
|
-
# Returns a string
|
|
75
161
|
def content_normalized(wrap: nil)
|
|
76
|
-
return unless content
|
|
77
162
|
@content_normalized ||= begin
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
string, _partition, _instructions = string.partition(END_OF_TERMS_REGEX)
|
|
85
|
-
string = normalize_lists(string)
|
|
86
|
-
string = normalize_quotes(string)
|
|
87
|
-
string = normalize_https(string)
|
|
88
|
-
string = strip_markup(string)
|
|
89
|
-
strip_whitespace(string)
|
|
163
|
+
@_content = content_without_title_and_version.downcase
|
|
164
|
+
|
|
165
|
+
(NORMALIZATIONS.keys + %i[spelling bullets]).each { |op| normalize(op) }
|
|
166
|
+
STRIP_METHODS.each { |op| strip(op) }
|
|
167
|
+
|
|
168
|
+
_content
|
|
90
169
|
end
|
|
91
170
|
|
|
92
171
|
if wrap.nil?
|
|
@@ -96,14 +175,24 @@ module Licensee
|
|
|
96
175
|
end
|
|
97
176
|
end
|
|
98
177
|
|
|
178
|
+
# Backwards compatibalize constants to avoid a breaking change
|
|
179
|
+
def self.const_missing(const)
|
|
180
|
+
key = const.to_s.downcase.gsub('_regex', '').to_sym
|
|
181
|
+
REGEXES[key] || super
|
|
182
|
+
end
|
|
183
|
+
|
|
99
184
|
# Wrap text to the given line length
|
|
100
185
|
def self.wrap(text, line_width = 80)
|
|
101
186
|
return if text.nil?
|
|
187
|
+
|
|
102
188
|
text = text.clone
|
|
189
|
+
text.gsub!(REGEXES[:bullet]) { |m| "\n#{m}\n" }
|
|
103
190
|
text.gsub!(/([^\n])\n([^\n])/, '\1 \2')
|
|
104
191
|
|
|
105
192
|
text = text.split("\n").collect do |line|
|
|
106
|
-
if line
|
|
193
|
+
if line =~ REGEXES[:hrs]
|
|
194
|
+
line
|
|
195
|
+
elsif line.length > line_width
|
|
107
196
|
line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip
|
|
108
197
|
else
|
|
109
198
|
line
|
|
@@ -114,82 +203,142 @@ module Licensee
|
|
|
114
203
|
end
|
|
115
204
|
|
|
116
205
|
def self.format_percent(float)
|
|
117
|
-
"#{format('
|
|
206
|
+
"#{format('%<float>.2f', float: float)}%"
|
|
118
207
|
end
|
|
119
208
|
|
|
120
209
|
def self.title_regex
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
210
|
+
@title_regex ||= begin
|
|
211
|
+
licenses = Licensee::License.all(hidden: true, psuedo: false)
|
|
212
|
+
titles = licenses.map(&:title_regex)
|
|
213
|
+
|
|
214
|
+
# Title regex must include the version to support matching within
|
|
215
|
+
# families, but for sake of normalization, we can be less strict
|
|
216
|
+
without_versions = licenses.map do |license|
|
|
217
|
+
next if license.title == license.name_without_version
|
|
218
|
+
|
|
219
|
+
Regexp.new Regexp.escape(license.name_without_version), 'i'
|
|
220
|
+
end
|
|
221
|
+
titles.concat(without_versions.compact)
|
|
131
222
|
|
|
132
|
-
|
|
223
|
+
/#{START_REGEX}\(?(?:the )?#{Regexp.union titles}.*?$/i
|
|
224
|
+
end
|
|
133
225
|
end
|
|
134
226
|
|
|
135
227
|
private
|
|
136
228
|
|
|
137
|
-
def
|
|
138
|
-
|
|
229
|
+
def _content
|
|
230
|
+
@_content ||= content.to_s.dup.strip
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def strip(regex_or_sym)
|
|
234
|
+
return unless _content
|
|
235
|
+
|
|
236
|
+
if regex_or_sym.is_a?(Symbol)
|
|
237
|
+
meth = "strip_#{regex_or_sym}"
|
|
238
|
+
return send(meth) if respond_to?(meth, true)
|
|
239
|
+
|
|
240
|
+
unless REGEXES[regex_or_sym]
|
|
241
|
+
raise ArgumentError, "#{regex_or_sym} is an invalid regex reference"
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
regex_or_sym = REGEXES[regex_or_sym]
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
@_content = _content.gsub(regex_or_sym, ' ').squeeze(' ').strip
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def strip_title
|
|
251
|
+
while _content =~ ContentHelper.title_regex
|
|
252
|
+
strip(ContentHelper.title_regex)
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def strip_borders
|
|
257
|
+
normalize(REGEXES[:border_markup], '\1')
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def strip_comments
|
|
261
|
+
lines = _content.split("\n")
|
|
262
|
+
return if lines.count == 1
|
|
263
|
+
return unless lines.all? { |line| line =~ REGEXES[:comment_markup] }
|
|
264
|
+
|
|
265
|
+
strip(:comment_markup)
|
|
139
266
|
end
|
|
140
267
|
|
|
141
|
-
def
|
|
142
|
-
|
|
268
|
+
def strip_copyright
|
|
269
|
+
regex = Regexp.union(Matchers::Copyright::REGEX, REGEXES[:all_rights_reserved])
|
|
270
|
+
strip(regex) while _content =~ regex
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def strip_cc0_optional
|
|
274
|
+
return unless _content.include? 'associating cc0'
|
|
275
|
+
|
|
276
|
+
strip(REGEXES[:cc_legal_code])
|
|
277
|
+
strip(REGEXES[:cc0_info])
|
|
278
|
+
strip(REGEXES[:cc0_disclaimer])
|
|
143
279
|
end
|
|
144
280
|
|
|
145
|
-
def
|
|
146
|
-
|
|
281
|
+
def strip_unlicense_optional
|
|
282
|
+
return unless _content.include? 'unlicense'
|
|
283
|
+
|
|
284
|
+
strip(REGEXES[:unlicense_info])
|
|
147
285
|
end
|
|
148
286
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
287
|
+
def strip_end_of_terms
|
|
288
|
+
body, _partition, _instructions = _content.partition(END_OF_TERMS_REGEX)
|
|
289
|
+
@_content = body
|
|
152
290
|
end
|
|
153
291
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
strip(string, MARKDOWN_HEADING_REGEX)
|
|
292
|
+
def strip_span_markup
|
|
293
|
+
normalize(REGEXES[:span_markup], '\1')
|
|
157
294
|
end
|
|
158
295
|
|
|
159
|
-
def
|
|
160
|
-
|
|
296
|
+
def strip_link_markup
|
|
297
|
+
normalize(REGEXES[:link_markup], '\1')
|
|
161
298
|
end
|
|
162
299
|
|
|
163
|
-
def
|
|
164
|
-
|
|
300
|
+
def strip_html
|
|
301
|
+
return unless respond_to?(:filename) && filename
|
|
302
|
+
return unless File.extname(filename) =~ /\.html?/i
|
|
303
|
+
|
|
304
|
+
require 'reverse_markdown'
|
|
305
|
+
@_content = ReverseMarkdown.convert(_content, unknown_tags: :bypass)
|
|
165
306
|
end
|
|
166
307
|
|
|
167
|
-
def
|
|
168
|
-
|
|
308
|
+
def normalize(from_or_key, to = nil)
|
|
309
|
+
operation = { from: from_or_key, to: to } if to
|
|
310
|
+
operation ||= NORMALIZATIONS[from_or_key]
|
|
311
|
+
|
|
312
|
+
if operation
|
|
313
|
+
@_content = _content.gsub operation[:from], operation[:to]
|
|
314
|
+
elsif respond_to?("normalize_#{from_or_key}", true)
|
|
315
|
+
send("normalize_#{from_or_key}")
|
|
316
|
+
else
|
|
317
|
+
raise ArgumentError, "#{from_or_key} is an invalid normalization"
|
|
318
|
+
end
|
|
169
319
|
end
|
|
170
320
|
|
|
171
|
-
def
|
|
172
|
-
|
|
321
|
+
def normalize_spelling
|
|
322
|
+
normalize(/\b#{Regexp.union(VARIETAL_WORDS.keys)}\b/, VARIETAL_WORDS)
|
|
173
323
|
end
|
|
174
324
|
|
|
175
|
-
def
|
|
176
|
-
|
|
325
|
+
def normalize_bullets
|
|
326
|
+
normalize(REGEXES[:bullet], "\n\n* ")
|
|
327
|
+
normalize(/\)\s+\(/, ')(')
|
|
177
328
|
end
|
|
178
329
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
# strip double quotes if we still want to allow possessives
|
|
182
|
-
def normalize_quotes(string)
|
|
183
|
-
string.gsub(/#{QUOTE_BEGIN_REGEX}+([\w -]*?\w)#{QUOTE_END_REGEX}+/,
|
|
184
|
-
'"\1"')
|
|
330
|
+
def wordset_fieldless
|
|
331
|
+
@wordset_fieldless ||= wordset - fields_normalized_set
|
|
185
332
|
end
|
|
186
333
|
|
|
187
|
-
|
|
188
|
-
|
|
334
|
+
# Returns an array of strings of substitutable fields in normalized content
|
|
335
|
+
def fields_normalized
|
|
336
|
+
@fields_normalized ||=
|
|
337
|
+
content_normalized.scan(LicenseField::FIELD_REGEX).flatten
|
|
189
338
|
end
|
|
190
339
|
|
|
191
|
-
def
|
|
192
|
-
|
|
340
|
+
def fields_normalized_set
|
|
341
|
+
@fields_normalized_set ||= fields_normalized.to_set
|
|
193
342
|
end
|
|
194
343
|
end
|
|
195
344
|
end
|