licensee 9.10.0 → 9.13.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/licensee +5 -4
- data/lib/licensee.rb +4 -2
- data/lib/licensee/commands/detect.rb +10 -5
- data/lib/licensee/commands/diff.rb +7 -8
- data/lib/licensee/commands/license_path.rb +2 -0
- data/lib/licensee/commands/version.rb +2 -0
- data/lib/licensee/content_helper.rb +234 -85
- data/lib/licensee/hash_helper.rb +7 -5
- data/lib/licensee/license.rb +32 -25
- data/lib/licensee/license_field.rb +4 -1
- data/lib/licensee/license_meta.rb +3 -0
- data/lib/licensee/license_rules.rb +2 -0
- data/lib/licensee/matchers.rb +2 -0
- data/lib/licensee/matchers/cabal.rb +16 -2
- data/lib/licensee/matchers/cargo.rb +3 -1
- data/lib/licensee/matchers/copyright.rb +6 -4
- data/lib/licensee/matchers/cran.rb +7 -3
- data/lib/licensee/matchers/dice.rb +6 -4
- data/lib/licensee/matchers/dist_zilla.rb +3 -1
- data/lib/licensee/matchers/exact.rb +3 -0
- data/lib/licensee/matchers/gemspec.rb +8 -5
- data/lib/licensee/matchers/matcher.rb +3 -1
- data/lib/licensee/matchers/npm_bower.rb +3 -1
- data/lib/licensee/matchers/package.rb +3 -0
- data/lib/licensee/matchers/reference.rb +3 -1
- data/lib/licensee/matchers/spdx.rb +3 -1
- data/lib/licensee/project_files.rb +2 -0
- data/lib/licensee/project_files/license_file.rb +13 -10
- data/lib/licensee/project_files/package_manager_file.rb +3 -0
- data/lib/licensee/project_files/project_file.rb +12 -4
- data/lib/licensee/project_files/readme_file.rb +5 -3
- data/lib/licensee/projects.rb +2 -0
- data/lib/licensee/projects/fs_project.rb +3 -0
- data/lib/licensee/projects/git_project.rb +19 -11
- data/lib/licensee/projects/github_project.rb +6 -1
- data/lib/licensee/projects/project.rb +16 -5
- data/lib/licensee/rule.rb +2 -0
- data/lib/licensee/version.rb +3 -1
- data/licensee.gemspec +47 -0
- data/spec/bin_spec.rb +3 -1
- data/spec/fixture_spec.rb +46 -0
- data/spec/fixtures/bsd-3-noendorseslash/LICENSE +30 -0
- data/spec/fixtures/cc0-cal2013/LICENSE +116 -0
- data/spec/fixtures/cc0-cc/LICENSE +121 -0
- data/spec/fixtures/detect.json +9 -7
- data/spec/fixtures/fixtures.yml +130 -0
- data/spec/fixtures/html/license.html +262 -0
- data/spec/fixtures/license-hashes.json +41 -0
- data/spec/fixtures/mit-optional/LICENSE.txt +21 -0
- data/spec/fixtures/multiple-arrs/LICENSE +30 -0
- data/spec/fixtures/unlicense-noinfo/LICENSE +22 -0
- data/spec/integration_spec.rb +68 -2
- data/spec/licensee/commands/detect_spec.rb +10 -6
- data/spec/licensee/commands/license_path_spec.rb +3 -1
- data/spec/licensee/commands/version_spec.rb +3 -1
- data/spec/licensee/content_helper_spec.rb +184 -67
- data/spec/licensee/hash_helper_spec.rb +3 -1
- data/spec/licensee/license_field_spec.rb +5 -3
- data/spec/licensee/license_meta_spec.rb +16 -12
- data/spec/licensee/license_rules_spec.rb +6 -2
- data/spec/licensee/license_spec.rb +62 -37
- data/spec/licensee/matchers/cabal_matcher_spec.rb +97 -2
- data/spec/licensee/matchers/cargo_matcher_spec.rb +5 -2
- data/spec/licensee/matchers/copyright_matcher_spec.rb +7 -5
- data/spec/licensee/matchers/cran_matcher_spec.rb +5 -2
- data/spec/licensee/matchers/dice_matcher_spec.rb +15 -12
- data/spec/licensee/matchers/dist_zilla_matcher_spec.rb +5 -2
- data/spec/licensee/matchers/exact_matcher_spec.rb +5 -2
- data/spec/licensee/matchers/gemspec_matcher_spec.rb +5 -2
- data/spec/licensee/matchers/matcher_spec.rb +6 -2
- data/spec/licensee/matchers/npm_bower_matcher_spec.rb +5 -3
- data/spec/licensee/matchers/package_matcher_spec.rb +6 -2
- data/spec/licensee/matchers/reference_matcher_spec.rb +4 -2
- data/spec/licensee/matchers/spdx_matcher_spec.rb +5 -2
- data/spec/licensee/project_files/license_file_spec.rb +20 -18
- data/spec/licensee/project_files/package_info_spec.rb +5 -1
- data/spec/licensee/project_files/project_file_spec.rb +8 -2
- data/spec/licensee/project_files/readme_file_spec.rb +4 -1
- data/spec/licensee/project_spec.rb +24 -17
- data/spec/licensee/projects/git_project_spec.rb +23 -0
- data/spec/licensee/projects/github_project_spec.rb +8 -5
- data/spec/licensee/rule_spec.rb +6 -3
- data/spec/licensee_spec.rb +12 -9
- data/spec/spec_helper.rb +28 -9
- data/spec/vendored_license_spec.rb +29 -10
- data/vendor/choosealicense.com/_data/meta.yml +0 -4
- data/vendor/choosealicense.com/_data/rules.yml +3 -0
- data/vendor/choosealicense.com/_licenses/0bsd.txt +39 -0
- data/vendor/choosealicense.com/_licenses/afl-3.0.txt +7 -6
- data/vendor/choosealicense.com/_licenses/agpl-3.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/apache-2.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/artistic-2.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/bsd-2-clause.txt +8 -6
- data/vendor/choosealicense.com/_licenses/bsd-3-clause-clear.txt +2 -2
- data/vendor/choosealicense.com/_licenses/bsd-3-clause.txt +12 -10
- data/vendor/choosealicense.com/_licenses/bsd-4-clause.txt +61 -0
- data/vendor/choosealicense.com/_licenses/bsl-1.0.txt +5 -2
- data/vendor/choosealicense.com/_licenses/cc-by-4.0.txt +3 -1
- data/vendor/choosealicense.com/_licenses/cc-by-sa-4.0.txt +3 -1
- data/vendor/choosealicense.com/_licenses/cc0-1.0.txt +113 -105
- data/vendor/choosealicense.com/_licenses/cecill-2.1.txt +579 -0
- data/vendor/choosealicense.com/_licenses/ecl-2.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/epl-1.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/epl-2.0.txt +3 -4
- data/vendor/choosealicense.com/_licenses/eupl-1.1.txt +0 -1
- data/vendor/choosealicense.com/_licenses/eupl-1.2.txt +0 -1
- data/vendor/choosealicense.com/_licenses/gpl-2.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/gpl-3.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/isc.txt +0 -1
- data/vendor/choosealicense.com/_licenses/lgpl-2.1.txt +0 -1
- data/vendor/choosealicense.com/_licenses/lgpl-3.0.txt +1 -3
- data/vendor/choosealicense.com/_licenses/lppl-1.3c.txt +1 -2
- data/vendor/choosealicense.com/_licenses/mit.txt +1 -2
- data/vendor/choosealicense.com/_licenses/mpl-2.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/ms-pl.txt +0 -1
- data/vendor/choosealicense.com/_licenses/ms-rl.txt +0 -1
- data/vendor/choosealicense.com/_licenses/ncsa.txt +21 -22
- data/vendor/choosealicense.com/_licenses/odbl-1.0.txt +573 -0
- data/vendor/choosealicense.com/_licenses/ofl-1.1.txt +4 -2
- data/vendor/choosealicense.com/_licenses/osl-3.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/postgresql.txt +4 -5
- data/vendor/choosealicense.com/_licenses/unlicense.txt +1 -2
- data/vendor/choosealicense.com/_licenses/upl-1.0.txt +4 -5
- data/vendor/choosealicense.com/_licenses/vim.txt +111 -0
- data/vendor/choosealicense.com/_licenses/wtfpl.txt +0 -1
- data/vendor/choosealicense.com/_licenses/zlib.txt +4 -2
- metadata +77 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bd74e74f07c0dc4111b3cbbebf62ae3ab140a788ed511491b5995d45f2371de0
|
4
|
+
data.tar.gz: 90561b8b85ded55614a88e38b0469e80d110bd113e3a6ddfee9bdd840dec237a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bc64456f4f05411ab8152ce3ade984aa52851f39399d0bffe1e6c51fa4f9de6b00fcd84008bed0d0b71b0569a9799ba0b539ce6323fee55b4436c583ae4ce92c
|
7
|
+
data.tar.gz: d797f174a17e2f91eb8f4e7c9eff9b3d4631f58531d4d57cc9da98e75e062345a52354a9ca3a8e1f991cb70a92b91e7b600814dddca345c3528742011e92c8ed
|
data/bin/licensee
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
require 'dotenv/load'
|
4
5
|
require 'thor'
|
@@ -15,9 +16,9 @@ class LicenseeCLI < Thor
|
|
15
16
|
|
16
17
|
def path
|
17
18
|
@path ||= if !options[:remote] || args.first =~ %r{^https://}
|
18
|
-
|
19
|
-
|
20
|
-
|
19
|
+
args.first || Dir.pwd
|
20
|
+
else
|
21
|
+
"https://github.com/#{args.first}"
|
21
22
|
end
|
22
23
|
end
|
23
24
|
|
@@ -32,6 +33,6 @@ class LicenseeCLI < Thor
|
|
32
33
|
end
|
33
34
|
|
34
35
|
commands_dir = File.expand_path '../lib/licensee/commands/', __dir__
|
35
|
-
Dir["#{commands_dir}/*.rb"].each { |c| require(c) }
|
36
|
+
Dir["#{commands_dir}/*.rb"].sort.each { |c| require(c) }
|
36
37
|
|
37
38
|
LicenseeCLI.start(ARGV)
|
data/lib/licensee.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'licensee/version'
|
2
4
|
require 'forwardable'
|
3
5
|
require 'pathname'
|
@@ -19,7 +21,7 @@ module Licensee
|
|
19
21
|
CONFIDENCE_THRESHOLD = 98
|
20
22
|
|
21
23
|
# Base domain from which to build license URLs
|
22
|
-
DOMAIN = 'http://choosealicense.com'
|
24
|
+
DOMAIN = 'http://choosealicense.com'
|
23
25
|
|
24
26
|
class << self
|
25
27
|
attr_writer :confidence_threshold
|
@@ -49,7 +51,7 @@ module Licensee
|
|
49
51
|
end
|
50
52
|
|
51
53
|
# Inverse of the confidence threshold, represented as a float
|
52
|
-
# By default this will be 0.
|
54
|
+
# By default this will be 0.02
|
53
55
|
def inverse_confidence_threshold
|
54
56
|
@inverse_confidence_threshold ||=
|
55
57
|
(1 - Licensee.confidence_threshold / 100.0).round(2)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
class LicenseeCLI < Thor
|
2
4
|
# Methods to call when displaying information about ProjectFiles
|
3
5
|
MATCHED_FILE_METHODS = %i[
|
@@ -21,11 +23,11 @@ class LicenseeCLI < Thor
|
|
21
23
|
|
22
24
|
rows = []
|
23
25
|
rows << if project.license
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
['License:', project.license.spdx_id]
|
27
|
+
elsif !project.licenses.empty?
|
28
|
+
['Licenses:', project.licenses.map(&:spdx_id)]
|
29
|
+
else
|
30
|
+
['License:', set_color('None', :red)]
|
29
31
|
end
|
30
32
|
|
31
33
|
unless project.matched_files.empty?
|
@@ -40,8 +42,10 @@ class LicenseeCLI < Thor
|
|
40
42
|
|
41
43
|
MATCHED_FILE_METHODS.each do |method|
|
42
44
|
next unless matched_file.respond_to? method
|
45
|
+
|
43
46
|
value = matched_file.public_send method
|
44
47
|
next if value.nil?
|
48
|
+
|
45
49
|
rows << [humanize(method, :method), humanize(value, method)]
|
46
50
|
end
|
47
51
|
print_table rows, indent: 2
|
@@ -51,6 +55,7 @@ class LicenseeCLI < Thor
|
|
51
55
|
|
52
56
|
licenses = licenses_by_similarity(matched_file)
|
53
57
|
next if licenses.empty?
|
58
|
+
|
54
59
|
say ' Closest non-matching licenses:'
|
55
60
|
rows = licenses[0...3].map do |license, similarity|
|
56
61
|
spdx_id = license.meta['spdx-id']
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'tmpdir'
|
2
4
|
|
3
5
|
class LicenseeCLI < Thor
|
@@ -39,26 +41,23 @@ class LicenseeCLI < Thor
|
|
39
41
|
|
40
42
|
def license_to_diff
|
41
43
|
return options[:license_to_diff] if options[:license_to_diff]
|
42
|
-
return project.license_file if remote?
|
44
|
+
return project.license_file if remote? || STDIN.tty? && project.license_file
|
43
45
|
|
44
46
|
@license_to_diff ||= begin
|
45
|
-
if STDIN.tty?
|
46
|
-
error 'You must pipe license contents to the command via STDIN'
|
47
|
-
exit 1
|
48
|
-
end
|
49
|
-
|
50
47
|
Licensee::ProjectFiles::LicenseFile.new(STDIN.read, 'LICENSE')
|
51
48
|
end
|
52
49
|
end
|
53
50
|
|
54
51
|
def expected_license
|
55
|
-
|
52
|
+
if options[:license]
|
53
|
+
@expected_license ||= Licensee::License.find options[:license]
|
54
|
+
end
|
56
55
|
return @expected_license if @expected_license
|
57
56
|
|
58
57
|
if options[:license]
|
59
58
|
error "#{options[:license]} is not a valid license"
|
60
59
|
else
|
61
|
-
error '
|
60
|
+
error 'Usage: provide a license to diff against with --license (spdx name)'
|
62
61
|
end
|
63
62
|
|
64
63
|
error "Valid licenses: #{Licensee::License.all(hidden: true).map(&:key).join(', ')}"
|
@@ -1,38 +1,129 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'set'
|
2
4
|
require 'digest'
|
3
5
|
|
4
6
|
module Licensee
|
5
7
|
module ContentHelper
|
6
8
|
DIGEST = Digest::SHA1
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
9
|
+
START_REGEX = /\A\s*/.freeze
|
10
|
+
END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions\s*$/i.freeze
|
11
|
+
REGEXES = {
|
12
|
+
hrs: /^\s*[=\-\*]{3,}\s*$/,
|
13
|
+
all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
|
14
|
+
whitespace: /\s+/,
|
15
|
+
markdown_headings: /#{START_REGEX}#+/,
|
16
|
+
version: /#{START_REGEX}version.*$/i,
|
17
|
+
span_markup: /[_*~]+(.*?)[_*~]+/,
|
18
|
+
link_markup: /\[(.+?)\]\(.+?\)/,
|
19
|
+
block_markup: /^\s*>/,
|
20
|
+
border_markup: /^[\*-](.*?)[\*-]$/,
|
21
|
+
comment_markup: %r{^\s*?[/\*]{1,2}},
|
22
|
+
url: %r{#{START_REGEX}https?://[^ ]+\n},
|
23
|
+
bullet: /\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[)\.])\s+/i,
|
24
|
+
developed_by: /#{START_REGEX}developed by:.*?\n\n/im,
|
25
|
+
quote_begin: /[`'"‘“]/,
|
26
|
+
quote_end: /[`'"’”]/,
|
27
|
+
cc_legal_code: /^\s*Creative Commons Legal Code\s*$/i,
|
28
|
+
cc0_info: /For more information, please see\s*\S+zero\S+/im,
|
29
|
+
cc0_disclaimer: /CREATIVE COMMONS CORPORATION.*?\n\n/im,
|
30
|
+
unlicense_info: /For more information, please.*\S+unlicense\S+/im,
|
31
|
+
mit_optional: /\(including the next paragraph\)/i
|
32
|
+
}.freeze
|
33
|
+
NORMALIZATIONS = {
|
34
|
+
lists: { from: /^\s*(?:\d\.|\*)\s+([^\n])/, to: '- \1' },
|
35
|
+
https: { from: /http:/, to: 'https:' },
|
36
|
+
ampersands: { from: '&', to: 'and' },
|
37
|
+
dashes: { from: /(?<!^)([—–-]+)(?!$)/, to: '-' },
|
38
|
+
quotes: {
|
39
|
+
from: /#{REGEXES[:quote_begin]}+([\w -]*?\w)#{REGEXES[:quote_end]}+/,
|
40
|
+
to: '"\1"'
|
41
|
+
}
|
42
|
+
}.freeze
|
43
|
+
|
44
|
+
# Legally equivalent words that schould be ignored for comparison
|
45
|
+
# See https://spdx.org/spdx-license-list/matching-guidelines
|
46
|
+
VARIETAL_WORDS = {
|
47
|
+
'acknowledgment' => 'acknowledgement',
|
48
|
+
'analogue' => 'analog',
|
49
|
+
'analyse' => 'analyze',
|
50
|
+
'artefact' => 'artifact',
|
51
|
+
'authorisation' => 'authorization',
|
52
|
+
'authorised' => 'authorized',
|
53
|
+
'calibre' => 'caliber',
|
54
|
+
'cancelled' => 'canceled',
|
55
|
+
'capitalisations' => 'capitalizations',
|
56
|
+
'catalogue' => 'catalog',
|
57
|
+
'categorise' => 'categorize',
|
58
|
+
'centre' => 'center',
|
59
|
+
'emphasised' => 'emphasized',
|
60
|
+
'favour' => 'favor',
|
61
|
+
'favourite' => 'favorite',
|
62
|
+
'fulfil' => 'fulfill',
|
63
|
+
'fulfilment' => 'fulfillment',
|
64
|
+
'initialise' => 'initialize',
|
65
|
+
'judgment' => 'judgement',
|
66
|
+
'labelling' => 'labeling',
|
67
|
+
'labour' => 'labor',
|
68
|
+
'licence' => 'license',
|
69
|
+
'maximise' => 'maximize',
|
70
|
+
'modelled' => 'modeled',
|
71
|
+
'modelling' => 'modeling',
|
72
|
+
'offence' => 'offense',
|
73
|
+
'optimise' => 'optimize',
|
74
|
+
'organisation' => 'organization',
|
75
|
+
'organise' => 'organize',
|
76
|
+
'practise' => 'practice',
|
77
|
+
'programme' => 'program',
|
78
|
+
'realise' => 'realize',
|
79
|
+
'recognise' => 'recognize',
|
80
|
+
'signalling' => 'signaling',
|
81
|
+
'sub-license' => 'sublicense',
|
82
|
+
'sub license' => 'sublicense',
|
83
|
+
'utilisation' => 'utilization',
|
84
|
+
'whilst' => 'while',
|
85
|
+
'wilful' => 'wilfull',
|
86
|
+
'non-commercial' => 'noncommercial',
|
87
|
+
'cent' => 'percent',
|
88
|
+
'owner' => 'holder'
|
89
|
+
}.freeze
|
90
|
+
STRIP_METHODS = %i[
|
91
|
+
cc0_optional
|
92
|
+
unlicense_optional
|
93
|
+
hrs
|
94
|
+
markdown_headings
|
95
|
+
borders
|
96
|
+
title
|
97
|
+
version
|
98
|
+
url
|
99
|
+
copyright
|
100
|
+
title
|
101
|
+
block_markup
|
102
|
+
span_markup
|
103
|
+
link_markup
|
104
|
+
developed_by
|
105
|
+
end_of_terms
|
106
|
+
whitespace
|
107
|
+
mit_optional
|
108
|
+
].freeze
|
18
109
|
|
19
110
|
# A set of each word in the license, without duplicates
|
20
111
|
def wordset
|
21
|
-
@wordset ||=
|
22
|
-
content_normalized.scan(/(?:\w(?:'s|(?<=s)')?)+/).to_set
|
23
|
-
end
|
112
|
+
@wordset ||= content_normalized&.scan(%r{(?:[\w\/](?:'s|(?<=s)')?)+})&.to_set
|
24
113
|
end
|
25
114
|
|
26
115
|
# Number of characteres in the normalized content
|
27
116
|
def length
|
28
117
|
return 0 unless content_normalized
|
118
|
+
|
29
119
|
content_normalized.length
|
30
120
|
end
|
31
121
|
|
32
122
|
# Number of characters that could be added/removed to still be
|
33
123
|
# considered a potential match
|
34
124
|
def max_delta
|
35
|
-
@max_delta ||=
|
125
|
+
@max_delta ||= fields_normalized.size * 10 +
|
126
|
+
(length * Licensee.inverse_confidence_threshold).to_i
|
36
127
|
end
|
37
128
|
|
38
129
|
# Given another license or project file, calculates the difference in length
|
@@ -43,8 +134,9 @@ module Licensee
|
|
43
134
|
# Given another license or project file, calculates the similarity
|
44
135
|
# as a percentage of words in common
|
45
136
|
def similarity(other)
|
46
|
-
overlap = (
|
47
|
-
total =
|
137
|
+
overlap = (wordset_fieldless & other.wordset).size
|
138
|
+
total = wordset_fieldless.size + other.wordset.size -
|
139
|
+
fields_normalized_set.size
|
48
140
|
100.0 * (overlap * 2.0 / total)
|
49
141
|
end
|
50
142
|
|
@@ -59,34 +151,21 @@ module Licensee
|
|
59
151
|
# content with attribution first to detect attribuion in LicenseFile
|
60
152
|
def content_without_title_and_version
|
61
153
|
@content_without_title_and_version ||= begin
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
strip_version(string).strip
|
154
|
+
@_content = nil
|
155
|
+
ops = %i[html hrs comments markdown_headings title version]
|
156
|
+
ops.each { |op| strip(op) }
|
157
|
+
_content
|
67
158
|
end
|
68
159
|
end
|
69
160
|
|
70
|
-
# Content without title, version, copyright, whitespace, or insturctions
|
71
|
-
#
|
72
|
-
# wrap - Optional width to wrap the content
|
73
|
-
#
|
74
|
-
# Returns a string
|
75
161
|
def content_normalized(wrap: nil)
|
76
|
-
return unless content
|
77
162
|
@content_normalized ||= begin
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
string, _partition, _instructions = string.partition(END_OF_TERMS_REGEX)
|
85
|
-
string = normalize_lists(string)
|
86
|
-
string = normalize_quotes(string)
|
87
|
-
string = normalize_https(string)
|
88
|
-
string = strip_markup(string)
|
89
|
-
strip_whitespace(string)
|
163
|
+
@_content = content_without_title_and_version.downcase
|
164
|
+
|
165
|
+
(NORMALIZATIONS.keys + %i[spelling bullets]).each { |op| normalize(op) }
|
166
|
+
STRIP_METHODS.each { |op| strip(op) }
|
167
|
+
|
168
|
+
_content
|
90
169
|
end
|
91
170
|
|
92
171
|
if wrap.nil?
|
@@ -96,14 +175,24 @@ module Licensee
|
|
96
175
|
end
|
97
176
|
end
|
98
177
|
|
178
|
+
# Backwards compatibalize constants to avoid a breaking change
|
179
|
+
def self.const_missing(const)
|
180
|
+
key = const.to_s.downcase.gsub('_regex', '').to_sym
|
181
|
+
REGEXES[key] || super
|
182
|
+
end
|
183
|
+
|
99
184
|
# Wrap text to the given line length
|
100
185
|
def self.wrap(text, line_width = 80)
|
101
186
|
return if text.nil?
|
187
|
+
|
102
188
|
text = text.clone
|
189
|
+
text.gsub!(REGEXES[:bullet]) { |m| "\n#{m}\n" }
|
103
190
|
text.gsub!(/([^\n])\n([^\n])/, '\1 \2')
|
104
191
|
|
105
192
|
text = text.split("\n").collect do |line|
|
106
|
-
if line
|
193
|
+
if line =~ REGEXES[:hrs]
|
194
|
+
line
|
195
|
+
elsif line.length > line_width
|
107
196
|
line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip
|
108
197
|
else
|
109
198
|
line
|
@@ -114,82 +203,142 @@ module Licensee
|
|
114
203
|
end
|
115
204
|
|
116
205
|
def self.format_percent(float)
|
117
|
-
"#{format('
|
206
|
+
"#{format('%<float>.2f', float: float)}%"
|
118
207
|
end
|
119
208
|
|
120
209
|
def self.title_regex
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
210
|
+
@title_regex ||= begin
|
211
|
+
licenses = Licensee::License.all(hidden: true, psuedo: false)
|
212
|
+
titles = licenses.map(&:title_regex)
|
213
|
+
|
214
|
+
# Title regex must include the version to support matching within
|
215
|
+
# families, but for sake of normalization, we can be less strict
|
216
|
+
without_versions = licenses.map do |license|
|
217
|
+
next if license.title == license.name_without_version
|
218
|
+
|
219
|
+
Regexp.new Regexp.escape(license.name_without_version), 'i'
|
220
|
+
end
|
221
|
+
titles.concat(without_versions.compact)
|
131
222
|
|
132
|
-
|
223
|
+
/#{START_REGEX}\(?(?:the )?#{Regexp.union titles}.*?$/i
|
224
|
+
end
|
133
225
|
end
|
134
226
|
|
135
227
|
private
|
136
228
|
|
137
|
-
def
|
138
|
-
|
229
|
+
def _content
|
230
|
+
@_content ||= content.to_s.dup.strip
|
231
|
+
end
|
232
|
+
|
233
|
+
def strip(regex_or_sym)
|
234
|
+
return unless _content
|
235
|
+
|
236
|
+
if regex_or_sym.is_a?(Symbol)
|
237
|
+
meth = "strip_#{regex_or_sym}"
|
238
|
+
return send(meth) if respond_to?(meth, true)
|
239
|
+
|
240
|
+
unless REGEXES[regex_or_sym]
|
241
|
+
raise ArgumentError, "#{regex_or_sym} is an invalid regex reference"
|
242
|
+
end
|
243
|
+
|
244
|
+
regex_or_sym = REGEXES[regex_or_sym]
|
245
|
+
end
|
246
|
+
|
247
|
+
@_content = _content.gsub(regex_or_sym, ' ').squeeze(' ').strip
|
248
|
+
end
|
249
|
+
|
250
|
+
def strip_title
|
251
|
+
while _content =~ ContentHelper.title_regex
|
252
|
+
strip(ContentHelper.title_regex)
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
def strip_borders
|
257
|
+
normalize(REGEXES[:border_markup], '\1')
|
258
|
+
end
|
259
|
+
|
260
|
+
def strip_comments
|
261
|
+
lines = _content.split("\n")
|
262
|
+
return if lines.count == 1
|
263
|
+
return unless lines.all? { |line| line =~ REGEXES[:comment_markup] }
|
264
|
+
|
265
|
+
strip(:comment_markup)
|
139
266
|
end
|
140
267
|
|
141
|
-
def
|
142
|
-
|
268
|
+
def strip_copyright
|
269
|
+
regex = Regexp.union(Matchers::Copyright::REGEX, REGEXES[:all_rights_reserved])
|
270
|
+
strip(regex) while _content =~ regex
|
271
|
+
end
|
272
|
+
|
273
|
+
def strip_cc0_optional
|
274
|
+
return unless _content.include? 'associating cc0'
|
275
|
+
|
276
|
+
strip(REGEXES[:cc_legal_code])
|
277
|
+
strip(REGEXES[:cc0_info])
|
278
|
+
strip(REGEXES[:cc0_disclaimer])
|
143
279
|
end
|
144
280
|
|
145
|
-
def
|
146
|
-
|
281
|
+
def strip_unlicense_optional
|
282
|
+
return unless _content.include? 'unlicense'
|
283
|
+
|
284
|
+
strip(REGEXES[:unlicense_info])
|
147
285
|
end
|
148
286
|
|
149
|
-
|
150
|
-
|
151
|
-
|
287
|
+
def strip_end_of_terms
|
288
|
+
body, _partition, _instructions = _content.partition(END_OF_TERMS_REGEX)
|
289
|
+
@_content = body
|
152
290
|
end
|
153
291
|
|
154
|
-
|
155
|
-
|
156
|
-
strip(string, MARKDOWN_HEADING_REGEX)
|
292
|
+
def strip_span_markup
|
293
|
+
normalize(REGEXES[:span_markup], '\1')
|
157
294
|
end
|
158
295
|
|
159
|
-
def
|
160
|
-
|
296
|
+
def strip_link_markup
|
297
|
+
normalize(REGEXES[:link_markup], '\1')
|
161
298
|
end
|
162
299
|
|
163
|
-
def
|
164
|
-
|
300
|
+
def strip_html
|
301
|
+
return unless respond_to?(:filename) && filename
|
302
|
+
return unless File.extname(filename) =~ /\.html?/i
|
303
|
+
|
304
|
+
require 'reverse_markdown'
|
305
|
+
@_content = ReverseMarkdown.convert(_content, unknown_tags: :bypass)
|
165
306
|
end
|
166
307
|
|
167
|
-
def
|
168
|
-
|
308
|
+
def normalize(from_or_key, to = nil)
|
309
|
+
operation = { from: from_or_key, to: to } if to
|
310
|
+
operation ||= NORMALIZATIONS[from_or_key]
|
311
|
+
|
312
|
+
if operation
|
313
|
+
@_content = _content.gsub operation[:from], operation[:to]
|
314
|
+
elsif respond_to?("normalize_#{from_or_key}", true)
|
315
|
+
send("normalize_#{from_or_key}")
|
316
|
+
else
|
317
|
+
raise ArgumentError, "#{from_or_key} is an invalid normalization"
|
318
|
+
end
|
169
319
|
end
|
170
320
|
|
171
|
-
def
|
172
|
-
|
321
|
+
def normalize_spelling
|
322
|
+
normalize(/\b#{Regexp.union(VARIETAL_WORDS.keys)}\b/, VARIETAL_WORDS)
|
173
323
|
end
|
174
324
|
|
175
|
-
def
|
176
|
-
|
325
|
+
def normalize_bullets
|
326
|
+
normalize(REGEXES[:bullet], "\n\n* ")
|
327
|
+
normalize(/\)\s+\(/, ')(')
|
177
328
|
end
|
178
329
|
|
179
|
-
|
180
|
-
|
181
|
-
# strip double quotes if we still want to allow possessives
|
182
|
-
def normalize_quotes(string)
|
183
|
-
string.gsub(/#{QUOTE_BEGIN_REGEX}+([\w -]*?\w)#{QUOTE_END_REGEX}+/,
|
184
|
-
'"\1"')
|
330
|
+
def wordset_fieldless
|
331
|
+
@wordset_fieldless ||= wordset - fields_normalized_set
|
185
332
|
end
|
186
333
|
|
187
|
-
|
188
|
-
|
334
|
+
# Returns an array of strings of substitutable fields in normalized content
|
335
|
+
def fields_normalized
|
336
|
+
@fields_normalized ||=
|
337
|
+
content_normalized.scan(LicenseField::FIELD_REGEX).flatten
|
189
338
|
end
|
190
339
|
|
191
|
-
def
|
192
|
-
|
340
|
+
def fields_normalized_set
|
341
|
+
@fields_normalized_set ||= fields_normalized.to_set
|
193
342
|
end
|
194
343
|
end
|
195
344
|
end
|