licensee 9.9.3 → 9.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/licensee +1 -0
- data/lib/licensee.rb +4 -2
- data/lib/licensee/commands/detect.rb +9 -4
- data/lib/licensee/commands/diff.rb +7 -8
- data/lib/licensee/commands/license_path.rb +2 -0
- data/lib/licensee/commands/version.rb +2 -0
- data/lib/licensee/content_helper.rb +188 -83
- data/lib/licensee/hash_helper.rb +2 -0
- data/lib/licensee/license.rb +18 -7
- data/lib/licensee/license_field.rb +8 -1
- data/lib/licensee/license_meta.rb +3 -0
- data/lib/licensee/license_rules.rb +2 -0
- data/lib/licensee/matchers.rb +2 -0
- data/lib/licensee/matchers/cabal.rb +16 -2
- data/lib/licensee/matchers/cargo.rb +3 -1
- data/lib/licensee/matchers/copyright.rb +4 -2
- data/lib/licensee/matchers/cran.rb +7 -3
- data/lib/licensee/matchers/dice.rb +10 -2
- data/lib/licensee/matchers/dist_zilla.rb +3 -1
- data/lib/licensee/matchers/exact.rb +3 -0
- data/lib/licensee/matchers/gemspec.rb +8 -5
- data/lib/licensee/matchers/matcher.rb +3 -1
- data/lib/licensee/matchers/npm_bower.rb +3 -1
- data/lib/licensee/matchers/package.rb +3 -0
- data/lib/licensee/matchers/reference.rb +3 -1
- data/lib/licensee/matchers/spdx.rb +3 -1
- data/lib/licensee/project_files.rb +2 -0
- data/lib/licensee/project_files/license_file.rb +13 -10
- data/lib/licensee/project_files/package_manager_file.rb +3 -0
- data/lib/licensee/project_files/project_file.rb +12 -4
- data/lib/licensee/project_files/readme_file.rb +7 -5
- data/lib/licensee/projects.rb +2 -0
- data/lib/licensee/projects/fs_project.rb +3 -0
- data/lib/licensee/projects/git_project.rb +16 -8
- data/lib/licensee/projects/github_project.rb +29 -9
- data/lib/licensee/projects/project.rb +13 -2
- data/lib/licensee/rule.rb +2 -0
- data/lib/licensee/version.rb +3 -1
- data/spec/bin_spec.rb +2 -0
- data/spec/fixture_spec.rb +46 -0
- data/spec/fixtures/detect.json +8 -6
- data/spec/fixtures/fixtures.yml +110 -0
- data/spec/fixtures/html/license.html +262 -0
- data/spec/fixtures/license-hashes.json +39 -0
- data/spec/fixtures/mit-optional/LICENSE.txt +21 -0
- data/spec/integration_spec.rb +20 -0
- data/spec/licensee/commands/detect_spec.rb +6 -2
- data/spec/licensee/commands/license_path_spec.rb +2 -0
- data/spec/licensee/commands/version_spec.rb +2 -0
- data/spec/licensee/content_helper_spec.rb +152 -36
- data/spec/licensee/hash_helper_spec.rb +2 -0
- data/spec/licensee/license_field_spec.rb +7 -0
- data/spec/licensee/license_meta_spec.rb +2 -0
- data/spec/licensee/license_rules_spec.rb +2 -0
- data/spec/licensee/license_spec.rb +36 -11
- data/spec/licensee/matchers/cabal_matcher_spec.rb +93 -0
- data/spec/licensee/matchers/cargo_matcher_spec.rb +2 -0
- data/spec/licensee/matchers/copyright_matcher_spec.rb +4 -2
- data/spec/licensee/matchers/cran_matcher_spec.rb +2 -0
- data/spec/licensee/matchers/dice_matcher_spec.rb +4 -2
- data/spec/licensee/matchers/dist_zilla_matcher_spec.rb +2 -0
- data/spec/licensee/matchers/exact_matcher_spec.rb +2 -0
- data/spec/licensee/matchers/gemspec_matcher_spec.rb +2 -0
- data/spec/licensee/matchers/matcher_spec.rb +2 -0
- data/spec/licensee/matchers/npm_bower_matcher_spec.rb +2 -0
- data/spec/licensee/matchers/package_matcher_spec.rb +2 -0
- data/spec/licensee/matchers/reference_matcher_spec.rb +2 -0
- data/spec/licensee/matchers/spdx_matcher_spec.rb +2 -0
- data/spec/licensee/project_files/license_file_spec.rb +4 -2
- data/spec/licensee/project_files/package_info_spec.rb +2 -0
- data/spec/licensee/project_files/project_file_spec.rb +3 -0
- data/spec/licensee/project_files/readme_file_spec.rb +11 -0
- data/spec/licensee/project_spec.rb +23 -3
- data/spec/licensee/projects/git_project_spec.rb +23 -0
- data/spec/licensee/projects/github_project_spec.rb +2 -0
- data/spec/licensee/rule_spec.rb +2 -0
- data/spec/licensee_spec.rb +3 -1
- data/spec/spec_helper.rb +29 -9
- data/spec/vendored_license_spec.rb +27 -8
- data/vendor/choosealicense.com/_data/meta.yml +0 -4
- data/vendor/choosealicense.com/_licenses/0bsd.txt +39 -0
- data/vendor/choosealicense.com/_licenses/afl-3.0.txt +7 -6
- data/vendor/choosealicense.com/_licenses/agpl-3.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/apache-2.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/artistic-2.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/bsd-2-clause.txt +8 -6
- data/vendor/choosealicense.com/_licenses/bsd-3-clause-clear.txt +1 -2
- data/vendor/choosealicense.com/_licenses/bsd-3-clause.txt +12 -10
- data/vendor/choosealicense.com/_licenses/bsl-1.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/cc-by-4.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/cc-by-sa-4.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/cc0-1.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/cecill-2.1.txt +579 -0
- data/vendor/choosealicense.com/_licenses/ecl-2.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/epl-1.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/epl-2.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/eupl-1.1.txt +0 -1
- data/vendor/choosealicense.com/_licenses/eupl-1.2.txt +0 -1
- data/vendor/choosealicense.com/_licenses/gpl-2.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/gpl-3.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/isc.txt +0 -1
- data/vendor/choosealicense.com/_licenses/lgpl-2.1.txt +0 -1
- data/vendor/choosealicense.com/_licenses/lgpl-3.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/lppl-1.3c.txt +0 -1
- data/vendor/choosealicense.com/_licenses/mit.txt +0 -1
- data/vendor/choosealicense.com/_licenses/mpl-2.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/ms-pl.txt +0 -1
- data/vendor/choosealicense.com/_licenses/ms-rl.txt +0 -1
- data/vendor/choosealicense.com/_licenses/ncsa.txt +0 -1
- data/vendor/choosealicense.com/_licenses/odbl-1.0.txt +573 -0
- data/vendor/choosealicense.com/_licenses/ofl-1.1.txt +0 -1
- data/vendor/choosealicense.com/_licenses/osl-3.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/postgresql.txt +2 -3
- data/vendor/choosealicense.com/_licenses/unlicense.txt +1 -2
- data/vendor/choosealicense.com/_licenses/upl-1.0.txt +3 -4
- data/vendor/choosealicense.com/_licenses/wtfpl.txt +0 -1
- data/vendor/choosealicense.com/_licenses/zlib.txt +0 -1
- metadata +41 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d7dc009b0467cfb305e8dac051ed4e78d2f35d0454f2e14cef0952338540f8ae
|
4
|
+
data.tar.gz: 3c27bb3dd3cea6d62fab826b81fab93d9152893851b541c91d69406cdf9fcbd8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 07f19b33f70b0b73611d34e474f2aa4e4d7f62c7451cdf70f76774beceac2c75ab3d1cc5048061a848b979a54032aad6dd1ba278c79cd798029efd6873d54425
|
7
|
+
data.tar.gz: 96c5e66f65307e7feb2c00b3f06661b093c60995d049f7fd19cc27b76881965a1d33768a16b0a3a3b085e9392ef828dce9cf692ee04255dd9ea2c6d22da38da6
|
data/bin/licensee
CHANGED
data/lib/licensee.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'licensee/version'
|
2
4
|
require 'forwardable'
|
3
5
|
require 'pathname'
|
@@ -19,7 +21,7 @@ module Licensee
|
|
19
21
|
CONFIDENCE_THRESHOLD = 98
|
20
22
|
|
21
23
|
# Base domain from which to build license URLs
|
22
|
-
DOMAIN = 'http://choosealicense.com'
|
24
|
+
DOMAIN = 'http://choosealicense.com'
|
23
25
|
|
24
26
|
class << self
|
25
27
|
attr_writer :confidence_threshold
|
@@ -49,7 +51,7 @@ module Licensee
|
|
49
51
|
end
|
50
52
|
|
51
53
|
# Inverse of the confidence threshold, represented as a float
|
52
|
-
# By default this will be 0.
|
54
|
+
# By default this will be 0.02
|
53
55
|
def inverse_confidence_threshold
|
54
56
|
@inverse_confidence_threshold ||=
|
55
57
|
(1 - Licensee.confidence_threshold / 100.0).round(2)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
class LicenseeCLI < Thor
|
2
4
|
# Methods to call when displaying information about ProjectFiles
|
3
5
|
MATCHED_FILE_METHODS = %i[
|
@@ -40,8 +42,10 @@ class LicenseeCLI < Thor
|
|
40
42
|
|
41
43
|
MATCHED_FILE_METHODS.each do |method|
|
42
44
|
next unless matched_file.respond_to? method
|
45
|
+
|
43
46
|
value = matched_file.public_send method
|
44
47
|
next if value.nil?
|
48
|
+
|
45
49
|
rows << [humanize(method, :method), humanize(value, method)]
|
46
50
|
end
|
47
51
|
print_table rows, indent: 2
|
@@ -49,8 +53,9 @@ class LicenseeCLI < Thor
|
|
49
53
|
next unless matched_file.is_a? Licensee::ProjectFiles::LicenseFile
|
50
54
|
next if matched_file.confidence == 100
|
51
55
|
|
52
|
-
licenses =
|
56
|
+
licenses = licenses_by_similarity(matched_file)
|
53
57
|
next if licenses.empty?
|
58
|
+
|
54
59
|
say ' Closest non-matching licenses:'
|
55
60
|
rows = licenses[0...3].map do |license, similarity|
|
56
61
|
spdx_id = license.meta['spdx-id']
|
@@ -89,15 +94,15 @@ class LicenseeCLI < Thor
|
|
89
94
|
end
|
90
95
|
end
|
91
96
|
|
92
|
-
def
|
97
|
+
def licenses_by_similarity(matched_file)
|
93
98
|
matcher = Licensee::Matchers::Dice.new(matched_file)
|
94
99
|
potential_licenses = Licensee.licenses(hidden: true).select(&:wordset)
|
95
100
|
matcher.instance_variable_set('@potential_licenses', potential_licenses)
|
96
|
-
matcher.
|
101
|
+
matcher.licenses_by_similarity
|
97
102
|
end
|
98
103
|
|
99
104
|
def closest_license_key(matched_file)
|
100
|
-
licenses =
|
105
|
+
licenses = licenses_by_similarity(matched_file)
|
101
106
|
licenses.first.first.key unless licenses.empty?
|
102
107
|
end
|
103
108
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'tmpdir'
|
2
4
|
|
3
5
|
class LicenseeCLI < Thor
|
@@ -39,26 +41,23 @@ class LicenseeCLI < Thor
|
|
39
41
|
|
40
42
|
def license_to_diff
|
41
43
|
return options[:license_to_diff] if options[:license_to_diff]
|
42
|
-
return project.license_file if remote?
|
44
|
+
return project.license_file if remote? || STDIN.tty? && project.license_file
|
43
45
|
|
44
46
|
@license_to_diff ||= begin
|
45
|
-
if STDIN.tty?
|
46
|
-
error 'You must pipe license contents to the command via STDIN'
|
47
|
-
exit 1
|
48
|
-
end
|
49
|
-
|
50
47
|
Licensee::ProjectFiles::LicenseFile.new(STDIN.read, 'LICENSE')
|
51
48
|
end
|
52
49
|
end
|
53
50
|
|
54
51
|
def expected_license
|
55
|
-
|
52
|
+
if options[:license]
|
53
|
+
@expected_license ||= Licensee::License.find options[:license]
|
54
|
+
end
|
56
55
|
return @expected_license if @expected_license
|
57
56
|
|
58
57
|
if options[:license]
|
59
58
|
error "#{options[:license]} is not a valid license"
|
60
59
|
else
|
61
|
-
error '
|
60
|
+
error 'Usage: provide a license to diff against with --license (spdx name)'
|
62
61
|
end
|
63
62
|
|
64
63
|
error "Valid licenses: #{Licensee::License.all(hidden: true).map(&:key).join(', ')}"
|
@@ -1,31 +1,105 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'set'
|
2
4
|
require 'digest'
|
3
5
|
|
4
6
|
module Licensee
|
5
7
|
module ContentHelper
|
6
8
|
DIGEST = Digest::SHA1
|
7
|
-
|
8
|
-
|
9
|
+
START_REGEX = /\A\s*/.freeze
|
10
|
+
END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions\s*$/i.freeze
|
9
11
|
ALT_TITLE_REGEX = License::ALT_TITLE_REGEX
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
12
|
+
REGEXES = {
|
13
|
+
hrs: /^\s*[=\-\*]{3,}\s*$/,
|
14
|
+
all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
|
15
|
+
whitespace: /\s+/,
|
16
|
+
markdown_headings: /#{START_REGEX}#+/,
|
17
|
+
version: /#{START_REGEX}version.*$/i,
|
18
|
+
span_markup: /[_*~]+(.*?)[_*~]+/,
|
19
|
+
link_markup: /\[(.+?)\]\(.+?\)/,
|
20
|
+
block_markup: /^\s*>/,
|
21
|
+
border_markup: /^[\*-](.*?)[\*-]$/,
|
22
|
+
comment_markup: %r{^\s*?[/\*]{1,2}},
|
23
|
+
url: %r{#{START_REGEX}https?://[^ ]+\n},
|
24
|
+
bullet: /\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[)\.])\s+/i,
|
25
|
+
developed_by: /#{START_REGEX}developed by:.*?\n\n/im,
|
26
|
+
quote_begin: /[`'"‘“]/,
|
27
|
+
quote_end: /[`'"’”]/,
|
28
|
+
mit_optional: /\(including the next paragraph\)/i
|
29
|
+
}.freeze
|
30
|
+
NORMALIZATIONS = {
|
31
|
+
lists: { from: /^\s*(?:\d\.|\*)\s+([^\n])/, to: '- \1' },
|
32
|
+
https: { from: /http:/, to: 'https:' },
|
33
|
+
ampersands: { from: '&', to: 'and' },
|
34
|
+
dashes: { from: /(?<!^)([—–-]+)(?!$)/, to: '-' },
|
35
|
+
quotes: {
|
36
|
+
from: /#{REGEXES[:quote_begin]}+([\w -]*?\w)#{REGEXES[:quote_end]}+/,
|
37
|
+
to: '"\1"'
|
38
|
+
}
|
39
|
+
}.freeze
|
40
|
+
|
41
|
+
# Legally equivalent words that schould be ignored for comparison
|
42
|
+
# See https://spdx.org/spdx-license-list/matching-guidelines
|
43
|
+
VARIETAL_WORDS = {
|
44
|
+
'acknowledgment' => 'acknowledgement',
|
45
|
+
'analogue' => 'analog',
|
46
|
+
'analyse' => 'analyze',
|
47
|
+
'artefact' => 'artifact',
|
48
|
+
'authorisation' => 'authorization',
|
49
|
+
'authorised' => 'authorized',
|
50
|
+
'calibre' => 'caliber',
|
51
|
+
'cancelled' => 'canceled',
|
52
|
+
'capitalisations' => 'capitalizations',
|
53
|
+
'catalogue' => 'catalog',
|
54
|
+
'categorise' => 'categorize',
|
55
|
+
'centre' => 'center',
|
56
|
+
'emphasised' => 'emphasized',
|
57
|
+
'favour' => 'favor',
|
58
|
+
'favourite' => 'favorite',
|
59
|
+
'fulfil' => 'fulfill',
|
60
|
+
'fulfilment' => 'fulfillment',
|
61
|
+
'initialise' => 'initialize',
|
62
|
+
'judgment' => 'judgement',
|
63
|
+
'labelling' => 'labeling',
|
64
|
+
'labour' => 'labor',
|
65
|
+
'licence' => 'license',
|
66
|
+
'maximise' => 'maximize',
|
67
|
+
'modelled' => 'modeled',
|
68
|
+
'modelling' => 'modeling',
|
69
|
+
'offence' => 'offense',
|
70
|
+
'optimise' => 'optimize',
|
71
|
+
'organisation' => 'organization',
|
72
|
+
'organise' => 'organize',
|
73
|
+
'practise' => 'practice',
|
74
|
+
'programme' => 'program',
|
75
|
+
'realise' => 'realize',
|
76
|
+
'recognise' => 'recognize',
|
77
|
+
'signalling' => 'signaling',
|
78
|
+
'sub-license' => 'sublicense',
|
79
|
+
'sub license' => 'sublicense',
|
80
|
+
'utilisation' => 'utilization',
|
81
|
+
'whilst' => 'while',
|
82
|
+
'wilful' => 'wilfull',
|
83
|
+
'non-commercial' => 'noncommercial',
|
84
|
+
'cent' => 'percent',
|
85
|
+
'owner' => 'holder'
|
86
|
+
}.freeze
|
87
|
+
STRIP_METHODS = %i[
|
88
|
+
hrs markdown_headings borders title version url copyright
|
89
|
+
block_markup span_markup link_markup
|
90
|
+
all_rights_reserved developed_by end_of_terms whitespace
|
91
|
+
mit_optional
|
92
|
+
].freeze
|
18
93
|
|
19
94
|
# A set of each word in the license, without duplicates
|
20
95
|
def wordset
|
21
|
-
@wordset ||=
|
22
|
-
content_normalized.scan(/(?:\w(?:'s|(?<=s)')?)+/).to_set
|
23
|
-
end
|
96
|
+
@wordset ||= content_normalized&.scan(/(?:\w(?:'s|(?<=s)')?)+/)&.to_set
|
24
97
|
end
|
25
98
|
|
26
99
|
# Number of characteres in the normalized content
|
27
100
|
def length
|
28
101
|
return 0 unless content_normalized
|
102
|
+
|
29
103
|
content_normalized.length
|
30
104
|
end
|
31
105
|
|
@@ -43,8 +117,10 @@ module Licensee
|
|
43
117
|
# Given another license or project file, calculates the similarity
|
44
118
|
# as a percentage of words in common
|
45
119
|
def similarity(other)
|
46
|
-
|
47
|
-
|
120
|
+
wordset_fieldless = wordset - LicenseField.keys
|
121
|
+
fields_removed = wordset.size - wordset_fieldless.size
|
122
|
+
overlap = (wordset_fieldless & other.wordset).size
|
123
|
+
total = wordset_fieldless.size + other.wordset.size - fields_removed
|
48
124
|
100.0 * (overlap * 2.0 / total)
|
49
125
|
end
|
50
126
|
|
@@ -59,34 +135,21 @@ module Licensee
|
|
59
135
|
# content with attribution first to detect attribuion in LicenseFile
|
60
136
|
def content_without_title_and_version
|
61
137
|
@content_without_title_and_version ||= begin
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
strip_version(string).strip
|
138
|
+
@_content = nil
|
139
|
+
ops = %i[html hrs comments markdown_headings title version]
|
140
|
+
ops.each { |op| strip(op) }
|
141
|
+
_content
|
67
142
|
end
|
68
143
|
end
|
69
144
|
|
70
|
-
# Content without title, version, copyright, whitespace, or insturctions
|
71
|
-
#
|
72
|
-
# wrap - Optional width to wrap the content
|
73
|
-
#
|
74
|
-
# Returns a string
|
75
145
|
def content_normalized(wrap: nil)
|
76
|
-
return unless content
|
77
146
|
@content_normalized ||= begin
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
string, _partition, _instructions = string.partition(END_OF_TERMS_REGEX)
|
85
|
-
string = normalize_lists(string)
|
86
|
-
string = normalize_quotes(string)
|
87
|
-
string = normalize_https(string)
|
88
|
-
string = strip_markup(string)
|
89
|
-
strip_whitespace(string)
|
147
|
+
@_content = content_without_title_and_version.downcase
|
148
|
+
|
149
|
+
(NORMALIZATIONS.keys + %i[spelling bullets]).each { |op| normalize(op) }
|
150
|
+
STRIP_METHODS.each { |op| strip(op) }
|
151
|
+
|
152
|
+
_content
|
90
153
|
end
|
91
154
|
|
92
155
|
if wrap.nil?
|
@@ -96,14 +159,24 @@ module Licensee
|
|
96
159
|
end
|
97
160
|
end
|
98
161
|
|
162
|
+
# Backwards compatibalize constants to avoid a breaking change
|
163
|
+
def self.const_missing(const)
|
164
|
+
key = const.to_s.downcase.gsub('_regex', '').to_sym
|
165
|
+
REGEXES[key] || super
|
166
|
+
end
|
167
|
+
|
99
168
|
# Wrap text to the given line length
|
100
169
|
def self.wrap(text, line_width = 80)
|
101
170
|
return if text.nil?
|
171
|
+
|
102
172
|
text = text.clone
|
173
|
+
text.gsub!(REGEXES[:bullet]) { |m| "\n#{m}\n" }
|
103
174
|
text.gsub!(/([^\n])\n([^\n])/, '\1 \2')
|
104
175
|
|
105
176
|
text = text.split("\n").collect do |line|
|
106
|
-
if line
|
177
|
+
if line =~ REGEXES[:hrs]
|
178
|
+
line
|
179
|
+
elsif line.length > line_width
|
107
180
|
line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip
|
108
181
|
else
|
109
182
|
line
|
@@ -114,82 +187,114 @@ module Licensee
|
|
114
187
|
end
|
115
188
|
|
116
189
|
def self.format_percent(float)
|
117
|
-
"#{format('
|
190
|
+
"#{format('%<float>.2f', float: float)}%"
|
118
191
|
end
|
119
192
|
|
120
193
|
def self.title_regex
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
194
|
+
@title_regex ||= begin
|
195
|
+
licenses = Licensee::License.all(hidden: true, psuedo: false)
|
196
|
+
titles = licenses.map(&:title_regex)
|
197
|
+
|
198
|
+
# Title regex must include the version to support matching within
|
199
|
+
# families, but for sake of normalization, we can be less strict
|
200
|
+
without_versions = licenses.map do |license|
|
201
|
+
next if license.title == license.name_without_version
|
202
|
+
|
203
|
+
Regexp.new Regexp.escape(license.name_without_version), 'i'
|
204
|
+
end
|
205
|
+
titles.concat(without_versions.compact)
|
131
206
|
|
132
|
-
|
207
|
+
/#{START_REGEX}\(?(?:the )?#{Regexp.union titles}.*?$/i
|
208
|
+
end
|
133
209
|
end
|
134
210
|
|
135
211
|
private
|
136
212
|
|
137
|
-
def
|
138
|
-
|
213
|
+
def _content
|
214
|
+
@_content ||= content.to_s.dup.strip
|
139
215
|
end
|
140
216
|
|
141
|
-
def
|
142
|
-
|
217
|
+
def strip(regex_or_sym)
|
218
|
+
return unless _content
|
219
|
+
|
220
|
+
if regex_or_sym.is_a?(Symbol)
|
221
|
+
meth = "strip_#{regex_or_sym}"
|
222
|
+
return send(meth) if respond_to?(meth, true)
|
223
|
+
|
224
|
+
unless REGEXES[regex_or_sym]
|
225
|
+
raise ArgumentError, "#{regex_or_sym} is an invalid regex reference"
|
226
|
+
end
|
227
|
+
|
228
|
+
regex_or_sym = REGEXES[regex_or_sym]
|
229
|
+
end
|
230
|
+
|
231
|
+
@_content = _content.gsub(regex_or_sym, ' ').squeeze(' ').strip
|
143
232
|
end
|
144
233
|
|
145
|
-
def
|
146
|
-
|
234
|
+
def strip_title
|
235
|
+
while _content =~ ContentHelper.title_regex
|
236
|
+
strip(ContentHelper.title_regex)
|
237
|
+
end
|
147
238
|
end
|
148
239
|
|
149
|
-
|
150
|
-
|
151
|
-
strip(string, HR_REGEX)
|
240
|
+
def strip_borders
|
241
|
+
normalize(REGEXES[:border_markup], '\1')
|
152
242
|
end
|
153
243
|
|
154
|
-
|
155
|
-
|
156
|
-
|
244
|
+
def strip_comments
|
245
|
+
lines = _content.split("\n")
|
246
|
+
return if lines.count == 1
|
247
|
+
return unless lines.all? { |line| line =~ REGEXES[:comment_markup] }
|
248
|
+
|
249
|
+
strip(:comment_markup)
|
157
250
|
end
|
158
251
|
|
159
|
-
def
|
160
|
-
|
252
|
+
def strip_copyright
|
253
|
+
regex = Matchers::Copyright::REGEX
|
254
|
+
strip(regex) while _content =~ regex
|
161
255
|
end
|
162
256
|
|
163
|
-
def
|
164
|
-
|
257
|
+
def strip_end_of_terms
|
258
|
+
body, _partition, _instructions = _content.partition(END_OF_TERMS_REGEX)
|
259
|
+
@_content = body
|
165
260
|
end
|
166
261
|
|
167
|
-
def
|
168
|
-
|
262
|
+
def strip_span_markup
|
263
|
+
normalize(REGEXES[:span_markup], '\1')
|
169
264
|
end
|
170
265
|
|
171
|
-
def
|
172
|
-
|
266
|
+
def strip_link_markup
|
267
|
+
normalize(REGEXES[:link_markup], '\1')
|
173
268
|
end
|
174
269
|
|
175
|
-
def
|
176
|
-
|
270
|
+
def strip_html
|
271
|
+
return unless respond_to?(:filename) && filename
|
272
|
+
return unless File.extname(filename) =~ /\.html?/i
|
273
|
+
|
274
|
+
require 'reverse_markdown'
|
275
|
+
@_content = ReverseMarkdown.convert(_content, unknown_tags: :bypass)
|
177
276
|
end
|
178
277
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
278
|
+
def normalize(from_or_key, to = nil)
|
279
|
+
operation = { from: from_or_key, to: to } if to
|
280
|
+
operation ||= NORMALIZATIONS[from_or_key]
|
281
|
+
|
282
|
+
if operation
|
283
|
+
@_content = _content.gsub operation[:from], operation[:to]
|
284
|
+
elsif respond_to?("normalize_#{from_or_key}", true)
|
285
|
+
send("normalize_#{from_or_key}")
|
286
|
+
else
|
287
|
+
raise ArgumentError, "#{from_or_key} is an invalid normalization"
|
288
|
+
end
|
185
289
|
end
|
186
290
|
|
187
|
-
def
|
188
|
-
|
291
|
+
def normalize_spelling
|
292
|
+
normalize(/\b#{Regexp.union(VARIETAL_WORDS.keys)}\b/, VARIETAL_WORDS)
|
189
293
|
end
|
190
294
|
|
191
|
-
def
|
192
|
-
|
295
|
+
def normalize_bullets
|
296
|
+
normalize(REGEXES[:bullet], "\n\n* ")
|
297
|
+
normalize(/\)\s+\(/, ')(')
|
193
298
|
end
|
194
299
|
end
|
195
300
|
end
|