licensee 9.9.3 → 9.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/licensee +1 -0
- data/lib/licensee.rb +4 -2
- data/lib/licensee/commands/detect.rb +9 -4
- data/lib/licensee/commands/diff.rb +7 -8
- data/lib/licensee/commands/license_path.rb +2 -0
- data/lib/licensee/commands/version.rb +2 -0
- data/lib/licensee/content_helper.rb +188 -83
- data/lib/licensee/hash_helper.rb +2 -0
- data/lib/licensee/license.rb +18 -7
- data/lib/licensee/license_field.rb +8 -1
- data/lib/licensee/license_meta.rb +3 -0
- data/lib/licensee/license_rules.rb +2 -0
- data/lib/licensee/matchers.rb +2 -0
- data/lib/licensee/matchers/cabal.rb +16 -2
- data/lib/licensee/matchers/cargo.rb +3 -1
- data/lib/licensee/matchers/copyright.rb +4 -2
- data/lib/licensee/matchers/cran.rb +7 -3
- data/lib/licensee/matchers/dice.rb +10 -2
- data/lib/licensee/matchers/dist_zilla.rb +3 -1
- data/lib/licensee/matchers/exact.rb +3 -0
- data/lib/licensee/matchers/gemspec.rb +8 -5
- data/lib/licensee/matchers/matcher.rb +3 -1
- data/lib/licensee/matchers/npm_bower.rb +3 -1
- data/lib/licensee/matchers/package.rb +3 -0
- data/lib/licensee/matchers/reference.rb +3 -1
- data/lib/licensee/matchers/spdx.rb +3 -1
- data/lib/licensee/project_files.rb +2 -0
- data/lib/licensee/project_files/license_file.rb +13 -10
- data/lib/licensee/project_files/package_manager_file.rb +3 -0
- data/lib/licensee/project_files/project_file.rb +12 -4
- data/lib/licensee/project_files/readme_file.rb +7 -5
- data/lib/licensee/projects.rb +2 -0
- data/lib/licensee/projects/fs_project.rb +3 -0
- data/lib/licensee/projects/git_project.rb +16 -8
- data/lib/licensee/projects/github_project.rb +29 -9
- data/lib/licensee/projects/project.rb +13 -2
- data/lib/licensee/rule.rb +2 -0
- data/lib/licensee/version.rb +3 -1
- data/spec/bin_spec.rb +2 -0
- data/spec/fixture_spec.rb +46 -0
- data/spec/fixtures/detect.json +8 -6
- data/spec/fixtures/fixtures.yml +110 -0
- data/spec/fixtures/html/license.html +262 -0
- data/spec/fixtures/license-hashes.json +39 -0
- data/spec/fixtures/mit-optional/LICENSE.txt +21 -0
- data/spec/integration_spec.rb +20 -0
- data/spec/licensee/commands/detect_spec.rb +6 -2
- data/spec/licensee/commands/license_path_spec.rb +2 -0
- data/spec/licensee/commands/version_spec.rb +2 -0
- data/spec/licensee/content_helper_spec.rb +152 -36
- data/spec/licensee/hash_helper_spec.rb +2 -0
- data/spec/licensee/license_field_spec.rb +7 -0
- data/spec/licensee/license_meta_spec.rb +2 -0
- data/spec/licensee/license_rules_spec.rb +2 -0
- data/spec/licensee/license_spec.rb +36 -11
- data/spec/licensee/matchers/cabal_matcher_spec.rb +93 -0
- data/spec/licensee/matchers/cargo_matcher_spec.rb +2 -0
- data/spec/licensee/matchers/copyright_matcher_spec.rb +4 -2
- data/spec/licensee/matchers/cran_matcher_spec.rb +2 -0
- data/spec/licensee/matchers/dice_matcher_spec.rb +4 -2
- data/spec/licensee/matchers/dist_zilla_matcher_spec.rb +2 -0
- data/spec/licensee/matchers/exact_matcher_spec.rb +2 -0
- data/spec/licensee/matchers/gemspec_matcher_spec.rb +2 -0
- data/spec/licensee/matchers/matcher_spec.rb +2 -0
- data/spec/licensee/matchers/npm_bower_matcher_spec.rb +2 -0
- data/spec/licensee/matchers/package_matcher_spec.rb +2 -0
- data/spec/licensee/matchers/reference_matcher_spec.rb +2 -0
- data/spec/licensee/matchers/spdx_matcher_spec.rb +2 -0
- data/spec/licensee/project_files/license_file_spec.rb +4 -2
- data/spec/licensee/project_files/package_info_spec.rb +2 -0
- data/spec/licensee/project_files/project_file_spec.rb +3 -0
- data/spec/licensee/project_files/readme_file_spec.rb +11 -0
- data/spec/licensee/project_spec.rb +23 -3
- data/spec/licensee/projects/git_project_spec.rb +23 -0
- data/spec/licensee/projects/github_project_spec.rb +2 -0
- data/spec/licensee/rule_spec.rb +2 -0
- data/spec/licensee_spec.rb +3 -1
- data/spec/spec_helper.rb +29 -9
- data/spec/vendored_license_spec.rb +27 -8
- data/vendor/choosealicense.com/_data/meta.yml +0 -4
- data/vendor/choosealicense.com/_licenses/0bsd.txt +39 -0
- data/vendor/choosealicense.com/_licenses/afl-3.0.txt +7 -6
- data/vendor/choosealicense.com/_licenses/agpl-3.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/apache-2.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/artistic-2.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/bsd-2-clause.txt +8 -6
- data/vendor/choosealicense.com/_licenses/bsd-3-clause-clear.txt +1 -2
- data/vendor/choosealicense.com/_licenses/bsd-3-clause.txt +12 -10
- data/vendor/choosealicense.com/_licenses/bsl-1.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/cc-by-4.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/cc-by-sa-4.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/cc0-1.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/cecill-2.1.txt +579 -0
- data/vendor/choosealicense.com/_licenses/ecl-2.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/epl-1.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/epl-2.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/eupl-1.1.txt +0 -1
- data/vendor/choosealicense.com/_licenses/eupl-1.2.txt +0 -1
- data/vendor/choosealicense.com/_licenses/gpl-2.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/gpl-3.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/isc.txt +0 -1
- data/vendor/choosealicense.com/_licenses/lgpl-2.1.txt +0 -1
- data/vendor/choosealicense.com/_licenses/lgpl-3.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/lppl-1.3c.txt +0 -1
- data/vendor/choosealicense.com/_licenses/mit.txt +0 -1
- data/vendor/choosealicense.com/_licenses/mpl-2.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/ms-pl.txt +0 -1
- data/vendor/choosealicense.com/_licenses/ms-rl.txt +0 -1
- data/vendor/choosealicense.com/_licenses/ncsa.txt +0 -1
- data/vendor/choosealicense.com/_licenses/odbl-1.0.txt +573 -0
- data/vendor/choosealicense.com/_licenses/ofl-1.1.txt +0 -1
- data/vendor/choosealicense.com/_licenses/osl-3.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/postgresql.txt +2 -3
- data/vendor/choosealicense.com/_licenses/unlicense.txt +1 -2
- data/vendor/choosealicense.com/_licenses/upl-1.0.txt +3 -4
- data/vendor/choosealicense.com/_licenses/wtfpl.txt +0 -1
- data/vendor/choosealicense.com/_licenses/zlib.txt +0 -1
- metadata +41 -19
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d7dc009b0467cfb305e8dac051ed4e78d2f35d0454f2e14cef0952338540f8ae
|
|
4
|
+
data.tar.gz: 3c27bb3dd3cea6d62fab826b81fab93d9152893851b541c91d69406cdf9fcbd8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 07f19b33f70b0b73611d34e474f2aa4e4d7f62c7451cdf70f76774beceac2c75ab3d1cc5048061a848b979a54032aad6dd1ba278c79cd798029efd6873d54425
|
|
7
|
+
data.tar.gz: 96c5e66f65307e7feb2c00b3f06661b093c60995d049f7fd19cc27b76881965a1d33768a16b0a3a3b085e9392ef828dce9cf692ee04255dd9ea2c6d22da38da6
|
data/bin/licensee
CHANGED
data/lib/licensee.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require_relative 'licensee/version'
|
|
2
4
|
require 'forwardable'
|
|
3
5
|
require 'pathname'
|
|
@@ -19,7 +21,7 @@ module Licensee
|
|
|
19
21
|
CONFIDENCE_THRESHOLD = 98
|
|
20
22
|
|
|
21
23
|
# Base domain from which to build license URLs
|
|
22
|
-
DOMAIN = 'http://choosealicense.com'
|
|
24
|
+
DOMAIN = 'http://choosealicense.com'
|
|
23
25
|
|
|
24
26
|
class << self
|
|
25
27
|
attr_writer :confidence_threshold
|
|
@@ -49,7 +51,7 @@ module Licensee
|
|
|
49
51
|
end
|
|
50
52
|
|
|
51
53
|
# Inverse of the confidence threshold, represented as a float
|
|
52
|
-
# By default this will be 0.
|
|
54
|
+
# By default this will be 0.02
|
|
53
55
|
def inverse_confidence_threshold
|
|
54
56
|
@inverse_confidence_threshold ||=
|
|
55
57
|
(1 - Licensee.confidence_threshold / 100.0).round(2)
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
class LicenseeCLI < Thor
|
|
2
4
|
# Methods to call when displaying information about ProjectFiles
|
|
3
5
|
MATCHED_FILE_METHODS = %i[
|
|
@@ -40,8 +42,10 @@ class LicenseeCLI < Thor
|
|
|
40
42
|
|
|
41
43
|
MATCHED_FILE_METHODS.each do |method|
|
|
42
44
|
next unless matched_file.respond_to? method
|
|
45
|
+
|
|
43
46
|
value = matched_file.public_send method
|
|
44
47
|
next if value.nil?
|
|
48
|
+
|
|
45
49
|
rows << [humanize(method, :method), humanize(value, method)]
|
|
46
50
|
end
|
|
47
51
|
print_table rows, indent: 2
|
|
@@ -49,8 +53,9 @@ class LicenseeCLI < Thor
|
|
|
49
53
|
next unless matched_file.is_a? Licensee::ProjectFiles::LicenseFile
|
|
50
54
|
next if matched_file.confidence == 100
|
|
51
55
|
|
|
52
|
-
licenses =
|
|
56
|
+
licenses = licenses_by_similarity(matched_file)
|
|
53
57
|
next if licenses.empty?
|
|
58
|
+
|
|
54
59
|
say ' Closest non-matching licenses:'
|
|
55
60
|
rows = licenses[0...3].map do |license, similarity|
|
|
56
61
|
spdx_id = license.meta['spdx-id']
|
|
@@ -89,15 +94,15 @@ class LicenseeCLI < Thor
|
|
|
89
94
|
end
|
|
90
95
|
end
|
|
91
96
|
|
|
92
|
-
def
|
|
97
|
+
def licenses_by_similarity(matched_file)
|
|
93
98
|
matcher = Licensee::Matchers::Dice.new(matched_file)
|
|
94
99
|
potential_licenses = Licensee.licenses(hidden: true).select(&:wordset)
|
|
95
100
|
matcher.instance_variable_set('@potential_licenses', potential_licenses)
|
|
96
|
-
matcher.
|
|
101
|
+
matcher.licenses_by_similarity
|
|
97
102
|
end
|
|
98
103
|
|
|
99
104
|
def closest_license_key(matched_file)
|
|
100
|
-
licenses =
|
|
105
|
+
licenses = licenses_by_similarity(matched_file)
|
|
101
106
|
licenses.first.first.key unless licenses.empty?
|
|
102
107
|
end
|
|
103
108
|
end
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'tmpdir'
|
|
2
4
|
|
|
3
5
|
class LicenseeCLI < Thor
|
|
@@ -39,26 +41,23 @@ class LicenseeCLI < Thor
|
|
|
39
41
|
|
|
40
42
|
def license_to_diff
|
|
41
43
|
return options[:license_to_diff] if options[:license_to_diff]
|
|
42
|
-
return project.license_file if remote?
|
|
44
|
+
return project.license_file if remote? || STDIN.tty? && project.license_file
|
|
43
45
|
|
|
44
46
|
@license_to_diff ||= begin
|
|
45
|
-
if STDIN.tty?
|
|
46
|
-
error 'You must pipe license contents to the command via STDIN'
|
|
47
|
-
exit 1
|
|
48
|
-
end
|
|
49
|
-
|
|
50
47
|
Licensee::ProjectFiles::LicenseFile.new(STDIN.read, 'LICENSE')
|
|
51
48
|
end
|
|
52
49
|
end
|
|
53
50
|
|
|
54
51
|
def expected_license
|
|
55
|
-
|
|
52
|
+
if options[:license]
|
|
53
|
+
@expected_license ||= Licensee::License.find options[:license]
|
|
54
|
+
end
|
|
56
55
|
return @expected_license if @expected_license
|
|
57
56
|
|
|
58
57
|
if options[:license]
|
|
59
58
|
error "#{options[:license]} is not a valid license"
|
|
60
59
|
else
|
|
61
|
-
error '
|
|
60
|
+
error 'Usage: provide a license to diff against with --license (spdx name)'
|
|
62
61
|
end
|
|
63
62
|
|
|
64
63
|
error "Valid licenses: #{Licensee::License.all(hidden: true).map(&:key).join(', ')}"
|
|
@@ -1,31 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'set'
|
|
2
4
|
require 'digest'
|
|
3
5
|
|
|
4
6
|
module Licensee
|
|
5
7
|
module ContentHelper
|
|
6
8
|
DIGEST = Digest::SHA1
|
|
7
|
-
|
|
8
|
-
|
|
9
|
+
START_REGEX = /\A\s*/.freeze
|
|
10
|
+
END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions\s*$/i.freeze
|
|
9
11
|
ALT_TITLE_REGEX = License::ALT_TITLE_REGEX
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
12
|
+
REGEXES = {
|
|
13
|
+
hrs: /^\s*[=\-\*]{3,}\s*$/,
|
|
14
|
+
all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
|
|
15
|
+
whitespace: /\s+/,
|
|
16
|
+
markdown_headings: /#{START_REGEX}#+/,
|
|
17
|
+
version: /#{START_REGEX}version.*$/i,
|
|
18
|
+
span_markup: /[_*~]+(.*?)[_*~]+/,
|
|
19
|
+
link_markup: /\[(.+?)\]\(.+?\)/,
|
|
20
|
+
block_markup: /^\s*>/,
|
|
21
|
+
border_markup: /^[\*-](.*?)[\*-]$/,
|
|
22
|
+
comment_markup: %r{^\s*?[/\*]{1,2}},
|
|
23
|
+
url: %r{#{START_REGEX}https?://[^ ]+\n},
|
|
24
|
+
bullet: /\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[)\.])\s+/i,
|
|
25
|
+
developed_by: /#{START_REGEX}developed by:.*?\n\n/im,
|
|
26
|
+
quote_begin: /[`'"‘“]/,
|
|
27
|
+
quote_end: /[`'"’”]/,
|
|
28
|
+
mit_optional: /\(including the next paragraph\)/i
|
|
29
|
+
}.freeze
|
|
30
|
+
NORMALIZATIONS = {
|
|
31
|
+
lists: { from: /^\s*(?:\d\.|\*)\s+([^\n])/, to: '- \1' },
|
|
32
|
+
https: { from: /http:/, to: 'https:' },
|
|
33
|
+
ampersands: { from: '&', to: 'and' },
|
|
34
|
+
dashes: { from: /(?<!^)([—–-]+)(?!$)/, to: '-' },
|
|
35
|
+
quotes: {
|
|
36
|
+
from: /#{REGEXES[:quote_begin]}+([\w -]*?\w)#{REGEXES[:quote_end]}+/,
|
|
37
|
+
to: '"\1"'
|
|
38
|
+
}
|
|
39
|
+
}.freeze
|
|
40
|
+
|
|
41
|
+
# Legally equivalent words that schould be ignored for comparison
|
|
42
|
+
# See https://spdx.org/spdx-license-list/matching-guidelines
|
|
43
|
+
VARIETAL_WORDS = {
|
|
44
|
+
'acknowledgment' => 'acknowledgement',
|
|
45
|
+
'analogue' => 'analog',
|
|
46
|
+
'analyse' => 'analyze',
|
|
47
|
+
'artefact' => 'artifact',
|
|
48
|
+
'authorisation' => 'authorization',
|
|
49
|
+
'authorised' => 'authorized',
|
|
50
|
+
'calibre' => 'caliber',
|
|
51
|
+
'cancelled' => 'canceled',
|
|
52
|
+
'capitalisations' => 'capitalizations',
|
|
53
|
+
'catalogue' => 'catalog',
|
|
54
|
+
'categorise' => 'categorize',
|
|
55
|
+
'centre' => 'center',
|
|
56
|
+
'emphasised' => 'emphasized',
|
|
57
|
+
'favour' => 'favor',
|
|
58
|
+
'favourite' => 'favorite',
|
|
59
|
+
'fulfil' => 'fulfill',
|
|
60
|
+
'fulfilment' => 'fulfillment',
|
|
61
|
+
'initialise' => 'initialize',
|
|
62
|
+
'judgment' => 'judgement',
|
|
63
|
+
'labelling' => 'labeling',
|
|
64
|
+
'labour' => 'labor',
|
|
65
|
+
'licence' => 'license',
|
|
66
|
+
'maximise' => 'maximize',
|
|
67
|
+
'modelled' => 'modeled',
|
|
68
|
+
'modelling' => 'modeling',
|
|
69
|
+
'offence' => 'offense',
|
|
70
|
+
'optimise' => 'optimize',
|
|
71
|
+
'organisation' => 'organization',
|
|
72
|
+
'organise' => 'organize',
|
|
73
|
+
'practise' => 'practice',
|
|
74
|
+
'programme' => 'program',
|
|
75
|
+
'realise' => 'realize',
|
|
76
|
+
'recognise' => 'recognize',
|
|
77
|
+
'signalling' => 'signaling',
|
|
78
|
+
'sub-license' => 'sublicense',
|
|
79
|
+
'sub license' => 'sublicense',
|
|
80
|
+
'utilisation' => 'utilization',
|
|
81
|
+
'whilst' => 'while',
|
|
82
|
+
'wilful' => 'wilfull',
|
|
83
|
+
'non-commercial' => 'noncommercial',
|
|
84
|
+
'cent' => 'percent',
|
|
85
|
+
'owner' => 'holder'
|
|
86
|
+
}.freeze
|
|
87
|
+
STRIP_METHODS = %i[
|
|
88
|
+
hrs markdown_headings borders title version url copyright
|
|
89
|
+
block_markup span_markup link_markup
|
|
90
|
+
all_rights_reserved developed_by end_of_terms whitespace
|
|
91
|
+
mit_optional
|
|
92
|
+
].freeze
|
|
18
93
|
|
|
19
94
|
# A set of each word in the license, without duplicates
|
|
20
95
|
def wordset
|
|
21
|
-
@wordset ||=
|
|
22
|
-
content_normalized.scan(/(?:\w(?:'s|(?<=s)')?)+/).to_set
|
|
23
|
-
end
|
|
96
|
+
@wordset ||= content_normalized&.scan(/(?:\w(?:'s|(?<=s)')?)+/)&.to_set
|
|
24
97
|
end
|
|
25
98
|
|
|
26
99
|
# Number of characteres in the normalized content
|
|
27
100
|
def length
|
|
28
101
|
return 0 unless content_normalized
|
|
102
|
+
|
|
29
103
|
content_normalized.length
|
|
30
104
|
end
|
|
31
105
|
|
|
@@ -43,8 +117,10 @@ module Licensee
|
|
|
43
117
|
# Given another license or project file, calculates the similarity
|
|
44
118
|
# as a percentage of words in common
|
|
45
119
|
def similarity(other)
|
|
46
|
-
|
|
47
|
-
|
|
120
|
+
wordset_fieldless = wordset - LicenseField.keys
|
|
121
|
+
fields_removed = wordset.size - wordset_fieldless.size
|
|
122
|
+
overlap = (wordset_fieldless & other.wordset).size
|
|
123
|
+
total = wordset_fieldless.size + other.wordset.size - fields_removed
|
|
48
124
|
100.0 * (overlap * 2.0 / total)
|
|
49
125
|
end
|
|
50
126
|
|
|
@@ -59,34 +135,21 @@ module Licensee
|
|
|
59
135
|
# content with attribution first to detect attribuion in LicenseFile
|
|
60
136
|
def content_without_title_and_version
|
|
61
137
|
@content_without_title_and_version ||= begin
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
strip_version(string).strip
|
|
138
|
+
@_content = nil
|
|
139
|
+
ops = %i[html hrs comments markdown_headings title version]
|
|
140
|
+
ops.each { |op| strip(op) }
|
|
141
|
+
_content
|
|
67
142
|
end
|
|
68
143
|
end
|
|
69
144
|
|
|
70
|
-
# Content without title, version, copyright, whitespace, or insturctions
|
|
71
|
-
#
|
|
72
|
-
# wrap - Optional width to wrap the content
|
|
73
|
-
#
|
|
74
|
-
# Returns a string
|
|
75
145
|
def content_normalized(wrap: nil)
|
|
76
|
-
return unless content
|
|
77
146
|
@content_normalized ||= begin
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
string, _partition, _instructions = string.partition(END_OF_TERMS_REGEX)
|
|
85
|
-
string = normalize_lists(string)
|
|
86
|
-
string = normalize_quotes(string)
|
|
87
|
-
string = normalize_https(string)
|
|
88
|
-
string = strip_markup(string)
|
|
89
|
-
strip_whitespace(string)
|
|
147
|
+
@_content = content_without_title_and_version.downcase
|
|
148
|
+
|
|
149
|
+
(NORMALIZATIONS.keys + %i[spelling bullets]).each { |op| normalize(op) }
|
|
150
|
+
STRIP_METHODS.each { |op| strip(op) }
|
|
151
|
+
|
|
152
|
+
_content
|
|
90
153
|
end
|
|
91
154
|
|
|
92
155
|
if wrap.nil?
|
|
@@ -96,14 +159,24 @@ module Licensee
|
|
|
96
159
|
end
|
|
97
160
|
end
|
|
98
161
|
|
|
162
|
+
# Backwards compatibalize constants to avoid a breaking change
|
|
163
|
+
def self.const_missing(const)
|
|
164
|
+
key = const.to_s.downcase.gsub('_regex', '').to_sym
|
|
165
|
+
REGEXES[key] || super
|
|
166
|
+
end
|
|
167
|
+
|
|
99
168
|
# Wrap text to the given line length
|
|
100
169
|
def self.wrap(text, line_width = 80)
|
|
101
170
|
return if text.nil?
|
|
171
|
+
|
|
102
172
|
text = text.clone
|
|
173
|
+
text.gsub!(REGEXES[:bullet]) { |m| "\n#{m}\n" }
|
|
103
174
|
text.gsub!(/([^\n])\n([^\n])/, '\1 \2')
|
|
104
175
|
|
|
105
176
|
text = text.split("\n").collect do |line|
|
|
106
|
-
if line
|
|
177
|
+
if line =~ REGEXES[:hrs]
|
|
178
|
+
line
|
|
179
|
+
elsif line.length > line_width
|
|
107
180
|
line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip
|
|
108
181
|
else
|
|
109
182
|
line
|
|
@@ -114,82 +187,114 @@ module Licensee
|
|
|
114
187
|
end
|
|
115
188
|
|
|
116
189
|
def self.format_percent(float)
|
|
117
|
-
"#{format('
|
|
190
|
+
"#{format('%<float>.2f', float: float)}%"
|
|
118
191
|
end
|
|
119
192
|
|
|
120
193
|
def self.title_regex
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
194
|
+
@title_regex ||= begin
|
|
195
|
+
licenses = Licensee::License.all(hidden: true, psuedo: false)
|
|
196
|
+
titles = licenses.map(&:title_regex)
|
|
197
|
+
|
|
198
|
+
# Title regex must include the version to support matching within
|
|
199
|
+
# families, but for sake of normalization, we can be less strict
|
|
200
|
+
without_versions = licenses.map do |license|
|
|
201
|
+
next if license.title == license.name_without_version
|
|
202
|
+
|
|
203
|
+
Regexp.new Regexp.escape(license.name_without_version), 'i'
|
|
204
|
+
end
|
|
205
|
+
titles.concat(without_versions.compact)
|
|
131
206
|
|
|
132
|
-
|
|
207
|
+
/#{START_REGEX}\(?(?:the )?#{Regexp.union titles}.*?$/i
|
|
208
|
+
end
|
|
133
209
|
end
|
|
134
210
|
|
|
135
211
|
private
|
|
136
212
|
|
|
137
|
-
def
|
|
138
|
-
|
|
213
|
+
def _content
|
|
214
|
+
@_content ||= content.to_s.dup.strip
|
|
139
215
|
end
|
|
140
216
|
|
|
141
|
-
def
|
|
142
|
-
|
|
217
|
+
def strip(regex_or_sym)
|
|
218
|
+
return unless _content
|
|
219
|
+
|
|
220
|
+
if regex_or_sym.is_a?(Symbol)
|
|
221
|
+
meth = "strip_#{regex_or_sym}"
|
|
222
|
+
return send(meth) if respond_to?(meth, true)
|
|
223
|
+
|
|
224
|
+
unless REGEXES[regex_or_sym]
|
|
225
|
+
raise ArgumentError, "#{regex_or_sym} is an invalid regex reference"
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
regex_or_sym = REGEXES[regex_or_sym]
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
@_content = _content.gsub(regex_or_sym, ' ').squeeze(' ').strip
|
|
143
232
|
end
|
|
144
233
|
|
|
145
|
-
def
|
|
146
|
-
|
|
234
|
+
def strip_title
|
|
235
|
+
while _content =~ ContentHelper.title_regex
|
|
236
|
+
strip(ContentHelper.title_regex)
|
|
237
|
+
end
|
|
147
238
|
end
|
|
148
239
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
strip(string, HR_REGEX)
|
|
240
|
+
def strip_borders
|
|
241
|
+
normalize(REGEXES[:border_markup], '\1')
|
|
152
242
|
end
|
|
153
243
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
244
|
+
def strip_comments
|
|
245
|
+
lines = _content.split("\n")
|
|
246
|
+
return if lines.count == 1
|
|
247
|
+
return unless lines.all? { |line| line =~ REGEXES[:comment_markup] }
|
|
248
|
+
|
|
249
|
+
strip(:comment_markup)
|
|
157
250
|
end
|
|
158
251
|
|
|
159
|
-
def
|
|
160
|
-
|
|
252
|
+
def strip_copyright
|
|
253
|
+
regex = Matchers::Copyright::REGEX
|
|
254
|
+
strip(regex) while _content =~ regex
|
|
161
255
|
end
|
|
162
256
|
|
|
163
|
-
def
|
|
164
|
-
|
|
257
|
+
def strip_end_of_terms
|
|
258
|
+
body, _partition, _instructions = _content.partition(END_OF_TERMS_REGEX)
|
|
259
|
+
@_content = body
|
|
165
260
|
end
|
|
166
261
|
|
|
167
|
-
def
|
|
168
|
-
|
|
262
|
+
def strip_span_markup
|
|
263
|
+
normalize(REGEXES[:span_markup], '\1')
|
|
169
264
|
end
|
|
170
265
|
|
|
171
|
-
def
|
|
172
|
-
|
|
266
|
+
def strip_link_markup
|
|
267
|
+
normalize(REGEXES[:link_markup], '\1')
|
|
173
268
|
end
|
|
174
269
|
|
|
175
|
-
def
|
|
176
|
-
|
|
270
|
+
def strip_html
|
|
271
|
+
return unless respond_to?(:filename) && filename
|
|
272
|
+
return unless File.extname(filename) =~ /\.html?/i
|
|
273
|
+
|
|
274
|
+
require 'reverse_markdown'
|
|
275
|
+
@_content = ReverseMarkdown.convert(_content, unknown_tags: :bypass)
|
|
177
276
|
end
|
|
178
277
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
278
|
+
def normalize(from_or_key, to = nil)
|
|
279
|
+
operation = { from: from_or_key, to: to } if to
|
|
280
|
+
operation ||= NORMALIZATIONS[from_or_key]
|
|
281
|
+
|
|
282
|
+
if operation
|
|
283
|
+
@_content = _content.gsub operation[:from], operation[:to]
|
|
284
|
+
elsif respond_to?("normalize_#{from_or_key}", true)
|
|
285
|
+
send("normalize_#{from_or_key}")
|
|
286
|
+
else
|
|
287
|
+
raise ArgumentError, "#{from_or_key} is an invalid normalization"
|
|
288
|
+
end
|
|
185
289
|
end
|
|
186
290
|
|
|
187
|
-
def
|
|
188
|
-
|
|
291
|
+
def normalize_spelling
|
|
292
|
+
normalize(/\b#{Regexp.union(VARIETAL_WORDS.keys)}\b/, VARIETAL_WORDS)
|
|
189
293
|
end
|
|
190
294
|
|
|
191
|
-
def
|
|
192
|
-
|
|
295
|
+
def normalize_bullets
|
|
296
|
+
normalize(REGEXES[:bullet], "\n\n* ")
|
|
297
|
+
normalize(/\)\s+\(/, ')(')
|
|
193
298
|
end
|
|
194
299
|
end
|
|
195
300
|
end
|