licensee 9.18.0 → 10.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.md +1 -1
- data/bin/licensee +2 -0
- data/lib/licensee/commands/detect.rb +9 -89
- data/lib/licensee/commands/detect_helpers.rb +125 -0
- data/lib/licensee/commands/diff.rb +64 -35
- data/lib/licensee/commands/license_path.rb +1 -0
- data/lib/licensee/commands/version.rb +1 -0
- data/lib/licensee/content_helper/constants.rb +111 -0
- data/lib/licensee/content_helper/normalization_methods.rb +149 -0
- data/lib/licensee/content_helper/similarity_methods.rb +63 -0
- data/lib/licensee/content_helper.rb +42 -277
- data/lib/licensee/hash_helper.rb +9 -7
- data/lib/licensee/license/class_methods.rb +67 -0
- data/lib/licensee/license/content_methods.rb +52 -0
- data/lib/licensee/license/identity_methods.rb +117 -0
- data/lib/licensee/license.rb +31 -208
- data/lib/licensee/license_field.rb +9 -6
- data/lib/licensee/license_meta.rb +4 -1
- data/lib/licensee/license_rules.rb +5 -1
- data/lib/licensee/matchers/cabal.rb +6 -2
- data/lib/licensee/matchers/cargo.rb +1 -0
- data/lib/licensee/matchers/copyright.rb +3 -1
- data/lib/licensee/matchers/cran.rb +2 -1
- data/lib/licensee/matchers/dice.rb +13 -2
- data/lib/licensee/matchers/dist_zilla.rb +1 -0
- data/lib/licensee/matchers/exact.rb +2 -0
- data/lib/licensee/matchers/gemspec.rb +1 -8
- data/lib/licensee/matchers/matcher.rb +5 -3
- data/lib/licensee/matchers/npm_bower.rb +1 -0
- data/lib/licensee/matchers/nuget.rb +1 -0
- data/lib/licensee/matchers/package.rb +21 -5
- data/lib/licensee/matchers/spdx.rb +1 -0
- data/lib/licensee/matchers.rb +1 -0
- data/lib/licensee/project_files/license_file.rb +28 -3
- data/lib/licensee/project_files/package_manager_file.rb +1 -0
- data/lib/licensee/project_files/project_file.rb +8 -5
- data/lib/licensee/project_files/readme_file.rb +1 -0
- data/lib/licensee/project_files.rb +1 -0
- data/lib/licensee/projects/fs_project.rb +2 -0
- data/lib/licensee/projects/git_project.rb +30 -4
- data/lib/licensee/projects/github_project.rb +25 -5
- data/lib/licensee/projects/project.rb +31 -34
- data/lib/licensee/projects.rb +1 -0
- data/lib/licensee/rule.rb +2 -0
- data/lib/licensee/version.rb +1 -1
- data/lib/licensee.rb +23 -2
- data/spec/bin_spec.rb +8 -8
- data/spec/fixture_spec.rb +18 -19
- data/spec/fixtures/bsd-3-linebreak-owner/LICENSE +30 -0
- data/spec/fixtures/bsd-3-multilinecopyright/LICENSE +27 -0
- data/spec/fixtures/detect.json +3 -3
- data/spec/fixtures/fixtures.yml +35 -11
- data/spec/fixtures/license-hashes.json +4 -4
- data/spec/fixtures/licenses-dir/LICENSES/MIT.txt +21 -0
- data/spec/fixtures/licenses-dir-with-license-ref/LICENSES/LicenseRef-MIT.txt +21 -0
- data/spec/fixtures/licenses-dir-with-multiple-license-files/LICENSES/MIT.txt +21 -0
- data/spec/fixtures/licenses-dir-with-multiple-license-files/LICENSES/MPL-2.0.txt +362 -0
- data/spec/fixtures/licenses-dir-with-top-level-license/LICENSE.md +195 -0
- data/spec/fixtures/licenses-dir-with-top-level-license/LICENSES/MIT.txt +21 -0
- data/spec/integration_spec.rb +247 -274
- data/spec/licensee/commands/detect_spec.rb +94 -21
- data/spec/licensee/commands/license_path_spec.rb +13 -9
- data/spec/licensee/commands/version_spec.rb +12 -8
- data/spec/licensee/content_helper_spec.rb +159 -111
- data/spec/licensee/hash_helper_spec.rb +9 -10
- data/spec/licensee/license_field_spec.rb +17 -22
- data/spec/licensee/license_meta_spec.rb +29 -37
- data/spec/licensee/license_rules_spec.rb +19 -19
- data/spec/licensee/license_spec.rb +219 -264
- data/spec/licensee/licensee_filesystem_spec.rb +40 -0
- data/spec/licensee/matchers/cabal_matcher_spec.rb +67 -31
- data/spec/licensee/matchers/cargo_matcher_spec.rb +7 -7
- data/spec/licensee/matchers/copyright_matcher_spec.rb +21 -10
- data/spec/licensee/matchers/cran_matcher_spec.rb +6 -6
- data/spec/licensee/matchers/dice_matcher_spec.rb +47 -33
- data/spec/licensee/matchers/dist_zilla_matcher_spec.rb +7 -7
- data/spec/licensee/matchers/exact_matcher_spec.rb +4 -4
- data/spec/licensee/matchers/gemspec_matcher_spec.rb +10 -10
- data/spec/licensee/matchers/matcher_spec.rb +14 -4
- data/spec/licensee/matchers/npm_bower_matcher_spec.rb +20 -12
- data/spec/licensee/matchers/nu_get_matcher_spec.rb +12 -12
- data/spec/licensee/matchers/package_matcher_spec.rb +40 -12
- data/spec/licensee/matchers/reference_matcher_spec.rb +17 -13
- data/spec/licensee/matchers/spdx_matcher_spec.rb +9 -9
- data/spec/licensee/project_files/license_file_spec.rb +136 -72
- data/spec/licensee/project_files/package_manager_file_spec.rb +3 -3
- data/spec/licensee/project_files/project_file_spec.rb +29 -23
- data/spec/licensee/project_files/readme_file_spec.rb +13 -13
- data/spec/licensee/project_spec.rb +168 -123
- data/spec/licensee/projects/git_hub_project_spec.rb +268 -26
- data/spec/licensee/projects/git_project_spec.rb +23 -1
- data/spec/licensee/projects/project_spec.rb +15 -0
- data/spec/licensee/rule_spec.rb +19 -22
- data/spec/licensee_spec.rb +23 -11
- data/spec/spec_helper.rb +3 -1
- data/spec/vendored_license_spec.rb +37 -60
- data/vendor/choosealicense.com/_licenses/blueoak-1.0.0.txt +1 -1
- data/vendor/choosealicense.com/_licenses/bsd-4-clause.txt +1 -1
- data/vendor/choosealicense.com/_licenses/cern-ohl-p-2.0.txt +1 -1
- data/vendor/choosealicense.com/_licenses/cern-ohl-s-2.0.txt +1 -1
- data/vendor/choosealicense.com/_licenses/cern-ohl-w-2.0.txt +2 -2
- data/vendor/choosealicense.com/_licenses/gpl-2.0.txt +1 -1
- data/vendor/choosealicense.com/_licenses/gpl-3.0.txt +1 -1
- data/vendor/choosealicense.com/_licenses/mit-0.txt +1 -1
- data/vendor/choosealicense.com/_licenses/osl-3.0.txt +1 -1
- data/vendor/choosealicense.com/_licenses/zlib.txt +1 -1
- data/vendor/license-list-XML/src/0BSD.xml +1 -1
- data/vendor/license-list-XML/src/AFL-3.0.xml +1 -1
- data/vendor/license-list-XML/src/AGPL-3.0.xml +1 -1
- data/vendor/license-list-XML/src/Apache-2.0.xml +1 -1
- data/vendor/license-list-XML/src/Artistic-2.0.xml +1 -1
- data/vendor/license-list-XML/src/BSD-2-Clause-Patent.xml +1 -1
- data/vendor/license-list-XML/src/BSD-2-Clause.xml +1 -1
- data/vendor/license-list-XML/src/BSD-3-Clause.xml +3 -3
- data/vendor/license-list-XML/src/BSD-4-Clause.xml +3 -2
- data/vendor/license-list-XML/src/BSL-1.0.xml +1 -1
- data/vendor/license-list-XML/src/ECL-2.0.xml +1 -1
- data/vendor/license-list-XML/src/EPL-1.0.xml +1 -1
- data/vendor/license-list-XML/src/EPL-2.0.xml +3 -1
- data/vendor/license-list-XML/src/EUPL-1.1.xml +1 -1
- data/vendor/license-list-XML/src/EUPL-1.2.xml +1 -1
- data/vendor/license-list-XML/src/GPL-2.0.xml +11 -6
- data/vendor/license-list-XML/src/GPL-3.0.xml +1 -1
- data/vendor/license-list-XML/src/ISC.xml +1 -1
- data/vendor/license-list-XML/src/LGPL-2.1.xml +6 -3
- data/vendor/license-list-XML/src/LGPL-3.0.xml +1 -1
- data/vendor/license-list-XML/src/LPPL-1.3c.xml +2 -2
- data/vendor/license-list-XML/src/MIT.xml +32 -14
- data/vendor/license-list-XML/src/MPL-2.0.xml +3 -3
- data/vendor/license-list-XML/src/MS-PL.xml +1 -1
- data/vendor/license-list-XML/src/MS-RL.xml +1 -1
- data/vendor/license-list-XML/src/NCSA.xml +1 -1
- data/vendor/license-list-XML/src/OFL-1.1.xml +1 -1
- data/vendor/license-list-XML/src/OSL-3.0.xml +1 -1
- data/vendor/license-list-XML/src/PostgreSQL.xml +1 -1
- data/vendor/license-list-XML/src/UPL-1.0.xml +1 -1
- data/vendor/license-list-XML/src/Zlib.xml +1 -1
- metadata +48 -30
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Licensee
|
|
4
|
+
module ContentHelper
|
|
5
|
+
# Mixin providing wordset-based similarity scoring.
|
|
6
|
+
module SimilarityMethods
|
|
7
|
+
# Given another license or project file, calculates the similarity
|
|
8
|
+
# as a percentage of words in common, minus a tiny penalty that
|
|
9
|
+
# increases with size difference between licenses so that false
|
|
10
|
+
# positives for long licenses are ruled out by this score alone.
|
|
11
|
+
def similarity(other)
|
|
12
|
+
overlap = (wordset_fieldless & other.wordset).size
|
|
13
|
+
(overlap * 200.0) / similarity_denominator(other)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Given another license or project file, calculates the Dice coefficient
|
|
17
|
+
# over bigrams (consecutive word pairs). Unlike wordset similarity this
|
|
18
|
+
# is sensitive to word order, making it resistant to adversarial scrambling
|
|
19
|
+
# where all the correct words appear but in the wrong sequence.
|
|
20
|
+
def bigram_similarity(other)
|
|
21
|
+
my_bigrams = bigrams
|
|
22
|
+
other_bigrams = other.bigrams
|
|
23
|
+
total = my_bigrams.size + other_bigrams.size
|
|
24
|
+
return 0.0 if total.zero?
|
|
25
|
+
|
|
26
|
+
overlap = (my_bigrams & other_bigrams).size
|
|
27
|
+
(overlap * 200.0) / total
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def wordset_fieldless
|
|
33
|
+
@wordset_fieldless ||= wordset - fields_normalized_set
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def similarity_denominator(other)
|
|
37
|
+
total = wordset_fieldless.size + other.wordset.size - fields_normalized_set.size
|
|
38
|
+
total + (variation_adjusted_length_delta(other) / 4)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Returns an array of strings of substitutable fields in normalized content
|
|
42
|
+
def fields_normalized
|
|
43
|
+
@fields_normalized ||= content_normalized.scan(LicenseField::FIELD_REGEX).flatten
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def fields_normalized_set
|
|
47
|
+
@fields_normalized_set ||= fields_normalized.to_set
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def variation_adjusted_length_delta(other)
|
|
51
|
+
delta = length_delta(other)
|
|
52
|
+
|
|
53
|
+
# The content helper mixin is used in different objects
|
|
54
|
+
# Licenses have a more advanced SPDX alt. segement-based delta.
|
|
55
|
+
# Use that if it's present, otherwise, just return the simple delta.
|
|
56
|
+
return delta unless respond_to?(:spdx_alt_segments, true)
|
|
57
|
+
|
|
58
|
+
adjusted_delta = delta - ([fields_normalized.size, spdx_alt_segments].max * 5)
|
|
59
|
+
adjusted_delta.positive? ? adjusted_delta : 0
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -1,112 +1,31 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require 'set'
|
|
4
3
|
require 'digest'
|
|
4
|
+
require_relative 'content_helper/constants'
|
|
5
|
+
require_relative 'content_helper/normalization_methods'
|
|
6
|
+
require_relative 'content_helper/similarity_methods'
|
|
5
7
|
|
|
6
8
|
module Licensee
|
|
9
|
+
# Text normalization, hashing, wrapping, and similarity helpers for license content.
|
|
7
10
|
module ContentHelper
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
REGEXES = {
|
|
12
|
-
bom: /#{START_REGEX}\xEF\xBB\xBF/,
|
|
13
|
-
hrs: /^\s*[=\-*]{3,}\s*$/,
|
|
14
|
-
all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
|
|
15
|
-
whitespace: /\s+/,
|
|
16
|
-
markdown_headings: /^\s*#+/,
|
|
17
|
-
version: /#{START_REGEX}version.*$/i,
|
|
18
|
-
span_markup: /[_*~]+(.*?)[_*~]+/,
|
|
19
|
-
link_markup: /\[(.+?)\]\(.+?\)/,
|
|
20
|
-
block_markup: /^\s*>/,
|
|
21
|
-
border_markup: /^[*-](.*?)[*-]$/,
|
|
22
|
-
comment_markup: %r{^\s*?[/*]{1,2}},
|
|
23
|
-
url: %r{#{START_REGEX}https?://[^ ]+\n},
|
|
24
|
-
bullet: /\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[).])\s+/i,
|
|
25
|
-
developed_by: /#{START_REGEX}developed by:.*?\n\n/im,
|
|
26
|
-
cc_dedication: /The\s+text\s+of\s+the\s+Creative\s+Commons.*?Public\s+Domain\s+Dedication./im,
|
|
27
|
-
cc_wiki: /wiki.creativecommons.org/i,
|
|
28
|
-
cc_legal_code: /^\s*Creative Commons Legal Code\s*$/i,
|
|
29
|
-
cc0_info: /For more information, please see\s*\S+zero\S+/im,
|
|
30
|
-
cc0_disclaimer: /CREATIVE COMMONS CORPORATION.*?\n\n/im,
|
|
31
|
-
unlicense_info: /For more information, please.*\S+unlicense\S+/im,
|
|
32
|
-
mit_optional: /\(including the next paragraph\)/i
|
|
33
|
-
}.freeze
|
|
34
|
-
NORMALIZATIONS = {
|
|
35
|
-
lists: { from: /^\s*(?:\d\.|[*-])(?: [*_]{0,2}\(?[\da-z]\)[*_]{0,2})?\s+([^\n])/, to: '- \1' },
|
|
36
|
-
https: { from: /http:/, to: 'https:' },
|
|
37
|
-
ampersands: { from: '&', to: 'and' },
|
|
38
|
-
dashes: { from: /(?<!^)([—–-]+)(?!$)/, to: '-' },
|
|
39
|
-
quote: { from: /[`'"‘“’”]/, to: "'" },
|
|
40
|
-
hyphenated: { from: /(\w+)-\s*\n\s*(\w+)/, to: '\1-\2' }
|
|
41
|
-
}.freeze
|
|
42
|
-
|
|
43
|
-
# Legally equivalent words that schould be ignored for comparison
|
|
44
|
-
# See https://spdx.org/spdx-license-list/matching-guidelines
|
|
45
|
-
VARIETAL_WORDS = {
|
|
46
|
-
'acknowledgment' => 'acknowledgement',
|
|
47
|
-
'analogue' => 'analog',
|
|
48
|
-
'analyse' => 'analyze',
|
|
49
|
-
'artefact' => 'artifact',
|
|
50
|
-
'authorisation' => 'authorization',
|
|
51
|
-
'authorised' => 'authorized',
|
|
52
|
-
'calibre' => 'caliber',
|
|
53
|
-
'cancelled' => 'canceled',
|
|
54
|
-
'capitalisations' => 'capitalizations',
|
|
55
|
-
'catalogue' => 'catalog',
|
|
56
|
-
'categorise' => 'categorize',
|
|
57
|
-
'centre' => 'center',
|
|
58
|
-
'emphasised' => 'emphasized',
|
|
59
|
-
'favour' => 'favor',
|
|
60
|
-
'favourite' => 'favorite',
|
|
61
|
-
'fulfil' => 'fulfill',
|
|
62
|
-
'fulfilment' => 'fulfillment',
|
|
63
|
-
'initialise' => 'initialize',
|
|
64
|
-
'judgment' => 'judgement',
|
|
65
|
-
'labelling' => 'labeling',
|
|
66
|
-
'labour' => 'labor',
|
|
67
|
-
'licence' => 'license',
|
|
68
|
-
'maximise' => 'maximize',
|
|
69
|
-
'modelled' => 'modeled',
|
|
70
|
-
'modelling' => 'modeling',
|
|
71
|
-
'offence' => 'offense',
|
|
72
|
-
'optimise' => 'optimize',
|
|
73
|
-
'organisation' => 'organization',
|
|
74
|
-
'organise' => 'organize',
|
|
75
|
-
'practise' => 'practice',
|
|
76
|
-
'programme' => 'program',
|
|
77
|
-
'realise' => 'realize',
|
|
78
|
-
'recognise' => 'recognize',
|
|
79
|
-
'signalling' => 'signaling',
|
|
80
|
-
'sub-license' => 'sublicense',
|
|
81
|
-
'sub license' => 'sublicense',
|
|
82
|
-
'utilisation' => 'utilization',
|
|
83
|
-
'whilst' => 'while',
|
|
84
|
-
'wilful' => 'wilfull',
|
|
85
|
-
'non-commercial' => 'noncommercial',
|
|
86
|
-
'per cent' => 'percent',
|
|
87
|
-
'copyright owner' => 'copyright holder'
|
|
88
|
-
}.freeze
|
|
89
|
-
STRIP_METHODS = %i[
|
|
90
|
-
bom
|
|
91
|
-
cc_optional
|
|
92
|
-
cc0_optional
|
|
93
|
-
unlicense_optional
|
|
94
|
-
borders
|
|
95
|
-
title
|
|
96
|
-
version
|
|
97
|
-
url
|
|
98
|
-
copyright
|
|
99
|
-
title
|
|
100
|
-
block_markup
|
|
101
|
-
developed_by
|
|
102
|
-
end_of_terms
|
|
103
|
-
whitespace
|
|
104
|
-
mit_optional
|
|
105
|
-
].freeze
|
|
11
|
+
include Constants
|
|
12
|
+
include NormalizationMethods
|
|
13
|
+
include SimilarityMethods
|
|
106
14
|
|
|
107
15
|
# A set of each word in the license, without duplicates
|
|
108
16
|
def wordset
|
|
109
|
-
@wordset ||=
|
|
17
|
+
@wordset ||= words&.to_set
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# A set of consecutive word pairs (bigrams) in the license, without duplicates.
|
|
21
|
+
# Unlike wordset, bigrams are order-sensitive, making similarity scores
|
|
22
|
+
# robust against adversarial word scrambling (see GitHub issue #602).
|
|
23
|
+
def bigrams
|
|
24
|
+
@bigrams ||= if words.nil? || words.length < 2
|
|
25
|
+
Set.new
|
|
26
|
+
else
|
|
27
|
+
words.each_cons(2).to_set { |a, b| "#{a} #{b}" }
|
|
28
|
+
end
|
|
110
29
|
end
|
|
111
30
|
|
|
112
31
|
# Number of characters in the normalized content
|
|
@@ -121,52 +40,11 @@ module Licensee
|
|
|
121
40
|
(length - other.length).abs
|
|
122
41
|
end
|
|
123
42
|
|
|
124
|
-
# Given another license or project file, calculates the similarity
|
|
125
|
-
# as a percentage of words in common, minus a tiny penalty that
|
|
126
|
-
# increases with size difference between licenses so that false
|
|
127
|
-
# positives for long licnses are ruled out by this score alone.
|
|
128
|
-
def similarity(other)
|
|
129
|
-
overlap = (wordset_fieldless & other.wordset).size
|
|
130
|
-
total = wordset_fieldless.size + other.wordset.size -
|
|
131
|
-
fields_normalized_set.size
|
|
132
|
-
(overlap * 200.0) / (total + (variation_adjusted_length_delta(other) / 4))
|
|
133
|
-
end
|
|
134
|
-
|
|
135
43
|
# SHA1 of the normalized content
|
|
136
44
|
def content_hash
|
|
137
45
|
@content_hash ||= DIGEST.hexdigest content_normalized
|
|
138
46
|
end
|
|
139
47
|
|
|
140
|
-
# Content with the title and version removed
|
|
141
|
-
# The first time should normally be the attribution line
|
|
142
|
-
# Used to dry up `content_normalized` but we need the case sensitive
|
|
143
|
-
# content with attribution first to detect attribuion in LicenseFile
|
|
144
|
-
def content_without_title_and_version
|
|
145
|
-
@content_without_title_and_version ||= begin
|
|
146
|
-
@_content = nil
|
|
147
|
-
ops = %i[html hrs comments markdown_headings link_markup title version]
|
|
148
|
-
ops.each { |op| strip(op) }
|
|
149
|
-
_content
|
|
150
|
-
end
|
|
151
|
-
end
|
|
152
|
-
|
|
153
|
-
def content_normalized(wrap: nil)
|
|
154
|
-
@content_normalized ||= begin
|
|
155
|
-
@_content = content_without_title_and_version.downcase
|
|
156
|
-
|
|
157
|
-
(NORMALIZATIONS.keys + %i[spelling span_markup bullets]).each { |op| normalize(op) }
|
|
158
|
-
STRIP_METHODS.each { |op| strip(op) }
|
|
159
|
-
|
|
160
|
-
_content
|
|
161
|
-
end
|
|
162
|
-
|
|
163
|
-
if wrap.nil?
|
|
164
|
-
@content_normalized
|
|
165
|
-
else
|
|
166
|
-
Licensee::ContentHelper.wrap(@content_normalized, wrap)
|
|
167
|
-
end
|
|
168
|
-
end
|
|
169
|
-
|
|
170
48
|
# Backwards compatibalize constants to avoid a breaking change
|
|
171
49
|
def self.const_missing(const)
|
|
172
50
|
key = const.to_s.downcase.gsub('_regex', '').to_sym
|
|
@@ -177,19 +55,26 @@ module Licensee
|
|
|
177
55
|
def self.wrap(text, line_width = 80)
|
|
178
56
|
return if text.nil?
|
|
179
57
|
|
|
58
|
+
text = normalize_for_wrapping(text)
|
|
59
|
+
wrapped = wrap_lines(text, line_width)
|
|
60
|
+
wrapped.strip
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def self.normalize_for_wrapping(text)
|
|
180
64
|
text = text.clone
|
|
181
65
|
text.gsub!(REGEXES[:bullet]) { |m| "\n#{m}\n" }
|
|
182
|
-
text.gsub!(/([^\n])\n([^\n])/, '
|
|
66
|
+
text.gsub!(/([^\n])\n([^\n])/, '\\1 \\2')
|
|
67
|
+
text
|
|
68
|
+
end
|
|
183
69
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
end * "\n"
|
|
70
|
+
def self.wrap_lines(text, line_width)
|
|
71
|
+
text.split("\n").map { |line| wrap_line(line, line_width) }.join("\n")
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def self.wrap_line(line, line_width)
|
|
75
|
+
return line if line =~ REGEXES[:hrs] || line.length <= line_width
|
|
191
76
|
|
|
192
|
-
|
|
77
|
+
line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip
|
|
193
78
|
end
|
|
194
79
|
|
|
195
80
|
def self.format_percent(float)
|
|
@@ -198,7 +83,7 @@ module Licensee
|
|
|
198
83
|
|
|
199
84
|
def self.title_regex
|
|
200
85
|
@title_regex ||= begin
|
|
201
|
-
licenses = Licensee::License.all(hidden: true,
|
|
86
|
+
licenses = Licensee::License.all(hidden: true, pseudo: false)
|
|
202
87
|
titles = licenses.map(&:title_regex)
|
|
203
88
|
|
|
204
89
|
# Title regex must include the version to support matching within
|
|
@@ -216,134 +101,14 @@ module Licensee
|
|
|
216
101
|
|
|
217
102
|
private
|
|
218
103
|
|
|
219
|
-
|
|
220
|
-
|
|
104
|
+
# Ordered array of words extracted from the normalized content.
|
|
105
|
+
# Memoized so that both wordset and bigrams share the same scan result.
|
|
106
|
+
def words
|
|
107
|
+
@words ||= content_normalized&.scan(%r{(?:[\w/-](?:'s|(?<=s)')?)+})
|
|
221
108
|
end
|
|
222
109
|
|
|
223
|
-
def
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
if regex_or_sym.is_a?(Symbol)
|
|
227
|
-
meth = "strip_#{regex_or_sym}"
|
|
228
|
-
return send(meth) if respond_to?(meth, true)
|
|
229
|
-
|
|
230
|
-
raise ArgumentError, "#{regex_or_sym} is an invalid regex reference" unless REGEXES[regex_or_sym]
|
|
231
|
-
|
|
232
|
-
regex_or_sym = REGEXES[regex_or_sym]
|
|
233
|
-
end
|
|
234
|
-
|
|
235
|
-
@_content = _content.gsub(regex_or_sym, ' ').squeeze(' ').strip
|
|
236
|
-
end
|
|
237
|
-
|
|
238
|
-
def strip_title
|
|
239
|
-
strip(ContentHelper.title_regex) while _content =~ ContentHelper.title_regex
|
|
240
|
-
end
|
|
241
|
-
|
|
242
|
-
def strip_borders
|
|
243
|
-
normalize(REGEXES[:border_markup], '\1')
|
|
244
|
-
end
|
|
245
|
-
|
|
246
|
-
def strip_comments
|
|
247
|
-
lines = _content.split("\n")
|
|
248
|
-
return if lines.count == 1
|
|
249
|
-
return unless lines.all? { |line| line =~ REGEXES[:comment_markup] }
|
|
250
|
-
|
|
251
|
-
strip(:comment_markup)
|
|
252
|
-
end
|
|
253
|
-
|
|
254
|
-
def strip_copyright
|
|
255
|
-
regex = Regexp.union(Matchers::Copyright::REGEX, REGEXES[:all_rights_reserved])
|
|
256
|
-
strip(regex) while _content =~ regex
|
|
257
|
-
end
|
|
258
|
-
|
|
259
|
-
def strip_cc0_optional
|
|
260
|
-
return unless _content.include? 'associating cc0'
|
|
261
|
-
|
|
262
|
-
strip(REGEXES[:cc_legal_code])
|
|
263
|
-
strip(REGEXES[:cc0_info])
|
|
264
|
-
strip(REGEXES[:cc0_disclaimer])
|
|
265
|
-
end
|
|
266
|
-
|
|
267
|
-
def strip_cc_optional
|
|
268
|
-
return unless _content.include? 'creative commons'
|
|
269
|
-
|
|
270
|
-
strip(REGEXES[:cc_dedication])
|
|
271
|
-
strip(REGEXES[:cc_wiki])
|
|
272
|
-
end
|
|
273
|
-
|
|
274
|
-
def strip_unlicense_optional
|
|
275
|
-
return unless _content.include? 'unlicense'
|
|
276
|
-
|
|
277
|
-
strip(REGEXES[:unlicense_info])
|
|
278
|
-
end
|
|
279
|
-
|
|
280
|
-
def strip_end_of_terms
|
|
281
|
-
body, _partition, _instructions = _content.partition(END_OF_TERMS_REGEX)
|
|
282
|
-
@_content = body
|
|
283
|
-
end
|
|
284
|
-
|
|
285
|
-
def normalize_span_markup
|
|
286
|
-
normalize(REGEXES[:span_markup], '\1')
|
|
287
|
-
end
|
|
288
|
-
|
|
289
|
-
def strip_link_markup
|
|
290
|
-
normalize(REGEXES[:link_markup], '\1')
|
|
291
|
-
end
|
|
292
|
-
|
|
293
|
-
def strip_html
|
|
294
|
-
return unless respond_to?(:filename) && filename
|
|
295
|
-
return unless /\.html?/i.match?(File.extname(filename))
|
|
296
|
-
|
|
297
|
-
require 'reverse_markdown'
|
|
298
|
-
@_content = ReverseMarkdown.convert(_content, unknown_tags: :bypass)
|
|
299
|
-
end
|
|
300
|
-
|
|
301
|
-
def normalize(from_or_key, to = nil)
|
|
302
|
-
operation = { from: from_or_key, to: to } if to
|
|
303
|
-
operation ||= NORMALIZATIONS[from_or_key]
|
|
304
|
-
|
|
305
|
-
if operation
|
|
306
|
-
@_content = _content.gsub operation[:from], operation[:to]
|
|
307
|
-
elsif respond_to?(:"normalize_#{from_or_key}", true)
|
|
308
|
-
send(:"normalize_#{from_or_key}")
|
|
309
|
-
else
|
|
310
|
-
raise ArgumentError, "#{from_or_key} is an invalid normalization"
|
|
311
|
-
end
|
|
312
|
-
end
|
|
313
|
-
|
|
314
|
-
def normalize_spelling
|
|
315
|
-
normalize(/\b#{Regexp.union(VARIETAL_WORDS.keys)}\b/, VARIETAL_WORDS)
|
|
316
|
-
end
|
|
317
|
-
|
|
318
|
-
def normalize_bullets
|
|
319
|
-
normalize(REGEXES[:bullet], "\n\n- ")
|
|
320
|
-
normalize(/\)\s+\(/, ')(')
|
|
321
|
-
end
|
|
322
|
-
|
|
323
|
-
def wordset_fieldless
|
|
324
|
-
@wordset_fieldless ||= wordset - fields_normalized_set
|
|
325
|
-
end
|
|
326
|
-
|
|
327
|
-
# Returns an array of strings of substitutable fields in normalized content
|
|
328
|
-
def fields_normalized
|
|
329
|
-
@fields_normalized ||=
|
|
330
|
-
content_normalized.scan(LicenseField::FIELD_REGEX).flatten
|
|
331
|
-
end
|
|
332
|
-
|
|
333
|
-
def fields_normalized_set
|
|
334
|
-
@fields_normalized_set ||= fields_normalized.to_set
|
|
335
|
-
end
|
|
336
|
-
|
|
337
|
-
def variation_adjusted_length_delta(other)
|
|
338
|
-
delta = length_delta(other)
|
|
339
|
-
|
|
340
|
-
# The content helper mixin is used in different objects
|
|
341
|
-
# Licenses have a more advanced SPDX alt. segement-based delta.
|
|
342
|
-
# Use that if it's present, otherwise, just return the simple delta.
|
|
343
|
-
return delta unless respond_to?(:spdx_alt_segments, true)
|
|
344
|
-
|
|
345
|
-
adjusted_delta = delta - ([fields_normalized.size, spdx_alt_segments].max * 5)
|
|
346
|
-
adjusted_delta.positive? ? adjusted_delta : 0
|
|
110
|
+
def _content
|
|
111
|
+
@_content ||= content.to_s.dup.strip
|
|
347
112
|
end
|
|
348
113
|
end
|
|
349
114
|
end
|
data/lib/licensee/hash_helper.rb
CHANGED
|
@@ -1,22 +1,24 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Licensee
|
|
4
|
+
# Mixin that provides a `to_h` based on a class's `HASH_METHODS`.
|
|
4
5
|
module HashHelper
|
|
5
6
|
def to_h
|
|
6
7
|
hash = {}
|
|
7
8
|
self.class::HASH_METHODS.each do |method|
|
|
8
9
|
key = method.to_s.delete('?').to_sym
|
|
9
10
|
value = public_send(method)
|
|
10
|
-
hash[key] =
|
|
11
|
-
value.map { |v| v.respond_to?(:to_h) ? v.to_h : v }
|
|
12
|
-
elsif value.respond_to?(:to_h) && !value.nil?
|
|
13
|
-
value.to_h
|
|
14
|
-
else
|
|
15
|
-
value
|
|
16
|
-
end
|
|
11
|
+
hash[key] = serialize_hash_value(value)
|
|
17
12
|
end
|
|
18
13
|
|
|
19
14
|
hash
|
|
20
15
|
end
|
|
16
|
+
|
|
17
|
+
def serialize_hash_value(value)
|
|
18
|
+
return value.map { |v| v.respond_to?(:to_h) ? v.to_h : v } if value.is_a?(Array)
|
|
19
|
+
return value.to_h if value.respond_to?(:to_h) && !value.nil?
|
|
20
|
+
|
|
21
|
+
value
|
|
22
|
+
end
|
|
21
23
|
end
|
|
22
24
|
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Licensee
|
|
4
|
+
class License
|
|
5
|
+
# Class-level lookup and caching for licenses.
|
|
6
|
+
module ClassMethods
|
|
7
|
+
# All license objects defined via Licensee (via choosealicense.com)
|
|
8
|
+
#
|
|
9
|
+
# Options:
|
|
10
|
+
# - :hidden - boolean, return hidden licenses (default: false)
|
|
11
|
+
# - :featured - boolean, return only (non)featured licenses (default: all)
|
|
12
|
+
#
|
|
13
|
+
# Returns an Array of License objects.
|
|
14
|
+
def all(options = {})
|
|
15
|
+
@all[options] ||= begin
|
|
16
|
+
normalized_options = LicenseAllHelper.normalize_all_options(options, DEFAULT_OPTIONS)
|
|
17
|
+
output = licenses.dup
|
|
18
|
+
LicenseAllHelper.apply_all_filters!(output, normalized_options)
|
|
19
|
+
output.sort_by!(&:key)
|
|
20
|
+
LicenseAllHelper.filter_featured(output, normalized_options[:featured])
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def keys
|
|
25
|
+
@keys ||= license_files.map do |license_file|
|
|
26
|
+
::File.basename(license_file, '.txt').downcase
|
|
27
|
+
end + PSEUDO_LICENSES
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def find(key, options = {})
|
|
31
|
+
options = { hidden: true }.merge(options)
|
|
32
|
+
keys_licenses(options)[key.downcase]
|
|
33
|
+
end
|
|
34
|
+
alias [] find
|
|
35
|
+
alias find_by_key find
|
|
36
|
+
|
|
37
|
+
# Given a license title or nickname, fuzzy match the license
|
|
38
|
+
def find_by_title(title)
|
|
39
|
+
License.all(hidden: true, pseudo: false).find do |license|
|
|
40
|
+
title =~ /\A(the )?#{license.title_regex}( license)?\z/i
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def license_dir
|
|
45
|
+
::File.expand_path '../../../vendor/choosealicense.com/_licenses', __dir__
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def license_files
|
|
49
|
+
@license_files ||= Dir.glob("#{license_dir}/*.txt")
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def spdx_dir
|
|
53
|
+
::File.expand_path '../../../vendor/license-list-XML/src', __dir__
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
def licenses
|
|
59
|
+
@licenses ||= keys.map { |key| new(key) }
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def keys_licenses(options = {})
|
|
63
|
+
@keys_licenses[options] ||= all(options).to_h { |l| [l.key, l] }
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Licensee
|
|
4
|
+
class License
|
|
5
|
+
# Instance methods for loading and working with license content.
|
|
6
|
+
module ContentMethods
|
|
7
|
+
# Path to vendored license file on disk
|
|
8
|
+
def path
|
|
9
|
+
@path ||= File.expand_path "#{@key}.txt", Licensee::License.license_dir
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# The license body (e.g., contents - frontmatter)
|
|
13
|
+
def content
|
|
14
|
+
@content ||= parts[2] if parts && parts[2]
|
|
15
|
+
end
|
|
16
|
+
alias to_s content
|
|
17
|
+
alias text content
|
|
18
|
+
alias body content
|
|
19
|
+
|
|
20
|
+
# Returns an array of strings of substitutable fields in the license body
|
|
21
|
+
def fields
|
|
22
|
+
@fields ||= LicenseField.from_content(content)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Returns a string with `[fields]` replaced by `{{{fields}}}`
|
|
26
|
+
# Does not mangle non-supported fields in the form of `[field]`
|
|
27
|
+
def content_for_mustache
|
|
28
|
+
@content_for_mustache ||= content.gsub(LicenseField::FIELD_REGEX, '{{{\1}}}')
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
# Raw content of license file, including YAML front matter
|
|
34
|
+
def raw_content
|
|
35
|
+
return if pseudo_license?
|
|
36
|
+
raise Licensee::InvalidLicense, "'#{key}' is not a valid license key" unless File.exist?(path)
|
|
37
|
+
|
|
38
|
+
@raw_content ||= File.read(path, encoding: 'utf-8')
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def parts
|
|
42
|
+
return unless raw_content
|
|
43
|
+
|
|
44
|
+
@parts ||= raw_content.match(/\A(---\n.*\n---\n+)?(.*)/m).to_a
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def yaml
|
|
48
|
+
@yaml ||= parts[1] if parts
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|