licensee 9.18.0 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.md +1 -1
  3. data/bin/licensee +2 -0
  4. data/lib/licensee/commands/detect.rb +9 -89
  5. data/lib/licensee/commands/detect_helpers.rb +125 -0
  6. data/lib/licensee/commands/diff.rb +64 -35
  7. data/lib/licensee/commands/license_path.rb +1 -0
  8. data/lib/licensee/commands/version.rb +1 -0
  9. data/lib/licensee/content_helper/constants.rb +111 -0
  10. data/lib/licensee/content_helper/normalization_methods.rb +149 -0
  11. data/lib/licensee/content_helper/similarity_methods.rb +63 -0
  12. data/lib/licensee/content_helper.rb +42 -277
  13. data/lib/licensee/hash_helper.rb +9 -7
  14. data/lib/licensee/license/class_methods.rb +67 -0
  15. data/lib/licensee/license/content_methods.rb +52 -0
  16. data/lib/licensee/license/identity_methods.rb +117 -0
  17. data/lib/licensee/license.rb +31 -208
  18. data/lib/licensee/license_field.rb +9 -6
  19. data/lib/licensee/license_meta.rb +4 -1
  20. data/lib/licensee/license_rules.rb +5 -1
  21. data/lib/licensee/matchers/cabal.rb +6 -2
  22. data/lib/licensee/matchers/cargo.rb +1 -0
  23. data/lib/licensee/matchers/copyright.rb +3 -1
  24. data/lib/licensee/matchers/cran.rb +2 -1
  25. data/lib/licensee/matchers/dice.rb +13 -2
  26. data/lib/licensee/matchers/dist_zilla.rb +1 -0
  27. data/lib/licensee/matchers/exact.rb +2 -0
  28. data/lib/licensee/matchers/gemspec.rb +1 -8
  29. data/lib/licensee/matchers/matcher.rb +5 -3
  30. data/lib/licensee/matchers/npm_bower.rb +1 -0
  31. data/lib/licensee/matchers/nuget.rb +1 -0
  32. data/lib/licensee/matchers/package.rb +21 -5
  33. data/lib/licensee/matchers/spdx.rb +1 -0
  34. data/lib/licensee/matchers.rb +1 -0
  35. data/lib/licensee/project_files/license_file.rb +28 -3
  36. data/lib/licensee/project_files/package_manager_file.rb +1 -0
  37. data/lib/licensee/project_files/project_file.rb +8 -5
  38. data/lib/licensee/project_files/readme_file.rb +1 -0
  39. data/lib/licensee/project_files.rb +1 -0
  40. data/lib/licensee/projects/fs_project.rb +2 -0
  41. data/lib/licensee/projects/git_project.rb +30 -4
  42. data/lib/licensee/projects/github_project.rb +25 -5
  43. data/lib/licensee/projects/project.rb +31 -34
  44. data/lib/licensee/projects.rb +1 -0
  45. data/lib/licensee/rule.rb +2 -0
  46. data/lib/licensee/version.rb +1 -1
  47. data/lib/licensee.rb +23 -2
  48. data/spec/bin_spec.rb +8 -8
  49. data/spec/fixture_spec.rb +18 -19
  50. data/spec/fixtures/bsd-3-linebreak-owner/LICENSE +30 -0
  51. data/spec/fixtures/bsd-3-multilinecopyright/LICENSE +27 -0
  52. data/spec/fixtures/detect.json +3 -3
  53. data/spec/fixtures/fixtures.yml +35 -11
  54. data/spec/fixtures/license-hashes.json +4 -4
  55. data/spec/fixtures/licenses-dir/LICENSES/MIT.txt +21 -0
  56. data/spec/fixtures/licenses-dir-with-license-ref/LICENSES/LicenseRef-MIT.txt +21 -0
  57. data/spec/fixtures/licenses-dir-with-multiple-license-files/LICENSES/MIT.txt +21 -0
  58. data/spec/fixtures/licenses-dir-with-multiple-license-files/LICENSES/MPL-2.0.txt +362 -0
  59. data/spec/fixtures/licenses-dir-with-top-level-license/LICENSE.md +195 -0
  60. data/spec/fixtures/licenses-dir-with-top-level-license/LICENSES/MIT.txt +21 -0
  61. data/spec/integration_spec.rb +247 -274
  62. data/spec/licensee/commands/detect_spec.rb +94 -21
  63. data/spec/licensee/commands/license_path_spec.rb +13 -9
  64. data/spec/licensee/commands/version_spec.rb +12 -8
  65. data/spec/licensee/content_helper_spec.rb +159 -111
  66. data/spec/licensee/hash_helper_spec.rb +9 -10
  67. data/spec/licensee/license_field_spec.rb +17 -22
  68. data/spec/licensee/license_meta_spec.rb +29 -37
  69. data/spec/licensee/license_rules_spec.rb +19 -19
  70. data/spec/licensee/license_spec.rb +219 -264
  71. data/spec/licensee/licensee_filesystem_spec.rb +40 -0
  72. data/spec/licensee/matchers/cabal_matcher_spec.rb +67 -31
  73. data/spec/licensee/matchers/cargo_matcher_spec.rb +7 -7
  74. data/spec/licensee/matchers/copyright_matcher_spec.rb +21 -10
  75. data/spec/licensee/matchers/cran_matcher_spec.rb +6 -6
  76. data/spec/licensee/matchers/dice_matcher_spec.rb +47 -33
  77. data/spec/licensee/matchers/dist_zilla_matcher_spec.rb +7 -7
  78. data/spec/licensee/matchers/exact_matcher_spec.rb +4 -4
  79. data/spec/licensee/matchers/gemspec_matcher_spec.rb +10 -10
  80. data/spec/licensee/matchers/matcher_spec.rb +14 -4
  81. data/spec/licensee/matchers/npm_bower_matcher_spec.rb +20 -12
  82. data/spec/licensee/matchers/nu_get_matcher_spec.rb +12 -12
  83. data/spec/licensee/matchers/package_matcher_spec.rb +40 -12
  84. data/spec/licensee/matchers/reference_matcher_spec.rb +17 -13
  85. data/spec/licensee/matchers/spdx_matcher_spec.rb +9 -9
  86. data/spec/licensee/project_files/license_file_spec.rb +136 -72
  87. data/spec/licensee/project_files/package_manager_file_spec.rb +3 -3
  88. data/spec/licensee/project_files/project_file_spec.rb +29 -23
  89. data/spec/licensee/project_files/readme_file_spec.rb +13 -13
  90. data/spec/licensee/project_spec.rb +168 -123
  91. data/spec/licensee/projects/git_hub_project_spec.rb +268 -26
  92. data/spec/licensee/projects/git_project_spec.rb +23 -1
  93. data/spec/licensee/projects/project_spec.rb +15 -0
  94. data/spec/licensee/rule_spec.rb +19 -22
  95. data/spec/licensee_spec.rb +23 -11
  96. data/spec/spec_helper.rb +3 -1
  97. data/spec/vendored_license_spec.rb +37 -60
  98. data/vendor/choosealicense.com/_licenses/blueoak-1.0.0.txt +1 -1
  99. data/vendor/choosealicense.com/_licenses/bsd-4-clause.txt +1 -1
  100. data/vendor/choosealicense.com/_licenses/cern-ohl-p-2.0.txt +1 -1
  101. data/vendor/choosealicense.com/_licenses/cern-ohl-s-2.0.txt +1 -1
  102. data/vendor/choosealicense.com/_licenses/cern-ohl-w-2.0.txt +2 -2
  103. data/vendor/choosealicense.com/_licenses/gpl-2.0.txt +1 -1
  104. data/vendor/choosealicense.com/_licenses/gpl-3.0.txt +1 -1
  105. data/vendor/choosealicense.com/_licenses/mit-0.txt +1 -1
  106. data/vendor/choosealicense.com/_licenses/osl-3.0.txt +1 -1
  107. data/vendor/choosealicense.com/_licenses/zlib.txt +1 -1
  108. data/vendor/license-list-XML/src/0BSD.xml +1 -1
  109. data/vendor/license-list-XML/src/AFL-3.0.xml +1 -1
  110. data/vendor/license-list-XML/src/AGPL-3.0.xml +1 -1
  111. data/vendor/license-list-XML/src/Apache-2.0.xml +1 -1
  112. data/vendor/license-list-XML/src/Artistic-2.0.xml +1 -1
  113. data/vendor/license-list-XML/src/BSD-2-Clause-Patent.xml +1 -1
  114. data/vendor/license-list-XML/src/BSD-2-Clause.xml +1 -1
  115. data/vendor/license-list-XML/src/BSD-3-Clause.xml +3 -3
  116. data/vendor/license-list-XML/src/BSD-4-Clause.xml +3 -2
  117. data/vendor/license-list-XML/src/BSL-1.0.xml +1 -1
  118. data/vendor/license-list-XML/src/ECL-2.0.xml +1 -1
  119. data/vendor/license-list-XML/src/EPL-1.0.xml +1 -1
  120. data/vendor/license-list-XML/src/EPL-2.0.xml +3 -1
  121. data/vendor/license-list-XML/src/EUPL-1.1.xml +1 -1
  122. data/vendor/license-list-XML/src/EUPL-1.2.xml +1 -1
  123. data/vendor/license-list-XML/src/GPL-2.0.xml +11 -6
  124. data/vendor/license-list-XML/src/GPL-3.0.xml +1 -1
  125. data/vendor/license-list-XML/src/ISC.xml +1 -1
  126. data/vendor/license-list-XML/src/LGPL-2.1.xml +6 -3
  127. data/vendor/license-list-XML/src/LGPL-3.0.xml +1 -1
  128. data/vendor/license-list-XML/src/LPPL-1.3c.xml +2 -2
  129. data/vendor/license-list-XML/src/MIT.xml +32 -14
  130. data/vendor/license-list-XML/src/MPL-2.0.xml +3 -3
  131. data/vendor/license-list-XML/src/MS-PL.xml +1 -1
  132. data/vendor/license-list-XML/src/MS-RL.xml +1 -1
  133. data/vendor/license-list-XML/src/NCSA.xml +1 -1
  134. data/vendor/license-list-XML/src/OFL-1.1.xml +1 -1
  135. data/vendor/license-list-XML/src/OSL-3.0.xml +1 -1
  136. data/vendor/license-list-XML/src/PostgreSQL.xml +1 -1
  137. data/vendor/license-list-XML/src/UPL-1.0.xml +1 -1
  138. data/vendor/license-list-XML/src/Zlib.xml +1 -1
  139. metadata +48 -30
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Licensee
4
+ module ContentHelper
5
+ # Mixin providing wordset-based similarity scoring.
6
+ module SimilarityMethods
7
+ # Given another license or project file, calculates the similarity
8
+ # as a percentage of words in common, minus a tiny penalty that
9
+ # increases with size difference between licenses so that false
10
+ # positives for long licenses are ruled out by this score alone.
11
+ def similarity(other)
12
+ overlap = (wordset_fieldless & other.wordset).size
13
+ (overlap * 200.0) / similarity_denominator(other)
14
+ end
15
+
16
+ # Given another license or project file, calculates the Dice coefficient
17
+ # over bigrams (consecutive word pairs). Unlike wordset similarity this
18
+ # is sensitive to word order, making it resistant to adversarial scrambling
19
+ # where all the correct words appear but in the wrong sequence.
20
+ def bigram_similarity(other)
21
+ my_bigrams = bigrams
22
+ other_bigrams = other.bigrams
23
+ total = my_bigrams.size + other_bigrams.size
24
+ return 0.0 if total.zero?
25
+
26
+ overlap = (my_bigrams & other_bigrams).size
27
+ (overlap * 200.0) / total
28
+ end
29
+
30
+ private
31
+
32
+ def wordset_fieldless
33
+ @wordset_fieldless ||= wordset - fields_normalized_set
34
+ end
35
+
36
+ def similarity_denominator(other)
37
+ total = wordset_fieldless.size + other.wordset.size - fields_normalized_set.size
38
+ total + (variation_adjusted_length_delta(other) / 4)
39
+ end
40
+
41
+ # Returns an array of strings of substitutable fields in normalized content
42
+ def fields_normalized
43
+ @fields_normalized ||= content_normalized.scan(LicenseField::FIELD_REGEX).flatten
44
+ end
45
+
46
+ def fields_normalized_set
47
+ @fields_normalized_set ||= fields_normalized.to_set
48
+ end
49
+
50
+ def variation_adjusted_length_delta(other)
51
+ delta = length_delta(other)
52
+
53
+ # The content helper mixin is used in different objects
54
+ # Licenses have a more advanced SPDX alt. segement-based delta.
55
+ # Use that if it's present, otherwise, just return the simple delta.
56
+ return delta unless respond_to?(:spdx_alt_segments, true)
57
+
58
+ adjusted_delta = delta - ([fields_normalized.size, spdx_alt_segments].max * 5)
59
+ adjusted_delta.positive? ? adjusted_delta : 0
60
+ end
61
+ end
62
+ end
63
+ end
@@ -1,112 +1,31 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'set'
4
3
  require 'digest'
4
+ require_relative 'content_helper/constants'
5
+ require_relative 'content_helper/normalization_methods'
6
+ require_relative 'content_helper/similarity_methods'
5
7
 
6
8
  module Licensee
9
+ # Text normalization, hashing, wrapping, and similarity helpers for license content.
7
10
  module ContentHelper
8
- DIGEST = Digest::SHA1
9
- START_REGEX = /\A\s*/
10
- END_OF_TERMS_REGEX = /^[\s#*_]*end of (the )?terms and conditions[\s#*_]*$/i
11
- REGEXES = {
12
- bom: /#{START_REGEX}\xEF\xBB\xBF/,
13
- hrs: /^\s*[=\-*]{3,}\s*$/,
14
- all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
15
- whitespace: /\s+/,
16
- markdown_headings: /^\s*#+/,
17
- version: /#{START_REGEX}version.*$/i,
18
- span_markup: /[_*~]+(.*?)[_*~]+/,
19
- link_markup: /\[(.+?)\]\(.+?\)/,
20
- block_markup: /^\s*>/,
21
- border_markup: /^[*-](.*?)[*-]$/,
22
- comment_markup: %r{^\s*?[/*]{1,2}},
23
- url: %r{#{START_REGEX}https?://[^ ]+\n},
24
- bullet: /\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[).])\s+/i,
25
- developed_by: /#{START_REGEX}developed by:.*?\n\n/im,
26
- cc_dedication: /The\s+text\s+of\s+the\s+Creative\s+Commons.*?Public\s+Domain\s+Dedication./im,
27
- cc_wiki: /wiki.creativecommons.org/i,
28
- cc_legal_code: /^\s*Creative Commons Legal Code\s*$/i,
29
- cc0_info: /For more information, please see\s*\S+zero\S+/im,
30
- cc0_disclaimer: /CREATIVE COMMONS CORPORATION.*?\n\n/im,
31
- unlicense_info: /For more information, please.*\S+unlicense\S+/im,
32
- mit_optional: /\(including the next paragraph\)/i
33
- }.freeze
34
- NORMALIZATIONS = {
35
- lists: { from: /^\s*(?:\d\.|[*-])(?: [*_]{0,2}\(?[\da-z]\)[*_]{0,2})?\s+([^\n])/, to: '- \1' },
36
- https: { from: /http:/, to: 'https:' },
37
- ampersands: { from: '&', to: 'and' },
38
- dashes: { from: /(?<!^)([—–-]+)(?!$)/, to: '-' },
39
- quote: { from: /[`'"‘“’”]/, to: "'" },
40
- hyphenated: { from: /(\w+)-\s*\n\s*(\w+)/, to: '\1-\2' }
41
- }.freeze
42
-
43
- # Legally equivalent words that schould be ignored for comparison
44
- # See https://spdx.org/spdx-license-list/matching-guidelines
45
- VARIETAL_WORDS = {
46
- 'acknowledgment' => 'acknowledgement',
47
- 'analogue' => 'analog',
48
- 'analyse' => 'analyze',
49
- 'artefact' => 'artifact',
50
- 'authorisation' => 'authorization',
51
- 'authorised' => 'authorized',
52
- 'calibre' => 'caliber',
53
- 'cancelled' => 'canceled',
54
- 'capitalisations' => 'capitalizations',
55
- 'catalogue' => 'catalog',
56
- 'categorise' => 'categorize',
57
- 'centre' => 'center',
58
- 'emphasised' => 'emphasized',
59
- 'favour' => 'favor',
60
- 'favourite' => 'favorite',
61
- 'fulfil' => 'fulfill',
62
- 'fulfilment' => 'fulfillment',
63
- 'initialise' => 'initialize',
64
- 'judgment' => 'judgement',
65
- 'labelling' => 'labeling',
66
- 'labour' => 'labor',
67
- 'licence' => 'license',
68
- 'maximise' => 'maximize',
69
- 'modelled' => 'modeled',
70
- 'modelling' => 'modeling',
71
- 'offence' => 'offense',
72
- 'optimise' => 'optimize',
73
- 'organisation' => 'organization',
74
- 'organise' => 'organize',
75
- 'practise' => 'practice',
76
- 'programme' => 'program',
77
- 'realise' => 'realize',
78
- 'recognise' => 'recognize',
79
- 'signalling' => 'signaling',
80
- 'sub-license' => 'sublicense',
81
- 'sub license' => 'sublicense',
82
- 'utilisation' => 'utilization',
83
- 'whilst' => 'while',
84
- 'wilful' => 'wilfull',
85
- 'non-commercial' => 'noncommercial',
86
- 'per cent' => 'percent',
87
- 'copyright owner' => 'copyright holder'
88
- }.freeze
89
- STRIP_METHODS = %i[
90
- bom
91
- cc_optional
92
- cc0_optional
93
- unlicense_optional
94
- borders
95
- title
96
- version
97
- url
98
- copyright
99
- title
100
- block_markup
101
- developed_by
102
- end_of_terms
103
- whitespace
104
- mit_optional
105
- ].freeze
11
+ include Constants
12
+ include NormalizationMethods
13
+ include SimilarityMethods
106
14
 
107
15
  # A set of each word in the license, without duplicates
108
16
  def wordset
109
- @wordset ||= content_normalized&.scan(%r{(?:[\w/-](?:'s|(?<=s)')?)+})&.to_set
17
+ @wordset ||= words&.to_set
18
+ end
19
+
20
+ # A set of consecutive word pairs (bigrams) in the license, without duplicates.
21
+ # Unlike wordset, bigrams are order-sensitive, making similarity scores
22
+ # robust against adversarial word scrambling (see GitHub issue #602).
23
+ def bigrams
24
+ @bigrams ||= if words.nil? || words.length < 2
25
+ Set.new
26
+ else
27
+ words.each_cons(2).to_set { |a, b| "#{a} #{b}" }
28
+ end
110
29
  end
111
30
 
112
31
  # Number of characters in the normalized content
@@ -121,52 +40,11 @@ module Licensee
121
40
  (length - other.length).abs
122
41
  end
123
42
 
124
- # Given another license or project file, calculates the similarity
125
- # as a percentage of words in common, minus a tiny penalty that
126
- # increases with size difference between licenses so that false
127
- # positives for long licnses are ruled out by this score alone.
128
- def similarity(other)
129
- overlap = (wordset_fieldless & other.wordset).size
130
- total = wordset_fieldless.size + other.wordset.size -
131
- fields_normalized_set.size
132
- (overlap * 200.0) / (total + (variation_adjusted_length_delta(other) / 4))
133
- end
134
-
135
43
  # SHA1 of the normalized content
136
44
  def content_hash
137
45
  @content_hash ||= DIGEST.hexdigest content_normalized
138
46
  end
139
47
 
140
- # Content with the title and version removed
141
- # The first time should normally be the attribution line
142
- # Used to dry up `content_normalized` but we need the case sensitive
143
- # content with attribution first to detect attribuion in LicenseFile
144
- def content_without_title_and_version
145
- @content_without_title_and_version ||= begin
146
- @_content = nil
147
- ops = %i[html hrs comments markdown_headings link_markup title version]
148
- ops.each { |op| strip(op) }
149
- _content
150
- end
151
- end
152
-
153
- def content_normalized(wrap: nil)
154
- @content_normalized ||= begin
155
- @_content = content_without_title_and_version.downcase
156
-
157
- (NORMALIZATIONS.keys + %i[spelling span_markup bullets]).each { |op| normalize(op) }
158
- STRIP_METHODS.each { |op| strip(op) }
159
-
160
- _content
161
- end
162
-
163
- if wrap.nil?
164
- @content_normalized
165
- else
166
- Licensee::ContentHelper.wrap(@content_normalized, wrap)
167
- end
168
- end
169
-
170
48
  # Backwards compatibalize constants to avoid a breaking change
171
49
  def self.const_missing(const)
172
50
  key = const.to_s.downcase.gsub('_regex', '').to_sym
@@ -177,19 +55,26 @@ module Licensee
177
55
  def self.wrap(text, line_width = 80)
178
56
  return if text.nil?
179
57
 
58
+ text = normalize_for_wrapping(text)
59
+ wrapped = wrap_lines(text, line_width)
60
+ wrapped.strip
61
+ end
62
+
63
+ def self.normalize_for_wrapping(text)
180
64
  text = text.clone
181
65
  text.gsub!(REGEXES[:bullet]) { |m| "\n#{m}\n" }
182
- text.gsub!(/([^\n])\n([^\n])/, '\1 \2')
66
+ text.gsub!(/([^\n])\n([^\n])/, '\\1 \\2')
67
+ text
68
+ end
183
69
 
184
- text = text.split("\n").collect do |line|
185
- if line =~ REGEXES[:hrs] || line.length <= line_width
186
- line
187
- else
188
- line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip
189
- end
190
- end * "\n"
70
+ def self.wrap_lines(text, line_width)
71
+ text.split("\n").map { |line| wrap_line(line, line_width) }.join("\n")
72
+ end
73
+
74
+ def self.wrap_line(line, line_width)
75
+ return line if line =~ REGEXES[:hrs] || line.length <= line_width
191
76
 
192
- text.strip
77
+ line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip
193
78
  end
194
79
 
195
80
  def self.format_percent(float)
@@ -198,7 +83,7 @@ module Licensee
198
83
 
199
84
  def self.title_regex
200
85
  @title_regex ||= begin
201
- licenses = Licensee::License.all(hidden: true, psuedo: false)
86
+ licenses = Licensee::License.all(hidden: true, pseudo: false)
202
87
  titles = licenses.map(&:title_regex)
203
88
 
204
89
  # Title regex must include the version to support matching within
@@ -216,134 +101,14 @@ module Licensee
216
101
 
217
102
  private
218
103
 
219
- def _content
220
- @_content ||= content.to_s.dup.strip
104
+ # Ordered array of words extracted from the normalized content.
105
+ # Memoized so that both wordset and bigrams share the same scan result.
106
+ def words
107
+ @words ||= content_normalized&.scan(%r{(?:[\w/-](?:'s|(?<=s)')?)+})
221
108
  end
222
109
 
223
- def strip(regex_or_sym)
224
- return unless _content
225
-
226
- if regex_or_sym.is_a?(Symbol)
227
- meth = "strip_#{regex_or_sym}"
228
- return send(meth) if respond_to?(meth, true)
229
-
230
- raise ArgumentError, "#{regex_or_sym} is an invalid regex reference" unless REGEXES[regex_or_sym]
231
-
232
- regex_or_sym = REGEXES[regex_or_sym]
233
- end
234
-
235
- @_content = _content.gsub(regex_or_sym, ' ').squeeze(' ').strip
236
- end
237
-
238
- def strip_title
239
- strip(ContentHelper.title_regex) while _content =~ ContentHelper.title_regex
240
- end
241
-
242
- def strip_borders
243
- normalize(REGEXES[:border_markup], '\1')
244
- end
245
-
246
- def strip_comments
247
- lines = _content.split("\n")
248
- return if lines.count == 1
249
- return unless lines.all? { |line| line =~ REGEXES[:comment_markup] }
250
-
251
- strip(:comment_markup)
252
- end
253
-
254
- def strip_copyright
255
- regex = Regexp.union(Matchers::Copyright::REGEX, REGEXES[:all_rights_reserved])
256
- strip(regex) while _content =~ regex
257
- end
258
-
259
- def strip_cc0_optional
260
- return unless _content.include? 'associating cc0'
261
-
262
- strip(REGEXES[:cc_legal_code])
263
- strip(REGEXES[:cc0_info])
264
- strip(REGEXES[:cc0_disclaimer])
265
- end
266
-
267
- def strip_cc_optional
268
- return unless _content.include? 'creative commons'
269
-
270
- strip(REGEXES[:cc_dedication])
271
- strip(REGEXES[:cc_wiki])
272
- end
273
-
274
- def strip_unlicense_optional
275
- return unless _content.include? 'unlicense'
276
-
277
- strip(REGEXES[:unlicense_info])
278
- end
279
-
280
- def strip_end_of_terms
281
- body, _partition, _instructions = _content.partition(END_OF_TERMS_REGEX)
282
- @_content = body
283
- end
284
-
285
- def normalize_span_markup
286
- normalize(REGEXES[:span_markup], '\1')
287
- end
288
-
289
- def strip_link_markup
290
- normalize(REGEXES[:link_markup], '\1')
291
- end
292
-
293
- def strip_html
294
- return unless respond_to?(:filename) && filename
295
- return unless /\.html?/i.match?(File.extname(filename))
296
-
297
- require 'reverse_markdown'
298
- @_content = ReverseMarkdown.convert(_content, unknown_tags: :bypass)
299
- end
300
-
301
- def normalize(from_or_key, to = nil)
302
- operation = { from: from_or_key, to: to } if to
303
- operation ||= NORMALIZATIONS[from_or_key]
304
-
305
- if operation
306
- @_content = _content.gsub operation[:from], operation[:to]
307
- elsif respond_to?(:"normalize_#{from_or_key}", true)
308
- send(:"normalize_#{from_or_key}")
309
- else
310
- raise ArgumentError, "#{from_or_key} is an invalid normalization"
311
- end
312
- end
313
-
314
- def normalize_spelling
315
- normalize(/\b#{Regexp.union(VARIETAL_WORDS.keys)}\b/, VARIETAL_WORDS)
316
- end
317
-
318
- def normalize_bullets
319
- normalize(REGEXES[:bullet], "\n\n- ")
320
- normalize(/\)\s+\(/, ')(')
321
- end
322
-
323
- def wordset_fieldless
324
- @wordset_fieldless ||= wordset - fields_normalized_set
325
- end
326
-
327
- # Returns an array of strings of substitutable fields in normalized content
328
- def fields_normalized
329
- @fields_normalized ||=
330
- content_normalized.scan(LicenseField::FIELD_REGEX).flatten
331
- end
332
-
333
- def fields_normalized_set
334
- @fields_normalized_set ||= fields_normalized.to_set
335
- end
336
-
337
- def variation_adjusted_length_delta(other)
338
- delta = length_delta(other)
339
-
340
- # The content helper mixin is used in different objects
341
- # Licenses have a more advanced SPDX alt. segement-based delta.
342
- # Use that if it's present, otherwise, just return the simple delta.
343
- return delta unless respond_to?(:spdx_alt_segments, true)
344
-
345
- adjusted_delta = delta - ([fields_normalized.size, spdx_alt_segments].max * 5)
346
- adjusted_delta.positive? ? adjusted_delta : 0
110
+ def _content
111
+ @_content ||= content.to_s.dup.strip
347
112
  end
348
113
  end
349
114
  end
@@ -1,22 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Licensee
4
+ # Mixin that provides a `to_h` based on a class's `HASH_METHODS`.
4
5
  module HashHelper
5
6
  def to_h
6
7
  hash = {}
7
8
  self.class::HASH_METHODS.each do |method|
8
9
  key = method.to_s.delete('?').to_sym
9
10
  value = public_send(method)
10
- hash[key] = if value.is_a?(Array)
11
- value.map { |v| v.respond_to?(:to_h) ? v.to_h : v }
12
- elsif value.respond_to?(:to_h) && !value.nil?
13
- value.to_h
14
- else
15
- value
16
- end
11
+ hash[key] = serialize_hash_value(value)
17
12
  end
18
13
 
19
14
  hash
20
15
  end
16
+
17
+ def serialize_hash_value(value)
18
+ return value.map { |v| v.respond_to?(:to_h) ? v.to_h : v } if value.is_a?(Array)
19
+ return value.to_h if value.respond_to?(:to_h) && !value.nil?
20
+
21
+ value
22
+ end
21
23
  end
22
24
  end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Licensee
4
+ class License
5
+ # Class-level lookup and caching for licenses.
6
+ module ClassMethods
7
+ # All license objects defined via Licensee (via choosealicense.com)
8
+ #
9
+ # Options:
10
+ # - :hidden - boolean, return hidden licenses (default: false)
11
+ # - :featured - boolean, return only (non)featured licenses (default: all)
12
+ #
13
+ # Returns an Array of License objects.
14
+ def all(options = {})
15
+ @all[options] ||= begin
16
+ normalized_options = LicenseAllHelper.normalize_all_options(options, DEFAULT_OPTIONS)
17
+ output = licenses.dup
18
+ LicenseAllHelper.apply_all_filters!(output, normalized_options)
19
+ output.sort_by!(&:key)
20
+ LicenseAllHelper.filter_featured(output, normalized_options[:featured])
21
+ end
22
+ end
23
+
24
+ def keys
25
+ @keys ||= license_files.map do |license_file|
26
+ ::File.basename(license_file, '.txt').downcase
27
+ end + PSEUDO_LICENSES
28
+ end
29
+
30
+ def find(key, options = {})
31
+ options = { hidden: true }.merge(options)
32
+ keys_licenses(options)[key.downcase]
33
+ end
34
+ alias [] find
35
+ alias find_by_key find
36
+
37
+ # Given a license title or nickname, fuzzy match the license
38
+ def find_by_title(title)
39
+ License.all(hidden: true, pseudo: false).find do |license|
40
+ title =~ /\A(the )?#{license.title_regex}( license)?\z/i
41
+ end
42
+ end
43
+
44
+ def license_dir
45
+ ::File.expand_path '../../../vendor/choosealicense.com/_licenses', __dir__
46
+ end
47
+
48
+ def license_files
49
+ @license_files ||= Dir.glob("#{license_dir}/*.txt")
50
+ end
51
+
52
+ def spdx_dir
53
+ ::File.expand_path '../../../vendor/license-list-XML/src', __dir__
54
+ end
55
+
56
+ private
57
+
58
+ def licenses
59
+ @licenses ||= keys.map { |key| new(key) }
60
+ end
61
+
62
+ def keys_licenses(options = {})
63
+ @keys_licenses[options] ||= all(options).to_h { |l| [l.key, l] }
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Licensee
4
+ class License
5
+ # Instance methods for loading and working with license content.
6
+ module ContentMethods
7
+ # Path to vendored license file on disk
8
+ def path
9
+ @path ||= File.expand_path "#{@key}.txt", Licensee::License.license_dir
10
+ end
11
+
12
+ # The license body (e.g., contents - frontmatter)
13
+ def content
14
+ @content ||= parts[2] if parts && parts[2]
15
+ end
16
+ alias to_s content
17
+ alias text content
18
+ alias body content
19
+
20
+ # Returns an array of strings of substitutable fields in the license body
21
+ def fields
22
+ @fields ||= LicenseField.from_content(content)
23
+ end
24
+
25
+ # Returns a string with `[fields]` replaced by `{{{fields}}}`
26
+ # Does not mangle non-supported fields in the form of `[field]`
27
+ def content_for_mustache
28
+ @content_for_mustache ||= content.gsub(LicenseField::FIELD_REGEX, '{{{\1}}}')
29
+ end
30
+
31
+ private
32
+
33
+ # Raw content of license file, including YAML front matter
34
+ def raw_content
35
+ return if pseudo_license?
36
+ raise Licensee::InvalidLicense, "'#{key}' is not a valid license key" unless File.exist?(path)
37
+
38
+ @raw_content ||= File.read(path, encoding: 'utf-8')
39
+ end
40
+
41
+ def parts
42
+ return unless raw_content
43
+
44
+ @parts ||= raw_content.match(/\A(---\n.*\n---\n+)?(.*)/m).to_a
45
+ end
46
+
47
+ def yaml
48
+ @yaml ||= parts[1] if parts
49
+ end
50
+ end
51
+ end
52
+ end