licensee 9.9.3 → 9.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. checksums.yaml +4 -4
  2. data/bin/licensee +1 -0
  3. data/lib/licensee.rb +4 -2
  4. data/lib/licensee/commands/detect.rb +9 -4
  5. data/lib/licensee/commands/diff.rb +7 -8
  6. data/lib/licensee/commands/license_path.rb +2 -0
  7. data/lib/licensee/commands/version.rb +2 -0
  8. data/lib/licensee/content_helper.rb +188 -83
  9. data/lib/licensee/hash_helper.rb +2 -0
  10. data/lib/licensee/license.rb +18 -7
  11. data/lib/licensee/license_field.rb +8 -1
  12. data/lib/licensee/license_meta.rb +3 -0
  13. data/lib/licensee/license_rules.rb +2 -0
  14. data/lib/licensee/matchers.rb +2 -0
  15. data/lib/licensee/matchers/cabal.rb +16 -2
  16. data/lib/licensee/matchers/cargo.rb +3 -1
  17. data/lib/licensee/matchers/copyright.rb +4 -2
  18. data/lib/licensee/matchers/cran.rb +7 -3
  19. data/lib/licensee/matchers/dice.rb +10 -2
  20. data/lib/licensee/matchers/dist_zilla.rb +3 -1
  21. data/lib/licensee/matchers/exact.rb +3 -0
  22. data/lib/licensee/matchers/gemspec.rb +8 -5
  23. data/lib/licensee/matchers/matcher.rb +3 -1
  24. data/lib/licensee/matchers/npm_bower.rb +3 -1
  25. data/lib/licensee/matchers/package.rb +3 -0
  26. data/lib/licensee/matchers/reference.rb +3 -1
  27. data/lib/licensee/matchers/spdx.rb +3 -1
  28. data/lib/licensee/project_files.rb +2 -0
  29. data/lib/licensee/project_files/license_file.rb +13 -10
  30. data/lib/licensee/project_files/package_manager_file.rb +3 -0
  31. data/lib/licensee/project_files/project_file.rb +12 -4
  32. data/lib/licensee/project_files/readme_file.rb +7 -5
  33. data/lib/licensee/projects.rb +2 -0
  34. data/lib/licensee/projects/fs_project.rb +3 -0
  35. data/lib/licensee/projects/git_project.rb +16 -8
  36. data/lib/licensee/projects/github_project.rb +29 -9
  37. data/lib/licensee/projects/project.rb +13 -2
  38. data/lib/licensee/rule.rb +2 -0
  39. data/lib/licensee/version.rb +3 -1
  40. data/spec/bin_spec.rb +2 -0
  41. data/spec/fixture_spec.rb +46 -0
  42. data/spec/fixtures/detect.json +8 -6
  43. data/spec/fixtures/fixtures.yml +110 -0
  44. data/spec/fixtures/html/license.html +262 -0
  45. data/spec/fixtures/license-hashes.json +39 -0
  46. data/spec/fixtures/mit-optional/LICENSE.txt +21 -0
  47. data/spec/integration_spec.rb +20 -0
  48. data/spec/licensee/commands/detect_spec.rb +6 -2
  49. data/spec/licensee/commands/license_path_spec.rb +2 -0
  50. data/spec/licensee/commands/version_spec.rb +2 -0
  51. data/spec/licensee/content_helper_spec.rb +152 -36
  52. data/spec/licensee/hash_helper_spec.rb +2 -0
  53. data/spec/licensee/license_field_spec.rb +7 -0
  54. data/spec/licensee/license_meta_spec.rb +2 -0
  55. data/spec/licensee/license_rules_spec.rb +2 -0
  56. data/spec/licensee/license_spec.rb +36 -11
  57. data/spec/licensee/matchers/cabal_matcher_spec.rb +93 -0
  58. data/spec/licensee/matchers/cargo_matcher_spec.rb +2 -0
  59. data/spec/licensee/matchers/copyright_matcher_spec.rb +4 -2
  60. data/spec/licensee/matchers/cran_matcher_spec.rb +2 -0
  61. data/spec/licensee/matchers/dice_matcher_spec.rb +4 -2
  62. data/spec/licensee/matchers/dist_zilla_matcher_spec.rb +2 -0
  63. data/spec/licensee/matchers/exact_matcher_spec.rb +2 -0
  64. data/spec/licensee/matchers/gemspec_matcher_spec.rb +2 -0
  65. data/spec/licensee/matchers/matcher_spec.rb +2 -0
  66. data/spec/licensee/matchers/npm_bower_matcher_spec.rb +2 -0
  67. data/spec/licensee/matchers/package_matcher_spec.rb +2 -0
  68. data/spec/licensee/matchers/reference_matcher_spec.rb +2 -0
  69. data/spec/licensee/matchers/spdx_matcher_spec.rb +2 -0
  70. data/spec/licensee/project_files/license_file_spec.rb +4 -2
  71. data/spec/licensee/project_files/package_info_spec.rb +2 -0
  72. data/spec/licensee/project_files/project_file_spec.rb +3 -0
  73. data/spec/licensee/project_files/readme_file_spec.rb +11 -0
  74. data/spec/licensee/project_spec.rb +23 -3
  75. data/spec/licensee/projects/git_project_spec.rb +23 -0
  76. data/spec/licensee/projects/github_project_spec.rb +2 -0
  77. data/spec/licensee/rule_spec.rb +2 -0
  78. data/spec/licensee_spec.rb +3 -1
  79. data/spec/spec_helper.rb +29 -9
  80. data/spec/vendored_license_spec.rb +27 -8
  81. data/vendor/choosealicense.com/_data/meta.yml +0 -4
  82. data/vendor/choosealicense.com/_licenses/0bsd.txt +39 -0
  83. data/vendor/choosealicense.com/_licenses/afl-3.0.txt +7 -6
  84. data/vendor/choosealicense.com/_licenses/agpl-3.0.txt +0 -1
  85. data/vendor/choosealicense.com/_licenses/apache-2.0.txt +0 -1
  86. data/vendor/choosealicense.com/_licenses/artistic-2.0.txt +0 -1
  87. data/vendor/choosealicense.com/_licenses/bsd-2-clause.txt +8 -6
  88. data/vendor/choosealicense.com/_licenses/bsd-3-clause-clear.txt +1 -2
  89. data/vendor/choosealicense.com/_licenses/bsd-3-clause.txt +12 -10
  90. data/vendor/choosealicense.com/_licenses/bsl-1.0.txt +0 -1
  91. data/vendor/choosealicense.com/_licenses/cc-by-4.0.txt +0 -1
  92. data/vendor/choosealicense.com/_licenses/cc-by-sa-4.0.txt +0 -1
  93. data/vendor/choosealicense.com/_licenses/cc0-1.0.txt +0 -1
  94. data/vendor/choosealicense.com/_licenses/cecill-2.1.txt +579 -0
  95. data/vendor/choosealicense.com/_licenses/ecl-2.0.txt +0 -1
  96. data/vendor/choosealicense.com/_licenses/epl-1.0.txt +1 -2
  97. data/vendor/choosealicense.com/_licenses/epl-2.0.txt +1 -2
  98. data/vendor/choosealicense.com/_licenses/eupl-1.1.txt +0 -1
  99. data/vendor/choosealicense.com/_licenses/eupl-1.2.txt +0 -1
  100. data/vendor/choosealicense.com/_licenses/gpl-2.0.txt +0 -1
  101. data/vendor/choosealicense.com/_licenses/gpl-3.0.txt +1 -2
  102. data/vendor/choosealicense.com/_licenses/isc.txt +0 -1
  103. data/vendor/choosealicense.com/_licenses/lgpl-2.1.txt +0 -1
  104. data/vendor/choosealicense.com/_licenses/lgpl-3.0.txt +1 -2
  105. data/vendor/choosealicense.com/_licenses/lppl-1.3c.txt +0 -1
  106. data/vendor/choosealicense.com/_licenses/mit.txt +0 -1
  107. data/vendor/choosealicense.com/_licenses/mpl-2.0.txt +0 -1
  108. data/vendor/choosealicense.com/_licenses/ms-pl.txt +0 -1
  109. data/vendor/choosealicense.com/_licenses/ms-rl.txt +0 -1
  110. data/vendor/choosealicense.com/_licenses/ncsa.txt +0 -1
  111. data/vendor/choosealicense.com/_licenses/odbl-1.0.txt +573 -0
  112. data/vendor/choosealicense.com/_licenses/ofl-1.1.txt +0 -1
  113. data/vendor/choosealicense.com/_licenses/osl-3.0.txt +1 -2
  114. data/vendor/choosealicense.com/_licenses/postgresql.txt +2 -3
  115. data/vendor/choosealicense.com/_licenses/unlicense.txt +1 -2
  116. data/vendor/choosealicense.com/_licenses/upl-1.0.txt +3 -4
  117. data/vendor/choosealicense.com/_licenses/wtfpl.txt +0 -1
  118. data/vendor/choosealicense.com/_licenses/zlib.txt +0 -1
  119. metadata +41 -19
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9d245d544a683bfaff9448e8654a409cea3b6ddc808ba7f7b966c84e031255ea
4
- data.tar.gz: 801c8048260b692571828fce99aa754afeb99063849e3e330e7fbb0b1b7117db
3
+ metadata.gz: d7dc009b0467cfb305e8dac051ed4e78d2f35d0454f2e14cef0952338540f8ae
4
+ data.tar.gz: 3c27bb3dd3cea6d62fab826b81fab93d9152893851b541c91d69406cdf9fcbd8
5
5
  SHA512:
6
- metadata.gz: 918b6abcf12b00722fb85c64112f2e11334a4b43d6f3b7d3335156a6bb21b0077432030f6c132aa0950af0b4860102a9a1c2d8548a5de5e1529852ae13e29ed1
7
- data.tar.gz: 919b0c1a4390a2dd964e7f4d245194ef47e108e1336fc4f04b9795a97f17475ebc64dce12ff1bbd97bb805f5d1fc95774ece1878af17a89f4d57cf974582944b
6
+ metadata.gz: 07f19b33f70b0b73611d34e474f2aa4e4d7f62c7451cdf70f76774beceac2c75ab3d1cc5048061a848b979a54032aad6dd1ba278c79cd798029efd6873d54425
7
+ data.tar.gz: 96c5e66f65307e7feb2c00b3f06661b093c60995d049f7fd19cc27b76881965a1d33768a16b0a3a3b085e9392ef828dce9cf692ee04255dd9ea2c6d22da38da6
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'dotenv/load'
4
5
  require 'thor'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'licensee/version'
2
4
  require 'forwardable'
3
5
  require 'pathname'
@@ -19,7 +21,7 @@ module Licensee
19
21
  CONFIDENCE_THRESHOLD = 98
20
22
 
21
23
  # Base domain from which to build license URLs
22
- DOMAIN = 'http://choosealicense.com'.freeze
24
+ DOMAIN = 'http://choosealicense.com'
23
25
 
24
26
  class << self
25
27
  attr_writer :confidence_threshold
@@ -49,7 +51,7 @@ module Licensee
49
51
  end
50
52
 
51
53
  # Inverse of the confidence threshold, represented as a float
52
- # By default this will be 0.05
54
+ # By default this will be 0.02
53
55
  def inverse_confidence_threshold
54
56
  @inverse_confidence_threshold ||=
55
57
  (1 - Licensee.confidence_threshold / 100.0).round(2)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class LicenseeCLI < Thor
2
4
  # Methods to call when displaying information about ProjectFiles
3
5
  MATCHED_FILE_METHODS = %i[
@@ -40,8 +42,10 @@ class LicenseeCLI < Thor
40
42
 
41
43
  MATCHED_FILE_METHODS.each do |method|
42
44
  next unless matched_file.respond_to? method
45
+
43
46
  value = matched_file.public_send method
44
47
  next if value.nil?
48
+
45
49
  rows << [humanize(method, :method), humanize(value, method)]
46
50
  end
47
51
  print_table rows, indent: 2
@@ -49,8 +53,9 @@ class LicenseeCLI < Thor
49
53
  next unless matched_file.is_a? Licensee::ProjectFiles::LicenseFile
50
54
  next if matched_file.confidence == 100
51
55
 
52
- licenses = licenses_by_similiarity(matched_file)
56
+ licenses = licenses_by_similarity(matched_file)
53
57
  next if licenses.empty?
58
+
54
59
  say ' Closest non-matching licenses:'
55
60
  rows = licenses[0...3].map do |license, similarity|
56
61
  spdx_id = license.meta['spdx-id']
@@ -89,15 +94,15 @@ class LicenseeCLI < Thor
89
94
  end
90
95
  end
91
96
 
92
- def licenses_by_similiarity(matched_file)
97
+ def licenses_by_similarity(matched_file)
93
98
  matcher = Licensee::Matchers::Dice.new(matched_file)
94
99
  potential_licenses = Licensee.licenses(hidden: true).select(&:wordset)
95
100
  matcher.instance_variable_set('@potential_licenses', potential_licenses)
96
- matcher.licenses_by_similiarity
101
+ matcher.licenses_by_similarity
97
102
  end
98
103
 
99
104
  def closest_license_key(matched_file)
100
- licenses = licenses_by_similiarity(matched_file)
105
+ licenses = licenses_by_similarity(matched_file)
101
106
  licenses.first.first.key unless licenses.empty?
102
107
  end
103
108
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'tmpdir'
2
4
 
3
5
  class LicenseeCLI < Thor
@@ -39,26 +41,23 @@ class LicenseeCLI < Thor
39
41
 
40
42
  def license_to_diff
41
43
  return options[:license_to_diff] if options[:license_to_diff]
42
- return project.license_file if remote?
44
+ return project.license_file if remote? || STDIN.tty? && project.license_file
43
45
 
44
46
  @license_to_diff ||= begin
45
- if STDIN.tty?
46
- error 'You must pipe license contents to the command via STDIN'
47
- exit 1
48
- end
49
-
50
47
  Licensee::ProjectFiles::LicenseFile.new(STDIN.read, 'LICENSE')
51
48
  end
52
49
  end
53
50
 
54
51
  def expected_license
55
- @expected_license ||= Licensee::License.find options[:license] if options[:license]
52
+ if options[:license]
53
+ @expected_license ||= Licensee::License.find options[:license]
54
+ end
56
55
  return @expected_license if @expected_license
57
56
 
58
57
  if options[:license]
59
58
  error "#{options[:license]} is not a valid license"
60
59
  else
61
- error 'You must provide an expected license'
60
+ error 'Usage: provide a license to diff against with --license (spdx name)'
62
61
  end
63
62
 
64
63
  error "Valid licenses: #{Licensee::License.all(hidden: true).map(&:key).join(', ')}"
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class LicenseeCLI < Thor
2
4
  desc 'license-path [PATH]', "Returns the path to the given project's license file"
3
5
  def license_path(_path)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class LicenseeCLI < Thor
2
4
  desc 'version', 'Return the Licensee version'
3
5
  def version
@@ -1,31 +1,105 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'set'
2
4
  require 'digest'
3
5
 
4
6
  module Licensee
5
7
  module ContentHelper
6
8
  DIGEST = Digest::SHA1
7
- END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions\s*$/i
8
- HR_REGEX = /[=\-\*][=\-\*\s]{3,}/
9
+ START_REGEX = /\A\s*/.freeze
10
+ END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions\s*$/i.freeze
9
11
  ALT_TITLE_REGEX = License::ALT_TITLE_REGEX
10
- ALL_RIGHTS_RESERVED_REGEX = /\Aall rights reserved\.?$/i
11
- WHITESPACE_REGEX = /\s+/
12
- MARKDOWN_HEADING_REGEX = /\A\s*#+/
13
- VERSION_REGEX = /\Aversion.*$/i
14
- MARKUP_REGEX = /[#_*=~\[\]()`|>]+/
15
- DEVELOPED_BY_REGEX = /\Adeveloped by:.*?\n\n/im
16
- QUOTE_BEGIN_REGEX = /[`'"‘“]/
17
- QUOTE_END_REGEX = /['"’”]/
12
+ REGEXES = {
13
+ hrs: /^\s*[=\-\*]{3,}\s*$/,
14
+ all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
15
+ whitespace: /\s+/,
16
+ markdown_headings: /#{START_REGEX}#+/,
17
+ version: /#{START_REGEX}version.*$/i,
18
+ span_markup: /[_*~]+(.*?)[_*~]+/,
19
+ link_markup: /\[(.+?)\]\(.+?\)/,
20
+ block_markup: /^\s*>/,
21
+ border_markup: /^[\*-](.*?)[\*-]$/,
22
+ comment_markup: %r{^\s*?[/\*]{1,2}},
23
+ url: %r{#{START_REGEX}https?://[^ ]+\n},
24
+ bullet: /\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[)\.])\s+/i,
25
+ developed_by: /#{START_REGEX}developed by:.*?\n\n/im,
26
+ quote_begin: /[`'"‘“]/,
27
+ quote_end: /[`'"’”]/,
28
+ mit_optional: /\(including the next paragraph\)/i
29
+ }.freeze
30
+ NORMALIZATIONS = {
31
+ lists: { from: /^\s*(?:\d\.|\*)\s+([^\n])/, to: '- \1' },
32
+ https: { from: /http:/, to: 'https:' },
33
+ ampersands: { from: '&', to: 'and' },
34
+ dashes: { from: /(?<!^)([—–-]+)(?!$)/, to: '-' },
35
+ quotes: {
36
+ from: /#{REGEXES[:quote_begin]}+([\w -]*?\w)#{REGEXES[:quote_end]}+/,
37
+ to: '"\1"'
38
+ }
39
+ }.freeze
40
+
41
+ # Legally equivalent words that schould be ignored for comparison
42
+ # See https://spdx.org/spdx-license-list/matching-guidelines
43
+ VARIETAL_WORDS = {
44
+ 'acknowledgment' => 'acknowledgement',
45
+ 'analogue' => 'analog',
46
+ 'analyse' => 'analyze',
47
+ 'artefact' => 'artifact',
48
+ 'authorisation' => 'authorization',
49
+ 'authorised' => 'authorized',
50
+ 'calibre' => 'caliber',
51
+ 'cancelled' => 'canceled',
52
+ 'capitalisations' => 'capitalizations',
53
+ 'catalogue' => 'catalog',
54
+ 'categorise' => 'categorize',
55
+ 'centre' => 'center',
56
+ 'emphasised' => 'emphasized',
57
+ 'favour' => 'favor',
58
+ 'favourite' => 'favorite',
59
+ 'fulfil' => 'fulfill',
60
+ 'fulfilment' => 'fulfillment',
61
+ 'initialise' => 'initialize',
62
+ 'judgment' => 'judgement',
63
+ 'labelling' => 'labeling',
64
+ 'labour' => 'labor',
65
+ 'licence' => 'license',
66
+ 'maximise' => 'maximize',
67
+ 'modelled' => 'modeled',
68
+ 'modelling' => 'modeling',
69
+ 'offence' => 'offense',
70
+ 'optimise' => 'optimize',
71
+ 'organisation' => 'organization',
72
+ 'organise' => 'organize',
73
+ 'practise' => 'practice',
74
+ 'programme' => 'program',
75
+ 'realise' => 'realize',
76
+ 'recognise' => 'recognize',
77
+ 'signalling' => 'signaling',
78
+ 'sub-license' => 'sublicense',
79
+ 'sub license' => 'sublicense',
80
+ 'utilisation' => 'utilization',
81
+ 'whilst' => 'while',
82
+ 'wilful' => 'wilfull',
83
+ 'non-commercial' => 'noncommercial',
84
+ 'cent' => 'percent',
85
+ 'owner' => 'holder'
86
+ }.freeze
87
+ STRIP_METHODS = %i[
88
+ hrs markdown_headings borders title version url copyright
89
+ block_markup span_markup link_markup
90
+ all_rights_reserved developed_by end_of_terms whitespace
91
+ mit_optional
92
+ ].freeze
18
93
 
19
94
  # A set of each word in the license, without duplicates
20
95
  def wordset
21
- @wordset ||= if content_normalized
22
- content_normalized.scan(/(?:\w(?:'s|(?<=s)')?)+/).to_set
23
- end
96
+ @wordset ||= content_normalized&.scan(/(?:\w(?:'s|(?<=s)')?)+/)&.to_set
24
97
  end
25
98
 
26
99
  # Number of characteres in the normalized content
27
100
  def length
28
101
  return 0 unless content_normalized
102
+
29
103
  content_normalized.length
30
104
  end
31
105
 
@@ -43,8 +117,10 @@ module Licensee
43
117
  # Given another license or project file, calculates the similarity
44
118
  # as a percentage of words in common
45
119
  def similarity(other)
46
- overlap = (wordset & other.wordset).size
47
- total = wordset.size + other.wordset.size
120
+ wordset_fieldless = wordset - LicenseField.keys
121
+ fields_removed = wordset.size - wordset_fieldless.size
122
+ overlap = (wordset_fieldless & other.wordset).size
123
+ total = wordset_fieldless.size + other.wordset.size - fields_removed
48
124
  100.0 * (overlap * 2.0 / total)
49
125
  end
50
126
 
@@ -59,34 +135,21 @@ module Licensee
59
135
  # content with attribution first to detect attribuion in LicenseFile
60
136
  def content_without_title_and_version
61
137
  @content_without_title_and_version ||= begin
62
- string = content.strip
63
- string = strip_markdown_headings(string)
64
- string = strip_hrs(string)
65
- string = strip_title(string) while string =~ ContentHelper.title_regex
66
- strip_version(string).strip
138
+ @_content = nil
139
+ ops = %i[html hrs comments markdown_headings title version]
140
+ ops.each { |op| strip(op) }
141
+ _content
67
142
  end
68
143
  end
69
144
 
70
- # Content without title, version, copyright, whitespace, or insturctions
71
- #
72
- # wrap - Optional width to wrap the content
73
- #
74
- # Returns a string
75
145
  def content_normalized(wrap: nil)
76
- return unless content
77
146
  @content_normalized ||= begin
78
- string = content_without_title_and_version.downcase
79
- while string =~ Matchers::Copyright::REGEX
80
- string = strip_copyright(string)
81
- end
82
- string = strip_all_rights_reserved(string)
83
- string = strip_developed_by(string)
84
- string, _partition, _instructions = string.partition(END_OF_TERMS_REGEX)
85
- string = normalize_lists(string)
86
- string = normalize_quotes(string)
87
- string = normalize_https(string)
88
- string = strip_markup(string)
89
- strip_whitespace(string)
147
+ @_content = content_without_title_and_version.downcase
148
+
149
+ (NORMALIZATIONS.keys + %i[spelling bullets]).each { |op| normalize(op) }
150
+ STRIP_METHODS.each { |op| strip(op) }
151
+
152
+ _content
90
153
  end
91
154
 
92
155
  if wrap.nil?
@@ -96,14 +159,24 @@ module Licensee
96
159
  end
97
160
  end
98
161
 
162
+ # Backwards compatibalize constants to avoid a breaking change
163
+ def self.const_missing(const)
164
+ key = const.to_s.downcase.gsub('_regex', '').to_sym
165
+ REGEXES[key] || super
166
+ end
167
+
99
168
  # Wrap text to the given line length
100
169
  def self.wrap(text, line_width = 80)
101
170
  return if text.nil?
171
+
102
172
  text = text.clone
173
+ text.gsub!(REGEXES[:bullet]) { |m| "\n#{m}\n" }
103
174
  text.gsub!(/([^\n])\n([^\n])/, '\1 \2')
104
175
 
105
176
  text = text.split("\n").collect do |line|
106
- if line.length > line_width
177
+ if line =~ REGEXES[:hrs]
178
+ line
179
+ elsif line.length > line_width
107
180
  line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip
108
181
  else
109
182
  line
@@ -114,82 +187,114 @@ module Licensee
114
187
  end
115
188
 
116
189
  def self.format_percent(float)
117
- "#{format('%.2f', float)}%"
190
+ "#{format('%<float>.2f', float: float)}%"
118
191
  end
119
192
 
120
193
  def self.title_regex
121
- licenses = Licensee::License.all(hidden: true, psuedo: false)
122
- titles = licenses.map(&:title_regex)
123
-
124
- # Title regex must include the version to support matching within
125
- # families, but for sake of normalization, we can be less strict
126
- without_versions = licenses.map do |license|
127
- next if license.title == license.name_without_version
128
- Regexp.new Regexp.escape(license.name_without_version), 'i'
129
- end
130
- titles.concat(without_versions.compact)
194
+ @title_regex ||= begin
195
+ licenses = Licensee::License.all(hidden: true, psuedo: false)
196
+ titles = licenses.map(&:title_regex)
197
+
198
+ # Title regex must include the version to support matching within
199
+ # families, but for sake of normalization, we can be less strict
200
+ without_versions = licenses.map do |license|
201
+ next if license.title == license.name_without_version
202
+
203
+ Regexp.new Regexp.escape(license.name_without_version), 'i'
204
+ end
205
+ titles.concat(without_versions.compact)
131
206
 
132
- /\A\s*\(?(the )?#{Regexp.union titles}.*$/i
207
+ /#{START_REGEX}\(?(?:the )?#{Regexp.union titles}.*?$/i
208
+ end
133
209
  end
134
210
 
135
211
  private
136
212
 
137
- def strip_title(string)
138
- strip(string, ContentHelper.title_regex)
213
+ def _content
214
+ @_content ||= content.to_s.dup.strip
139
215
  end
140
216
 
141
- def strip_version(string)
142
- strip(string, VERSION_REGEX)
217
+ def strip(regex_or_sym)
218
+ return unless _content
219
+
220
+ if regex_or_sym.is_a?(Symbol)
221
+ meth = "strip_#{regex_or_sym}"
222
+ return send(meth) if respond_to?(meth, true)
223
+
224
+ unless REGEXES[regex_or_sym]
225
+ raise ArgumentError, "#{regex_or_sym} is an invalid regex reference"
226
+ end
227
+
228
+ regex_or_sym = REGEXES[regex_or_sym]
229
+ end
230
+
231
+ @_content = _content.gsub(regex_or_sym, ' ').squeeze(' ').strip
143
232
  end
144
233
 
145
- def strip_copyright(string)
146
- strip(string, Matchers::Copyright::REGEX)
234
+ def strip_title
235
+ while _content =~ ContentHelper.title_regex
236
+ strip(ContentHelper.title_regex)
237
+ end
147
238
  end
148
239
 
149
- # Strip HRs from MPL
150
- def strip_hrs(string)
151
- strip(string, HR_REGEX)
240
+ def strip_borders
241
+ normalize(REGEXES[:border_markup], '\1')
152
242
  end
153
243
 
154
- # Strip leading #s from the document
155
- def strip_markdown_headings(string)
156
- strip(string, MARKDOWN_HEADING_REGEX)
244
+ def strip_comments
245
+ lines = _content.split("\n")
246
+ return if lines.count == 1
247
+ return unless lines.all? { |line| line =~ REGEXES[:comment_markup] }
248
+
249
+ strip(:comment_markup)
157
250
  end
158
251
 
159
- def strip_whitespace(string)
160
- strip(string, WHITESPACE_REGEX)
252
+ def strip_copyright
253
+ regex = Matchers::Copyright::REGEX
254
+ strip(regex) while _content =~ regex
161
255
  end
162
256
 
163
- def strip_all_rights_reserved(string)
164
- strip(string, ALL_RIGHTS_RESERVED_REGEX)
257
+ def strip_end_of_terms
258
+ body, _partition, _instructions = _content.partition(END_OF_TERMS_REGEX)
259
+ @_content = body
165
260
  end
166
261
 
167
- def strip_markup(string)
168
- strip(string, MARKUP_REGEX)
262
+ def strip_span_markup
263
+ normalize(REGEXES[:span_markup], '\1')
169
264
  end
170
265
 
171
- def strip_developed_by(string)
172
- strip(string, DEVELOPED_BY_REGEX)
266
+ def strip_link_markup
267
+ normalize(REGEXES[:link_markup], '\1')
173
268
  end
174
269
 
175
- def strip(string, regex)
176
- string.gsub(regex, ' ').squeeze(' ').strip
270
+ def strip_html
271
+ return unless respond_to?(:filename) && filename
272
+ return unless File.extname(filename) =~ /\.html?/i
273
+
274
+ require 'reverse_markdown'
275
+ @_content = ReverseMarkdown.convert(_content, unknown_tags: :bypass)
177
276
  end
178
277
 
179
- # Replace all enclosing quotes with double quotes
180
- # Single versus double quotes don't alter the meaning, and it's easier to
181
- # strip double quotes if we still want to allow possessives
182
- def normalize_quotes(string)
183
- string.gsub(/#{QUOTE_BEGIN_REGEX}+([\w -]*?\w)#{QUOTE_END_REGEX}+/,
184
- '"\1"')
278
+ def normalize(from_or_key, to = nil)
279
+ operation = { from: from_or_key, to: to } if to
280
+ operation ||= NORMALIZATIONS[from_or_key]
281
+
282
+ if operation
283
+ @_content = _content.gsub operation[:from], operation[:to]
284
+ elsif respond_to?("normalize_#{from_or_key}", true)
285
+ send("normalize_#{from_or_key}")
286
+ else
287
+ raise ArgumentError, "#{from_or_key} is an invalid normalization"
288
+ end
185
289
  end
186
290
 
187
- def normalize_https(string)
188
- string.gsub(/http:/, 'https:')
291
+ def normalize_spelling
292
+ normalize(/\b#{Regexp.union(VARIETAL_WORDS.keys)}\b/, VARIETAL_WORDS)
189
293
  end
190
294
 
191
- def normalize_lists(string)
192
- string.gsub(/^\s*(\d\.|\*)/, '-')
295
+ def normalize_bullets
296
+ normalize(REGEXES[:bullet], "\n\n* ")
297
+ normalize(/\)\s+\(/, ')(')
193
298
  end
194
299
  end
195
300
  end