licensee 9.10.0 → 9.13.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (128) hide show
  1. checksums.yaml +4 -4
  2. data/bin/licensee +5 -4
  3. data/lib/licensee.rb +4 -2
  4. data/lib/licensee/commands/detect.rb +10 -5
  5. data/lib/licensee/commands/diff.rb +7 -8
  6. data/lib/licensee/commands/license_path.rb +2 -0
  7. data/lib/licensee/commands/version.rb +2 -0
  8. data/lib/licensee/content_helper.rb +234 -85
  9. data/lib/licensee/hash_helper.rb +7 -5
  10. data/lib/licensee/license.rb +32 -25
  11. data/lib/licensee/license_field.rb +4 -1
  12. data/lib/licensee/license_meta.rb +3 -0
  13. data/lib/licensee/license_rules.rb +2 -0
  14. data/lib/licensee/matchers.rb +2 -0
  15. data/lib/licensee/matchers/cabal.rb +16 -2
  16. data/lib/licensee/matchers/cargo.rb +3 -1
  17. data/lib/licensee/matchers/copyright.rb +6 -4
  18. data/lib/licensee/matchers/cran.rb +7 -3
  19. data/lib/licensee/matchers/dice.rb +6 -4
  20. data/lib/licensee/matchers/dist_zilla.rb +3 -1
  21. data/lib/licensee/matchers/exact.rb +3 -0
  22. data/lib/licensee/matchers/gemspec.rb +8 -5
  23. data/lib/licensee/matchers/matcher.rb +3 -1
  24. data/lib/licensee/matchers/npm_bower.rb +3 -1
  25. data/lib/licensee/matchers/package.rb +3 -0
  26. data/lib/licensee/matchers/reference.rb +3 -1
  27. data/lib/licensee/matchers/spdx.rb +3 -1
  28. data/lib/licensee/project_files.rb +2 -0
  29. data/lib/licensee/project_files/license_file.rb +13 -10
  30. data/lib/licensee/project_files/package_manager_file.rb +3 -0
  31. data/lib/licensee/project_files/project_file.rb +12 -4
  32. data/lib/licensee/project_files/readme_file.rb +5 -3
  33. data/lib/licensee/projects.rb +2 -0
  34. data/lib/licensee/projects/fs_project.rb +3 -0
  35. data/lib/licensee/projects/git_project.rb +19 -11
  36. data/lib/licensee/projects/github_project.rb +6 -1
  37. data/lib/licensee/projects/project.rb +16 -5
  38. data/lib/licensee/rule.rb +2 -0
  39. data/lib/licensee/version.rb +3 -1
  40. data/licensee.gemspec +47 -0
  41. data/spec/bin_spec.rb +3 -1
  42. data/spec/fixture_spec.rb +46 -0
  43. data/spec/fixtures/bsd-3-noendorseslash/LICENSE +30 -0
  44. data/spec/fixtures/cc0-cal2013/LICENSE +116 -0
  45. data/spec/fixtures/cc0-cc/LICENSE +121 -0
  46. data/spec/fixtures/detect.json +9 -7
  47. data/spec/fixtures/fixtures.yml +130 -0
  48. data/spec/fixtures/html/license.html +262 -0
  49. data/spec/fixtures/license-hashes.json +41 -0
  50. data/spec/fixtures/mit-optional/LICENSE.txt +21 -0
  51. data/spec/fixtures/multiple-arrs/LICENSE +30 -0
  52. data/spec/fixtures/unlicense-noinfo/LICENSE +22 -0
  53. data/spec/integration_spec.rb +68 -2
  54. data/spec/licensee/commands/detect_spec.rb +10 -6
  55. data/spec/licensee/commands/license_path_spec.rb +3 -1
  56. data/spec/licensee/commands/version_spec.rb +3 -1
  57. data/spec/licensee/content_helper_spec.rb +184 -67
  58. data/spec/licensee/hash_helper_spec.rb +3 -1
  59. data/spec/licensee/license_field_spec.rb +5 -3
  60. data/spec/licensee/license_meta_spec.rb +16 -12
  61. data/spec/licensee/license_rules_spec.rb +6 -2
  62. data/spec/licensee/license_spec.rb +62 -37
  63. data/spec/licensee/matchers/cabal_matcher_spec.rb +97 -2
  64. data/spec/licensee/matchers/cargo_matcher_spec.rb +5 -2
  65. data/spec/licensee/matchers/copyright_matcher_spec.rb +7 -5
  66. data/spec/licensee/matchers/cran_matcher_spec.rb +5 -2
  67. data/spec/licensee/matchers/dice_matcher_spec.rb +15 -12
  68. data/spec/licensee/matchers/dist_zilla_matcher_spec.rb +5 -2
  69. data/spec/licensee/matchers/exact_matcher_spec.rb +5 -2
  70. data/spec/licensee/matchers/gemspec_matcher_spec.rb +5 -2
  71. data/spec/licensee/matchers/matcher_spec.rb +6 -2
  72. data/spec/licensee/matchers/npm_bower_matcher_spec.rb +5 -3
  73. data/spec/licensee/matchers/package_matcher_spec.rb +6 -2
  74. data/spec/licensee/matchers/reference_matcher_spec.rb +4 -2
  75. data/spec/licensee/matchers/spdx_matcher_spec.rb +5 -2
  76. data/spec/licensee/project_files/license_file_spec.rb +20 -18
  77. data/spec/licensee/project_files/package_info_spec.rb +5 -1
  78. data/spec/licensee/project_files/project_file_spec.rb +8 -2
  79. data/spec/licensee/project_files/readme_file_spec.rb +4 -1
  80. data/spec/licensee/project_spec.rb +24 -17
  81. data/spec/licensee/projects/git_project_spec.rb +23 -0
  82. data/spec/licensee/projects/github_project_spec.rb +8 -5
  83. data/spec/licensee/rule_spec.rb +6 -3
  84. data/spec/licensee_spec.rb +12 -9
  85. data/spec/spec_helper.rb +28 -9
  86. data/spec/vendored_license_spec.rb +29 -10
  87. data/vendor/choosealicense.com/_data/meta.yml +0 -4
  88. data/vendor/choosealicense.com/_data/rules.yml +3 -0
  89. data/vendor/choosealicense.com/_licenses/0bsd.txt +39 -0
  90. data/vendor/choosealicense.com/_licenses/afl-3.0.txt +7 -6
  91. data/vendor/choosealicense.com/_licenses/agpl-3.0.txt +0 -1
  92. data/vendor/choosealicense.com/_licenses/apache-2.0.txt +1 -2
  93. data/vendor/choosealicense.com/_licenses/artistic-2.0.txt +1 -2
  94. data/vendor/choosealicense.com/_licenses/bsd-2-clause.txt +8 -6
  95. data/vendor/choosealicense.com/_licenses/bsd-3-clause-clear.txt +2 -2
  96. data/vendor/choosealicense.com/_licenses/bsd-3-clause.txt +12 -10
  97. data/vendor/choosealicense.com/_licenses/bsd-4-clause.txt +61 -0
  98. data/vendor/choosealicense.com/_licenses/bsl-1.0.txt +5 -2
  99. data/vendor/choosealicense.com/_licenses/cc-by-4.0.txt +3 -1
  100. data/vendor/choosealicense.com/_licenses/cc-by-sa-4.0.txt +3 -1
  101. data/vendor/choosealicense.com/_licenses/cc0-1.0.txt +113 -105
  102. data/vendor/choosealicense.com/_licenses/cecill-2.1.txt +579 -0
  103. data/vendor/choosealicense.com/_licenses/ecl-2.0.txt +1 -2
  104. data/vendor/choosealicense.com/_licenses/epl-1.0.txt +1 -2
  105. data/vendor/choosealicense.com/_licenses/epl-2.0.txt +3 -4
  106. data/vendor/choosealicense.com/_licenses/eupl-1.1.txt +0 -1
  107. data/vendor/choosealicense.com/_licenses/eupl-1.2.txt +0 -1
  108. data/vendor/choosealicense.com/_licenses/gpl-2.0.txt +0 -1
  109. data/vendor/choosealicense.com/_licenses/gpl-3.0.txt +1 -2
  110. data/vendor/choosealicense.com/_licenses/isc.txt +0 -1
  111. data/vendor/choosealicense.com/_licenses/lgpl-2.1.txt +0 -1
  112. data/vendor/choosealicense.com/_licenses/lgpl-3.0.txt +1 -3
  113. data/vendor/choosealicense.com/_licenses/lppl-1.3c.txt +1 -2
  114. data/vendor/choosealicense.com/_licenses/mit.txt +1 -2
  115. data/vendor/choosealicense.com/_licenses/mpl-2.0.txt +0 -1
  116. data/vendor/choosealicense.com/_licenses/ms-pl.txt +0 -1
  117. data/vendor/choosealicense.com/_licenses/ms-rl.txt +0 -1
  118. data/vendor/choosealicense.com/_licenses/ncsa.txt +21 -22
  119. data/vendor/choosealicense.com/_licenses/odbl-1.0.txt +573 -0
  120. data/vendor/choosealicense.com/_licenses/ofl-1.1.txt +4 -2
  121. data/vendor/choosealicense.com/_licenses/osl-3.0.txt +1 -2
  122. data/vendor/choosealicense.com/_licenses/postgresql.txt +4 -5
  123. data/vendor/choosealicense.com/_licenses/unlicense.txt +1 -2
  124. data/vendor/choosealicense.com/_licenses/upl-1.0.txt +4 -5
  125. data/vendor/choosealicense.com/_licenses/vim.txt +111 -0
  126. data/vendor/choosealicense.com/_licenses/wtfpl.txt +0 -1
  127. data/vendor/choosealicense.com/_licenses/zlib.txt +4 -2
  128. metadata +77 -19
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fa1cf2d146414880f019c339c2803443db7d63eb2568f7807079aac27a7aec98
4
- data.tar.gz: a6243b9cc4b9068090023c70c89a5ea0253580575b534da0c00deaa799441bb5
3
+ metadata.gz: bd74e74f07c0dc4111b3cbbebf62ae3ab140a788ed511491b5995d45f2371de0
4
+ data.tar.gz: 90561b8b85ded55614a88e38b0469e80d110bd113e3a6ddfee9bdd840dec237a
5
5
  SHA512:
6
- metadata.gz: cff2872a3d87b3c708ac2585dc41b64dd665008064303ec66a3501ec5160dbc87b5e78a4128921d5a95e3e7eb160ecc625d3d12f0fad36b05b59fa577088581b
7
- data.tar.gz: 96466e4da40eb17a01625110f236c40cc90cb99b2e39c7057f2867721c77c565ba16cf08d4c4b02c799d1675a11c765e0dd313c761e273354b19fa8cb0fa1528
6
+ metadata.gz: bc64456f4f05411ab8152ce3ade984aa52851f39399d0bffe1e6c51fa4f9de6b00fcd84008bed0d0b71b0569a9799ba0b539ce6323fee55b4436c583ae4ce92c
7
+ data.tar.gz: d797f174a17e2f91eb8f4e7c9eff9b3d4631f58531d4d57cc9da98e75e062345a52354a9ca3a8e1f991cb70a92b91e7b600814dddca345c3528742011e92c8ed
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'dotenv/load'
4
5
  require 'thor'
@@ -15,9 +16,9 @@ class LicenseeCLI < Thor
15
16
 
16
17
  def path
17
18
  @path ||= if !options[:remote] || args.first =~ %r{^https://}
18
- args.first || Dir.pwd
19
- else
20
- "https://github.com/#{args.first}"
19
+ args.first || Dir.pwd
20
+ else
21
+ "https://github.com/#{args.first}"
21
22
  end
22
23
  end
23
24
 
@@ -32,6 +33,6 @@ class LicenseeCLI < Thor
32
33
  end
33
34
 
34
35
  commands_dir = File.expand_path '../lib/licensee/commands/', __dir__
35
- Dir["#{commands_dir}/*.rb"].each { |c| require(c) }
36
+ Dir["#{commands_dir}/*.rb"].sort.each { |c| require(c) }
36
37
 
37
38
  LicenseeCLI.start(ARGV)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'licensee/version'
2
4
  require 'forwardable'
3
5
  require 'pathname'
@@ -19,7 +21,7 @@ module Licensee
19
21
  CONFIDENCE_THRESHOLD = 98
20
22
 
21
23
  # Base domain from which to build license URLs
22
- DOMAIN = 'http://choosealicense.com'.freeze
24
+ DOMAIN = 'http://choosealicense.com'
23
25
 
24
26
  class << self
25
27
  attr_writer :confidence_threshold
@@ -49,7 +51,7 @@ module Licensee
49
51
  end
50
52
 
51
53
  # Inverse of the confidence threshold, represented as a float
52
- # By default this will be 0.05
54
+ # By default this will be 0.02
53
55
  def inverse_confidence_threshold
54
56
  @inverse_confidence_threshold ||=
55
57
  (1 - Licensee.confidence_threshold / 100.0).round(2)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class LicenseeCLI < Thor
2
4
  # Methods to call when displaying information about ProjectFiles
3
5
  MATCHED_FILE_METHODS = %i[
@@ -21,11 +23,11 @@ class LicenseeCLI < Thor
21
23
 
22
24
  rows = []
23
25
  rows << if project.license
24
- ['License:', project.license.spdx_id]
25
- elsif !project.licenses.empty?
26
- ['Licenses:', project.licenses.map(&:spdx_id)]
27
- else
28
- ['License:', set_color('None', :red)]
26
+ ['License:', project.license.spdx_id]
27
+ elsif !project.licenses.empty?
28
+ ['Licenses:', project.licenses.map(&:spdx_id)]
29
+ else
30
+ ['License:', set_color('None', :red)]
29
31
  end
30
32
 
31
33
  unless project.matched_files.empty?
@@ -40,8 +42,10 @@ class LicenseeCLI < Thor
40
42
 
41
43
  MATCHED_FILE_METHODS.each do |method|
42
44
  next unless matched_file.respond_to? method
45
+
43
46
  value = matched_file.public_send method
44
47
  next if value.nil?
48
+
45
49
  rows << [humanize(method, :method), humanize(value, method)]
46
50
  end
47
51
  print_table rows, indent: 2
@@ -51,6 +55,7 @@ class LicenseeCLI < Thor
51
55
 
52
56
  licenses = licenses_by_similarity(matched_file)
53
57
  next if licenses.empty?
58
+
54
59
  say ' Closest non-matching licenses:'
55
60
  rows = licenses[0...3].map do |license, similarity|
56
61
  spdx_id = license.meta['spdx-id']
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'tmpdir'
2
4
 
3
5
  class LicenseeCLI < Thor
@@ -39,26 +41,23 @@ class LicenseeCLI < Thor
39
41
 
40
42
  def license_to_diff
41
43
  return options[:license_to_diff] if options[:license_to_diff]
42
- return project.license_file if remote?
44
+ return project.license_file if remote? || STDIN.tty? && project.license_file
43
45
 
44
46
  @license_to_diff ||= begin
45
- if STDIN.tty?
46
- error 'You must pipe license contents to the command via STDIN'
47
- exit 1
48
- end
49
-
50
47
  Licensee::ProjectFiles::LicenseFile.new(STDIN.read, 'LICENSE')
51
48
  end
52
49
  end
53
50
 
54
51
  def expected_license
55
- @expected_license ||= Licensee::License.find options[:license] if options[:license]
52
+ if options[:license]
53
+ @expected_license ||= Licensee::License.find options[:license]
54
+ end
56
55
  return @expected_license if @expected_license
57
56
 
58
57
  if options[:license]
59
58
  error "#{options[:license]} is not a valid license"
60
59
  else
61
- error 'You must provide an expected license'
60
+ error 'Usage: provide a license to diff against with --license (spdx name)'
62
61
  end
63
62
 
64
63
  error "Valid licenses: #{Licensee::License.all(hidden: true).map(&:key).join(', ')}"
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class LicenseeCLI < Thor
2
4
  desc 'license-path [PATH]', "Returns the path to the given project's license file"
3
5
  def license_path(_path)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class LicenseeCLI < Thor
2
4
  desc 'version', 'Return the Licensee version'
3
5
  def version
@@ -1,38 +1,129 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'set'
2
4
  require 'digest'
3
5
 
4
6
  module Licensee
5
7
  module ContentHelper
6
8
  DIGEST = Digest::SHA1
7
- END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions\s*$/i
8
- HR_REGEX = /[=\-\*][=\-\*\s]{3,}/
9
- ALT_TITLE_REGEX = License::ALT_TITLE_REGEX
10
- ALL_RIGHTS_RESERVED_REGEX = /\Aall rights reserved\.?$/i
11
- WHITESPACE_REGEX = /\s+/
12
- MARKDOWN_HEADING_REGEX = /\A\s*#+/
13
- VERSION_REGEX = /\Aversion.*$/i
14
- MARKUP_REGEX = /[#_*=~\[\]()`|>]+/
15
- DEVELOPED_BY_REGEX = /\Adeveloped by:.*?\n\n/im
16
- QUOTE_BEGIN_REGEX = /[`'"‘“]/
17
- QUOTE_END_REGEX = /['"’”]/
9
+ START_REGEX = /\A\s*/.freeze
10
+ END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions\s*$/i.freeze
11
+ REGEXES = {
12
+ hrs: /^\s*[=\-\*]{3,}\s*$/,
13
+ all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
14
+ whitespace: /\s+/,
15
+ markdown_headings: /#{START_REGEX}#+/,
16
+ version: /#{START_REGEX}version.*$/i,
17
+ span_markup: /[_*~]+(.*?)[_*~]+/,
18
+ link_markup: /\[(.+?)\]\(.+?\)/,
19
+ block_markup: /^\s*>/,
20
+ border_markup: /^[\*-](.*?)[\*-]$/,
21
+ comment_markup: %r{^\s*?[/\*]{1,2}},
22
+ url: %r{#{START_REGEX}https?://[^ ]+\n},
23
+ bullet: /\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[)\.])\s+/i,
24
+ developed_by: /#{START_REGEX}developed by:.*?\n\n/im,
25
+ quote_begin: /[`'"‘“]/,
26
+ quote_end: /[`'"’”]/,
27
+ cc_legal_code: /^\s*Creative Commons Legal Code\s*$/i,
28
+ cc0_info: /For more information, please see\s*\S+zero\S+/im,
29
+ cc0_disclaimer: /CREATIVE COMMONS CORPORATION.*?\n\n/im,
30
+ unlicense_info: /For more information, please.*\S+unlicense\S+/im,
31
+ mit_optional: /\(including the next paragraph\)/i
32
+ }.freeze
33
+ NORMALIZATIONS = {
34
+ lists: { from: /^\s*(?:\d\.|\*)\s+([^\n])/, to: '- \1' },
35
+ https: { from: /http:/, to: 'https:' },
36
+ ampersands: { from: '&', to: 'and' },
37
+ dashes: { from: /(?<!^)([—–-]+)(?!$)/, to: '-' },
38
+ quotes: {
39
+ from: /#{REGEXES[:quote_begin]}+([\w -]*?\w)#{REGEXES[:quote_end]}+/,
40
+ to: '"\1"'
41
+ }
42
+ }.freeze
43
+
44
+ # Legally equivalent words that schould be ignored for comparison
45
+ # See https://spdx.org/spdx-license-list/matching-guidelines
46
+ VARIETAL_WORDS = {
47
+ 'acknowledgment' => 'acknowledgement',
48
+ 'analogue' => 'analog',
49
+ 'analyse' => 'analyze',
50
+ 'artefact' => 'artifact',
51
+ 'authorisation' => 'authorization',
52
+ 'authorised' => 'authorized',
53
+ 'calibre' => 'caliber',
54
+ 'cancelled' => 'canceled',
55
+ 'capitalisations' => 'capitalizations',
56
+ 'catalogue' => 'catalog',
57
+ 'categorise' => 'categorize',
58
+ 'centre' => 'center',
59
+ 'emphasised' => 'emphasized',
60
+ 'favour' => 'favor',
61
+ 'favourite' => 'favorite',
62
+ 'fulfil' => 'fulfill',
63
+ 'fulfilment' => 'fulfillment',
64
+ 'initialise' => 'initialize',
65
+ 'judgment' => 'judgement',
66
+ 'labelling' => 'labeling',
67
+ 'labour' => 'labor',
68
+ 'licence' => 'license',
69
+ 'maximise' => 'maximize',
70
+ 'modelled' => 'modeled',
71
+ 'modelling' => 'modeling',
72
+ 'offence' => 'offense',
73
+ 'optimise' => 'optimize',
74
+ 'organisation' => 'organization',
75
+ 'organise' => 'organize',
76
+ 'practise' => 'practice',
77
+ 'programme' => 'program',
78
+ 'realise' => 'realize',
79
+ 'recognise' => 'recognize',
80
+ 'signalling' => 'signaling',
81
+ 'sub-license' => 'sublicense',
82
+ 'sub license' => 'sublicense',
83
+ 'utilisation' => 'utilization',
84
+ 'whilst' => 'while',
85
+ 'wilful' => 'wilfull',
86
+ 'non-commercial' => 'noncommercial',
87
+ 'cent' => 'percent',
88
+ 'owner' => 'holder'
89
+ }.freeze
90
+ STRIP_METHODS = %i[
91
+ cc0_optional
92
+ unlicense_optional
93
+ hrs
94
+ markdown_headings
95
+ borders
96
+ title
97
+ version
98
+ url
99
+ copyright
100
+ title
101
+ block_markup
102
+ span_markup
103
+ link_markup
104
+ developed_by
105
+ end_of_terms
106
+ whitespace
107
+ mit_optional
108
+ ].freeze
18
109
 
19
110
  # A set of each word in the license, without duplicates
20
111
  def wordset
21
- @wordset ||= if content_normalized
22
- content_normalized.scan(/(?:\w(?:'s|(?<=s)')?)+/).to_set
23
- end
112
+ @wordset ||= content_normalized&.scan(%r{(?:[\w\/](?:'s|(?<=s)')?)+})&.to_set
24
113
  end
25
114
 
26
115
  # Number of characteres in the normalized content
27
116
  def length
28
117
  return 0 unless content_normalized
118
+
29
119
  content_normalized.length
30
120
  end
31
121
 
32
122
  # Number of characters that could be added/removed to still be
33
123
  # considered a potential match
34
124
  def max_delta
35
- @max_delta ||= (length * Licensee.inverse_confidence_threshold).to_i
125
+ @max_delta ||= fields_normalized.size * 10 +
126
+ (length * Licensee.inverse_confidence_threshold).to_i
36
127
  end
37
128
 
38
129
  # Given another license or project file, calculates the difference in length
@@ -43,8 +134,9 @@ module Licensee
43
134
  # Given another license or project file, calculates the similarity
44
135
  # as a percentage of words in common
45
136
  def similarity(other)
46
- overlap = (wordset & other.wordset).size
47
- total = wordset.size + other.wordset.size
137
+ overlap = (wordset_fieldless & other.wordset).size
138
+ total = wordset_fieldless.size + other.wordset.size -
139
+ fields_normalized_set.size
48
140
  100.0 * (overlap * 2.0 / total)
49
141
  end
50
142
 
@@ -59,34 +151,21 @@ module Licensee
59
151
  # content with attribution first to detect attribuion in LicenseFile
60
152
  def content_without_title_and_version
61
153
  @content_without_title_and_version ||= begin
62
- string = content.strip
63
- string = strip_markdown_headings(string)
64
- string = strip_hrs(string)
65
- string = strip_title(string) while string =~ ContentHelper.title_regex
66
- strip_version(string).strip
154
+ @_content = nil
155
+ ops = %i[html hrs comments markdown_headings title version]
156
+ ops.each { |op| strip(op) }
157
+ _content
67
158
  end
68
159
  end
69
160
 
70
- # Content without title, version, copyright, whitespace, or insturctions
71
- #
72
- # wrap - Optional width to wrap the content
73
- #
74
- # Returns a string
75
161
  def content_normalized(wrap: nil)
76
- return unless content
77
162
  @content_normalized ||= begin
78
- string = content_without_title_and_version.downcase
79
- while string =~ Matchers::Copyright::REGEX
80
- string = strip_copyright(string)
81
- end
82
- string = strip_all_rights_reserved(string)
83
- string = strip_developed_by(string)
84
- string, _partition, _instructions = string.partition(END_OF_TERMS_REGEX)
85
- string = normalize_lists(string)
86
- string = normalize_quotes(string)
87
- string = normalize_https(string)
88
- string = strip_markup(string)
89
- strip_whitespace(string)
163
+ @_content = content_without_title_and_version.downcase
164
+
165
+ (NORMALIZATIONS.keys + %i[spelling bullets]).each { |op| normalize(op) }
166
+ STRIP_METHODS.each { |op| strip(op) }
167
+
168
+ _content
90
169
  end
91
170
 
92
171
  if wrap.nil?
@@ -96,14 +175,24 @@ module Licensee
96
175
  end
97
176
  end
98
177
 
178
+ # Backwards compatibalize constants to avoid a breaking change
179
+ def self.const_missing(const)
180
+ key = const.to_s.downcase.gsub('_regex', '').to_sym
181
+ REGEXES[key] || super
182
+ end
183
+
99
184
  # Wrap text to the given line length
100
185
  def self.wrap(text, line_width = 80)
101
186
  return if text.nil?
187
+
102
188
  text = text.clone
189
+ text.gsub!(REGEXES[:bullet]) { |m| "\n#{m}\n" }
103
190
  text.gsub!(/([^\n])\n([^\n])/, '\1 \2')
104
191
 
105
192
  text = text.split("\n").collect do |line|
106
- if line.length > line_width
193
+ if line =~ REGEXES[:hrs]
194
+ line
195
+ elsif line.length > line_width
107
196
  line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip
108
197
  else
109
198
  line
@@ -114,82 +203,142 @@ module Licensee
114
203
  end
115
204
 
116
205
  def self.format_percent(float)
117
- "#{format('%.2f', float)}%"
206
+ "#{format('%<float>.2f', float: float)}%"
118
207
  end
119
208
 
120
209
  def self.title_regex
121
- licenses = Licensee::License.all(hidden: true, psuedo: false)
122
- titles = licenses.map(&:title_regex)
123
-
124
- # Title regex must include the version to support matching within
125
- # families, but for sake of normalization, we can be less strict
126
- without_versions = licenses.map do |license|
127
- next if license.title == license.name_without_version
128
- Regexp.new Regexp.escape(license.name_without_version), 'i'
129
- end
130
- titles.concat(without_versions.compact)
210
+ @title_regex ||= begin
211
+ licenses = Licensee::License.all(hidden: true, psuedo: false)
212
+ titles = licenses.map(&:title_regex)
213
+
214
+ # Title regex must include the version to support matching within
215
+ # families, but for sake of normalization, we can be less strict
216
+ without_versions = licenses.map do |license|
217
+ next if license.title == license.name_without_version
218
+
219
+ Regexp.new Regexp.escape(license.name_without_version), 'i'
220
+ end
221
+ titles.concat(without_versions.compact)
131
222
 
132
- /\A\s*\(?(the )?#{Regexp.union titles}.*$/i
223
+ /#{START_REGEX}\(?(?:the )?#{Regexp.union titles}.*?$/i
224
+ end
133
225
  end
134
226
 
135
227
  private
136
228
 
137
- def strip_title(string)
138
- strip(string, ContentHelper.title_regex)
229
+ def _content
230
+ @_content ||= content.to_s.dup.strip
231
+ end
232
+
233
+ def strip(regex_or_sym)
234
+ return unless _content
235
+
236
+ if regex_or_sym.is_a?(Symbol)
237
+ meth = "strip_#{regex_or_sym}"
238
+ return send(meth) if respond_to?(meth, true)
239
+
240
+ unless REGEXES[regex_or_sym]
241
+ raise ArgumentError, "#{regex_or_sym} is an invalid regex reference"
242
+ end
243
+
244
+ regex_or_sym = REGEXES[regex_or_sym]
245
+ end
246
+
247
+ @_content = _content.gsub(regex_or_sym, ' ').squeeze(' ').strip
248
+ end
249
+
250
+ def strip_title
251
+ while _content =~ ContentHelper.title_regex
252
+ strip(ContentHelper.title_regex)
253
+ end
254
+ end
255
+
256
+ def strip_borders
257
+ normalize(REGEXES[:border_markup], '\1')
258
+ end
259
+
260
+ def strip_comments
261
+ lines = _content.split("\n")
262
+ return if lines.count == 1
263
+ return unless lines.all? { |line| line =~ REGEXES[:comment_markup] }
264
+
265
+ strip(:comment_markup)
139
266
  end
140
267
 
141
- def strip_version(string)
142
- strip(string, VERSION_REGEX)
268
+ def strip_copyright
269
+ regex = Regexp.union(Matchers::Copyright::REGEX, REGEXES[:all_rights_reserved])
270
+ strip(regex) while _content =~ regex
271
+ end
272
+
273
+ def strip_cc0_optional
274
+ return unless _content.include? 'associating cc0'
275
+
276
+ strip(REGEXES[:cc_legal_code])
277
+ strip(REGEXES[:cc0_info])
278
+ strip(REGEXES[:cc0_disclaimer])
143
279
  end
144
280
 
145
- def strip_copyright(string)
146
- strip(string, Matchers::Copyright::REGEX)
281
+ def strip_unlicense_optional
282
+ return unless _content.include? 'unlicense'
283
+
284
+ strip(REGEXES[:unlicense_info])
147
285
  end
148
286
 
149
- # Strip HRs from MPL
150
- def strip_hrs(string)
151
- strip(string, HR_REGEX)
287
+ def strip_end_of_terms
288
+ body, _partition, _instructions = _content.partition(END_OF_TERMS_REGEX)
289
+ @_content = body
152
290
  end
153
291
 
154
- # Strip leading #s from the document
155
- def strip_markdown_headings(string)
156
- strip(string, MARKDOWN_HEADING_REGEX)
292
+ def strip_span_markup
293
+ normalize(REGEXES[:span_markup], '\1')
157
294
  end
158
295
 
159
- def strip_whitespace(string)
160
- strip(string, WHITESPACE_REGEX)
296
+ def strip_link_markup
297
+ normalize(REGEXES[:link_markup], '\1')
161
298
  end
162
299
 
163
- def strip_all_rights_reserved(string)
164
- strip(string, ALL_RIGHTS_RESERVED_REGEX)
300
+ def strip_html
301
+ return unless respond_to?(:filename) && filename
302
+ return unless File.extname(filename) =~ /\.html?/i
303
+
304
+ require 'reverse_markdown'
305
+ @_content = ReverseMarkdown.convert(_content, unknown_tags: :bypass)
165
306
  end
166
307
 
167
- def strip_markup(string)
168
- strip(string, MARKUP_REGEX)
308
+ def normalize(from_or_key, to = nil)
309
+ operation = { from: from_or_key, to: to } if to
310
+ operation ||= NORMALIZATIONS[from_or_key]
311
+
312
+ if operation
313
+ @_content = _content.gsub operation[:from], operation[:to]
314
+ elsif respond_to?("normalize_#{from_or_key}", true)
315
+ send("normalize_#{from_or_key}")
316
+ else
317
+ raise ArgumentError, "#{from_or_key} is an invalid normalization"
318
+ end
169
319
  end
170
320
 
171
- def strip_developed_by(string)
172
- strip(string, DEVELOPED_BY_REGEX)
321
+ def normalize_spelling
322
+ normalize(/\b#{Regexp.union(VARIETAL_WORDS.keys)}\b/, VARIETAL_WORDS)
173
323
  end
174
324
 
175
- def strip(string, regex)
176
- string.gsub(regex, ' ').squeeze(' ').strip
325
+ def normalize_bullets
326
+ normalize(REGEXES[:bullet], "\n\n* ")
327
+ normalize(/\)\s+\(/, ')(')
177
328
  end
178
329
 
179
- # Replace all enclosing quotes with double quotes
180
- # Single versus double quotes don't alter the meaning, and it's easier to
181
- # strip double quotes if we still want to allow possessives
182
- def normalize_quotes(string)
183
- string.gsub(/#{QUOTE_BEGIN_REGEX}+([\w -]*?\w)#{QUOTE_END_REGEX}+/,
184
- '"\1"')
330
+ def wordset_fieldless
331
+ @wordset_fieldless ||= wordset - fields_normalized_set
185
332
  end
186
333
 
187
- def normalize_https(string)
188
- string.gsub(/http:/, 'https:')
334
+ # Returns an array of strings of substitutable fields in normalized content
335
+ def fields_normalized
336
+ @fields_normalized ||=
337
+ content_normalized.scan(LicenseField::FIELD_REGEX).flatten
189
338
  end
190
339
 
191
- def normalize_lists(string)
192
- string.gsub(/^\s*(\d\.|\*)/, '-')
340
+ def fields_normalized_set
341
+ @fields_normalized_set ||= fields_normalized.to_set
193
342
  end
194
343
  end
195
344
  end