licensee 9.10.1 → 9.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.md +1 -1
  3. data/bin/licensee +5 -4
  4. data/lib/licensee.rb +7 -5
  5. data/lib/licensee/commands/detect.rb +7 -5
  6. data/lib/licensee/commands/diff.rb +7 -8
  7. data/lib/licensee/commands/license_path.rb +2 -0
  8. data/lib/licensee/commands/version.rb +2 -0
  9. data/lib/licensee/content_helper.rb +230 -85
  10. data/lib/licensee/hash_helper.rb +7 -5
  11. data/lib/licensee/license.rb +21 -22
  12. data/lib/licensee/license_field.rb +3 -1
  13. data/lib/licensee/license_meta.rb +2 -0
  14. data/lib/licensee/license_rules.rb +2 -0
  15. data/lib/licensee/matchers.rb +2 -0
  16. data/lib/licensee/matchers/cabal.rb +16 -2
  17. data/lib/licensee/matchers/cargo.rb +3 -1
  18. data/lib/licensee/matchers/copyright.rb +6 -4
  19. data/lib/licensee/matchers/cran.rb +5 -3
  20. data/lib/licensee/matchers/dice.rb +6 -4
  21. data/lib/licensee/matchers/dist_zilla.rb +3 -1
  22. data/lib/licensee/matchers/exact.rb +2 -0
  23. data/lib/licensee/matchers/gemspec.rb +7 -5
  24. data/lib/licensee/matchers/matcher.rb +3 -1
  25. data/lib/licensee/matchers/npm_bower.rb +3 -1
  26. data/lib/licensee/matchers/package.rb +2 -0
  27. data/lib/licensee/matchers/reference.rb +3 -1
  28. data/lib/licensee/matchers/spdx.rb +3 -1
  29. data/lib/licensee/project_files.rb +2 -0
  30. data/lib/licensee/project_files/license_file.rb +12 -10
  31. data/lib/licensee/project_files/package_manager_file.rb +2 -0
  32. data/lib/licensee/project_files/project_file.rb +12 -5
  33. data/lib/licensee/project_files/readme_file.rb +5 -3
  34. data/lib/licensee/projects.rb +2 -0
  35. data/lib/licensee/projects/fs_project.rb +9 -2
  36. data/lib/licensee/projects/git_project.rb +19 -11
  37. data/lib/licensee/projects/github_project.rb +3 -1
  38. data/lib/licensee/projects/project.rb +7 -5
  39. data/lib/licensee/rule.rb +2 -0
  40. data/lib/licensee/version.rb +3 -1
  41. data/licensee.gemspec +45 -0
  42. data/spec/bin_spec.rb +3 -1
  43. data/spec/fixture_spec.rb +46 -0
  44. data/spec/fixtures/bsd-3-noendorseslash/LICENSE +30 -0
  45. data/spec/fixtures/cc0-cal2013/LICENSE +116 -0
  46. data/spec/fixtures/cc0-cc/LICENSE +121 -0
  47. data/spec/fixtures/detect.json +10 -8
  48. data/spec/fixtures/fixtures.yml +134 -0
  49. data/spec/fixtures/html/license.html +262 -0
  50. data/spec/fixtures/license-hashes.json +41 -0
  51. data/spec/fixtures/mit-optional/LICENSE.txt +21 -0
  52. data/spec/fixtures/multiple-arrs/LICENSE +30 -0
  53. data/spec/fixtures/readme-invalid-encoding/README.md +24 -0
  54. data/spec/fixtures/unlicense-noinfo/LICENSE +22 -0
  55. data/spec/integration_spec.rb +68 -2
  56. data/spec/licensee/commands/detect_spec.rb +11 -7
  57. data/spec/licensee/commands/license_path_spec.rb +3 -1
  58. data/spec/licensee/commands/version_spec.rb +3 -1
  59. data/spec/licensee/content_helper_spec.rb +185 -67
  60. data/spec/licensee/hash_helper_spec.rb +3 -1
  61. data/spec/licensee/license_field_spec.rb +5 -3
  62. data/spec/licensee/license_meta_spec.rb +16 -12
  63. data/spec/licensee/license_rules_spec.rb +6 -2
  64. data/spec/licensee/license_spec.rb +37 -35
  65. data/spec/licensee/matchers/cabal_matcher_spec.rb +97 -2
  66. data/spec/licensee/matchers/cargo_matcher_spec.rb +5 -2
  67. data/spec/licensee/matchers/copyright_matcher_spec.rb +7 -5
  68. data/spec/licensee/matchers/cran_matcher_spec.rb +5 -2
  69. data/spec/licensee/matchers/dice_matcher_spec.rb +15 -12
  70. data/spec/licensee/matchers/dist_zilla_matcher_spec.rb +5 -2
  71. data/spec/licensee/matchers/exact_matcher_spec.rb +5 -2
  72. data/spec/licensee/matchers/gemspec_matcher_spec.rb +5 -2
  73. data/spec/licensee/matchers/matcher_spec.rb +6 -2
  74. data/spec/licensee/matchers/npm_bower_matcher_spec.rb +5 -3
  75. data/spec/licensee/matchers/package_matcher_spec.rb +6 -2
  76. data/spec/licensee/matchers/reference_matcher_spec.rb +4 -2
  77. data/spec/licensee/matchers/spdx_matcher_spec.rb +5 -2
  78. data/spec/licensee/project_files/license_file_spec.rb +20 -18
  79. data/spec/licensee/project_files/package_info_spec.rb +5 -1
  80. data/spec/licensee/project_files/project_file_spec.rb +8 -2
  81. data/spec/licensee/project_files/readme_file_spec.rb +4 -1
  82. data/spec/licensee/project_spec.rb +24 -17
  83. data/spec/licensee/projects/git_project_spec.rb +23 -0
  84. data/spec/licensee/projects/github_project_spec.rb +8 -5
  85. data/spec/licensee/rule_spec.rb +6 -3
  86. data/spec/licensee_spec.rb +12 -9
  87. data/spec/spec_helper.rb +27 -9
  88. data/spec/vendored_license_spec.rb +29 -10
  89. data/vendor/choosealicense.com/_data/meta.yml +0 -4
  90. data/vendor/choosealicense.com/_data/rules.yml +3 -0
  91. data/vendor/choosealicense.com/_licenses/0bsd.txt +39 -0
  92. data/vendor/choosealicense.com/_licenses/afl-3.0.txt +7 -6
  93. data/vendor/choosealicense.com/_licenses/agpl-3.0.txt +0 -1
  94. data/vendor/choosealicense.com/_licenses/apache-2.0.txt +1 -2
  95. data/vendor/choosealicense.com/_licenses/artistic-2.0.txt +1 -2
  96. data/vendor/choosealicense.com/_licenses/bsd-2-clause.txt +8 -6
  97. data/vendor/choosealicense.com/_licenses/bsd-3-clause-clear.txt +2 -2
  98. data/vendor/choosealicense.com/_licenses/bsd-3-clause.txt +12 -10
  99. data/vendor/choosealicense.com/_licenses/bsd-4-clause.txt +61 -0
  100. data/vendor/choosealicense.com/_licenses/bsl-1.0.txt +5 -2
  101. data/vendor/choosealicense.com/_licenses/cc-by-4.0.txt +16 -14
  102. data/vendor/choosealicense.com/_licenses/cc-by-sa-4.0.txt +16 -14
  103. data/vendor/choosealicense.com/_licenses/cc0-1.0.txt +113 -105
  104. data/vendor/choosealicense.com/_licenses/cecill-2.1.txt +579 -0
  105. data/vendor/choosealicense.com/_licenses/ecl-2.0.txt +1 -2
  106. data/vendor/choosealicense.com/_licenses/epl-1.0.txt +1 -2
  107. data/vendor/choosealicense.com/_licenses/epl-2.0.txt +3 -4
  108. data/vendor/choosealicense.com/_licenses/eupl-1.1.txt +0 -1
  109. data/vendor/choosealicense.com/_licenses/eupl-1.2.txt +0 -1
  110. data/vendor/choosealicense.com/_licenses/gpl-2.0.txt +0 -1
  111. data/vendor/choosealicense.com/_licenses/gpl-3.0.txt +1 -2
  112. data/vendor/choosealicense.com/_licenses/isc.txt +2 -3
  113. data/vendor/choosealicense.com/_licenses/lgpl-2.1.txt +0 -1
  114. data/vendor/choosealicense.com/_licenses/lgpl-3.0.txt +1 -3
  115. data/vendor/choosealicense.com/_licenses/lppl-1.3c.txt +1 -2
  116. data/vendor/choosealicense.com/_licenses/mit.txt +1 -2
  117. data/vendor/choosealicense.com/_licenses/mpl-2.0.txt +0 -1
  118. data/vendor/choosealicense.com/_licenses/ms-pl.txt +0 -1
  119. data/vendor/choosealicense.com/_licenses/ms-rl.txt +0 -1
  120. data/vendor/choosealicense.com/_licenses/ncsa.txt +21 -22
  121. data/vendor/choosealicense.com/_licenses/odbl-1.0.txt +573 -0
  122. data/vendor/choosealicense.com/_licenses/ofl-1.1.txt +4 -2
  123. data/vendor/choosealicense.com/_licenses/osl-3.0.txt +1 -2
  124. data/vendor/choosealicense.com/_licenses/postgresql.txt +4 -5
  125. data/vendor/choosealicense.com/_licenses/unlicense.txt +1 -2
  126. data/vendor/choosealicense.com/_licenses/upl-1.0.txt +4 -5
  127. data/vendor/choosealicense.com/_licenses/vim.txt +111 -0
  128. data/vendor/choosealicense.com/_licenses/wtfpl.txt +0 -1
  129. data/vendor/choosealicense.com/_licenses/zlib.txt +4 -2
  130. metadata +79 -28
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 354a3c318aa962a9c184c71850ba04574618b17f28582f92660a69e5405218ee
4
- data.tar.gz: 1a96874eaaff65c949ca73261aced972cf20ab53d67da7b42d887ac360810eaf
3
+ metadata.gz: d931f50190ecf7abb790530607bd57eb31c7190926a394bca7aa9ec0550cfba8
4
+ data.tar.gz: f119b575b2ff9538133a587ef3a23638756fac47e30c40b442ddad679bb62036
5
5
  SHA512:
6
- metadata.gz: 227b37abb076fd1d76d9fb296056bdf16b5a24b00ef2e95a0c8a883c7be6dee011bbea1df834b20d6ffe331815cc22f4726c94c1894400d04ae32944f01622dd
7
- data.tar.gz: 41582f813fd9e57fa3c497c7edfd11fc447a131e7414ba7a1c3c25266239ec2efb6b867b84adffac704a5ba96e992b72d3f87215222e0aca32f40305e285cc2d
6
+ metadata.gz: 4b423e68fb6496eefc0f4259fac2539f34430a13e1eb6d3758a6876c604fc40e5a763a04836025070410c082c2a516b28988ceeb46ed2b2a06276b318b9d0fb6
7
+ data.tar.gz: f0e150efc09980729793f86bbfcff323617349f0cae92ff2d4ebace2e29dac96e980deacc224b6ab08a21e3224f77b95541bdfaab9fa016ba81a6f94c3fdcce7
data/LICENSE.md CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2014-2017 Ben Balter
3
+ Copyright (c) 2014-2020 Ben Balter and Licensee contributors
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'dotenv/load'
4
5
  require 'thor'
@@ -15,9 +16,9 @@ class LicenseeCLI < Thor
15
16
 
16
17
  def path
17
18
  @path ||= if !options[:remote] || args.first =~ %r{^https://}
18
- args.first || Dir.pwd
19
- else
20
- "https://github.com/#{args.first}"
19
+ args.first || Dir.pwd
20
+ else
21
+ "https://github.com/#{args.first}"
21
22
  end
22
23
  end
23
24
 
@@ -32,6 +33,6 @@ class LicenseeCLI < Thor
32
33
  end
33
34
 
34
35
  commands_dir = File.expand_path '../lib/licensee/commands/', __dir__
35
- Dir["#{commands_dir}/*.rb"].each { |c| require(c) }
36
+ Dir["#{commands_dir}/*.rb"].sort.each { |c| require(c) }
36
37
 
37
38
  LicenseeCLI.start(ARGV)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'licensee/version'
2
4
  require 'forwardable'
3
5
  require 'pathname'
@@ -19,7 +21,7 @@ module Licensee
19
21
  CONFIDENCE_THRESHOLD = 98
20
22
 
21
23
  # Base domain from which to build license URLs
22
- DOMAIN = 'http://choosealicense.com'.freeze
24
+ DOMAIN = 'http://choosealicense.com'
23
25
 
24
26
  class << self
25
27
  attr_writer :confidence_threshold
@@ -36,12 +38,12 @@ module Licensee
36
38
 
37
39
  def project(path, **args)
38
40
  if path =~ %r{\Ahttps://github.com}
39
- Licensee::Projects::GitHubProject.new(path, args)
41
+ Licensee::Projects::GitHubProject.new(path, **args)
40
42
  else
41
- Licensee::Projects::GitProject.new(path, args)
43
+ Licensee::Projects::GitProject.new(path, **args)
42
44
  end
43
45
  rescue Licensee::Projects::GitProject::InvalidRepository
44
- Licensee::Projects::FSProject.new(path, args)
46
+ Licensee::Projects::FSProject.new(path, **args)
45
47
  end
46
48
 
47
49
  def confidence_threshold
@@ -49,7 +51,7 @@ module Licensee
49
51
  end
50
52
 
51
53
  # Inverse of the confidence threshold, represented as a float
52
- # By default this will be 0.05
54
+ # By default this will be 0.02
53
55
  def inverse_confidence_threshold
54
56
  @inverse_confidence_threshold ||=
55
57
  (1 - Licensee.confidence_threshold / 100.0).round(2)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class LicenseeCLI < Thor
2
4
  # Methods to call when displaying information about ProjectFiles
3
5
  MATCHED_FILE_METHODS = %i[
@@ -21,11 +23,11 @@ class LicenseeCLI < Thor
21
23
 
22
24
  rows = []
23
25
  rows << if project.license
24
- ['License:', project.license.spdx_id]
25
- elsif !project.licenses.empty?
26
- ['Licenses:', project.licenses.map(&:spdx_id)]
27
- else
28
- ['License:', set_color('None', :red)]
26
+ ['License:', project.license.spdx_id]
27
+ elsif !project.licenses.empty?
28
+ ['Licenses:', project.licenses.map(&:spdx_id)]
29
+ else
30
+ ['License:', set_color('None', :red)]
29
31
  end
30
32
 
31
33
  unless project.matched_files.empty?
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'tmpdir'
2
4
 
3
5
  class LicenseeCLI < Thor
@@ -39,26 +41,23 @@ class LicenseeCLI < Thor
39
41
 
40
42
  def license_to_diff
41
43
  return options[:license_to_diff] if options[:license_to_diff]
42
- return project.license_file if remote?
44
+ return project.license_file if remote? || STDIN.tty? && project.license_file
43
45
 
44
46
  @license_to_diff ||= begin
45
- if STDIN.tty?
46
- error 'You must pipe license contents to the command via STDIN'
47
- exit 1
48
- end
49
-
50
47
  Licensee::ProjectFiles::LicenseFile.new(STDIN.read, 'LICENSE')
51
48
  end
52
49
  end
53
50
 
54
51
  def expected_license
55
- @expected_license ||= Licensee::License.find options[:license] if options[:license]
52
+ if options[:license]
53
+ @expected_license ||= Licensee::License.find options[:license]
54
+ end
56
55
  return @expected_license if @expected_license
57
56
 
58
57
  if options[:license]
59
58
  error "#{options[:license]} is not a valid license"
60
59
  else
61
- error 'You must provide an expected license'
60
+ error 'Usage: provide a license to diff against with --license (spdx name)'
62
61
  end
63
62
 
64
63
  error "Valid licenses: #{Licensee::License.all(hidden: true).map(&:key).join(', ')}"
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class LicenseeCLI < Thor
2
4
  desc 'license-path [PATH]', "Returns the path to the given project's license file"
3
5
  def license_path(_path)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class LicenseeCLI < Thor
2
4
  desc 'version', 'Return the Licensee version'
3
5
  def version
@@ -1,26 +1,115 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'set'
2
4
  require 'digest'
3
5
 
4
6
  module Licensee
5
7
  module ContentHelper
6
8
  DIGEST = Digest::SHA1
7
- END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions\s*$/i
8
- HR_REGEX = /[=\-\*][=\-\*\s]{3,}/
9
- ALT_TITLE_REGEX = License::ALT_TITLE_REGEX
10
- ALL_RIGHTS_RESERVED_REGEX = /\Aall rights reserved\.?$/i
11
- WHITESPACE_REGEX = /\s+/
12
- MARKDOWN_HEADING_REGEX = /\A\s*#+/
13
- VERSION_REGEX = /\Aversion.*$/i
14
- MARKUP_REGEX = /[#_*=~\[\]()`|>]+/
15
- DEVELOPED_BY_REGEX = /\Adeveloped by:.*?\n\n/im
16
- QUOTE_BEGIN_REGEX = /[`'"‘“]/
17
- QUOTE_END_REGEX = /['"’”]/
9
+ START_REGEX = /\A\s*/.freeze
10
+ END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions[\s#*_]*$/i.freeze
11
+ REGEXES = {
12
+ hrs: /^\s*[=\-\*]{3,}\s*$/,
13
+ all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
14
+ whitespace: /\s+/,
15
+ markdown_headings: /#{START_REGEX}#+/,
16
+ version: /#{START_REGEX}version.*$/i,
17
+ span_markup: /[_*~]+(.*?)[_*~]+/,
18
+ link_markup: /\[(.+?)\]\(.+?\)/,
19
+ block_markup: /^\s*>/,
20
+ border_markup: /^[\*-](.*?)[\*-]$/,
21
+ comment_markup: %r{^\s*?[/\*]{1,2}},
22
+ url: %r{#{START_REGEX}https?://[^ ]+\n},
23
+ bullet: /\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[)\.])\s+/i,
24
+ developed_by: /#{START_REGEX}developed by:.*?\n\n/im,
25
+ quote_begin: /[`'"‘“]/,
26
+ quote_end: /[`'"’”]/,
27
+ cc_legal_code: /^\s*Creative Commons Legal Code\s*$/i,
28
+ cc0_info: /For more information, please see\s*\S+zero\S+/im,
29
+ cc0_disclaimer: /CREATIVE COMMONS CORPORATION.*?\n\n/im,
30
+ unlicense_info: /For more information, please.*\S+unlicense\S+/im,
31
+ mit_optional: /\(including the next paragraph\)/i
32
+ }.freeze
33
+ NORMALIZATIONS = {
34
+ lists: { from: /^\s*(?:\d\.|\*)\s+([^\n])/, to: '- \1' },
35
+ https: { from: /http:/, to: 'https:' },
36
+ ampersands: { from: '&', to: 'and' },
37
+ dashes: { from: /(?<!^)([—–-]+)(?!$)/, to: '-' },
38
+ quotes: {
39
+ from: /#{REGEXES[:quote_begin]}+([\w -]*?\w)#{REGEXES[:quote_end]}+/,
40
+ to: '"\1"'
41
+ }
42
+ }.freeze
43
+
44
+ # Legally equivalent words that schould be ignored for comparison
45
+ # See https://spdx.org/spdx-license-list/matching-guidelines
46
+ VARIETAL_WORDS = {
47
+ 'acknowledgment' => 'acknowledgement',
48
+ 'analogue' => 'analog',
49
+ 'analyse' => 'analyze',
50
+ 'artefact' => 'artifact',
51
+ 'authorisation' => 'authorization',
52
+ 'authorised' => 'authorized',
53
+ 'calibre' => 'caliber',
54
+ 'cancelled' => 'canceled',
55
+ 'capitalisations' => 'capitalizations',
56
+ 'catalogue' => 'catalog',
57
+ 'categorise' => 'categorize',
58
+ 'centre' => 'center',
59
+ 'emphasised' => 'emphasized',
60
+ 'favour' => 'favor',
61
+ 'favourite' => 'favorite',
62
+ 'fulfil' => 'fulfill',
63
+ 'fulfilment' => 'fulfillment',
64
+ 'initialise' => 'initialize',
65
+ 'judgment' => 'judgement',
66
+ 'labelling' => 'labeling',
67
+ 'labour' => 'labor',
68
+ 'licence' => 'license',
69
+ 'maximise' => 'maximize',
70
+ 'modelled' => 'modeled',
71
+ 'modelling' => 'modeling',
72
+ 'offence' => 'offense',
73
+ 'optimise' => 'optimize',
74
+ 'organisation' => 'organization',
75
+ 'organise' => 'organize',
76
+ 'practise' => 'practice',
77
+ 'programme' => 'program',
78
+ 'realise' => 'realize',
79
+ 'recognise' => 'recognize',
80
+ 'signalling' => 'signaling',
81
+ 'sub-license' => 'sublicense',
82
+ 'sub license' => 'sublicense',
83
+ 'utilisation' => 'utilization',
84
+ 'whilst' => 'while',
85
+ 'wilful' => 'wilfull',
86
+ 'non-commercial' => 'noncommercial',
87
+ 'cent' => 'percent',
88
+ 'owner' => 'holder'
89
+ }.freeze
90
+ STRIP_METHODS = %i[
91
+ cc0_optional
92
+ unlicense_optional
93
+ hrs
94
+ markdown_headings
95
+ borders
96
+ title
97
+ version
98
+ url
99
+ copyright
100
+ title
101
+ block_markup
102
+ span_markup
103
+ link_markup
104
+ developed_by
105
+ end_of_terms
106
+ whitespace
107
+ mit_optional
108
+ ].freeze
18
109
 
19
110
  # A set of each word in the license, without duplicates
20
111
  def wordset
21
- @wordset ||= if content_normalized
22
- content_normalized.scan(/(?:\w(?:'s|(?<=s)')?)+/).to_set
23
- end
112
+ @wordset ||= content_normalized&.scan(%r{(?:[\w\/](?:'s|(?<=s)')?)+})&.to_set
24
113
  end
25
114
 
26
115
  # Number of characteres in the normalized content
@@ -33,7 +122,8 @@ module Licensee
33
122
  # Number of characters that could be added/removed to still be
34
123
  # considered a potential match
35
124
  def max_delta
36
- @max_delta ||= (length * Licensee.inverse_confidence_threshold).to_i
125
+ @max_delta ||= fields_normalized.size * 10 +
126
+ (length * Licensee.inverse_confidence_threshold).to_i
37
127
  end
38
128
 
39
129
  # Given another license or project file, calculates the difference in length
@@ -44,8 +134,9 @@ module Licensee
44
134
  # Given another license or project file, calculates the similarity
45
135
  # as a percentage of words in common
46
136
  def similarity(other)
47
- overlap = (wordset & other.wordset).size
48
- total = wordset.size + other.wordset.size
137
+ overlap = (wordset_fieldless & other.wordset).size
138
+ total = wordset_fieldless.size + other.wordset.size -
139
+ fields_normalized_set.size
49
140
  100.0 * (overlap * 2.0 / total)
50
141
  end
51
142
 
@@ -60,35 +151,21 @@ module Licensee
60
151
  # content with attribution first to detect attribuion in LicenseFile
61
152
  def content_without_title_and_version
62
153
  @content_without_title_and_version ||= begin
63
- string = content.strip
64
- string = strip_markdown_headings(string)
65
- string = strip_hrs(string)
66
- string = strip_title(string) while string =~ ContentHelper.title_regex
67
- strip_version(string).strip
154
+ @_content = nil
155
+ ops = %i[html hrs comments markdown_headings title version]
156
+ ops.each { |op| strip(op) }
157
+ _content
68
158
  end
69
159
  end
70
160
 
71
- # Content without title, version, copyright, whitespace, or insturctions
72
- #
73
- # wrap - Optional width to wrap the content
74
- #
75
- # Returns a string
76
161
  def content_normalized(wrap: nil)
77
- return unless content
78
-
79
162
  @content_normalized ||= begin
80
- string = content_without_title_and_version.downcase
81
- while string =~ Matchers::Copyright::REGEX
82
- string = strip_copyright(string)
83
- end
84
- string = strip_all_rights_reserved(string)
85
- string = strip_developed_by(string)
86
- string, _partition, _instructions = string.partition(END_OF_TERMS_REGEX)
87
- string = normalize_lists(string)
88
- string = normalize_quotes(string)
89
- string = normalize_https(string)
90
- string = strip_markup(string)
91
- strip_whitespace(string)
163
+ @_content = content_without_title_and_version.downcase
164
+
165
+ (NORMALIZATIONS.keys + %i[spelling bullets]).each { |op| normalize(op) }
166
+ STRIP_METHODS.each { |op| strip(op) }
167
+
168
+ _content
92
169
  end
93
170
 
94
171
  if wrap.nil?
@@ -98,15 +175,24 @@ module Licensee
98
175
  end
99
176
  end
100
177
 
178
+ # Backwards compatibalize constants to avoid a breaking change
179
+ def self.const_missing(const)
180
+ key = const.to_s.downcase.gsub('_regex', '').to_sym
181
+ REGEXES[key] || super
182
+ end
183
+
101
184
  # Wrap text to the given line length
102
185
  def self.wrap(text, line_width = 80)
103
186
  return if text.nil?
104
187
 
105
188
  text = text.clone
189
+ text.gsub!(REGEXES[:bullet]) { |m| "\n#{m}\n" }
106
190
  text.gsub!(/([^\n])\n([^\n])/, '\1 \2')
107
191
 
108
192
  text = text.split("\n").collect do |line|
109
- if line.length > line_width
193
+ if line =~ REGEXES[:hrs]
194
+ line
195
+ elsif line.length > line_width
110
196
  line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip
111
197
  else
112
198
  line
@@ -117,83 +203,142 @@ module Licensee
117
203
  end
118
204
 
119
205
  def self.format_percent(float)
120
- "#{format('%.2f', float)}%"
206
+ "#{format('%<float>.2f', float: float)}%"
121
207
  end
122
208
 
123
209
  def self.title_regex
124
- licenses = Licensee::License.all(hidden: true, psuedo: false)
125
- titles = licenses.map(&:title_regex)
210
+ @title_regex ||= begin
211
+ licenses = Licensee::License.all(hidden: true, psuedo: false)
212
+ titles = licenses.map(&:title_regex)
126
213
 
127
- # Title regex must include the version to support matching within
128
- # families, but for sake of normalization, we can be less strict
129
- without_versions = licenses.map do |license|
130
- next if license.title == license.name_without_version
214
+ # Title regex must include the version to support matching within
215
+ # families, but for sake of normalization, we can be less strict
216
+ without_versions = licenses.map do |license|
217
+ next if license.title == license.name_without_version
131
218
 
132
- Regexp.new Regexp.escape(license.name_without_version), 'i'
133
- end
134
- titles.concat(without_versions.compact)
219
+ Regexp.new Regexp.escape(license.name_without_version), 'i'
220
+ end
221
+ titles.concat(without_versions.compact)
135
222
 
136
- /\A\s*\(?(the )?#{Regexp.union titles}.*$/i
223
+ /#{START_REGEX}\(?(?:the )?#{Regexp.union titles}.*?$/i
224
+ end
137
225
  end
138
226
 
139
227
  private
140
228
 
141
- def strip_title(string)
142
- strip(string, ContentHelper.title_regex)
229
+ def _content
230
+ @_content ||= content.to_s.dup.strip
231
+ end
232
+
233
+ def strip(regex_or_sym)
234
+ return unless _content
235
+
236
+ if regex_or_sym.is_a?(Symbol)
237
+ meth = "strip_#{regex_or_sym}"
238
+ return send(meth) if respond_to?(meth, true)
239
+
240
+ unless REGEXES[regex_or_sym]
241
+ raise ArgumentError, "#{regex_or_sym} is an invalid regex reference"
242
+ end
243
+
244
+ regex_or_sym = REGEXES[regex_or_sym]
245
+ end
246
+
247
+ @_content = _content.gsub(regex_or_sym, ' ').squeeze(' ').strip
248
+ end
249
+
250
+ def strip_title
251
+ while _content =~ ContentHelper.title_regex
252
+ strip(ContentHelper.title_regex)
253
+ end
254
+ end
255
+
256
+ def strip_borders
257
+ normalize(REGEXES[:border_markup], '\1')
258
+ end
259
+
260
+ def strip_comments
261
+ lines = _content.split("\n")
262
+ return if lines.count == 1
263
+ return unless lines.all? { |line| line =~ REGEXES[:comment_markup] }
264
+
265
+ strip(:comment_markup)
143
266
  end
144
267
 
145
- def strip_version(string)
146
- strip(string, VERSION_REGEX)
268
+ def strip_copyright
269
+ regex = Regexp.union(Matchers::Copyright::REGEX, REGEXES[:all_rights_reserved])
270
+ strip(regex) while _content =~ regex
147
271
  end
148
272
 
149
- def strip_copyright(string)
150
- strip(string, Matchers::Copyright::REGEX)
273
+ def strip_cc0_optional
274
+ return unless _content.include? 'associating cc0'
275
+
276
+ strip(REGEXES[:cc_legal_code])
277
+ strip(REGEXES[:cc0_info])
278
+ strip(REGEXES[:cc0_disclaimer])
151
279
  end
152
280
 
153
- # Strip HRs from MPL
154
- def strip_hrs(string)
155
- strip(string, HR_REGEX)
281
+ def strip_unlicense_optional
282
+ return unless _content.include? 'unlicense'
283
+
284
+ strip(REGEXES[:unlicense_info])
156
285
  end
157
286
 
158
- # Strip leading #s from the document
159
- def strip_markdown_headings(string)
160
- strip(string, MARKDOWN_HEADING_REGEX)
287
+ def strip_end_of_terms
288
+ body, _partition, _instructions = _content.partition(END_OF_TERMS_REGEX)
289
+ @_content = body
161
290
  end
162
291
 
163
- def strip_whitespace(string)
164
- strip(string, WHITESPACE_REGEX)
292
+ def strip_span_markup
293
+ normalize(REGEXES[:span_markup], '\1')
165
294
  end
166
295
 
167
- def strip_all_rights_reserved(string)
168
- strip(string, ALL_RIGHTS_RESERVED_REGEX)
296
+ def strip_link_markup
297
+ normalize(REGEXES[:link_markup], '\1')
169
298
  end
170
299
 
171
- def strip_markup(string)
172
- strip(string, MARKUP_REGEX)
300
+ def strip_html
301
+ return unless respond_to?(:filename) && filename
302
+ return unless File.extname(filename) =~ /\.html?/i
303
+
304
+ require 'reverse_markdown'
305
+ @_content = ReverseMarkdown.convert(_content, unknown_tags: :bypass)
306
+ end
307
+
308
+ def normalize(from_or_key, to = nil)
309
+ operation = { from: from_or_key, to: to } if to
310
+ operation ||= NORMALIZATIONS[from_or_key]
311
+
312
+ if operation
313
+ @_content = _content.gsub operation[:from], operation[:to]
314
+ elsif respond_to?("normalize_#{from_or_key}", true)
315
+ send("normalize_#{from_or_key}")
316
+ else
317
+ raise ArgumentError, "#{from_or_key} is an invalid normalization"
318
+ end
173
319
  end
174
320
 
175
- def strip_developed_by(string)
176
- strip(string, DEVELOPED_BY_REGEX)
321
+ def normalize_spelling
322
+ normalize(/\b#{Regexp.union(VARIETAL_WORDS.keys)}\b/, VARIETAL_WORDS)
177
323
  end
178
324
 
179
- def strip(string, regex)
180
- string.gsub(regex, ' ').squeeze(' ').strip
325
+ def normalize_bullets
326
+ normalize(REGEXES[:bullet], "\n\n* ")
327
+ normalize(/\)\s+\(/, ')(')
181
328
  end
182
329
 
183
- # Replace all enclosing quotes with double quotes
184
- # Single versus double quotes don't alter the meaning, and it's easier to
185
- # strip double quotes if we still want to allow possessives
186
- def normalize_quotes(string)
187
- string.gsub(/#{QUOTE_BEGIN_REGEX}+([\w -]*?\w)#{QUOTE_END_REGEX}+/,
188
- '"\1"')
330
+ def wordset_fieldless
331
+ @wordset_fieldless ||= wordset - fields_normalized_set
189
332
  end
190
333
 
191
- def normalize_https(string)
192
- string.gsub(/http:/, 'https:')
334
+ # Returns an array of strings of substitutable fields in normalized content
335
+ def fields_normalized
336
+ @fields_normalized ||=
337
+ content_normalized.scan(LicenseField::FIELD_REGEX).flatten
193
338
  end
194
339
 
195
- def normalize_lists(string)
196
- string.gsub(/^\s*(\d\.|\*)/, '-')
340
+ def fields_normalized_set
341
+ @fields_normalized_set ||= fields_normalized.to_set
197
342
  end
198
343
  end
199
344
  end