licensee 9.10.1 → 9.13.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (130) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.md +1 -1
  3. data/bin/licensee +5 -4
  4. data/lib/licensee.rb +7 -5
  5. data/lib/licensee/commands/detect.rb +7 -5
  6. data/lib/licensee/commands/diff.rb +7 -8
  7. data/lib/licensee/commands/license_path.rb +2 -0
  8. data/lib/licensee/commands/version.rb +2 -0
  9. data/lib/licensee/content_helper.rb +230 -85
  10. data/lib/licensee/hash_helper.rb +7 -5
  11. data/lib/licensee/license.rb +21 -22
  12. data/lib/licensee/license_field.rb +3 -1
  13. data/lib/licensee/license_meta.rb +2 -0
  14. data/lib/licensee/license_rules.rb +2 -0
  15. data/lib/licensee/matchers.rb +2 -0
  16. data/lib/licensee/matchers/cabal.rb +16 -2
  17. data/lib/licensee/matchers/cargo.rb +3 -1
  18. data/lib/licensee/matchers/copyright.rb +6 -4
  19. data/lib/licensee/matchers/cran.rb +5 -3
  20. data/lib/licensee/matchers/dice.rb +6 -4
  21. data/lib/licensee/matchers/dist_zilla.rb +3 -1
  22. data/lib/licensee/matchers/exact.rb +2 -0
  23. data/lib/licensee/matchers/gemspec.rb +7 -5
  24. data/lib/licensee/matchers/matcher.rb +3 -1
  25. data/lib/licensee/matchers/npm_bower.rb +3 -1
  26. data/lib/licensee/matchers/package.rb +2 -0
  27. data/lib/licensee/matchers/reference.rb +3 -1
  28. data/lib/licensee/matchers/spdx.rb +3 -1
  29. data/lib/licensee/project_files.rb +2 -0
  30. data/lib/licensee/project_files/license_file.rb +12 -10
  31. data/lib/licensee/project_files/package_manager_file.rb +2 -0
  32. data/lib/licensee/project_files/project_file.rb +12 -5
  33. data/lib/licensee/project_files/readme_file.rb +5 -3
  34. data/lib/licensee/projects.rb +2 -0
  35. data/lib/licensee/projects/fs_project.rb +9 -2
  36. data/lib/licensee/projects/git_project.rb +19 -11
  37. data/lib/licensee/projects/github_project.rb +3 -1
  38. data/lib/licensee/projects/project.rb +7 -5
  39. data/lib/licensee/rule.rb +2 -0
  40. data/lib/licensee/version.rb +3 -1
  41. data/licensee.gemspec +45 -0
  42. data/spec/bin_spec.rb +3 -1
  43. data/spec/fixture_spec.rb +46 -0
  44. data/spec/fixtures/bsd-3-noendorseslash/LICENSE +30 -0
  45. data/spec/fixtures/cc0-cal2013/LICENSE +116 -0
  46. data/spec/fixtures/cc0-cc/LICENSE +121 -0
  47. data/spec/fixtures/detect.json +10 -8
  48. data/spec/fixtures/fixtures.yml +134 -0
  49. data/spec/fixtures/html/license.html +262 -0
  50. data/spec/fixtures/license-hashes.json +41 -0
  51. data/spec/fixtures/mit-optional/LICENSE.txt +21 -0
  52. data/spec/fixtures/multiple-arrs/LICENSE +30 -0
  53. data/spec/fixtures/readme-invalid-encoding/README.md +24 -0
  54. data/spec/fixtures/unlicense-noinfo/LICENSE +22 -0
  55. data/spec/integration_spec.rb +68 -2
  56. data/spec/licensee/commands/detect_spec.rb +11 -7
  57. data/spec/licensee/commands/license_path_spec.rb +3 -1
  58. data/spec/licensee/commands/version_spec.rb +3 -1
  59. data/spec/licensee/content_helper_spec.rb +185 -67
  60. data/spec/licensee/hash_helper_spec.rb +3 -1
  61. data/spec/licensee/license_field_spec.rb +5 -3
  62. data/spec/licensee/license_meta_spec.rb +16 -12
  63. data/spec/licensee/license_rules_spec.rb +6 -2
  64. data/spec/licensee/license_spec.rb +37 -35
  65. data/spec/licensee/matchers/cabal_matcher_spec.rb +97 -2
  66. data/spec/licensee/matchers/cargo_matcher_spec.rb +5 -2
  67. data/spec/licensee/matchers/copyright_matcher_spec.rb +7 -5
  68. data/spec/licensee/matchers/cran_matcher_spec.rb +5 -2
  69. data/spec/licensee/matchers/dice_matcher_spec.rb +15 -12
  70. data/spec/licensee/matchers/dist_zilla_matcher_spec.rb +5 -2
  71. data/spec/licensee/matchers/exact_matcher_spec.rb +5 -2
  72. data/spec/licensee/matchers/gemspec_matcher_spec.rb +5 -2
  73. data/spec/licensee/matchers/matcher_spec.rb +6 -2
  74. data/spec/licensee/matchers/npm_bower_matcher_spec.rb +5 -3
  75. data/spec/licensee/matchers/package_matcher_spec.rb +6 -2
  76. data/spec/licensee/matchers/reference_matcher_spec.rb +4 -2
  77. data/spec/licensee/matchers/spdx_matcher_spec.rb +5 -2
  78. data/spec/licensee/project_files/license_file_spec.rb +20 -18
  79. data/spec/licensee/project_files/package_info_spec.rb +5 -1
  80. data/spec/licensee/project_files/project_file_spec.rb +8 -2
  81. data/spec/licensee/project_files/readme_file_spec.rb +4 -1
  82. data/spec/licensee/project_spec.rb +24 -17
  83. data/spec/licensee/projects/git_project_spec.rb +23 -0
  84. data/spec/licensee/projects/github_project_spec.rb +8 -5
  85. data/spec/licensee/rule_spec.rb +6 -3
  86. data/spec/licensee_spec.rb +12 -9
  87. data/spec/spec_helper.rb +27 -9
  88. data/spec/vendored_license_spec.rb +29 -10
  89. data/vendor/choosealicense.com/_data/meta.yml +0 -4
  90. data/vendor/choosealicense.com/_data/rules.yml +3 -0
  91. data/vendor/choosealicense.com/_licenses/0bsd.txt +39 -0
  92. data/vendor/choosealicense.com/_licenses/afl-3.0.txt +7 -6
  93. data/vendor/choosealicense.com/_licenses/agpl-3.0.txt +0 -1
  94. data/vendor/choosealicense.com/_licenses/apache-2.0.txt +1 -2
  95. data/vendor/choosealicense.com/_licenses/artistic-2.0.txt +1 -2
  96. data/vendor/choosealicense.com/_licenses/bsd-2-clause.txt +8 -6
  97. data/vendor/choosealicense.com/_licenses/bsd-3-clause-clear.txt +2 -2
  98. data/vendor/choosealicense.com/_licenses/bsd-3-clause.txt +12 -10
  99. data/vendor/choosealicense.com/_licenses/bsd-4-clause.txt +61 -0
  100. data/vendor/choosealicense.com/_licenses/bsl-1.0.txt +5 -2
  101. data/vendor/choosealicense.com/_licenses/cc-by-4.0.txt +16 -14
  102. data/vendor/choosealicense.com/_licenses/cc-by-sa-4.0.txt +16 -14
  103. data/vendor/choosealicense.com/_licenses/cc0-1.0.txt +113 -105
  104. data/vendor/choosealicense.com/_licenses/cecill-2.1.txt +579 -0
  105. data/vendor/choosealicense.com/_licenses/ecl-2.0.txt +1 -2
  106. data/vendor/choosealicense.com/_licenses/epl-1.0.txt +1 -2
  107. data/vendor/choosealicense.com/_licenses/epl-2.0.txt +3 -4
  108. data/vendor/choosealicense.com/_licenses/eupl-1.1.txt +0 -1
  109. data/vendor/choosealicense.com/_licenses/eupl-1.2.txt +0 -1
  110. data/vendor/choosealicense.com/_licenses/gpl-2.0.txt +0 -1
  111. data/vendor/choosealicense.com/_licenses/gpl-3.0.txt +1 -2
  112. data/vendor/choosealicense.com/_licenses/isc.txt +2 -3
  113. data/vendor/choosealicense.com/_licenses/lgpl-2.1.txt +0 -1
  114. data/vendor/choosealicense.com/_licenses/lgpl-3.0.txt +1 -3
  115. data/vendor/choosealicense.com/_licenses/lppl-1.3c.txt +1 -2
  116. data/vendor/choosealicense.com/_licenses/mit.txt +1 -2
  117. data/vendor/choosealicense.com/_licenses/mpl-2.0.txt +0 -1
  118. data/vendor/choosealicense.com/_licenses/ms-pl.txt +0 -1
  119. data/vendor/choosealicense.com/_licenses/ms-rl.txt +0 -1
  120. data/vendor/choosealicense.com/_licenses/ncsa.txt +21 -22
  121. data/vendor/choosealicense.com/_licenses/odbl-1.0.txt +573 -0
  122. data/vendor/choosealicense.com/_licenses/ofl-1.1.txt +4 -2
  123. data/vendor/choosealicense.com/_licenses/osl-3.0.txt +1 -2
  124. data/vendor/choosealicense.com/_licenses/postgresql.txt +4 -5
  125. data/vendor/choosealicense.com/_licenses/unlicense.txt +1 -2
  126. data/vendor/choosealicense.com/_licenses/upl-1.0.txt +4 -5
  127. data/vendor/choosealicense.com/_licenses/vim.txt +111 -0
  128. data/vendor/choosealicense.com/_licenses/wtfpl.txt +0 -1
  129. data/vendor/choosealicense.com/_licenses/zlib.txt +4 -2
  130. metadata +79 -28
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 354a3c318aa962a9c184c71850ba04574618b17f28582f92660a69e5405218ee
4
- data.tar.gz: 1a96874eaaff65c949ca73261aced972cf20ab53d67da7b42d887ac360810eaf
3
+ metadata.gz: d931f50190ecf7abb790530607bd57eb31c7190926a394bca7aa9ec0550cfba8
4
+ data.tar.gz: f119b575b2ff9538133a587ef3a23638756fac47e30c40b442ddad679bb62036
5
5
  SHA512:
6
- metadata.gz: 227b37abb076fd1d76d9fb296056bdf16b5a24b00ef2e95a0c8a883c7be6dee011bbea1df834b20d6ffe331815cc22f4726c94c1894400d04ae32944f01622dd
7
- data.tar.gz: 41582f813fd9e57fa3c497c7edfd11fc447a131e7414ba7a1c3c25266239ec2efb6b867b84adffac704a5ba96e992b72d3f87215222e0aca32f40305e285cc2d
6
+ metadata.gz: 4b423e68fb6496eefc0f4259fac2539f34430a13e1eb6d3758a6876c604fc40e5a763a04836025070410c082c2a516b28988ceeb46ed2b2a06276b318b9d0fb6
7
+ data.tar.gz: f0e150efc09980729793f86bbfcff323617349f0cae92ff2d4ebace2e29dac96e980deacc224b6ab08a21e3224f77b95541bdfaab9fa016ba81a6f94c3fdcce7
data/LICENSE.md CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2014-2017 Ben Balter
3
+ Copyright (c) 2014-2020 Ben Balter and Licensee contributors
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'dotenv/load'
4
5
  require 'thor'
@@ -15,9 +16,9 @@ class LicenseeCLI < Thor
15
16
 
16
17
  def path
17
18
  @path ||= if !options[:remote] || args.first =~ %r{^https://}
18
- args.first || Dir.pwd
19
- else
20
- "https://github.com/#{args.first}"
19
+ args.first || Dir.pwd
20
+ else
21
+ "https://github.com/#{args.first}"
21
22
  end
22
23
  end
23
24
 
@@ -32,6 +33,6 @@ class LicenseeCLI < Thor
32
33
  end
33
34
 
34
35
  commands_dir = File.expand_path '../lib/licensee/commands/', __dir__
35
- Dir["#{commands_dir}/*.rb"].each { |c| require(c) }
36
+ Dir["#{commands_dir}/*.rb"].sort.each { |c| require(c) }
36
37
 
37
38
  LicenseeCLI.start(ARGV)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'licensee/version'
2
4
  require 'forwardable'
3
5
  require 'pathname'
@@ -19,7 +21,7 @@ module Licensee
19
21
  CONFIDENCE_THRESHOLD = 98
20
22
 
21
23
  # Base domain from which to build license URLs
22
- DOMAIN = 'http://choosealicense.com'.freeze
24
+ DOMAIN = 'http://choosealicense.com'
23
25
 
24
26
  class << self
25
27
  attr_writer :confidence_threshold
@@ -36,12 +38,12 @@ module Licensee
36
38
 
37
39
  def project(path, **args)
38
40
  if path =~ %r{\Ahttps://github.com}
39
- Licensee::Projects::GitHubProject.new(path, args)
41
+ Licensee::Projects::GitHubProject.new(path, **args)
40
42
  else
41
- Licensee::Projects::GitProject.new(path, args)
43
+ Licensee::Projects::GitProject.new(path, **args)
42
44
  end
43
45
  rescue Licensee::Projects::GitProject::InvalidRepository
44
- Licensee::Projects::FSProject.new(path, args)
46
+ Licensee::Projects::FSProject.new(path, **args)
45
47
  end
46
48
 
47
49
  def confidence_threshold
@@ -49,7 +51,7 @@ module Licensee
49
51
  end
50
52
 
51
53
  # Inverse of the confidence threshold, represented as a float
52
- # By default this will be 0.05
54
+ # By default this will be 0.02
53
55
  def inverse_confidence_threshold
54
56
  @inverse_confidence_threshold ||=
55
57
  (1 - Licensee.confidence_threshold / 100.0).round(2)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class LicenseeCLI < Thor
2
4
  # Methods to call when displaying information about ProjectFiles
3
5
  MATCHED_FILE_METHODS = %i[
@@ -21,11 +23,11 @@ class LicenseeCLI < Thor
21
23
 
22
24
  rows = []
23
25
  rows << if project.license
24
- ['License:', project.license.spdx_id]
25
- elsif !project.licenses.empty?
26
- ['Licenses:', project.licenses.map(&:spdx_id)]
27
- else
28
- ['License:', set_color('None', :red)]
26
+ ['License:', project.license.spdx_id]
27
+ elsif !project.licenses.empty?
28
+ ['Licenses:', project.licenses.map(&:spdx_id)]
29
+ else
30
+ ['License:', set_color('None', :red)]
29
31
  end
30
32
 
31
33
  unless project.matched_files.empty?
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'tmpdir'
2
4
 
3
5
  class LicenseeCLI < Thor
@@ -39,26 +41,23 @@ class LicenseeCLI < Thor
39
41
 
40
42
  def license_to_diff
41
43
  return options[:license_to_diff] if options[:license_to_diff]
42
- return project.license_file if remote?
44
+ return project.license_file if remote? || STDIN.tty? && project.license_file
43
45
 
44
46
  @license_to_diff ||= begin
45
- if STDIN.tty?
46
- error 'You must pipe license contents to the command via STDIN'
47
- exit 1
48
- end
49
-
50
47
  Licensee::ProjectFiles::LicenseFile.new(STDIN.read, 'LICENSE')
51
48
  end
52
49
  end
53
50
 
54
51
  def expected_license
55
- @expected_license ||= Licensee::License.find options[:license] if options[:license]
52
+ if options[:license]
53
+ @expected_license ||= Licensee::License.find options[:license]
54
+ end
56
55
  return @expected_license if @expected_license
57
56
 
58
57
  if options[:license]
59
58
  error "#{options[:license]} is not a valid license"
60
59
  else
61
- error 'You must provide an expected license'
60
+ error 'Usage: provide a license to diff against with --license (spdx name)'
62
61
  end
63
62
 
64
63
  error "Valid licenses: #{Licensee::License.all(hidden: true).map(&:key).join(', ')}"
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class LicenseeCLI < Thor
2
4
  desc 'license-path [PATH]', "Returns the path to the given project's license file"
3
5
  def license_path(_path)
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class LicenseeCLI < Thor
2
4
  desc 'version', 'Return the Licensee version'
3
5
  def version
@@ -1,26 +1,115 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'set'
2
4
  require 'digest'
3
5
 
4
6
  module Licensee
5
7
  module ContentHelper
6
8
  DIGEST = Digest::SHA1
7
- END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions\s*$/i
8
- HR_REGEX = /[=\-\*][=\-\*\s]{3,}/
9
- ALT_TITLE_REGEX = License::ALT_TITLE_REGEX
10
- ALL_RIGHTS_RESERVED_REGEX = /\Aall rights reserved\.?$/i
11
- WHITESPACE_REGEX = /\s+/
12
- MARKDOWN_HEADING_REGEX = /\A\s*#+/
13
- VERSION_REGEX = /\Aversion.*$/i
14
- MARKUP_REGEX = /[#_*=~\[\]()`|>]+/
15
- DEVELOPED_BY_REGEX = /\Adeveloped by:.*?\n\n/im
16
- QUOTE_BEGIN_REGEX = /[`'"‘“]/
17
- QUOTE_END_REGEX = /['"’”]/
9
+ START_REGEX = /\A\s*/.freeze
10
+ END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions[\s#*_]*$/i.freeze
11
+ REGEXES = {
12
+ hrs: /^\s*[=\-\*]{3,}\s*$/,
13
+ all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
14
+ whitespace: /\s+/,
15
+ markdown_headings: /#{START_REGEX}#+/,
16
+ version: /#{START_REGEX}version.*$/i,
17
+ span_markup: /[_*~]+(.*?)[_*~]+/,
18
+ link_markup: /\[(.+?)\]\(.+?\)/,
19
+ block_markup: /^\s*>/,
20
+ border_markup: /^[\*-](.*?)[\*-]$/,
21
+ comment_markup: %r{^\s*?[/\*]{1,2}},
22
+ url: %r{#{START_REGEX}https?://[^ ]+\n},
23
+ bullet: /\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[)\.])\s+/i,
24
+ developed_by: /#{START_REGEX}developed by:.*?\n\n/im,
25
+ quote_begin: /[`'"‘“]/,
26
+ quote_end: /[`'"’”]/,
27
+ cc_legal_code: /^\s*Creative Commons Legal Code\s*$/i,
28
+ cc0_info: /For more information, please see\s*\S+zero\S+/im,
29
+ cc0_disclaimer: /CREATIVE COMMONS CORPORATION.*?\n\n/im,
30
+ unlicense_info: /For more information, please.*\S+unlicense\S+/im,
31
+ mit_optional: /\(including the next paragraph\)/i
32
+ }.freeze
33
+ NORMALIZATIONS = {
34
+ lists: { from: /^\s*(?:\d\.|\*)\s+([^\n])/, to: '- \1' },
35
+ https: { from: /http:/, to: 'https:' },
36
+ ampersands: { from: '&', to: 'and' },
37
+ dashes: { from: /(?<!^)([—–-]+)(?!$)/, to: '-' },
38
+ quotes: {
39
+ from: /#{REGEXES[:quote_begin]}+([\w -]*?\w)#{REGEXES[:quote_end]}+/,
40
+ to: '"\1"'
41
+ }
42
+ }.freeze
43
+
44
+ # Legally equivalent words that schould be ignored for comparison
45
+ # See https://spdx.org/spdx-license-list/matching-guidelines
46
+ VARIETAL_WORDS = {
47
+ 'acknowledgment' => 'acknowledgement',
48
+ 'analogue' => 'analog',
49
+ 'analyse' => 'analyze',
50
+ 'artefact' => 'artifact',
51
+ 'authorisation' => 'authorization',
52
+ 'authorised' => 'authorized',
53
+ 'calibre' => 'caliber',
54
+ 'cancelled' => 'canceled',
55
+ 'capitalisations' => 'capitalizations',
56
+ 'catalogue' => 'catalog',
57
+ 'categorise' => 'categorize',
58
+ 'centre' => 'center',
59
+ 'emphasised' => 'emphasized',
60
+ 'favour' => 'favor',
61
+ 'favourite' => 'favorite',
62
+ 'fulfil' => 'fulfill',
63
+ 'fulfilment' => 'fulfillment',
64
+ 'initialise' => 'initialize',
65
+ 'judgment' => 'judgement',
66
+ 'labelling' => 'labeling',
67
+ 'labour' => 'labor',
68
+ 'licence' => 'license',
69
+ 'maximise' => 'maximize',
70
+ 'modelled' => 'modeled',
71
+ 'modelling' => 'modeling',
72
+ 'offence' => 'offense',
73
+ 'optimise' => 'optimize',
74
+ 'organisation' => 'organization',
75
+ 'organise' => 'organize',
76
+ 'practise' => 'practice',
77
+ 'programme' => 'program',
78
+ 'realise' => 'realize',
79
+ 'recognise' => 'recognize',
80
+ 'signalling' => 'signaling',
81
+ 'sub-license' => 'sublicense',
82
+ 'sub license' => 'sublicense',
83
+ 'utilisation' => 'utilization',
84
+ 'whilst' => 'while',
85
+ 'wilful' => 'wilfull',
86
+ 'non-commercial' => 'noncommercial',
87
+ 'cent' => 'percent',
88
+ 'owner' => 'holder'
89
+ }.freeze
90
+ STRIP_METHODS = %i[
91
+ cc0_optional
92
+ unlicense_optional
93
+ hrs
94
+ markdown_headings
95
+ borders
96
+ title
97
+ version
98
+ url
99
+ copyright
100
+ title
101
+ block_markup
102
+ span_markup
103
+ link_markup
104
+ developed_by
105
+ end_of_terms
106
+ whitespace
107
+ mit_optional
108
+ ].freeze
18
109
 
19
110
  # A set of each word in the license, without duplicates
20
111
  def wordset
21
- @wordset ||= if content_normalized
22
- content_normalized.scan(/(?:\w(?:'s|(?<=s)')?)+/).to_set
23
- end
112
+ @wordset ||= content_normalized&.scan(%r{(?:[\w\/](?:'s|(?<=s)')?)+})&.to_set
24
113
  end
25
114
 
26
115
  # Number of characteres in the normalized content
@@ -33,7 +122,8 @@ module Licensee
33
122
  # Number of characters that could be added/removed to still be
34
123
  # considered a potential match
35
124
  def max_delta
36
- @max_delta ||= (length * Licensee.inverse_confidence_threshold).to_i
125
+ @max_delta ||= fields_normalized.size * 10 +
126
+ (length * Licensee.inverse_confidence_threshold).to_i
37
127
  end
38
128
 
39
129
  # Given another license or project file, calculates the difference in length
@@ -44,8 +134,9 @@ module Licensee
44
134
  # Given another license or project file, calculates the similarity
45
135
  # as a percentage of words in common
46
136
  def similarity(other)
47
- overlap = (wordset & other.wordset).size
48
- total = wordset.size + other.wordset.size
137
+ overlap = (wordset_fieldless & other.wordset).size
138
+ total = wordset_fieldless.size + other.wordset.size -
139
+ fields_normalized_set.size
49
140
  100.0 * (overlap * 2.0 / total)
50
141
  end
51
142
 
@@ -60,35 +151,21 @@ module Licensee
60
151
  # content with attribution first to detect attribuion in LicenseFile
61
152
  def content_without_title_and_version
62
153
  @content_without_title_and_version ||= begin
63
- string = content.strip
64
- string = strip_markdown_headings(string)
65
- string = strip_hrs(string)
66
- string = strip_title(string) while string =~ ContentHelper.title_regex
67
- strip_version(string).strip
154
+ @_content = nil
155
+ ops = %i[html hrs comments markdown_headings title version]
156
+ ops.each { |op| strip(op) }
157
+ _content
68
158
  end
69
159
  end
70
160
 
71
- # Content without title, version, copyright, whitespace, or insturctions
72
- #
73
- # wrap - Optional width to wrap the content
74
- #
75
- # Returns a string
76
161
  def content_normalized(wrap: nil)
77
- return unless content
78
-
79
162
  @content_normalized ||= begin
80
- string = content_without_title_and_version.downcase
81
- while string =~ Matchers::Copyright::REGEX
82
- string = strip_copyright(string)
83
- end
84
- string = strip_all_rights_reserved(string)
85
- string = strip_developed_by(string)
86
- string, _partition, _instructions = string.partition(END_OF_TERMS_REGEX)
87
- string = normalize_lists(string)
88
- string = normalize_quotes(string)
89
- string = normalize_https(string)
90
- string = strip_markup(string)
91
- strip_whitespace(string)
163
+ @_content = content_without_title_and_version.downcase
164
+
165
+ (NORMALIZATIONS.keys + %i[spelling bullets]).each { |op| normalize(op) }
166
+ STRIP_METHODS.each { |op| strip(op) }
167
+
168
+ _content
92
169
  end
93
170
 
94
171
  if wrap.nil?
@@ -98,15 +175,24 @@ module Licensee
98
175
  end
99
176
  end
100
177
 
178
+ # Backwards compatibalize constants to avoid a breaking change
179
+ def self.const_missing(const)
180
+ key = const.to_s.downcase.gsub('_regex', '').to_sym
181
+ REGEXES[key] || super
182
+ end
183
+
101
184
  # Wrap text to the given line length
102
185
  def self.wrap(text, line_width = 80)
103
186
  return if text.nil?
104
187
 
105
188
  text = text.clone
189
+ text.gsub!(REGEXES[:bullet]) { |m| "\n#{m}\n" }
106
190
  text.gsub!(/([^\n])\n([^\n])/, '\1 \2')
107
191
 
108
192
  text = text.split("\n").collect do |line|
109
- if line.length > line_width
193
+ if line =~ REGEXES[:hrs]
194
+ line
195
+ elsif line.length > line_width
110
196
  line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip
111
197
  else
112
198
  line
@@ -117,83 +203,142 @@ module Licensee
117
203
  end
118
204
 
119
205
  def self.format_percent(float)
120
- "#{format('%.2f', float)}%"
206
+ "#{format('%<float>.2f', float: float)}%"
121
207
  end
122
208
 
123
209
  def self.title_regex
124
- licenses = Licensee::License.all(hidden: true, psuedo: false)
125
- titles = licenses.map(&:title_regex)
210
+ @title_regex ||= begin
211
+ licenses = Licensee::License.all(hidden: true, psuedo: false)
212
+ titles = licenses.map(&:title_regex)
126
213
 
127
- # Title regex must include the version to support matching within
128
- # families, but for sake of normalization, we can be less strict
129
- without_versions = licenses.map do |license|
130
- next if license.title == license.name_without_version
214
+ # Title regex must include the version to support matching within
215
+ # families, but for sake of normalization, we can be less strict
216
+ without_versions = licenses.map do |license|
217
+ next if license.title == license.name_without_version
131
218
 
132
- Regexp.new Regexp.escape(license.name_without_version), 'i'
133
- end
134
- titles.concat(without_versions.compact)
219
+ Regexp.new Regexp.escape(license.name_without_version), 'i'
220
+ end
221
+ titles.concat(without_versions.compact)
135
222
 
136
- /\A\s*\(?(the )?#{Regexp.union titles}.*$/i
223
+ /#{START_REGEX}\(?(?:the )?#{Regexp.union titles}.*?$/i
224
+ end
137
225
  end
138
226
 
139
227
  private
140
228
 
141
- def strip_title(string)
142
- strip(string, ContentHelper.title_regex)
229
+ def _content
230
+ @_content ||= content.to_s.dup.strip
231
+ end
232
+
233
+ def strip(regex_or_sym)
234
+ return unless _content
235
+
236
+ if regex_or_sym.is_a?(Symbol)
237
+ meth = "strip_#{regex_or_sym}"
238
+ return send(meth) if respond_to?(meth, true)
239
+
240
+ unless REGEXES[regex_or_sym]
241
+ raise ArgumentError, "#{regex_or_sym} is an invalid regex reference"
242
+ end
243
+
244
+ regex_or_sym = REGEXES[regex_or_sym]
245
+ end
246
+
247
+ @_content = _content.gsub(regex_or_sym, ' ').squeeze(' ').strip
248
+ end
249
+
250
+ def strip_title
251
+ while _content =~ ContentHelper.title_regex
252
+ strip(ContentHelper.title_regex)
253
+ end
254
+ end
255
+
256
+ def strip_borders
257
+ normalize(REGEXES[:border_markup], '\1')
258
+ end
259
+
260
+ def strip_comments
261
+ lines = _content.split("\n")
262
+ return if lines.count == 1
263
+ return unless lines.all? { |line| line =~ REGEXES[:comment_markup] }
264
+
265
+ strip(:comment_markup)
143
266
  end
144
267
 
145
- def strip_version(string)
146
- strip(string, VERSION_REGEX)
268
+ def strip_copyright
269
+ regex = Regexp.union(Matchers::Copyright::REGEX, REGEXES[:all_rights_reserved])
270
+ strip(regex) while _content =~ regex
147
271
  end
148
272
 
149
- def strip_copyright(string)
150
- strip(string, Matchers::Copyright::REGEX)
273
+ def strip_cc0_optional
274
+ return unless _content.include? 'associating cc0'
275
+
276
+ strip(REGEXES[:cc_legal_code])
277
+ strip(REGEXES[:cc0_info])
278
+ strip(REGEXES[:cc0_disclaimer])
151
279
  end
152
280
 
153
- # Strip HRs from MPL
154
- def strip_hrs(string)
155
- strip(string, HR_REGEX)
281
+ def strip_unlicense_optional
282
+ return unless _content.include? 'unlicense'
283
+
284
+ strip(REGEXES[:unlicense_info])
156
285
  end
157
286
 
158
- # Strip leading #s from the document
159
- def strip_markdown_headings(string)
160
- strip(string, MARKDOWN_HEADING_REGEX)
287
+ def strip_end_of_terms
288
+ body, _partition, _instructions = _content.partition(END_OF_TERMS_REGEX)
289
+ @_content = body
161
290
  end
162
291
 
163
- def strip_whitespace(string)
164
- strip(string, WHITESPACE_REGEX)
292
+ def strip_span_markup
293
+ normalize(REGEXES[:span_markup], '\1')
165
294
  end
166
295
 
167
- def strip_all_rights_reserved(string)
168
- strip(string, ALL_RIGHTS_RESERVED_REGEX)
296
+ def strip_link_markup
297
+ normalize(REGEXES[:link_markup], '\1')
169
298
  end
170
299
 
171
- def strip_markup(string)
172
- strip(string, MARKUP_REGEX)
300
+ def strip_html
301
+ return unless respond_to?(:filename) && filename
302
+ return unless File.extname(filename) =~ /\.html?/i
303
+
304
+ require 'reverse_markdown'
305
+ @_content = ReverseMarkdown.convert(_content, unknown_tags: :bypass)
306
+ end
307
+
308
+ def normalize(from_or_key, to = nil)
309
+ operation = { from: from_or_key, to: to } if to
310
+ operation ||= NORMALIZATIONS[from_or_key]
311
+
312
+ if operation
313
+ @_content = _content.gsub operation[:from], operation[:to]
314
+ elsif respond_to?("normalize_#{from_or_key}", true)
315
+ send("normalize_#{from_or_key}")
316
+ else
317
+ raise ArgumentError, "#{from_or_key} is an invalid normalization"
318
+ end
173
319
  end
174
320
 
175
- def strip_developed_by(string)
176
- strip(string, DEVELOPED_BY_REGEX)
321
+ def normalize_spelling
322
+ normalize(/\b#{Regexp.union(VARIETAL_WORDS.keys)}\b/, VARIETAL_WORDS)
177
323
  end
178
324
 
179
- def strip(string, regex)
180
- string.gsub(regex, ' ').squeeze(' ').strip
325
+ def normalize_bullets
326
+ normalize(REGEXES[:bullet], "\n\n* ")
327
+ normalize(/\)\s+\(/, ')(')
181
328
  end
182
329
 
183
- # Replace all enclosing quotes with double quotes
184
- # Single versus double quotes don't alter the meaning, and it's easier to
185
- # strip double quotes if we still want to allow possessives
186
- def normalize_quotes(string)
187
- string.gsub(/#{QUOTE_BEGIN_REGEX}+([\w -]*?\w)#{QUOTE_END_REGEX}+/,
188
- '"\1"')
330
+ def wordset_fieldless
331
+ @wordset_fieldless ||= wordset - fields_normalized_set
189
332
  end
190
333
 
191
- def normalize_https(string)
192
- string.gsub(/http:/, 'https:')
334
+ # Returns an array of strings of substitutable fields in normalized content
335
+ def fields_normalized
336
+ @fields_normalized ||=
337
+ content_normalized.scan(LicenseField::FIELD_REGEX).flatten
193
338
  end
194
339
 
195
- def normalize_lists(string)
196
- string.gsub(/^\s*(\d\.|\*)/, '-')
340
+ def fields_normalized_set
341
+ @fields_normalized_set ||= fields_normalized.to_set
197
342
  end
198
343
  end
199
344
  end