licensee 9.18.0 → 9.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.md +1 -1
  3. data/bin/licensee +2 -0
  4. data/lib/licensee/commands/detect.rb +9 -89
  5. data/lib/licensee/commands/detect_helpers.rb +125 -0
  6. data/lib/licensee/commands/diff.rb +64 -35
  7. data/lib/licensee/commands/license_path.rb +1 -0
  8. data/lib/licensee/commands/version.rb +1 -0
  9. data/lib/licensee/content_helper/constants.rb +109 -0
  10. data/lib/licensee/content_helper/normalization_methods.rb +137 -0
  11. data/lib/licensee/content_helper/similarity_methods.rb +49 -0
  12. data/lib/licensee/content_helper.rb +23 -275
  13. data/lib/licensee/hash_helper.rb +9 -7
  14. data/lib/licensee/license/class_methods.rb +67 -0
  15. data/lib/licensee/license/content_methods.rb +52 -0
  16. data/lib/licensee/license/identity_methods.rb +117 -0
  17. data/lib/licensee/license.rb +31 -208
  18. data/lib/licensee/license_field.rb +9 -6
  19. data/lib/licensee/license_meta.rb +4 -1
  20. data/lib/licensee/license_rules.rb +5 -1
  21. data/lib/licensee/matchers/cabal.rb +1 -0
  22. data/lib/licensee/matchers/cargo.rb +1 -0
  23. data/lib/licensee/matchers/copyright.rb +3 -1
  24. data/lib/licensee/matchers/cran.rb +2 -1
  25. data/lib/licensee/matchers/dice.rb +1 -0
  26. data/lib/licensee/matchers/dist_zilla.rb +1 -0
  27. data/lib/licensee/matchers/exact.rb +2 -0
  28. data/lib/licensee/matchers/gemspec.rb +1 -0
  29. data/lib/licensee/matchers/matcher.rb +2 -0
  30. data/lib/licensee/matchers/npm_bower.rb +1 -0
  31. data/lib/licensee/matchers/nuget.rb +1 -0
  32. data/lib/licensee/matchers/package.rb +20 -4
  33. data/lib/licensee/matchers/spdx.rb +1 -0
  34. data/lib/licensee/matchers.rb +1 -0
  35. data/lib/licensee/project_files/license_file.rb +28 -3
  36. data/lib/licensee/project_files/package_manager_file.rb +1 -0
  37. data/lib/licensee/project_files/project_file.rb +7 -4
  38. data/lib/licensee/project_files/readme_file.rb +1 -0
  39. data/lib/licensee/project_files.rb +1 -0
  40. data/lib/licensee/projects/fs_project.rb +2 -0
  41. data/lib/licensee/projects/git_project.rb +15 -3
  42. data/lib/licensee/projects/github_project.rb +25 -5
  43. data/lib/licensee/projects/project.rb +29 -32
  44. data/lib/licensee/projects.rb +1 -0
  45. data/lib/licensee/rule.rb +2 -0
  46. data/lib/licensee/version.rb +1 -1
  47. data/lib/licensee.rb +23 -2
  48. data/spec/bin_spec.rb +8 -8
  49. data/spec/fixture_spec.rb +16 -17
  50. data/spec/fixtures/detect.json +3 -3
  51. data/spec/fixtures/fixtures.yml +16 -0
  52. data/spec/fixtures/licenses-dir/LICENSES/MIT.txt +21 -0
  53. data/spec/fixtures/licenses-dir-with-license-ref/LICENSES/LicenseRef-MIT.txt +21 -0
  54. data/spec/fixtures/licenses-dir-with-multiple-license-files/LICENSES/MIT.txt +21 -0
  55. data/spec/fixtures/licenses-dir-with-multiple-license-files/LICENSES/MPL-2.0.txt +362 -0
  56. data/spec/fixtures/licenses-dir-with-top-level-license/LICENSE.md +195 -0
  57. data/spec/fixtures/licenses-dir-with-top-level-license/LICENSES/MIT.txt +21 -0
  58. data/spec/integration_spec.rb +247 -274
  59. data/spec/licensee/commands/detect_spec.rb +94 -21
  60. data/spec/licensee/commands/license_path_spec.rb +13 -9
  61. data/spec/licensee/commands/version_spec.rb +12 -8
  62. data/spec/licensee/content_helper_spec.rb +91 -107
  63. data/spec/licensee/hash_helper_spec.rb +3 -6
  64. data/spec/licensee/license_field_spec.rb +17 -22
  65. data/spec/licensee/license_meta_spec.rb +29 -37
  66. data/spec/licensee/license_rules_spec.rb +19 -19
  67. data/spec/licensee/license_spec.rb +215 -264
  68. data/spec/licensee/licensee_filesystem_spec.rb +40 -0
  69. data/spec/licensee/matchers/cabal_matcher_spec.rb +31 -31
  70. data/spec/licensee/matchers/cargo_matcher_spec.rb +7 -7
  71. data/spec/licensee/matchers/copyright_matcher_spec.rb +10 -10
  72. data/spec/licensee/matchers/cran_matcher_spec.rb +6 -6
  73. data/spec/licensee/matchers/dice_matcher_spec.rb +34 -33
  74. data/spec/licensee/matchers/dist_zilla_matcher_spec.rb +7 -7
  75. data/spec/licensee/matchers/exact_matcher_spec.rb +4 -4
  76. data/spec/licensee/matchers/gemspec_matcher_spec.rb +10 -10
  77. data/spec/licensee/matchers/matcher_spec.rb +4 -4
  78. data/spec/licensee/matchers/npm_bower_matcher_spec.rb +20 -12
  79. data/spec/licensee/matchers/nu_get_matcher_spec.rb +12 -12
  80. data/spec/licensee/matchers/package_matcher_spec.rb +32 -12
  81. data/spec/licensee/matchers/reference_matcher_spec.rb +13 -13
  82. data/spec/licensee/matchers/spdx_matcher_spec.rb +9 -9
  83. data/spec/licensee/project_files/license_file_spec.rb +136 -72
  84. data/spec/licensee/project_files/package_manager_file_spec.rb +3 -3
  85. data/spec/licensee/project_files/project_file_spec.rb +13 -23
  86. data/spec/licensee/project_files/readme_file_spec.rb +13 -13
  87. data/spec/licensee/project_spec.rb +168 -123
  88. data/spec/licensee/projects/git_hub_project_spec.rb +268 -26
  89. data/spec/licensee/projects/git_project_spec.rb +1 -1
  90. data/spec/licensee/rule_spec.rb +15 -22
  91. data/spec/licensee_spec.rb +15 -11
  92. data/spec/spec_helper.rb +3 -1
  93. data/spec/vendored_license_spec.rb +37 -60
  94. data/vendor/choosealicense.com/_licenses/blueoak-1.0.0.txt +1 -1
  95. data/vendor/choosealicense.com/_licenses/cern-ohl-p-2.0.txt +1 -1
  96. data/vendor/choosealicense.com/_licenses/cern-ohl-s-2.0.txt +1 -1
  97. data/vendor/choosealicense.com/_licenses/cern-ohl-w-2.0.txt +2 -2
  98. data/vendor/choosealicense.com/_licenses/gpl-2.0.txt +1 -1
  99. data/vendor/choosealicense.com/_licenses/gpl-3.0.txt +1 -1
  100. data/vendor/choosealicense.com/_licenses/mit-0.txt +1 -1
  101. data/vendor/choosealicense.com/_licenses/osl-3.0.txt +1 -1
  102. data/vendor/license-list-XML/src/Apache-2.0.xml +1 -0
  103. data/vendor/license-list-XML/src/BSD-3-Clause.xml +2 -2
  104. data/vendor/license-list-XML/src/BSD-4-Clause.xml +2 -1
  105. data/vendor/license-list-XML/src/EPL-2.0.xml +2 -0
  106. data/vendor/license-list-XML/src/GPL-2.0.xml +5 -3
  107. data/vendor/license-list-XML/src/MIT.xml +28 -13
  108. data/vendor/license-list-XML/src/MPL-2.0.xml +2 -2
  109. metadata +20 -9
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 224c5413c75e78cad466f9d41b8a18a8b0a7ddf348740521ed37dbea4e25e674
4
- data.tar.gz: f73bde77074c0e4feff4190551effbaebd685abf539b944ed0509231090fe8be
3
+ metadata.gz: 4878becb1edfcb446503f89645e4daeeacc60715ca978a8d4391418b0493f255
4
+ data.tar.gz: 651e41f44d60ab16669584edeb693967e4aa1a6a6c911c7a944fc6b35bd05503
5
5
  SHA512:
6
- metadata.gz: 75680afe6a50966f190c3748130cfb72b4b4d68a846aec482405e14756a9d1211754b1a94328e79f38619dc3ee3c1999490b9cab157fc7b76b8cc03661316a7b
7
- data.tar.gz: 3622c24de4f843dd0f59f37a3db30c8a9e6feba68644f7dce4168adb7a304f97175a41595c70c789acdd4e2166bfa3eb6547b485ce7e0a476719457530652d54
6
+ metadata.gz: c7e96fea4cd873710794c9e68f93edc8f016a620c55464f0f5c8d8ba82c90de8195376f858d8bc6125bb1ac170e980a98557fd135d68ab38ed98e2ba1d5e030e
7
+ data.tar.gz: caf4cc661f13ca7ce0ea847e6cd95ec45dd21472eb03cb6862d0cb39c52823818a70373b1133f55570492297543e7db6d4c7adeeefdbef6dd8c5795f67b19919
data/LICENSE.md CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2014-2021 Ben Balter and Licensee contributors
3
+ Copyright (c) Ben Balter and Licensee contributors
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/bin/licensee CHANGED
@@ -7,6 +7,7 @@ require 'json'
7
7
 
8
8
  require_relative '../lib/licensee'
9
9
 
10
+ # Thor CLI entrypoint for the `licensee` command.
10
11
  class LicenseeCLI < Thor
11
12
  package_name 'Licensee'
12
13
  class_option :remote, type: :boolean, desc: 'Assume PATH is a GitHub owner/repo path'
@@ -30,6 +31,7 @@ class LicenseeCLI < Thor
30
31
  @project ||= Licensee.project(path,
31
32
  detect_packages: options[:packages],
32
33
  detect_readme: options[:readme],
34
+ filesystem: options[:filesystem],
33
35
  ref: options[:ref])
34
36
  end
35
37
 
@@ -1,10 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative 'detect_helpers'
4
+
5
+ # Implementation of the `licensee detect` command.
3
6
  class LicenseeCLI < Thor
4
- # Methods to call when displaying information about ProjectFiles
5
- MATCHED_FILE_METHODS = %i[
6
- content_hash attribution confidence matcher license
7
- ].freeze
7
+ include Licensee::Commands::DetectCLIHelpers
8
8
 
9
9
  desc 'detect [PATH]', 'Detect the license of the given project'
10
10
  option :json, type: :boolean, desc: 'Return output as JSON'
@@ -14,94 +14,14 @@ class LicenseeCLI < Thor
14
14
  option :license, type: :string, desc: 'The SPDX ID or key of the license to compare (implies --diff)'
15
15
  option :diff, type: :boolean, desc: 'Compare the license to the closest match'
16
16
  option :ref, type: :string, desc: 'The name of the commit/branch/tag to search (github.com only)'
17
+ option :filesystem, type: :boolean, desc: 'Force looking at the filesystem (ignore git data)'
17
18
  def detect(_path = nil)
18
19
  Licensee.confidence_threshold = options[:confidence]
19
20
 
20
- if options[:json]
21
- say project.to_h.to_json
22
- exit !project.licenses.empty?
23
- end
24
-
25
- rows = []
26
- rows << if project.license
27
- ['License:', project.license.spdx_id]
28
- elsif !project.licenses.empty?
29
- ['Licenses:', project.licenses.map(&:spdx_id)]
30
- else
31
- ['License:', set_color('None', :red)]
32
- end
33
-
34
- rows << ['Matched files:', project.matched_files.map(&:filename).join(', ')] unless project.matched_files.empty?
35
-
36
- print_table rows
37
-
38
- project.matched_files.each do |matched_file|
39
- rows = []
40
- say "#{matched_file.filename}:"
41
-
42
- MATCHED_FILE_METHODS.each do |method|
43
- next unless matched_file.respond_to? method
44
-
45
- value = matched_file.public_send method
46
- next if value.nil?
47
-
48
- rows << [humanize(method, :method), humanize(value, method)]
49
- end
50
- print_table rows, indent: 2
51
-
52
- next unless matched_file.is_a? Licensee::ProjectFiles::LicenseFile
53
- next if matched_file.confidence == 100
54
-
55
- licenses = licenses_by_similarity(matched_file)
56
- next if licenses.empty?
57
-
58
- say ' Closest non-matching licenses:'
59
- rows = licenses[0...3].map do |license, similarity|
60
- spdx_id = license.meta['spdx-id']
61
- percent = Licensee::ContentHelper.format_percent(similarity)
62
- ["#{spdx_id} similarity:", percent]
63
- end
64
- print_table rows, indent: 4
65
- end
66
-
67
- if project.license_file && (options[:license] || options[:diff])
68
- license = options[:license] || closest_license_key(project.license_file)
69
- if license
70
- invoke(:diff, nil,
71
- license: license, license_to_diff: project.license_file)
72
- end
73
- end
74
-
75
- exit !project.licenses.empty?
76
- end
77
-
78
- private
79
-
80
- # Given a string or object, prepares it for output and human consumption
81
- def humanize(value, type = nil)
82
- case type
83
- when :license
84
- value.spdx_id
85
- when :matcher
86
- value.class
87
- when :confidence
88
- Licensee::ContentHelper.format_percent(value)
89
- when :method
90
- "#{value.to_s.tr('_', ' ').capitalize}:"
91
- else
92
- value
93
- end
94
- end
95
-
96
- def licenses_by_similarity(matched_file)
97
- matcher = Licensee::Matchers::Dice.new(matched_file)
98
- potential_licenses = Licensee.licenses(hidden: true).select(&:wordset)
99
- matcher.instance_variable_set(:@potential_licenses, potential_licenses)
100
- matcher.licenses_by_similarity
101
- end
21
+ handle_json_output if options[:json]
102
22
 
103
- def closest_license_key(matched_file)
104
- licenses = licenses_by_similarity(matched_file)
105
- licenses.first.first.key unless licenses.empty?
23
+ print_project_summary
24
+ print_matched_files
25
+ maybe_diff_license_file
106
26
  end
107
27
  end
@@ -0,0 +1,125 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Licensee
4
+ module Commands
5
+ # Helper methods for formatting `licensee detect` output.
6
+ module DetectCLIHelpers
7
+ # Methods to call when displaying information about ProjectFiles
8
+ MATCHED_FILE_METHODS = %i[
9
+ content_hash attribution confidence matcher license
10
+ ].freeze
11
+
12
+ private
13
+
14
+ # Given a string or object, prepares it for output and human consumption
15
+ def humanize(value, type = nil)
16
+ return value.spdx_id if type == :license
17
+ return value.class if type == :matcher
18
+ return Licensee::ContentHelper.format_percent(value) if type == :confidence
19
+ return "#{value.to_s.tr('_', ' ').capitalize}:" if type == :method
20
+
21
+ value
22
+ end
23
+
24
+ def licenses_by_similarity(matched_file)
25
+ matcher = Licensee::Matchers::Dice.new(matched_file)
26
+ potential_licenses = Licensee.licenses(hidden: true).select(&:wordset)
27
+ matcher.instance_variable_set(:@potential_licenses, potential_licenses)
28
+ matcher.licenses_by_similarity
29
+ end
30
+
31
+ def closest_license_key(matched_file)
32
+ licenses = licenses_by_similarity(matched_file)
33
+ licenses.first.first.key unless licenses.empty?
34
+ end
35
+
36
+ def handle_json_output
37
+ say project.to_h.to_json
38
+ exit !project.licenses.empty?
39
+ end
40
+
41
+ def print_project_summary
42
+ rows = [license_summary_row]
43
+ matched_files_row = matched_files_summary_row
44
+ rows << matched_files_row if matched_files_row
45
+ print_table rows
46
+ end
47
+
48
+ def license_summary_row
49
+ if project.license
50
+ ['License:', project.license.spdx_id]
51
+ elsif !project.licenses.empty?
52
+ ['Licenses:', project.licenses.map(&:spdx_id)]
53
+ else
54
+ ['License:', set_color('None', :red)]
55
+ end
56
+ end
57
+
58
+ def matched_files_summary_row
59
+ return if project.matched_files.empty?
60
+
61
+ ['Matched files:', project.matched_files.map(&:path).join(', ')]
62
+ end
63
+
64
+ def print_matched_files
65
+ project.matched_files.each do |matched_file|
66
+ print_matched_file_summary(matched_file)
67
+ print_closest_non_matching_licenses(matched_file)
68
+ end
69
+ end
70
+
71
+ def print_matched_file_summary(matched_file)
72
+ say "#{matched_file.path}:"
73
+ print_table matched_file_rows(matched_file), indent: 2
74
+ end
75
+
76
+ def matched_file_rows(matched_file)
77
+ MATCHED_FILE_METHODS.filter_map do |method|
78
+ matched_file_row(matched_file, method)
79
+ end
80
+ end
81
+
82
+ def matched_file_row(matched_file, method)
83
+ return unless matched_file.respond_to?(method)
84
+
85
+ value = matched_file.public_send(method)
86
+ return if value.nil?
87
+
88
+ [humanize(method, :method), humanize(value, method)]
89
+ end
90
+
91
+ def print_closest_non_matching_licenses(matched_file)
92
+ licenses = closest_non_matching_licenses(matched_file)
93
+ return unless licenses
94
+
95
+ say ' Closest non-matching licenses:'
96
+ print_table closest_non_matching_rows(licenses), indent: 4
97
+ end
98
+
99
+ def closest_non_matching_licenses(matched_file)
100
+ return unless matched_file.is_a?(Licensee::ProjectFiles::LicenseFile)
101
+ return if matched_file.confidence == 100
102
+
103
+ licenses = licenses_by_similarity(matched_file)
104
+ licenses.empty? ? nil : licenses
105
+ end
106
+
107
+ def closest_non_matching_rows(licenses)
108
+ licenses[0...3].map do |license, similarity|
109
+ spdx_id = license.meta['spdx-id']
110
+ ["#{spdx_id} similarity:", Licensee::ContentHelper.format_percent(similarity)]
111
+ end
112
+ end
113
+
114
+ def maybe_diff_license_file
115
+ return unless project.license_file
116
+ return unless options[:license] || options[:diff]
117
+
118
+ license = options[:license] || closest_license_key(project.license_file)
119
+ return unless license
120
+
121
+ invoke(:diff, nil, license: license, license_to_diff: project.license_file)
122
+ end
123
+ end
124
+ end
125
+ end
@@ -2,42 +2,64 @@
2
2
 
3
3
  require 'tmpdir'
4
4
 
5
+ # Implementation of the `licensee diff` command.
5
6
  class LicenseeCLI < Thor
6
7
  desc 'diff [PATH]', 'Compare the given license text to a known license'
7
8
  option :license, type: :string, desc: 'The SPDX ID or key of the license to compare'
8
9
  def diff(_path = nil)
9
10
  say "Comparing to #{expected_license.name}:"
10
- rows = []
11
+ print_table diff_summary_rows
12
+ exit_on_exact_match
13
+ say word_diff
14
+ end
15
+
16
+ private
11
17
 
12
- left = expected_license.content_normalized(wrap: 80)
13
- right = license_to_diff.content_normalized(wrap: 80)
18
+ def diff_summary_rows
19
+ [
20
+ ['Input Length:', license_to_diff.length],
21
+ ['License length:', expected_license.length],
22
+ ['Similarity:', formatted_similarity]
23
+ ]
24
+ end
25
+
26
+ def formatted_similarity
14
27
  similarity = expected_license.similarity(license_to_diff)
15
- similarity = Licensee::ContentHelper.format_percent(similarity)
16
-
17
- rows << ['Input Length:', license_to_diff.length]
18
- rows << ['License length:', expected_license.length]
19
- rows << ['Similarity:', similarity]
20
- print_table rows
21
-
22
- if left == right
23
- say 'Exact match!', :green
24
- exit
25
- end
26
-
27
- Dir.mktmpdir do |dir|
28
- path = File.expand_path 'LICENSE', dir
29
- Dir.chdir(dir) do
30
- `git init`
31
- File.write(path, left)
32
- `git add LICENSE`
33
- `git commit -m 'left'`
34
- File.write(path, right)
35
- say `git diff --word-diff`
36
- end
37
- end
28
+ Licensee::ContentHelper.format_percent(similarity)
38
29
  end
39
30
 
40
- private
31
+ def exit_on_exact_match
32
+ return unless expected_text == input_text
33
+
34
+ say 'Exact match!', :green
35
+ exit
36
+ end
37
+
38
+ def expected_text
39
+ @expected_text ||= expected_license.content_normalized(wrap: 80)
40
+ end
41
+
42
+ def input_text
43
+ @input_text ||= license_to_diff.content_normalized(wrap: 80)
44
+ end
45
+
46
+ def word_diff
47
+ Dir.mktmpdir { |dir| word_diff_in_dir(dir) }
48
+ end
49
+
50
+ def word_diff_in_dir(dir)
51
+ path = File.expand_path 'LICENSE', dir
52
+ Dir.chdir(dir) { git_word_diff(path) }
53
+ end
54
+
55
+ def git_word_diff(path)
56
+ `git init`
57
+ File.write(path, expected_text)
58
+ `git add LICENSE`
59
+ `git commit -m 'left'`
60
+ File.write(path, input_text)
61
+ `git diff --word-diff`
62
+ end
41
63
 
42
64
  def license_to_diff
43
65
  return options[:license_to_diff] if options[:license_to_diff]
@@ -47,16 +69,23 @@ class LicenseeCLI < Thor
47
69
  end
48
70
 
49
71
  def expected_license
50
- @expected_license ||= Licensee::License.find options[:license] if options[:license]
51
- return @expected_license if @expected_license
72
+ return @expected_license if defined?(@expected_license)
52
73
 
53
- if options[:license]
54
- error "#{options[:license]} is not a valid license"
55
- else
56
- error 'Usage: provide a license to diff against with --license (spdx name)'
57
- end
74
+ @expected_license = Licensee::License.find(options[:license]) if options[:license]
75
+ return @expected_license if @expected_license
58
76
 
59
- error "Valid licenses: #{Licensee::License.all(hidden: true).map(&:key).join(', ')}"
77
+ error expected_license_error_message
60
78
  exit 1
61
79
  end
80
+
81
+ def expected_license_error_message
82
+ message = if options[:license]
83
+ "#{options[:license]} is not a valid license"
84
+ else
85
+ 'Usage: provide a license to diff against with --license (spdx name)'
86
+ end
87
+
88
+ valid_licenses = Licensee::License.all(hidden: true).map(&:key).join(', ')
89
+ "#{message}\nValid licenses: #{valid_licenses}"
90
+ end
62
91
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # `licensee license-path` command implementation.
3
4
  class LicenseeCLI < Thor
4
5
  desc 'license-path [PATH]', "Returns the path to the given project's license file"
5
6
  def license_path(path)
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # `licensee version` command implementation.
3
4
  class LicenseeCLI < Thor
4
5
  desc 'version', 'Return the Licensee version'
5
6
  def version
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Licensee
4
+ module ContentHelper
5
+ module Constants
6
+ DIGEST = Digest::SHA1
7
+ START_REGEX = /\A\s*/
8
+ END_OF_TERMS_REGEX = /^[\s#*_]*end of (the )?terms and conditions[\s#*_]*$/i
9
+ REGEXES = {
10
+ bom: /#{START_REGEX}\xEF\xBB\xBF/,
11
+ hrs: /^\s*[=\-*]{3,}\s*$/,
12
+ all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
13
+ whitespace: /\s+/,
14
+ markdown_headings: /^\s*#+/,
15
+ version: /#{START_REGEX}version.*$/i,
16
+ span_markup: /[_*~]+(.*?)[_*~]+/,
17
+ link_markup: /\[(.+?)\]\(.+?\)/,
18
+ block_markup: /^\s*>/,
19
+ border_markup: /^[*-](.*?)[*-]$/,
20
+ comment_markup: %r{^\s*?[/*]{1,2}},
21
+ html_comment: /<!--.*?-->/m,
22
+ url: %r{#{START_REGEX}https?://[^ ]+\n},
23
+ bullet: /\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[).])\s+/i,
24
+ developed_by: /#{START_REGEX}developed by:.*?\n\n/im,
25
+ cc_dedication: /The\s+text\s+of\s+the\s+Creative\s+Commons.*?Public\s+Domain\s+Dedication\./im,
26
+ cc_wiki: /wiki\.creativecommons\.org/i,
27
+ cc_legal_code: /^\s*Creative Commons Legal Code\s*$/i,
28
+ cc0_info: /For more information, please see\s*\S+zero\S+/im,
29
+ cc0_disclaimer: /CREATIVE COMMONS CORPORATION.*?\n\n/im,
30
+ unlicense_info: /For more information, please.*\S+unlicense\S+/im,
31
+ mit_optional: /\(including the next paragraph\)/i
32
+ }.freeze
33
+
34
+ NORMALIZATIONS = {
35
+ lists: { from: /^\s*(?:\d\.|[*-])(?: [*_]{0,2}\(?[\da-z]\)[*_]{0,2})?\s+([^\n])/, to: '- \1' },
36
+ https: { from: /http:/, to: 'https:' },
37
+ ampersands: { from: '&', to: 'and' },
38
+ dashes: { from: /(?<!^)([—–-]+)(?!$)/, to: '-' },
39
+ quote: { from: /[`'"‘“’”]/, to: "'" },
40
+ hyphenated: { from: /(\w+)-\s*\n\s*(\w+)/, to: '\\1-\\2' }
41
+ }.freeze
42
+
43
+ # Legally equivalent words that schould be ignored for comparison
44
+ # See https://spdx.org/spdx-license-list/matching-guidelines
45
+ VARIETAL_WORDS = {
46
+ 'acknowledgment' => 'acknowledgement',
47
+ 'analogue' => 'analog',
48
+ 'analyse' => 'analyze',
49
+ 'artefact' => 'artifact',
50
+ 'authorisation' => 'authorization',
51
+ 'authorised' => 'authorized',
52
+ 'calibre' => 'caliber',
53
+ 'cancelled' => 'canceled',
54
+ 'capitalisations' => 'capitalizations',
55
+ 'catalogue' => 'catalog',
56
+ 'categorise' => 'categorize',
57
+ 'centre' => 'center',
58
+ 'emphasised' => 'emphasized',
59
+ 'favour' => 'favor',
60
+ 'favourite' => 'favorite',
61
+ 'fulfil' => 'fulfill',
62
+ 'fulfilment' => 'fulfillment',
63
+ 'initialise' => 'initialize',
64
+ 'judgment' => 'judgement',
65
+ 'labelling' => 'labeling',
66
+ 'labour' => 'labor',
67
+ 'licence' => 'license',
68
+ 'maximise' => 'maximize',
69
+ 'modelled' => 'modeled',
70
+ 'modelling' => 'modeling',
71
+ 'offence' => 'offense',
72
+ 'optimise' => 'optimize',
73
+ 'organisation' => 'organization',
74
+ 'organise' => 'organize',
75
+ 'practise' => 'practice',
76
+ 'programme' => 'program',
77
+ 'realise' => 'realize',
78
+ 'recognise' => 'recognize',
79
+ 'signalling' => 'signaling',
80
+ 'sub-license' => 'sublicense',
81
+ 'sub license' => 'sublicense',
82
+ 'utilisation' => 'utilization',
83
+ 'whilst' => 'while',
84
+ 'wilful' => 'wilfull',
85
+ 'non-commercial' => 'noncommercial',
86
+ 'per cent' => 'percent',
87
+ 'copyright owner' => 'copyright holder'
88
+ }.freeze
89
+
90
+ STRIP_METHODS = %i[
91
+ bom
92
+ cc_optional
93
+ cc0_optional
94
+ unlicense_optional
95
+ borders
96
+ title
97
+ version
98
+ url
99
+ copyright
100
+ title
101
+ block_markup
102
+ developed_by
103
+ end_of_terms
104
+ whitespace
105
+ mit_optional
106
+ ].freeze
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,137 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Licensee
4
+ module ContentHelper
5
+ # Mixin providing content normalization and stripping routines.
6
+ module NormalizationMethods
7
+ # Content with the title and version removed
8
+ # The first time should normally be the attribution line
9
+ # Used to dry up `content_normalized` but we need the case sensitive
10
+ # content with attribution first to detect attribuion in LicenseFile
11
+ def content_without_title_and_version
12
+ @content_without_title_and_version ||= begin
13
+ @_content = nil
14
+ ops = %i[html html_comment hrs comments markdown_headings link_markup title version]
15
+ ops.each { |op| strip(op) }
16
+ _content
17
+ end
18
+ end
19
+
20
+ def content_normalized(wrap: nil)
21
+ @content_normalized ||= normalize_content
22
+ wrap ? Licensee::ContentHelper.wrap(@content_normalized, wrap) : @content_normalized
23
+ end
24
+
25
+ def normalize_content
26
+ @_content = content_without_title_and_version.downcase
27
+ (ContentHelper::NORMALIZATIONS.keys + %i[spelling span_markup bullets]).each { |op| normalize(op) }
28
+ ContentHelper::STRIP_METHODS.each { |op| strip(op) }
29
+ _content
30
+ end
31
+
32
+ private
33
+
34
+ def strip(regex_or_sym)
35
+ return unless _content
36
+
37
+ if regex_or_sym.is_a?(Symbol)
38
+ meth = "strip_#{regex_or_sym}"
39
+ return send(meth) if respond_to?(meth, true)
40
+
41
+ unless ContentHelper::REGEXES[regex_or_sym]
42
+ raise ArgumentError, "#{regex_or_sym} is an invalid regex reference"
43
+ end
44
+
45
+ regex_or_sym = ContentHelper::REGEXES[regex_or_sym]
46
+ end
47
+
48
+ @_content = _content.gsub(regex_or_sym, ' ').squeeze(' ').strip
49
+ end
50
+
51
+ def strip_title
52
+ strip(ContentHelper.title_regex) while _content =~ ContentHelper.title_regex
53
+ end
54
+
55
+ def strip_borders
56
+ normalize(ContentHelper::REGEXES[:border_markup], '\\1')
57
+ end
58
+
59
+ def strip_comments
60
+ lines = _content.split("\n")
61
+ return if lines.one?
62
+ return unless lines.all? { |line| line =~ ContentHelper::REGEXES[:comment_markup] }
63
+
64
+ strip(:comment_markup)
65
+ end
66
+
67
+ def strip_copyright
68
+ regex = Regexp.union(Matchers::Copyright::REGEX, ContentHelper::REGEXES[:all_rights_reserved])
69
+ strip(regex) while _content =~ regex
70
+ end
71
+
72
+ def strip_cc0_optional
73
+ return unless _content.include? 'associating cc0'
74
+
75
+ strip(ContentHelper::REGEXES[:cc_legal_code])
76
+ strip(ContentHelper::REGEXES[:cc0_info])
77
+ strip(ContentHelper::REGEXES[:cc0_disclaimer])
78
+ end
79
+
80
+ def strip_cc_optional
81
+ return unless _content.include? 'creative commons'
82
+
83
+ strip(ContentHelper::REGEXES[:cc_dedication])
84
+ strip(ContentHelper::REGEXES[:cc_wiki])
85
+ end
86
+
87
+ def strip_unlicense_optional
88
+ return unless _content.include? 'unlicense'
89
+
90
+ strip(ContentHelper::REGEXES[:unlicense_info])
91
+ end
92
+
93
+ def strip_end_of_terms
94
+ body, _partition, _instructions = _content.partition(ContentHelper::END_OF_TERMS_REGEX)
95
+ @_content = body
96
+ end
97
+
98
+ def normalize_span_markup
99
+ normalize(ContentHelper::REGEXES[:span_markup], '\\1')
100
+ end
101
+
102
+ def strip_link_markup
103
+ normalize(ContentHelper::REGEXES[:link_markup], '\\1')
104
+ end
105
+
106
+ def strip_html
107
+ return unless respond_to?(:filename) && filename
108
+ return unless /\.html?/i.match?(File.extname(filename))
109
+
110
+ require 'reverse_markdown'
111
+ @_content = ReverseMarkdown.convert(_content, unknown_tags: :bypass)
112
+ end
113
+
114
+ def normalize(from_or_key, to = nil)
115
+ operation = { from: from_or_key, to: to } if to
116
+ operation ||= ContentHelper::NORMALIZATIONS[from_or_key]
117
+
118
+ if operation
119
+ @_content = _content.gsub operation[:from], operation[:to]
120
+ elsif respond_to?(:"normalize_#{from_or_key}", true)
121
+ send(:"normalize_#{from_or_key}")
122
+ else
123
+ raise ArgumentError, "#{from_or_key} is an invalid normalization"
124
+ end
125
+ end
126
+
127
+ def normalize_spelling
128
+ normalize(/\b#{Regexp.union(ContentHelper::VARIETAL_WORDS.keys)}\b/, ContentHelper::VARIETAL_WORDS)
129
+ end
130
+
131
+ def normalize_bullets
132
+ normalize(ContentHelper::REGEXES[:bullet], "\n\n- ")
133
+ normalize(/\)\s+\(/, ')(')
134
+ end
135
+ end
136
+ end
137
+ end