licensee 9.10.1 → 9.13.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.md +1 -1
- data/bin/licensee +5 -4
- data/lib/licensee.rb +7 -5
- data/lib/licensee/commands/detect.rb +7 -5
- data/lib/licensee/commands/diff.rb +7 -8
- data/lib/licensee/commands/license_path.rb +2 -0
- data/lib/licensee/commands/version.rb +2 -0
- data/lib/licensee/content_helper.rb +230 -85
- data/lib/licensee/hash_helper.rb +7 -5
- data/lib/licensee/license.rb +21 -22
- data/lib/licensee/license_field.rb +3 -1
- data/lib/licensee/license_meta.rb +2 -0
- data/lib/licensee/license_rules.rb +2 -0
- data/lib/licensee/matchers.rb +2 -0
- data/lib/licensee/matchers/cabal.rb +16 -2
- data/lib/licensee/matchers/cargo.rb +3 -1
- data/lib/licensee/matchers/copyright.rb +6 -4
- data/lib/licensee/matchers/cran.rb +5 -3
- data/lib/licensee/matchers/dice.rb +6 -4
- data/lib/licensee/matchers/dist_zilla.rb +3 -1
- data/lib/licensee/matchers/exact.rb +2 -0
- data/lib/licensee/matchers/gemspec.rb +7 -5
- data/lib/licensee/matchers/matcher.rb +3 -1
- data/lib/licensee/matchers/npm_bower.rb +3 -1
- data/lib/licensee/matchers/package.rb +2 -0
- data/lib/licensee/matchers/reference.rb +3 -1
- data/lib/licensee/matchers/spdx.rb +3 -1
- data/lib/licensee/project_files.rb +2 -0
- data/lib/licensee/project_files/license_file.rb +12 -10
- data/lib/licensee/project_files/package_manager_file.rb +2 -0
- data/lib/licensee/project_files/project_file.rb +12 -5
- data/lib/licensee/project_files/readme_file.rb +5 -3
- data/lib/licensee/projects.rb +2 -0
- data/lib/licensee/projects/fs_project.rb +9 -2
- data/lib/licensee/projects/git_project.rb +19 -11
- data/lib/licensee/projects/github_project.rb +3 -1
- data/lib/licensee/projects/project.rb +7 -5
- data/lib/licensee/rule.rb +2 -0
- data/lib/licensee/version.rb +3 -1
- data/licensee.gemspec +45 -0
- data/spec/bin_spec.rb +3 -1
- data/spec/fixture_spec.rb +46 -0
- data/spec/fixtures/bsd-3-noendorseslash/LICENSE +30 -0
- data/spec/fixtures/cc0-cal2013/LICENSE +116 -0
- data/spec/fixtures/cc0-cc/LICENSE +121 -0
- data/spec/fixtures/detect.json +10 -8
- data/spec/fixtures/fixtures.yml +134 -0
- data/spec/fixtures/html/license.html +262 -0
- data/spec/fixtures/license-hashes.json +41 -0
- data/spec/fixtures/mit-optional/LICENSE.txt +21 -0
- data/spec/fixtures/multiple-arrs/LICENSE +30 -0
- data/spec/fixtures/readme-invalid-encoding/README.md +24 -0
- data/spec/fixtures/unlicense-noinfo/LICENSE +22 -0
- data/spec/integration_spec.rb +68 -2
- data/spec/licensee/commands/detect_spec.rb +11 -7
- data/spec/licensee/commands/license_path_spec.rb +3 -1
- data/spec/licensee/commands/version_spec.rb +3 -1
- data/spec/licensee/content_helper_spec.rb +185 -67
- data/spec/licensee/hash_helper_spec.rb +3 -1
- data/spec/licensee/license_field_spec.rb +5 -3
- data/spec/licensee/license_meta_spec.rb +16 -12
- data/spec/licensee/license_rules_spec.rb +6 -2
- data/spec/licensee/license_spec.rb +37 -35
- data/spec/licensee/matchers/cabal_matcher_spec.rb +97 -2
- data/spec/licensee/matchers/cargo_matcher_spec.rb +5 -2
- data/spec/licensee/matchers/copyright_matcher_spec.rb +7 -5
- data/spec/licensee/matchers/cran_matcher_spec.rb +5 -2
- data/spec/licensee/matchers/dice_matcher_spec.rb +15 -12
- data/spec/licensee/matchers/dist_zilla_matcher_spec.rb +5 -2
- data/spec/licensee/matchers/exact_matcher_spec.rb +5 -2
- data/spec/licensee/matchers/gemspec_matcher_spec.rb +5 -2
- data/spec/licensee/matchers/matcher_spec.rb +6 -2
- data/spec/licensee/matchers/npm_bower_matcher_spec.rb +5 -3
- data/spec/licensee/matchers/package_matcher_spec.rb +6 -2
- data/spec/licensee/matchers/reference_matcher_spec.rb +4 -2
- data/spec/licensee/matchers/spdx_matcher_spec.rb +5 -2
- data/spec/licensee/project_files/license_file_spec.rb +20 -18
- data/spec/licensee/project_files/package_info_spec.rb +5 -1
- data/spec/licensee/project_files/project_file_spec.rb +8 -2
- data/spec/licensee/project_files/readme_file_spec.rb +4 -1
- data/spec/licensee/project_spec.rb +24 -17
- data/spec/licensee/projects/git_project_spec.rb +23 -0
- data/spec/licensee/projects/github_project_spec.rb +8 -5
- data/spec/licensee/rule_spec.rb +6 -3
- data/spec/licensee_spec.rb +12 -9
- data/spec/spec_helper.rb +27 -9
- data/spec/vendored_license_spec.rb +29 -10
- data/vendor/choosealicense.com/_data/meta.yml +0 -4
- data/vendor/choosealicense.com/_data/rules.yml +3 -0
- data/vendor/choosealicense.com/_licenses/0bsd.txt +39 -0
- data/vendor/choosealicense.com/_licenses/afl-3.0.txt +7 -6
- data/vendor/choosealicense.com/_licenses/agpl-3.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/apache-2.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/artistic-2.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/bsd-2-clause.txt +8 -6
- data/vendor/choosealicense.com/_licenses/bsd-3-clause-clear.txt +2 -2
- data/vendor/choosealicense.com/_licenses/bsd-3-clause.txt +12 -10
- data/vendor/choosealicense.com/_licenses/bsd-4-clause.txt +61 -0
- data/vendor/choosealicense.com/_licenses/bsl-1.0.txt +5 -2
- data/vendor/choosealicense.com/_licenses/cc-by-4.0.txt +16 -14
- data/vendor/choosealicense.com/_licenses/cc-by-sa-4.0.txt +16 -14
- data/vendor/choosealicense.com/_licenses/cc0-1.0.txt +113 -105
- data/vendor/choosealicense.com/_licenses/cecill-2.1.txt +579 -0
- data/vendor/choosealicense.com/_licenses/ecl-2.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/epl-1.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/epl-2.0.txt +3 -4
- data/vendor/choosealicense.com/_licenses/eupl-1.1.txt +0 -1
- data/vendor/choosealicense.com/_licenses/eupl-1.2.txt +0 -1
- data/vendor/choosealicense.com/_licenses/gpl-2.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/gpl-3.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/isc.txt +2 -3
- data/vendor/choosealicense.com/_licenses/lgpl-2.1.txt +0 -1
- data/vendor/choosealicense.com/_licenses/lgpl-3.0.txt +1 -3
- data/vendor/choosealicense.com/_licenses/lppl-1.3c.txt +1 -2
- data/vendor/choosealicense.com/_licenses/mit.txt +1 -2
- data/vendor/choosealicense.com/_licenses/mpl-2.0.txt +0 -1
- data/vendor/choosealicense.com/_licenses/ms-pl.txt +0 -1
- data/vendor/choosealicense.com/_licenses/ms-rl.txt +0 -1
- data/vendor/choosealicense.com/_licenses/ncsa.txt +21 -22
- data/vendor/choosealicense.com/_licenses/odbl-1.0.txt +573 -0
- data/vendor/choosealicense.com/_licenses/ofl-1.1.txt +4 -2
- data/vendor/choosealicense.com/_licenses/osl-3.0.txt +1 -2
- data/vendor/choosealicense.com/_licenses/postgresql.txt +4 -5
- data/vendor/choosealicense.com/_licenses/unlicense.txt +1 -2
- data/vendor/choosealicense.com/_licenses/upl-1.0.txt +4 -5
- data/vendor/choosealicense.com/_licenses/vim.txt +111 -0
- data/vendor/choosealicense.com/_licenses/wtfpl.txt +0 -1
- data/vendor/choosealicense.com/_licenses/zlib.txt +4 -2
- metadata +79 -28
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d931f50190ecf7abb790530607bd57eb31c7190926a394bca7aa9ec0550cfba8
|
|
4
|
+
data.tar.gz: f119b575b2ff9538133a587ef3a23638756fac47e30c40b442ddad679bb62036
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4b423e68fb6496eefc0f4259fac2539f34430a13e1eb6d3758a6876c604fc40e5a763a04836025070410c082c2a516b28988ceeb46ed2b2a06276b318b9d0fb6
|
|
7
|
+
data.tar.gz: f0e150efc09980729793f86bbfcff323617349f0cae92ff2d4ebace2e29dac96e980deacc224b6ab08a21e3224f77b95541bdfaab9fa016ba81a6f94c3fdcce7
|
data/LICENSE.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
|
-
Copyright (c) 2014-
|
|
3
|
+
Copyright (c) 2014-2020 Ben Balter and Licensee contributors
|
|
4
4
|
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
data/bin/licensee
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
2
3
|
|
|
3
4
|
require 'dotenv/load'
|
|
4
5
|
require 'thor'
|
|
@@ -15,9 +16,9 @@ class LicenseeCLI < Thor
|
|
|
15
16
|
|
|
16
17
|
def path
|
|
17
18
|
@path ||= if !options[:remote] || args.first =~ %r{^https://}
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
args.first || Dir.pwd
|
|
20
|
+
else
|
|
21
|
+
"https://github.com/#{args.first}"
|
|
21
22
|
end
|
|
22
23
|
end
|
|
23
24
|
|
|
@@ -32,6 +33,6 @@ class LicenseeCLI < Thor
|
|
|
32
33
|
end
|
|
33
34
|
|
|
34
35
|
commands_dir = File.expand_path '../lib/licensee/commands/', __dir__
|
|
35
|
-
Dir["#{commands_dir}/*.rb"].each { |c| require(c) }
|
|
36
|
+
Dir["#{commands_dir}/*.rb"].sort.each { |c| require(c) }
|
|
36
37
|
|
|
37
38
|
LicenseeCLI.start(ARGV)
|
data/lib/licensee.rb
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require_relative 'licensee/version'
|
|
2
4
|
require 'forwardable'
|
|
3
5
|
require 'pathname'
|
|
@@ -19,7 +21,7 @@ module Licensee
|
|
|
19
21
|
CONFIDENCE_THRESHOLD = 98
|
|
20
22
|
|
|
21
23
|
# Base domain from which to build license URLs
|
|
22
|
-
DOMAIN = 'http://choosealicense.com'
|
|
24
|
+
DOMAIN = 'http://choosealicense.com'
|
|
23
25
|
|
|
24
26
|
class << self
|
|
25
27
|
attr_writer :confidence_threshold
|
|
@@ -36,12 +38,12 @@ module Licensee
|
|
|
36
38
|
|
|
37
39
|
def project(path, **args)
|
|
38
40
|
if path =~ %r{\Ahttps://github.com}
|
|
39
|
-
Licensee::Projects::GitHubProject.new(path, args)
|
|
41
|
+
Licensee::Projects::GitHubProject.new(path, **args)
|
|
40
42
|
else
|
|
41
|
-
Licensee::Projects::GitProject.new(path, args)
|
|
43
|
+
Licensee::Projects::GitProject.new(path, **args)
|
|
42
44
|
end
|
|
43
45
|
rescue Licensee::Projects::GitProject::InvalidRepository
|
|
44
|
-
Licensee::Projects::FSProject.new(path, args)
|
|
46
|
+
Licensee::Projects::FSProject.new(path, **args)
|
|
45
47
|
end
|
|
46
48
|
|
|
47
49
|
def confidence_threshold
|
|
@@ -49,7 +51,7 @@ module Licensee
|
|
|
49
51
|
end
|
|
50
52
|
|
|
51
53
|
# Inverse of the confidence threshold, represented as a float
|
|
52
|
-
# By default this will be 0.
|
|
54
|
+
# By default this will be 0.02
|
|
53
55
|
def inverse_confidence_threshold
|
|
54
56
|
@inverse_confidence_threshold ||=
|
|
55
57
|
(1 - Licensee.confidence_threshold / 100.0).round(2)
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
class LicenseeCLI < Thor
|
|
2
4
|
# Methods to call when displaying information about ProjectFiles
|
|
3
5
|
MATCHED_FILE_METHODS = %i[
|
|
@@ -21,11 +23,11 @@ class LicenseeCLI < Thor
|
|
|
21
23
|
|
|
22
24
|
rows = []
|
|
23
25
|
rows << if project.license
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
26
|
+
['License:', project.license.spdx_id]
|
|
27
|
+
elsif !project.licenses.empty?
|
|
28
|
+
['Licenses:', project.licenses.map(&:spdx_id)]
|
|
29
|
+
else
|
|
30
|
+
['License:', set_color('None', :red)]
|
|
29
31
|
end
|
|
30
32
|
|
|
31
33
|
unless project.matched_files.empty?
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'tmpdir'
|
|
2
4
|
|
|
3
5
|
class LicenseeCLI < Thor
|
|
@@ -39,26 +41,23 @@ class LicenseeCLI < Thor
|
|
|
39
41
|
|
|
40
42
|
def license_to_diff
|
|
41
43
|
return options[:license_to_diff] if options[:license_to_diff]
|
|
42
|
-
return project.license_file if remote?
|
|
44
|
+
return project.license_file if remote? || STDIN.tty? && project.license_file
|
|
43
45
|
|
|
44
46
|
@license_to_diff ||= begin
|
|
45
|
-
if STDIN.tty?
|
|
46
|
-
error 'You must pipe license contents to the command via STDIN'
|
|
47
|
-
exit 1
|
|
48
|
-
end
|
|
49
|
-
|
|
50
47
|
Licensee::ProjectFiles::LicenseFile.new(STDIN.read, 'LICENSE')
|
|
51
48
|
end
|
|
52
49
|
end
|
|
53
50
|
|
|
54
51
|
def expected_license
|
|
55
|
-
|
|
52
|
+
if options[:license]
|
|
53
|
+
@expected_license ||= Licensee::License.find options[:license]
|
|
54
|
+
end
|
|
56
55
|
return @expected_license if @expected_license
|
|
57
56
|
|
|
58
57
|
if options[:license]
|
|
59
58
|
error "#{options[:license]} is not a valid license"
|
|
60
59
|
else
|
|
61
|
-
error '
|
|
60
|
+
error 'Usage: provide a license to diff against with --license (spdx name)'
|
|
62
61
|
end
|
|
63
62
|
|
|
64
63
|
error "Valid licenses: #{Licensee::License.all(hidden: true).map(&:key).join(', ')}"
|
|
@@ -1,26 +1,115 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'set'
|
|
2
4
|
require 'digest'
|
|
3
5
|
|
|
4
6
|
module Licensee
|
|
5
7
|
module ContentHelper
|
|
6
8
|
DIGEST = Digest::SHA1
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
9
|
+
START_REGEX = /\A\s*/.freeze
|
|
10
|
+
END_OF_TERMS_REGEX = /^[\s#*_]*end of terms and conditions[\s#*_]*$/i.freeze
|
|
11
|
+
REGEXES = {
|
|
12
|
+
hrs: /^\s*[=\-\*]{3,}\s*$/,
|
|
13
|
+
all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
|
|
14
|
+
whitespace: /\s+/,
|
|
15
|
+
markdown_headings: /#{START_REGEX}#+/,
|
|
16
|
+
version: /#{START_REGEX}version.*$/i,
|
|
17
|
+
span_markup: /[_*~]+(.*?)[_*~]+/,
|
|
18
|
+
link_markup: /\[(.+?)\]\(.+?\)/,
|
|
19
|
+
block_markup: /^\s*>/,
|
|
20
|
+
border_markup: /^[\*-](.*?)[\*-]$/,
|
|
21
|
+
comment_markup: %r{^\s*?[/\*]{1,2}},
|
|
22
|
+
url: %r{#{START_REGEX}https?://[^ ]+\n},
|
|
23
|
+
bullet: /\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[)\.])\s+/i,
|
|
24
|
+
developed_by: /#{START_REGEX}developed by:.*?\n\n/im,
|
|
25
|
+
quote_begin: /[`'"‘“]/,
|
|
26
|
+
quote_end: /[`'"’”]/,
|
|
27
|
+
cc_legal_code: /^\s*Creative Commons Legal Code\s*$/i,
|
|
28
|
+
cc0_info: /For more information, please see\s*\S+zero\S+/im,
|
|
29
|
+
cc0_disclaimer: /CREATIVE COMMONS CORPORATION.*?\n\n/im,
|
|
30
|
+
unlicense_info: /For more information, please.*\S+unlicense\S+/im,
|
|
31
|
+
mit_optional: /\(including the next paragraph\)/i
|
|
32
|
+
}.freeze
|
|
33
|
+
NORMALIZATIONS = {
|
|
34
|
+
lists: { from: /^\s*(?:\d\.|\*)\s+([^\n])/, to: '- \1' },
|
|
35
|
+
https: { from: /http:/, to: 'https:' },
|
|
36
|
+
ampersands: { from: '&', to: 'and' },
|
|
37
|
+
dashes: { from: /(?<!^)([—–-]+)(?!$)/, to: '-' },
|
|
38
|
+
quotes: {
|
|
39
|
+
from: /#{REGEXES[:quote_begin]}+([\w -]*?\w)#{REGEXES[:quote_end]}+/,
|
|
40
|
+
to: '"\1"'
|
|
41
|
+
}
|
|
42
|
+
}.freeze
|
|
43
|
+
|
|
44
|
+
# Legally equivalent words that schould be ignored for comparison
|
|
45
|
+
# See https://spdx.org/spdx-license-list/matching-guidelines
|
|
46
|
+
VARIETAL_WORDS = {
|
|
47
|
+
'acknowledgment' => 'acknowledgement',
|
|
48
|
+
'analogue' => 'analog',
|
|
49
|
+
'analyse' => 'analyze',
|
|
50
|
+
'artefact' => 'artifact',
|
|
51
|
+
'authorisation' => 'authorization',
|
|
52
|
+
'authorised' => 'authorized',
|
|
53
|
+
'calibre' => 'caliber',
|
|
54
|
+
'cancelled' => 'canceled',
|
|
55
|
+
'capitalisations' => 'capitalizations',
|
|
56
|
+
'catalogue' => 'catalog',
|
|
57
|
+
'categorise' => 'categorize',
|
|
58
|
+
'centre' => 'center',
|
|
59
|
+
'emphasised' => 'emphasized',
|
|
60
|
+
'favour' => 'favor',
|
|
61
|
+
'favourite' => 'favorite',
|
|
62
|
+
'fulfil' => 'fulfill',
|
|
63
|
+
'fulfilment' => 'fulfillment',
|
|
64
|
+
'initialise' => 'initialize',
|
|
65
|
+
'judgment' => 'judgement',
|
|
66
|
+
'labelling' => 'labeling',
|
|
67
|
+
'labour' => 'labor',
|
|
68
|
+
'licence' => 'license',
|
|
69
|
+
'maximise' => 'maximize',
|
|
70
|
+
'modelled' => 'modeled',
|
|
71
|
+
'modelling' => 'modeling',
|
|
72
|
+
'offence' => 'offense',
|
|
73
|
+
'optimise' => 'optimize',
|
|
74
|
+
'organisation' => 'organization',
|
|
75
|
+
'organise' => 'organize',
|
|
76
|
+
'practise' => 'practice',
|
|
77
|
+
'programme' => 'program',
|
|
78
|
+
'realise' => 'realize',
|
|
79
|
+
'recognise' => 'recognize',
|
|
80
|
+
'signalling' => 'signaling',
|
|
81
|
+
'sub-license' => 'sublicense',
|
|
82
|
+
'sub license' => 'sublicense',
|
|
83
|
+
'utilisation' => 'utilization',
|
|
84
|
+
'whilst' => 'while',
|
|
85
|
+
'wilful' => 'wilfull',
|
|
86
|
+
'non-commercial' => 'noncommercial',
|
|
87
|
+
'cent' => 'percent',
|
|
88
|
+
'owner' => 'holder'
|
|
89
|
+
}.freeze
|
|
90
|
+
STRIP_METHODS = %i[
|
|
91
|
+
cc0_optional
|
|
92
|
+
unlicense_optional
|
|
93
|
+
hrs
|
|
94
|
+
markdown_headings
|
|
95
|
+
borders
|
|
96
|
+
title
|
|
97
|
+
version
|
|
98
|
+
url
|
|
99
|
+
copyright
|
|
100
|
+
title
|
|
101
|
+
block_markup
|
|
102
|
+
span_markup
|
|
103
|
+
link_markup
|
|
104
|
+
developed_by
|
|
105
|
+
end_of_terms
|
|
106
|
+
whitespace
|
|
107
|
+
mit_optional
|
|
108
|
+
].freeze
|
|
18
109
|
|
|
19
110
|
# A set of each word in the license, without duplicates
|
|
20
111
|
def wordset
|
|
21
|
-
@wordset ||=
|
|
22
|
-
content_normalized.scan(/(?:\w(?:'s|(?<=s)')?)+/).to_set
|
|
23
|
-
end
|
|
112
|
+
@wordset ||= content_normalized&.scan(%r{(?:[\w\/](?:'s|(?<=s)')?)+})&.to_set
|
|
24
113
|
end
|
|
25
114
|
|
|
26
115
|
# Number of characteres in the normalized content
|
|
@@ -33,7 +122,8 @@ module Licensee
|
|
|
33
122
|
# Number of characters that could be added/removed to still be
|
|
34
123
|
# considered a potential match
|
|
35
124
|
def max_delta
|
|
36
|
-
@max_delta ||=
|
|
125
|
+
@max_delta ||= fields_normalized.size * 10 +
|
|
126
|
+
(length * Licensee.inverse_confidence_threshold).to_i
|
|
37
127
|
end
|
|
38
128
|
|
|
39
129
|
# Given another license or project file, calculates the difference in length
|
|
@@ -44,8 +134,9 @@ module Licensee
|
|
|
44
134
|
# Given another license or project file, calculates the similarity
|
|
45
135
|
# as a percentage of words in common
|
|
46
136
|
def similarity(other)
|
|
47
|
-
overlap = (
|
|
48
|
-
total =
|
|
137
|
+
overlap = (wordset_fieldless & other.wordset).size
|
|
138
|
+
total = wordset_fieldless.size + other.wordset.size -
|
|
139
|
+
fields_normalized_set.size
|
|
49
140
|
100.0 * (overlap * 2.0 / total)
|
|
50
141
|
end
|
|
51
142
|
|
|
@@ -60,35 +151,21 @@ module Licensee
|
|
|
60
151
|
# content with attribution first to detect attribuion in LicenseFile
|
|
61
152
|
def content_without_title_and_version
|
|
62
153
|
@content_without_title_and_version ||= begin
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
strip_version(string).strip
|
|
154
|
+
@_content = nil
|
|
155
|
+
ops = %i[html hrs comments markdown_headings title version]
|
|
156
|
+
ops.each { |op| strip(op) }
|
|
157
|
+
_content
|
|
68
158
|
end
|
|
69
159
|
end
|
|
70
160
|
|
|
71
|
-
# Content without title, version, copyright, whitespace, or insturctions
|
|
72
|
-
#
|
|
73
|
-
# wrap - Optional width to wrap the content
|
|
74
|
-
#
|
|
75
|
-
# Returns a string
|
|
76
161
|
def content_normalized(wrap: nil)
|
|
77
|
-
return unless content
|
|
78
|
-
|
|
79
162
|
@content_normalized ||= begin
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
string, _partition, _instructions = string.partition(END_OF_TERMS_REGEX)
|
|
87
|
-
string = normalize_lists(string)
|
|
88
|
-
string = normalize_quotes(string)
|
|
89
|
-
string = normalize_https(string)
|
|
90
|
-
string = strip_markup(string)
|
|
91
|
-
strip_whitespace(string)
|
|
163
|
+
@_content = content_without_title_and_version.downcase
|
|
164
|
+
|
|
165
|
+
(NORMALIZATIONS.keys + %i[spelling bullets]).each { |op| normalize(op) }
|
|
166
|
+
STRIP_METHODS.each { |op| strip(op) }
|
|
167
|
+
|
|
168
|
+
_content
|
|
92
169
|
end
|
|
93
170
|
|
|
94
171
|
if wrap.nil?
|
|
@@ -98,15 +175,24 @@ module Licensee
|
|
|
98
175
|
end
|
|
99
176
|
end
|
|
100
177
|
|
|
178
|
+
# Backwards compatibalize constants to avoid a breaking change
|
|
179
|
+
def self.const_missing(const)
|
|
180
|
+
key = const.to_s.downcase.gsub('_regex', '').to_sym
|
|
181
|
+
REGEXES[key] || super
|
|
182
|
+
end
|
|
183
|
+
|
|
101
184
|
# Wrap text to the given line length
|
|
102
185
|
def self.wrap(text, line_width = 80)
|
|
103
186
|
return if text.nil?
|
|
104
187
|
|
|
105
188
|
text = text.clone
|
|
189
|
+
text.gsub!(REGEXES[:bullet]) { |m| "\n#{m}\n" }
|
|
106
190
|
text.gsub!(/([^\n])\n([^\n])/, '\1 \2')
|
|
107
191
|
|
|
108
192
|
text = text.split("\n").collect do |line|
|
|
109
|
-
if line
|
|
193
|
+
if line =~ REGEXES[:hrs]
|
|
194
|
+
line
|
|
195
|
+
elsif line.length > line_width
|
|
110
196
|
line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip
|
|
111
197
|
else
|
|
112
198
|
line
|
|
@@ -117,83 +203,142 @@ module Licensee
|
|
|
117
203
|
end
|
|
118
204
|
|
|
119
205
|
def self.format_percent(float)
|
|
120
|
-
"#{format('
|
|
206
|
+
"#{format('%<float>.2f', float: float)}%"
|
|
121
207
|
end
|
|
122
208
|
|
|
123
209
|
def self.title_regex
|
|
124
|
-
|
|
125
|
-
|
|
210
|
+
@title_regex ||= begin
|
|
211
|
+
licenses = Licensee::License.all(hidden: true, psuedo: false)
|
|
212
|
+
titles = licenses.map(&:title_regex)
|
|
126
213
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
214
|
+
# Title regex must include the version to support matching within
|
|
215
|
+
# families, but for sake of normalization, we can be less strict
|
|
216
|
+
without_versions = licenses.map do |license|
|
|
217
|
+
next if license.title == license.name_without_version
|
|
131
218
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
219
|
+
Regexp.new Regexp.escape(license.name_without_version), 'i'
|
|
220
|
+
end
|
|
221
|
+
titles.concat(without_versions.compact)
|
|
135
222
|
|
|
136
|
-
|
|
223
|
+
/#{START_REGEX}\(?(?:the )?#{Regexp.union titles}.*?$/i
|
|
224
|
+
end
|
|
137
225
|
end
|
|
138
226
|
|
|
139
227
|
private
|
|
140
228
|
|
|
141
|
-
def
|
|
142
|
-
|
|
229
|
+
def _content
|
|
230
|
+
@_content ||= content.to_s.dup.strip
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def strip(regex_or_sym)
|
|
234
|
+
return unless _content
|
|
235
|
+
|
|
236
|
+
if regex_or_sym.is_a?(Symbol)
|
|
237
|
+
meth = "strip_#{regex_or_sym}"
|
|
238
|
+
return send(meth) if respond_to?(meth, true)
|
|
239
|
+
|
|
240
|
+
unless REGEXES[regex_or_sym]
|
|
241
|
+
raise ArgumentError, "#{regex_or_sym} is an invalid regex reference"
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
regex_or_sym = REGEXES[regex_or_sym]
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
@_content = _content.gsub(regex_or_sym, ' ').squeeze(' ').strip
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def strip_title
|
|
251
|
+
while _content =~ ContentHelper.title_regex
|
|
252
|
+
strip(ContentHelper.title_regex)
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def strip_borders
|
|
257
|
+
normalize(REGEXES[:border_markup], '\1')
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def strip_comments
|
|
261
|
+
lines = _content.split("\n")
|
|
262
|
+
return if lines.count == 1
|
|
263
|
+
return unless lines.all? { |line| line =~ REGEXES[:comment_markup] }
|
|
264
|
+
|
|
265
|
+
strip(:comment_markup)
|
|
143
266
|
end
|
|
144
267
|
|
|
145
|
-
def
|
|
146
|
-
|
|
268
|
+
def strip_copyright
|
|
269
|
+
regex = Regexp.union(Matchers::Copyright::REGEX, REGEXES[:all_rights_reserved])
|
|
270
|
+
strip(regex) while _content =~ regex
|
|
147
271
|
end
|
|
148
272
|
|
|
149
|
-
def
|
|
150
|
-
|
|
273
|
+
def strip_cc0_optional
|
|
274
|
+
return unless _content.include? 'associating cc0'
|
|
275
|
+
|
|
276
|
+
strip(REGEXES[:cc_legal_code])
|
|
277
|
+
strip(REGEXES[:cc0_info])
|
|
278
|
+
strip(REGEXES[:cc0_disclaimer])
|
|
151
279
|
end
|
|
152
280
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
281
|
+
def strip_unlicense_optional
|
|
282
|
+
return unless _content.include? 'unlicense'
|
|
283
|
+
|
|
284
|
+
strip(REGEXES[:unlicense_info])
|
|
156
285
|
end
|
|
157
286
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
287
|
+
def strip_end_of_terms
|
|
288
|
+
body, _partition, _instructions = _content.partition(END_OF_TERMS_REGEX)
|
|
289
|
+
@_content = body
|
|
161
290
|
end
|
|
162
291
|
|
|
163
|
-
def
|
|
164
|
-
|
|
292
|
+
def strip_span_markup
|
|
293
|
+
normalize(REGEXES[:span_markup], '\1')
|
|
165
294
|
end
|
|
166
295
|
|
|
167
|
-
def
|
|
168
|
-
|
|
296
|
+
def strip_link_markup
|
|
297
|
+
normalize(REGEXES[:link_markup], '\1')
|
|
169
298
|
end
|
|
170
299
|
|
|
171
|
-
def
|
|
172
|
-
|
|
300
|
+
def strip_html
|
|
301
|
+
return unless respond_to?(:filename) && filename
|
|
302
|
+
return unless File.extname(filename) =~ /\.html?/i
|
|
303
|
+
|
|
304
|
+
require 'reverse_markdown'
|
|
305
|
+
@_content = ReverseMarkdown.convert(_content, unknown_tags: :bypass)
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
def normalize(from_or_key, to = nil)
|
|
309
|
+
operation = { from: from_or_key, to: to } if to
|
|
310
|
+
operation ||= NORMALIZATIONS[from_or_key]
|
|
311
|
+
|
|
312
|
+
if operation
|
|
313
|
+
@_content = _content.gsub operation[:from], operation[:to]
|
|
314
|
+
elsif respond_to?("normalize_#{from_or_key}", true)
|
|
315
|
+
send("normalize_#{from_or_key}")
|
|
316
|
+
else
|
|
317
|
+
raise ArgumentError, "#{from_or_key} is an invalid normalization"
|
|
318
|
+
end
|
|
173
319
|
end
|
|
174
320
|
|
|
175
|
-
def
|
|
176
|
-
|
|
321
|
+
def normalize_spelling
|
|
322
|
+
normalize(/\b#{Regexp.union(VARIETAL_WORDS.keys)}\b/, VARIETAL_WORDS)
|
|
177
323
|
end
|
|
178
324
|
|
|
179
|
-
def
|
|
180
|
-
|
|
325
|
+
def normalize_bullets
|
|
326
|
+
normalize(REGEXES[:bullet], "\n\n* ")
|
|
327
|
+
normalize(/\)\s+\(/, ')(')
|
|
181
328
|
end
|
|
182
329
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
# strip double quotes if we still want to allow possessives
|
|
186
|
-
def normalize_quotes(string)
|
|
187
|
-
string.gsub(/#{QUOTE_BEGIN_REGEX}+([\w -]*?\w)#{QUOTE_END_REGEX}+/,
|
|
188
|
-
'"\1"')
|
|
330
|
+
def wordset_fieldless
|
|
331
|
+
@wordset_fieldless ||= wordset - fields_normalized_set
|
|
189
332
|
end
|
|
190
333
|
|
|
191
|
-
|
|
192
|
-
|
|
334
|
+
# Returns an array of strings of substitutable fields in normalized content
|
|
335
|
+
def fields_normalized
|
|
336
|
+
@fields_normalized ||=
|
|
337
|
+
content_normalized.scan(LicenseField::FIELD_REGEX).flatten
|
|
193
338
|
end
|
|
194
339
|
|
|
195
|
-
def
|
|
196
|
-
|
|
340
|
+
def fields_normalized_set
|
|
341
|
+
@fields_normalized_set ||= fields_normalized.to_set
|
|
197
342
|
end
|
|
198
343
|
end
|
|
199
344
|
end
|