license_matcher 0.1.0.pre.alpha → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +13 -1
- data/README.md +76 -5
- data/lib/license_matcher/native.bundle +0 -0
- data/lib/license_matcher/preprocess.rb +75 -0
- data/lib/license_matcher/rule_matcher.rb +601 -0
- data/lib/license_matcher/tf_ruby_matcher.rb +100 -0
- data/lib/license_matcher/url_matcher.rb +102 -0
- data/lib/license_matcher.rb +16 -2
- metadata +64 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6b710a76802bc5254b2d0d9dd3c36376c613b7e5
|
4
|
+
data.tar.gz: fef8fe16e4028ed1c49710956dceb4ac13acee9b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee54a1ae1b3258f9bc474a5a05c7221281be666976f6e1d547bb1673c4fa57507d0006a21b236d9ce02a55fa6e9e1ac9fe646a6c96d53c2d7255ab59d5e2c821
|
7
|
+
data.tar.gz: adb94097dea0e79e19ac4b2cf9285d42e190902f2d8f798e5eb7239d094e20ca12193efb7342ad9ee4065b2e5f2bc48ccf3cacc318419699d7a7f2b528fb6fab
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
license_matcher (0.1.0)
|
4
|
+
license_matcher (0.1.0.pre.alpha)
|
5
5
|
helix_runtime (~> 0.6.0)
|
6
|
+
narray (~> 0.6.1.2)
|
7
|
+
nokogiri (~> 1.8.0)
|
8
|
+
tf-idf-similarity (~> 0.1.6)
|
6
9
|
|
7
10
|
GEM
|
8
11
|
remote: https://rubygems.org/
|
@@ -13,6 +16,11 @@ GEM
|
|
13
16
|
rake (>= 10.0)
|
14
17
|
thor (~> 0.19.4)
|
15
18
|
toml (~> 0.1.2)
|
19
|
+
mini_portile2 (2.2.0)
|
20
|
+
msgpack (1.1.0)
|
21
|
+
narray (0.6.1.2)
|
22
|
+
nokogiri (1.8.0)
|
23
|
+
mini_portile2 (~> 2.2.0)
|
16
24
|
parslet (1.5.0)
|
17
25
|
blankslate (~> 2.0)
|
18
26
|
rake (10.5.0)
|
@@ -29,9 +37,12 @@ GEM
|
|
29
37
|
diff-lcs (>= 1.2.0, < 2.0)
|
30
38
|
rspec-support (~> 3.6.0)
|
31
39
|
rspec-support (3.6.0)
|
40
|
+
tf-idf-similarity (0.1.6)
|
41
|
+
unicode_utils (~> 1.4)
|
32
42
|
thor (0.19.4)
|
33
43
|
toml (0.1.2)
|
34
44
|
parslet (~> 1.5.0)
|
45
|
+
unicode_utils (1.4.0)
|
35
46
|
|
36
47
|
PLATFORMS
|
37
48
|
ruby
|
@@ -39,6 +50,7 @@ PLATFORMS
|
|
39
50
|
DEPENDENCIES
|
40
51
|
bundler (~> 1.15)
|
41
52
|
license_matcher!
|
53
|
+
msgpack (~> 1.1.0)
|
42
54
|
rake (~> 10.0)
|
43
55
|
rspec (~> 3.4)
|
44
56
|
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# LicenseMatcher
|
2
2
|
|
3
|
-
LicenseMatcher is a rubygem that
|
3
|
+
LicenseMatcher is a rubygem that matches a fulltext of Opensource License Text with the SPDX id; So you dont have to guess is it **BSD** or **MIT** license, let the `LicenseMatcher` does the heavy lifting for you;
|
4
4
|
|
5
5
|
|
6
6
|
It uses [Fosslim](https://github.com/Fosslim/fosslim/) library underneath, which gives remarkable performance with lower memory cost than pure Ruby implementation;
|
@@ -34,16 +34,87 @@ run `bundle exec irb` on your commandline to fire up Ruby REPL;
|
|
34
34
|
```
|
35
35
|
require 'license_matcher'
|
36
36
|
|
37
|
-
# build index
|
38
|
-
|
37
|
+
# download pre-build index
|
38
|
+
curl -O https://github.com/Fosslim/license_matcher/blob/master/data/index.msgpack
|
39
|
+
|
40
|
+
# or build index from the SPDX data
|
41
|
+
LicenseMatcher::TFRustMatcher.build_index( "data/licenses", "data/index.msgpack")
|
39
42
|
|
40
43
|
# match license text
|
41
44
|
txt = File.read("fixtures/files/mit.txt");
|
42
|
-
|
43
|
-
lm.
|
45
|
+
|
46
|
+
lm = LicenseMatcher::TFRubyMatcher.new("data/index.msgpack")
|
47
|
+
lm.match_text(txt, 0.9)
|
48
|
+
|
49
|
+
|
50
|
+
```
|
51
|
+
|
52
|
+
|
53
|
+
## Matchers
|
54
|
+
|
55
|
+
It currently supports 4 different models:
|
56
|
+
|
57
|
+
* **UrlMatcher.match_url** - finds matching SPDX license by comparing URL with urls in the `licenses.json`
|
58
|
+
|
59
|
+
```ruby
|
60
|
+
lm = LicenseMatcher::UrlMatcher.new
|
61
|
+
lm.match_url "https://opensource.org/licenses/AAL"
|
62
|
+
|
63
|
+
=> "AAL"
|
64
|
+
```
|
65
|
+
|
66
|
+
* **RuleMatcher.match_rule** - scans a text and returns the SPDX id, which rule matches longest substring in the license text
|
67
|
+
|
68
|
+
```ruby
|
69
|
+
lm = LicenseMatcher::RuleMatcher.new
|
70
|
+
lm.match_rules "It is license under Apache 2.0 License."
|
71
|
+
|
72
|
+
=> "Apache-2.0"
|
73
|
+
```
|
74
|
+
|
75
|
+
* **TFRubyMatcher** - original Ruby implementation, uses TF/IDF and Cosine similarity;
|
44
76
|
|
45
77
|
```
|
78
|
+
lm = LicenseMatcher::TFRubyMatcher.new
|
46
79
|
|
80
|
+
txt = File.read "fixtures/files/mit.html"
|
81
|
+
clean_txt = lm.preprocess_html txt # NB! it may help to increase accuracy
|
82
|
+
lm.match_txt clean_txt
|
83
|
+
```
|
84
|
+
|
85
|
+
* **TFRustMatcher** - uses simple Jaccard similarity;
|
86
|
+
|
87
|
+
```
|
88
|
+
lm2 = LicenseMatcher::TFRustMatcher.new
|
89
|
+
|
90
|
+
txt = File.read "fixtures/files/mit.txt"
|
91
|
+
lm2.match_text txt
|
92
|
+
```
|
93
|
+
|
94
|
+
## Benchmarks
|
95
|
+
|
96
|
+
* initialization, Ruby version 1times, Rust version 1000x
|
97
|
+
|
98
|
+
```
|
99
|
+
user system total real
|
100
|
+
TFRubyMatcher: 12.850000 0.180000 13.030000 ( 13.210955)
|
101
|
+
TFRustMatcher: 26.260000 9.400000 35.660000 ( 38.264632)
|
102
|
+
```
|
103
|
+
* matching preprocessed short [MIT](https://raw.githubusercontent.com/Fosslim/license_matcher/master/data/spdx_licenses/plain/MIT) text 1000x times
|
104
|
+
|
105
|
+
```
|
106
|
+
user system total real
|
107
|
+
TFRubyMatcher:102.410000 12.180000 114.590000 (116.308119)
|
108
|
+
TFRustMatcher: 7.170000 0.040000 7.210000 ( 7.266000)
|
109
|
+
```
|
110
|
+
|
111
|
+
* matching preprocessed long [AGPL-3.0](https://raw.githubusercontent.com/Fosslim/license_matcher/master/data/spdx_licenses/plain/AGPL-3.0) text 1000x times
|
112
|
+
|
113
|
+
```
|
114
|
+
user system total real
|
115
|
+
TFRubyMatcher:242.450000 21.960000 264.410000 (276.417704)
|
116
|
+
TFRustMatcher: 9.340000 0.070000 9.410000 ( 9.478597)
|
117
|
+
```
|
47
118
|
|
48
119
|
## Development
|
49
120
|
|
Binary file
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Preprocess
|
4
|
+
def preprocess_text(text)
|
5
|
+
text = safe_encode(text)
|
6
|
+
|
7
|
+
#remove markdown url tags
|
8
|
+
text = text.gsub(/\[.+?\]\(.+?\)/, ' ')
|
9
|
+
|
10
|
+
#remove spam words
|
11
|
+
text.gsub!(/\bTHE\b/i, '')
|
12
|
+
|
13
|
+
#remove some XML grabage
|
14
|
+
text = text.gsub(/\<\!\[CDATA.*?\]\]\>/, ' ').to_s
|
15
|
+
text = text.gsub(/\<\!--.+?--\>/, ' ').to_s
|
16
|
+
text = text.gsub(/<\!\[CDATA.+?\]>/, ' ').to_s
|
17
|
+
|
18
|
+
return text.to_s.strip.gsub(/\s+/, ' ')
|
19
|
+
end
|
20
|
+
|
21
|
+
def preprocess_html(html_text)
|
22
|
+
# if text is HTML doc, then
|
23
|
+
# extract text only from visible html tags
|
24
|
+
text = ""
|
25
|
+
|
26
|
+
html_doc = parse_html(html_text)
|
27
|
+
if html_doc
|
28
|
+
text = clean_html(html_doc)
|
29
|
+
else
|
30
|
+
p "match_html: failed to parse html document\n#{html_text}"
|
31
|
+
end
|
32
|
+
|
33
|
+
return text
|
34
|
+
end
|
35
|
+
|
36
|
+
def clean_html(html_doc)
|
37
|
+
body_text = ""
|
38
|
+
body_elements = html_doc.xpath(
|
39
|
+
'//p | //h1 | //h2 | //h3 | //h4 | //h5 | //h6 | //em | //strong | //b | //td | //pre
|
40
|
+
| //li[not(@id) and not(@class) and not(a)] | //section//section[@class="project-info"]
|
41
|
+
| //blockquote | //textarea'
|
42
|
+
).to_a
|
43
|
+
|
44
|
+
#extract text from html tag and separate them by space
|
45
|
+
body_elements.each {|el| body_text += ' ' + el.text.to_s}
|
46
|
+
|
47
|
+
#REMOVE XML CDATA like opensource.org pages has
|
48
|
+
body_text = body_text.to_s.strip
|
49
|
+
body_text.gsub!(/\<\!\[CDATA.+?\]\]\>/i, ' ')
|
50
|
+
|
51
|
+
if body_text.empty?
|
52
|
+
p "match_html: document didnt pass noise filter, will use whole body content"
|
53
|
+
body_text = html_doc.xpath('//body').text.to_s.strip
|
54
|
+
end
|
55
|
+
|
56
|
+
return body_text
|
57
|
+
end
|
58
|
+
|
59
|
+
def parse_html(html_text)
|
60
|
+
begin
|
61
|
+
return Nokogiri.HTML(safe_encode(html_text))
|
62
|
+
rescue Exception => e
|
63
|
+
log.error "failed to parse html doc: \n #{html_text}"
|
64
|
+
return nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def safe_encode(txt)
|
69
|
+
txt.to_s.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
70
|
+
rescue
|
71
|
+
p "Failed to encode text:\n #{txt}i"
|
72
|
+
return ""
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
@@ -0,0 +1,601 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module LicenseMatcher
|
4
|
+
|
5
|
+
class RuleMatcher
|
6
|
+
include Preprocess
|
7
|
+
|
8
|
+
attr_reader :licenses, :rules, :id_spdx_idx
|
9
|
+
|
10
|
+
DEFAULT_LICENSE_JSON = 'data/spdx_licenses/licenses.json'
|
11
|
+
|
12
|
+
|
13
|
+
def initialize(license_json_file = DEFAULT_LICENSE_JSON)
|
14
|
+
|
15
|
+
licenses_json_doc = read_json_file license_json_file
|
16
|
+
raise("Failed to read licenses.json") if licenses_json_doc.nil?
|
17
|
+
|
18
|
+
@rules = init_rules(licenses_json_doc)
|
19
|
+
@id_spdx_idx = init_id_idx(licenses_json_doc) #reverse index from downcased licenseID to case sensitive spdx id
|
20
|
+
true
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
def init_id_idx(licenses_json_doc)
|
25
|
+
idx = {}
|
26
|
+
licenses_json_doc.to_a.each do |spdx_item|
|
27
|
+
lic_id = spdx_item[:id].to_s.downcase
|
28
|
+
idx[lic_id] = spdx_item[:id]
|
29
|
+
end
|
30
|
+
|
31
|
+
idx
|
32
|
+
end
|
33
|
+
|
34
|
+
# finds matching regex rules in the text and sorts matches by length of match
|
35
|
+
# ps: not very efficient, but good enough to handle special cases;
|
36
|
+
# @args:
|
37
|
+
# text - string, a name of license,
|
38
|
+
# @returns:
|
39
|
+
# [[spdx_id, score, matching_rule, matching_length],...]
|
40
|
+
def match_rules(text, early_exit = false)
|
41
|
+
matches = []
|
42
|
+
text = preprocess_text(text)
|
43
|
+
|
44
|
+
#if text is already spdx_id, then shortcut matching
|
45
|
+
if @rules.has_key?(text.downcase)
|
46
|
+
return [[text.downcase, 1.0]]
|
47
|
+
end
|
48
|
+
|
49
|
+
text += ' ' # required to make wordborder matcher to work with 1word texts
|
50
|
+
@rules.each do |spdx_id, rules|
|
51
|
+
match_res = matches_any_rule?(rules, text)
|
52
|
+
unless match_res.nil?
|
53
|
+
matches << ([spdx_id, 1.0] + match_res)
|
54
|
+
break if early_exit == true
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
matches.sort do |a, b|
|
59
|
+
if (a.size == b.size and a.size == 4)
|
60
|
+
-1 * (a[3] <=> b[3])
|
61
|
+
else
|
62
|
+
0
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# if testable license text is in the ignore set return true
|
68
|
+
def ignore?(lic_text)
|
69
|
+
ignore_rules = get_ignore_rules
|
70
|
+
m = matches_any_rule?(ignore_rules, lic_text.to_s)
|
71
|
+
not m.nil?
|
72
|
+
end
|
73
|
+
|
74
|
+
def matches_any_rule?(rules, license_name)
|
75
|
+
res = nil
|
76
|
+
rules.each do |rule|
|
77
|
+
m = rule.match(license_name.to_s)
|
78
|
+
if m
|
79
|
+
res = [rule, m[0].size]
|
80
|
+
break
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
res
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
#-- helpers
|
89
|
+
|
90
|
+
def read_json_file(file_path)
|
91
|
+
JSON.parse(File.read(file_path), {symbolize_names: true})
|
92
|
+
rescue
|
93
|
+
log.info "Failed to read json file `#{file_path}`"
|
94
|
+
nil
|
95
|
+
end
|
96
|
+
|
97
|
+
# combines SPDX rules with custom handwritten rules
|
98
|
+
def init_rules(license_json_doc)
|
99
|
+
rules = {}
|
100
|
+
rules = build_rules_from_spdx_json(license_json_doc)
|
101
|
+
|
102
|
+
get_custom_rules.each do |spdx_id, custom_rules_array|
|
103
|
+
spdx_id = spdx_id.to_s.strip.downcase
|
104
|
+
|
105
|
+
if rules.has_key?(spdx_id)
|
106
|
+
rules[spdx_id].concat custom_rules_array
|
107
|
+
else
|
108
|
+
rules[spdx_id] = custom_rules_array
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
rules
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
# builds regex rules based on the LicenseJSON file
|
117
|
+
# rules are using urls, IDS, names and alternative names to build full string matching regexes
|
118
|
+
def build_rules_from_spdx_json(spdx_json)
|
119
|
+
spdx_rules = {}
|
120
|
+
|
121
|
+
sorted_spdx_json = spdx_json.sort_by {|x| x[:id]}
|
122
|
+
sorted_spdx_json.each do |spdx_item|
|
123
|
+
spdx_id = spdx_item[:id].to_s.downcase.strip
|
124
|
+
spdx_rules[spdx_id] = build_spdx_item_rules(spdx_item)
|
125
|
+
end
|
126
|
+
|
127
|
+
spdx_rules
|
128
|
+
end
|
129
|
+
|
130
|
+
def build_spdx_item_rules(spdx_item)
|
131
|
+
rules = []
|
132
|
+
|
133
|
+
#links are first, because it has highest confidence that they are talking about the license
|
134
|
+
spdx_item[:links].to_a.each do |link|
|
135
|
+
lic_url = link[:url].to_s.strip.gsub(/https?:\/\//i, '').gsub(/www\./, '').gsub(/\./, '\\.') #normalizes urls in the file
|
136
|
+
|
137
|
+
rules << Regexp.new("\\b[\\(]?https?:\/\/(www\.)?#{lic_url}[\/]?[\\)]?\\b".gsub(/\s+/, ''), Regexp::IGNORECASE)
|
138
|
+
end
|
139
|
+
|
140
|
+
#include also links to license texts
|
141
|
+
spdx_item[:text].to_a.each do |link|
|
142
|
+
lic_url = link[:url].to_s.strip.gsub(/https?:\/\//i, '').gsub(/www\./, '').gsub(/\./, '\\.') #normalizes urls in the file
|
143
|
+
rules << Regexp.new("\\b[\\(]?https?:\/\/(www\.)?#{lic_url}[\/]?[\\)]?\\b".gsub(/\s+/, ''), Regexp::IGNORECASE)
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
spdx_name = preprocess_text(spdx_item[:name])
|
148
|
+
spdx_name.gsub!(/\(.+?\)/, '') #remove SPDX ids in the license names
|
149
|
+
spdx_name.gsub!(/\./, '\\.') #mark version dots as not regex selector
|
150
|
+
spdx_name.gsub!(/[\*|\?|\+]/, '.') #replace regex selector with whatever mark ~> WTFPL name
|
151
|
+
spdx_name.gsub!(/\,/, '[\\,]?') #make comma optional
|
152
|
+
spdx_name.strip!
|
153
|
+
spdx_name.gsub!(/\s+/, '\\s\\+') #replace spaces with space selector
|
154
|
+
|
155
|
+
rules << Regexp.new("\\b#{spdx_name}\\b", Regexp::IGNORECASE)
|
156
|
+
|
157
|
+
#use spdx_id in full-text match if it's uniq and doest ambiquity like MIT, Fair, Glide
|
158
|
+
spdx_id = spdx_item[:id].to_s.strip.downcase
|
159
|
+
if spdx_id.match /[\d|-]|ware\z/
|
160
|
+
rules << Regexp.new("\\b[\\(]?#{spdx_id}[\\)]?\\b".gsub(/\s+/, '\\s').gsub(/\./, '\\.'), Regexp::IGNORECASE)
|
161
|
+
else
|
162
|
+
rules << Regexp.new("\\A[\\(]?#{spdx_id}[\\)]?\\b", Regexp::IGNORECASE)
|
163
|
+
end
|
164
|
+
|
165
|
+
spdx_item[:identifiers].to_a.each do |id|
|
166
|
+
rules << Regexp.new("\\b#{id[:identifier]}\\s".gsub(/\s+/, '\\s').gsub(/\./, '\\.'), Regexp::IGNORECASE)
|
167
|
+
end
|
168
|
+
|
169
|
+
spdx_item[:other_names].to_a.each do |alt|
|
170
|
+
rules << Regexp.new("\\b#{alt[:name]}\\b".gsub(/\s+/, '\\s'), Regexp::IGNORECASE)
|
171
|
+
end
|
172
|
+
|
173
|
+
rules
|
174
|
+
end
|
175
|
+
|
176
|
+
|
177
|
+
def get_ignore_rules
|
178
|
+
[
|
179
|
+
/\bProprietary\b/i, /\bOther\/Proprietary\b/i, /\ALICEN[C|S]E\.\w{2,8}\b/i,
|
180
|
+
/^LICEN[C|S]ING\.\w{2,8}\b/i, /^COPYING\.\w{2,8}/i,
|
181
|
+
/\ADFSG\s+APPROVED\b/i, /\ASee\slicense\sin\spackage\b/i,
|
182
|
+
/\ASee LICENSE\b/i,
|
183
|
+
/\AFree\s+for\s+non[-]?commercial\b/i, /\AFree\s+To\s+Use\b/i,
|
184
|
+
/\AFree\sFor\sHome\sUse\b/i, /\AFree\s+For\s+Educational\b/i,
|
185
|
+
/^Freely\s+Distributable\s*$/i, /^COPYRIGHT\s+\d{2,4}/i,
|
186
|
+
/^Copyright\s+\(c\)\s+\d{2,4}\b/i, /^COPYRIGHT\s*$/i, /^COPYRIGHT\.\w{2,8}\b/i,
|
187
|
+
/^\(c\)\s+\d{2,4}\d/,
|
188
|
+
/^LICENSE\s*$/i, /^FREE\s*$/i, /\ASee\sLicense\s*\b/i, /^TODO\s*$/i, /^FREEWARE\s*$/i,
|
189
|
+
/^All\srights\sreserved\s*$/i, /^COPYING\s*$/i, /^OTHER\s*$/i, /^NONE\s*$/i, /^DUAL\s*$/i,
|
190
|
+
/^KEEP\s+IT\s+REAL\s*\b/i, /\ABE\s+REAL\s*\z/i, /\APrivate\s*\z/i, /\ACommercial\s*\z/i,
|
191
|
+
/\ASee\s+LICENSE\s+file\b/i, /\ASee\sthe\sLICENSE\b/i, /\ALICEN[C|S]E\s*\z/i,
|
192
|
+
/^PUBLIC\s*$/i, /^see file LICENSE\s*$/i, /^__license__\s*$/i,
|
193
|
+
/\bLIEULA\b/i, /\AEULA\s*\z/i, /^qQuickLicen[c|s]e\b/i, /^For\sfun\b/i, /\AVarious\s*\z/i,
|
194
|
+
/^GNU\s*$/i, /^GNU[-|\s]?v3\s*$/i, /^OSI\s+Approved\s*$/i, /^OSI\s*$/i,
|
195
|
+
/\AOPEN\sSOURCE\sLICENSE\s?\z/i, /\AOPEN\s*\z/i, /\Aunknown\s*\z/i,
|
196
|
+
/^https?:\/\/github.com/i, /^https?:\/\/gitlab\.com/i
|
197
|
+
]
|
198
|
+
end
|
199
|
+
|
200
|
+
|
201
|
+
def get_custom_rules
|
202
|
+
{
|
203
|
+
"AAL" => [/\bAAL\b/i, /\bAAL\s+License\b/i,
|
204
|
+
/\bAttribution\s+Assurance\s+License\b/i],
|
205
|
+
"AFL-1.1" => [/\bAFL[-|v]?1[^\.]\b/i, /\bafl[-|v]?1\.1\b/i],
|
206
|
+
"AFL-1.2" => [/\bAFL[-|v]?1\.2\b/i],
|
207
|
+
"AFL-2.0" => [/\bAFL[-|v]?2[^\.]\b/i, /\bAFL[-|v]?2\.0\b/i],
|
208
|
+
"AFL-2.1" => [/\bAFL[-|v]?2\.1\b/i],
|
209
|
+
"AFL-3.0" => [
|
210
|
+
/\bAFL[-|\s|\_]?v?3\.0\b/i, /\bAFL[-|\s|\_]?v?3/i,
|
211
|
+
/\AAcademic\s+Free\s+License\s*\z/i, /^AFL\s?\z/i,
|
212
|
+
/\bhttps?:\/\/opensource\.org\/licenses\/academic\.php\b/i,
|
213
|
+
/\AAcademic[-|\s]Free[-|\s]License[-]?\s*\z/i,
|
214
|
+
/\bAcademic.Free.License.\(AFL\)/i
|
215
|
+
],
|
216
|
+
"AGPL-1.0" => [
|
217
|
+
/\bAGPL[-|v|_|\s]?1\.0\b/i,
|
218
|
+
/\bAGPL[-|v|_|\s]1\b/i , /\bAGPL[_|-|v]?2\b/i,
|
219
|
+
/\bAffero\s+General\s+Public\s+License\s+[v]?1\b/i,
|
220
|
+
/\bAGPL\s(?!(v|\d))/i #Matches only AGPL, but not AGPL v1, AGPL 1.0 etc
|
221
|
+
],
|
222
|
+
"AGPL-3.0" => [
|
223
|
+
/\bAGPL[-|_|\s]?v?3\.0\b/i, /\bAGPL[-|\s|_]?v?3[\+]?\b/i,
|
224
|
+
/\bAPGLv?3[\+]?\b/i, #some packages has typos
|
225
|
+
/\bGNU\s+Affero\s+General\s+Public\s+License\s+[v]?3/i,
|
226
|
+
/\bAFFERO\sGNU\sPUBLIC\sLICENSE\sv3\b/i,
|
227
|
+
/\bGnu\sAffero\sPublic\sLicense\sv3+?\b/i,
|
228
|
+
/\bAffero\sGeneral\sPublic\sLicen[s|c]e[\,]?\sversion\s+3[\.0]?\b/i,
|
229
|
+
/\bAffero\sGeneral\sPublic\sLicense\sv?3\b/i,
|
230
|
+
/\bAGPL\sversion\s3[\.]?\b/i,
|
231
|
+
/\bGNU\sAGPL\sv?3[\.0]?\b/i,
|
232
|
+
/\bGNU\sAFFERO\sv?3\b/i,
|
233
|
+
/\bhttps?:\/\/gnu\.org\/licenses\/agpl\.html\b/i,
|
234
|
+
/\AAFFERO\sGENERAL\sPUBLIC\sLICENSE\s*\z/i,
|
235
|
+
/^AFFERO\s*\z/i
|
236
|
+
],
|
237
|
+
"Aladdin" => [/\b[\(]?AFPL[\)]?\b/i, /\bAladdin\sFree\sPublic\sLicense\b/i],
|
238
|
+
"Amazon" => [/\bAmazon\sSoftware\sLicense\b/i],
|
239
|
+
"Apache-1.0" => [/\bAPACHE[-|_|\s]?v?1[^\.]/i, /\bAPACHE[-|\s]?v?1\.0\b/i],
|
240
|
+
"Apache-1.1" => [/\bAPACHE[-|_|\s]?v?1\.1\b/i],
|
241
|
+
"Apache-2.0" => [
|
242
|
+
/\bAPACHE\s+2\.0\b/i, /\bAPACHE[-|_|\s]?v?2\b/i,
|
243
|
+
/\bApache\sOpen\sSource\sLicense\s2\.0\b/i,
|
244
|
+
/\bAPACH[A|E]\s+Licen[c|s]e\s+[\(]?v?2\.0[\)]?\b/i,
|
245
|
+
/\bAPACHE\s+LICENSE\,?\s+VERSION\s+2\.0\b/i,
|
246
|
+
/\bApache\s+License\s+v?2\b/i,
|
247
|
+
/\bApache\s+Software\sLicense\b/i,
|
248
|
+
/\bApapche[-|\s|\_]?v?2\.0\b/i, /\bAL[-|\s|\_]2\.0\b/i,
|
249
|
+
/\bAPL\s+2\.0\b/i, /\bAPL[\.|-|v]?2\b/i, /\bASL\s+2\.0\b/i,
|
250
|
+
/\bASL[-|v|\s]?2\b/i, /\bALv2\b/i, /\bASF[-|\s]?2\.0\b/i,
|
251
|
+
/\AAPACHE\s*\z/i, /\AASL\s*\z/i, /\bASL\s+v?\.2\.0\b/i, /\AASF\s*\z/i,
|
252
|
+
/\AApache\s+license\s*\z/i,
|
253
|
+
],
|
254
|
+
"APL-1.0" => [/\bapl[-|_|\s]?v?1\b/i, /\bAPL[-|_|\s]?v?1\.0\b/i, /^APL$/i],
|
255
|
+
"APSL-1.0" => [/\bAPSL[-|_|\s]?v?1\.0\b/i, /\bAPSL[-|_|\s]?v?1(?!\.)\b/i, /\AAPPLE\s+PUBLIC\s+SOURCE\s*\z/i],
|
256
|
+
"APSL-1.1" => [/\bAPSL[-|_|\s]?v?1\.1\b/i],
|
257
|
+
"APSL-1.2" => [/\bAPSL[-|_|\s]?v?1\.2\b/i],
|
258
|
+
"APSL-2.0" => [/\bAPSL[-|_|\s]?v?2\.0\b/i, /\bAPSL[-|_|\s]?v?2\b/i],
|
259
|
+
|
260
|
+
"Artistic-1.0-Perl" => [/\bArtistic[-|_|\s]?v?1\.0\-Perl\b/i, /\bPerlArtistic\b/i],
|
261
|
+
"Artistic-1.0" => [/\bartistic[-|_|\s]?v?1\.0(?!\-)\b/i, /\bartistic[-|_|\s]?v?1(?!\.)\b/i],
|
262
|
+
"Artistic-2.0" => [/\bARTISTIC[-|_|\s]?v?2\.0\b/i, /\bartistic[-|_|\s]?v?2\b/i,
|
263
|
+
/\bArtistic.2.0\b/i,
|
264
|
+
/\bARTISTIC\s+LICENSE\b/i, /\AARTISTIC\s*\z/i],
|
265
|
+
"Beerware" => [
|
266
|
+
/\bBEERWARE\b/i, /\bBEER\s+LICEN[c|s]E\b/i,
|
267
|
+
/\bBEER[-|\s]WARE\b/i, /^BEER\b/i,
|
268
|
+
/\bBuy\ssnare\sa\sbeer\b/i,
|
269
|
+
/\bFree\sas\sin\sbeer\b/i
|
270
|
+
],
|
271
|
+
'BitTorrent-1.1' => [/\bBitTorrent\sOpen\sSource\sLicense\b/i],
|
272
|
+
"0BSD" => [/\A0BSD\s*\z/i],
|
273
|
+
"BSD-2-CLAUSE" => [
|
274
|
+
/\bBSD[-|_|\s]?v?2\b/i, /^FREEBSD\b/i, /^OPENBSD\b/i,
|
275
|
+
/\bBSDLv2\b/i
|
276
|
+
],
|
277
|
+
"BSD-3-CLAUSE" => [/\bBSD[-|_|\s]?v?3\b/i, /\bBSD[-|\s]3[-\s]CLAUSE\b/i,
|
278
|
+
/\bBDS[-|_|\s]3[-|\s]CLAUSE\b/i,
|
279
|
+
/\bthree-clause\sBSD\slicen[s|c]e\b/i,
|
280
|
+
/\ABDS\s*\z/i, /^various\/BSDish\s*$/],
|
281
|
+
"BSD-4-CLAUSE" => [
|
282
|
+
/\bBSD[-|_|\s]?v?4/i, /\ABSD\s*\z/i, /\ABSD\s+LI[s|c]EN[S|C]E\s*\z/i,
|
283
|
+
/\bBSD-4-CLAUSE\b/i,
|
284
|
+
/\bhttps?:\/\/en\.wikipedia\.org\/wiki\/BSD_licenses\b/i
|
285
|
+
],
|
286
|
+
"BSL-1.0" => [
|
287
|
+
/\bBSL[-|_|\s]?v?1\.0\b/i, /\bbsl[-|_|\s]?v?1\b/i, /^BOOST\b/i,
|
288
|
+
/\bBOOST\s+SOFTWARE\s+LICENSE\b/i,
|
289
|
+
/\bBoost\sLicense\s1\.0\b/i
|
290
|
+
],
|
291
|
+
"CC0-1.0" => [
|
292
|
+
/\bCC0[-|_|\s]?v?1\.0\b/i, /\bCC0[-|_|\s]?v?1\b/i,
|
293
|
+
/\bCC[-|\s]?[0|o]\b/i, /\bCreative\s+Commons\s+0\b/i,
|
294
|
+
/\bhttps?:\/\/creativecommons\.org\/publicdomain\/zero\/1\.0[\/]?\b/i,
|
295
|
+
/\bcc[-|\_]zero\b/i
|
296
|
+
],
|
297
|
+
"CC-BY-1.0" => [/\bCC.BY.v?1\.0\b/i, /\bCC.BY.v?1\b/i, /^CC[-|_|\s]?BY$/i],
|
298
|
+
"CC-BY-2.0" => [/\bCC.BY.v?2\.0\b/i, /\bCC.BY.v?2(?!\.)\b/i],
|
299
|
+
"CC-BY-2.5" => [/\bCC.BY.v?2\.5\b/i],
|
300
|
+
"CC-BY-3.0" => [
|
301
|
+
/\bCC.BY.v?3\.0\b/i, /\b[\(]?CC.BY[\)]?.v?3\b/i,
|
302
|
+
/\bCreative\sCommons\sBY\s3\.0\b/i,
|
303
|
+
/\bhttps?:\/\/.+\/licenses\/by\/3\.0[\/]?/i
|
304
|
+
],
|
305
|
+
"CC-BY-4.0" => [
|
306
|
+
/^CC[-|\s]?BY[-|\s]?v?4\.0$/i, /\bCC.BY.v?4\b/i, /\bCC.BY.4\.0\b/i,
|
307
|
+
/\bCREATIVE\s+COMMONS\s+ATTRIBUTION\s+[v]?4\.0\b/i,
|
308
|
+
/\ACREATIVE\s+COMMONS\s+ATTRIBUTION\s*\z\b/i
|
309
|
+
],
|
310
|
+
"CC-BY-SA-1.0" => [/\bCC[-|\s]BY.SA.v?1\.0\b/i, /\bCC[-|\s]BY.SA.v?1\b/i],
|
311
|
+
"CC-BY-SA-2.0" => [/\bCC[-|\s]BY.SA.v?2\.0\b/i, /\bCC[-|\s]BY.SA.v?2(?!\.)\b/i],
|
312
|
+
"CC-BY-SA-2.5" => [/\bCC[-|\s]BY.SA.v?2\.5\b/i],
|
313
|
+
"CC-BY-SA-3.0" => [/\bCC[-|\s]BY.SA.v?3\.0\b/i, /\bCC[-|\s]BY.SA.v?3\b/i,
|
314
|
+
/\bCC3\.0[-|_|\s]BY.SA\b/i,
|
315
|
+
/\bhttps?:\/\/(www\.)?.+\/by.sa\/3\.0[\/]?/i],
|
316
|
+
"CC-BY-SA-4.0" => [
|
317
|
+
/CC[-|\s]BY.SA.v?4\.0$/i, /\bCC[-|\s]BY.SA.v?4\b/i,
|
318
|
+
/CCSA-4\.0/i
|
319
|
+
],
|
320
|
+
"CC-BY-NC-1.0" => [/\bCC[-|\s]BY.NC[-|\s]?v?1\.0\b/i, /\bCC[-|\s]BY.NC[-|\s]?v?1\b/i],
|
321
|
+
"CC-BY-NC-2.0" => [/\bCC[-|\s]BY.NC[-|\s]?v?2\.0\b/i],
|
322
|
+
"CC-BY-NC-2.5" => [/\bCC[-|\s]BY.NC[-|\s]?v?2\.5\b/i],
|
323
|
+
"CC-BY-NC-3.0" => [/\bCC[-|\s]BY.NC[-|\s]?v?3\.0\b/i, /\bCC.BY.NC[-|\s]?v?3\b/i,
|
324
|
+
/\bCreative\s+Commons\s+Non[-]?Commercial[,]?\s+3\.0\b/i],
|
325
|
+
"CC-BY-NC-4.0" => [
|
326
|
+
/\bCC[-|\s]BY.NC[-|\s|_]?v?4\.0\b/i, /\bCC.BY.NC[-|\s|_]?v?4\b/i,
|
327
|
+
/\bhttps?:\/\/creativecommons\.org\/licenses\/by-nc\/3\.0[\/]?\b/i
|
328
|
+
],
|
329
|
+
"CC-BY-NC-SA-1.0" => [ /\bCC[-|\s+]BY.NC.SA[-|\s+]v?1\.0\b/i,
|
330
|
+
/\bCC[-|\s+]BY.NC.SA[-|\s+]v?1\b/i
|
331
|
+
],
|
332
|
+
"CC-BY-NC-SA-2.0" => [/\bCC[-|\s]?BY.NC.SA[-|\s]?v?2\.0\b/i],
|
333
|
+
"CC-BY-NC-SA-2.5" => [/\bCC[-|\s]?BY.NC.SA[-|\s]?v?2\.5\b/i],
|
334
|
+
"CC-BY-NC-SA-3.0" => [
|
335
|
+
/\bCC[-|\s]?BY.NC.SA[-|\s]?v?3\.0\b/i,
|
336
|
+
/\bCC[-|\s]?BY.NC.SA[-|\s]?v?3(?!\.)\b/i,
|
337
|
+
/\bBY[-|\s]NC[-|\s]SA\sv?3\.0\b/i,
|
338
|
+
/\bhttp:\/\/creativecommons.org\/licenses\/by-nc-sa\/3.0\/us[\/]?\b/i
|
339
|
+
],
|
340
|
+
"CC-BY-NC-SA-4.0" => [/\bCC[-|\s]?BY.NC.SA[-|\s]?v?4\.0\b/i,
|
341
|
+
/\bCC[-|_|\s]BY.NC.SA[-|\s]?v?4(?!\.)\b/i,
|
342
|
+
/\bBY.NC.SA[-|\s|\_]v?4\.0\b/i],
|
343
|
+
|
344
|
+
"CC-BY-ND-1.0" => [/\bCC[-|\s]BY.ND[-|\s]?v?1\.0\b/i],
|
345
|
+
"CC-BY-ND-2.0" => [/\bCC[-|\s]BY.ND[-|\s]?v?2\.0\b/i],
|
346
|
+
"CC-BY-ND-2.5" => [/\bCC[-|\s]BY.ND[-|\s]?v?2\.5\b/i],
|
347
|
+
"CC-BY-ND-3.0" => [/\bCC[-|\s]BY.ND[-|\s]?v?3\.0\b/i],
|
348
|
+
"CC-BY-ND-4.0" => [
|
349
|
+
/\bCC[-|\s]BY.ND[-|\s]?v?4\.0\b/i,
|
350
|
+
/\bCC\sBY.NC.ND\s4\.0/i
|
351
|
+
],
|
352
|
+
|
353
|
+
"CC-BY-NC-ND-3.0" => [/\bCC.BY.NC.ND.3\.0\b/i],
|
354
|
+
"CC-BY-NC-ND-4.0" => [/\bCC.BY.NC.ND.4\.0\b/i],
|
355
|
+
"CDDL-1.0" => [/\bCDDL[-|_|\s]?v?1\.0\b/i, /\bCDDL[-|_|\s]?v?1\b/i, /^CDDL$/i,
|
356
|
+
/\bCDDL\s+LICEN[C|S]E\b/i,
|
357
|
+
/\bCOMMON\sDEVELOPMENT\sAND\sDISTRIBUTION\sLICENSE\b/i
|
358
|
+
],
|
359
|
+
"CECILL-B" => [/\bCECILL[-|_|\s]?B\b/i],
|
360
|
+
"CECILL-C" => [/\bCECILL[-|_|\s]?C\b/i],
|
361
|
+
"CECILL-1.0" => [
|
362
|
+
/\bCECILL[-|\s|_]?v?1\.0\b/i, /\bCECILL[-|\s|_]?v?1\b/i,
|
363
|
+
/\ACECILL\s?\z/i, /\bCECILL\s+v?1\.2\b/i,
|
364
|
+
/^http:\/\/www\.cecill\.info\/licences\/Licence_CeCILL-C_V1-en.html$/i,
|
365
|
+
/\bhttp:\/\/www\.cecill\.info\b/i
|
366
|
+
],
|
367
|
+
"CECILL-2.1" => [
|
368
|
+
/\bCECILL[-|_|\s]?2\.1\b/i, /\bCECILL[\s|_|-]?v?2\b/i,
|
369
|
+
/\bCECILL\sVERSION\s2\.1\b/i
|
370
|
+
],
|
371
|
+
"CPL-1.0" => [
|
372
|
+
/\bCPL[-|\s|_]?v?1\.0\b/i, /\bCPL[-|\s|_]?v?1\b/i,
|
373
|
+
/\bCommon\s+Public\s+License\b/i, /\ACPL\s*\z/i
|
374
|
+
],
|
375
|
+
"CPAL-1.0" => [
|
376
|
+
/\bCommon\sPublic\sAttribution\sLicense\s1\.0\b/i,
|
377
|
+
/[\(]?\bCPAL\b[\)]?/i
|
378
|
+
],
|
379
|
+
"CUSTOM" => [ /\bCUSTOM\s+LICENSE\b/i ],
|
380
|
+
"DBAD" => [
|
381
|
+
/\bDONT\sBE\sA\sDICK\b/i, /\ADBAD\s*\z/i,
|
382
|
+
/\bdbad[-|\s|\_]license\b/i, /\ADBAD-1\s*\z/i,
|
383
|
+
/\ADBAP\b/i,
|
384
|
+
/\bhttps?:\/\/www\.dbad-license\.org[\/]?\b/i
|
385
|
+
],
|
386
|
+
"D-FSL-1.0" => [
|
387
|
+
/\bD-?FSL[-|_|\s]?v?1\.0\b/i, /\bD-?FSL[-|\s|_]?v?1\b/,
|
388
|
+
/\bGerman\sFREE\sSOFTWARE\b/i,
|
389
|
+
/\bDeutsche\sFreie\sSoftware\sLizenz\b/i
|
390
|
+
],
|
391
|
+
"ECL-1.0" => [ /\bECL[-|\s|_]?v?1\.0\b/i, /\bECL[-|\s|_]?v?1\b/i ],
|
392
|
+
"ECL-2.0" => [
|
393
|
+
/\bECL[-|\s|_]?v?2\.0\b/i, /\bECL[-|\s|_]?v?2\b/i,
|
394
|
+
/\bEDUCATIONAL\s+COMMUNITY\s+LICENSE[,]?\sVERSION\s2\.0\b/i
|
395
|
+
],
|
396
|
+
"EFL-1.0" => [/\bEFL[-|\s|_]?v?1\.0\b/i, /\bEFL[-|\s|_]?v?1\b/i ],
|
397
|
+
"EFL-2.0" => [
|
398
|
+
/\bEFL[-|\s|_]?v?2\.0\b/i, /\bEFL[-|\s|_]?v?2\b/i,
|
399
|
+
/\bEiffel\sForum\sLicense,?\sversion\s2/i,
|
400
|
+
/\bEiffel\sForum\sLicense\s2(?!\.)\b/i,
|
401
|
+
/\bEiffel\sForum\sLicense\b/i
|
402
|
+
],
|
403
|
+
"EPL-1.0" => [
|
404
|
+
/\bEPL[-|\s|_]?v?1\.0\b/i, /\bEPL[-|\s|_]?v?1\b/i,
|
405
|
+
/\bECLIPSE\s+PUBLIC\s+LICENSE\s+[v]?1\.0\b/i,
|
406
|
+
/\bECLIPSE\s+PUBLIC\s+LICENSE\b/i,
|
407
|
+
/^ECLIPSE$/i, /\AEPL\s*\z/
|
408
|
+
],
|
409
|
+
"ESA-1.0" => [
|
410
|
+
/\bESCL\s+[-|_]?\sType\s?1\b/,
|
411
|
+
/\bESA\sSOFTWARE\sCommunity\sLICENSE.+TYPE\s?1\b/i
|
412
|
+
],
|
413
|
+
"EUPL-1.0" => [/\b[\(]?EUPL[-|\s]?v?1\.0[\)]?\b/i],
|
414
|
+
"EUPL-1.1" => [
|
415
|
+
/\b[\(]?EUPL[-|\s]?v?1\.1[\)]?\b/i,
|
416
|
+
/\bEUROPEAN\s+UNION\s+PUBLIC\s+LICENSE\s+1\.1\b/i,
|
417
|
+
/\bEuropean\sUnion\sPublic\sLicense\b/i,
|
418
|
+
/\bEUPL\s+V?\.?1\.1\b/i, /\AEUPL\s*\z/i
|
419
|
+
],
|
420
|
+
"Fair" => [ /\bFAIR\s+LICENSE\b/i, /\AFair\s*\z/i],
|
421
|
+
"FreeType" => [ /\bFreeType\s+LICENSE\b/i],
|
422
|
+
"GFDL-1.0" => [
|
423
|
+
/\bGNU\sFree\sDocumentation\sLicense\b/i,
|
424
|
+
/\b[\(]?FDL[\)]?\b/
|
425
|
+
],
|
426
|
+
"GPL-1.0" => [
|
427
|
+
/\bGPL[-|\s|_]?v?1\.0\b/i, /\bGPL[-|\s|_]?v?1\b/i,
|
428
|
+
/\bGNU\sPUBLIC\sLICEN[S|C]E\sv?1\b/i
|
429
|
+
],
|
430
|
+
"GPL-2.0" => [
|
431
|
+
/\bGPL[-|\s|_]?v?2\.0/i, /\bGPL[-|\s|_]?v?2\b/i, /\bGPL\s+[v]?2\b/i,
|
432
|
+
/\bGNU\s+PUBLIC\s+LICENSE\s+v?2\.0\b/i,
|
433
|
+
/\bGNU\s+PUBLIC\s+License\sV?2\b/i,
|
434
|
+
/\bGNU\spublic\slicense\sversion\s2\b/i,
|
435
|
+
/\bGNU\sGeneral\sPublic\sLicense\sv?2\.0\b/i,
|
436
|
+
/\bGNU\sPublic\sLicense\s>=2\b/i,
|
437
|
+
/\bGNU\s+GPL\s+v2\b/i, /^GNUv?2\b/i, /^GLPv2\b/,
|
438
|
+
/\bWhatever\slicense\sPlone\sis\b/i
|
439
|
+
],
|
440
|
+
"GPL-3.0" => [
|
441
|
+
/\bGNU\s+GENERAL\s+PUBLIC\s+License\s+[v]?3\b/i,
|
442
|
+
/\bGNU\s+General\s+Public\s+License[\,]?\sVersion\s3[\.0]?\b/i,
|
443
|
+
/\bGNU\sPublic\sLicense\sv?3\.0\b/i,
|
444
|
+
/\bGNU\s+PUBLIC\s+LICENSE\s+v?3\b/i,
|
445
|
+
/\bGnu\sPublic\sLicense\sversion\s3\b/i,
|
446
|
+
/\bGNU\sGeneral\sPublic\sLicense\sversion\s?3\b/i,
|
447
|
+
/\bGPL[-|\s|_]?v?3\.0\b/i, /\bGPL[-|\s|_]?v?[\.]?3\b/i, /\bGPL\s+3\b/i,
|
448
|
+
/\bGNU\s+PUBLIC\s+v3\+?\b/i,
|
449
|
+
/\bGNUGPL[-|\s|\_]?v?3\b/i, /\bGNU\s+PL\s+[v]?3\b/i,
|
450
|
+
/\bGLPv3\b/i, /\bGNU3\b/i, /GPvL3/i, /\bGNU\sGLP\sv?3\b/i,
|
451
|
+
/\AGNU\sGENERAL\sPUBLIC\sLICENSE\s*\z/i, /\A[\(]?GPL[\)]?\s*\z/i
|
452
|
+
],
|
453
|
+
|
454
|
+
"IDPL-1.0" => [
|
455
|
+
/\bIDPL[-|\s|\_]?v?1\.0\b/,
|
456
|
+
/\bhttps?:\/\/www\.firebirdsql\.org\/index\.php\?op=doc\&id=idpl\b/i
|
457
|
+
],
|
458
|
+
"IPL-1.0" => [/\bIBM\sOpen\sSource\sLicense\b/i, /\bIBM\sPublic\sLicen[s|c]e\b/i],
|
459
|
+
"ISC" => [/\bISC\s+LICENSE\b/i, /\b[\(]?ISCL[\)]?\b/i, /\bISC\b/i,
|
460
|
+
/\AICS\s*\z/i],
|
461
|
+
"JSON" => [/\bJSON\s+LICENSE\b/i],
|
462
|
+
"KINDLY" => [/\bKINDLY\s+License\b/i],
|
463
|
+
"LGPL-2.0" => [
|
464
|
+
/\bLGPL[-|\s|_]?v?2\.0\b/i, /\bLGPL[-|\s|_]?v?2(?!\.)\b/i,
|
465
|
+
/\bLesser\sGeneral\sPublic\sLicense\sv?2(?!\.)\b/i,
|
466
|
+
/\bLPGL[-|\s|\_]?v?2(?!\.)\b/i
|
467
|
+
],
|
468
|
+
"LGPL-2.1" => [
|
469
|
+
/\bLGPL[-|\s|_]?v?2\.1\b/i,
|
470
|
+
/\bLesser\sGeneral\sPublic\sLicense\s+\(LGPL\)\s+Version\s+2\.1\b/i,
|
471
|
+
/\bLESSER\sGENERAL\sPUBLIC\sLICENSE[\,]?\sVersion\s2\.1[\,]?\b/i,
|
472
|
+
/\bLESSER\sGENERAL\sPUBLIC\sLICENSE[\,]?\sv?2\.1\b/i
|
473
|
+
],
|
474
|
+
"LGPL-3.0" => [/\bLGPL[-|\s|_]?v?3\.0\b/i, /\bLGPL[-|\s|_]?v?3[\+]?\b/i,
|
475
|
+
/\bLGLP[\s|-|v]?3\.0\b/i, /^LPLv3\s*$/, /\bLPGL[-|\s|_]?v?3[\+]?\b/i,
|
476
|
+
/\bLESSER\s+GENERAL\s+PUBLIC\s+License\s+[v]?3\b/i,
|
477
|
+
/\bLesser\sGeneral\sPublic\sLicense\sv?\.?\s+3\.0\b/i,
|
478
|
+
/\bhttps?:\/\/www\.gnu\.org\/copyleft\/lesser.html\b/i,
|
479
|
+
/\bLESSER\sGENERAL\sPUBLIC\sLICENSE\sVersion\s3\b/i,
|
480
|
+
/\bLesser\sGeneral\sPublic\sLicense[\,]?\sversion\s3\.0\b/i,
|
481
|
+
/\bLESSER\sGENERAL\sPUBLIC\sLICENSE.+?version\s?3/i,
|
482
|
+
/\A[\(]?LGPL[\)]?\s*\z/i
|
483
|
+
],
|
484
|
+
"MirOS" => [/\bMirOS\b/i],
|
485
|
+
"MIT" => [
|
486
|
+
/\bMIT\s+LICEN[S|C]E\b/i, /\AMITL?\s*\z/i, /\bEXPAT\b/i,
|
487
|
+
/\bMIT[-|\_]LICENSE\.\w{2,8}\b/i, /^MTI\b/i,
|
488
|
+
/\bMIT[-|\s|\_]?v?2\.0\b/i, /\AM\.I\.T[\.]?\s*\z/,
|
489
|
+
/\bMassachusetts-Institute-of-Technology-License/i
|
490
|
+
],
|
491
|
+
"MITNFA" => [/\bMIT\s\+no\-false\-attribs\slicense\b/i],
|
492
|
+
"MPL-1.0" => [
|
493
|
+
/\bMPL[-|\s|\_]?v?1\.0\b/i, /\bMPL[-|\s|\_]?v?1(?!\.)\b/i,
|
494
|
+
/\bMozilla\sPublic\sLicense\sv?1\.0\b/i,
|
495
|
+
],
|
496
|
+
"MPL-1.1" => [
|
497
|
+
/\bMozilla.Public.License\s+v?1\.1\b/i,
|
498
|
+
/\bMPL[-|\s|\_]?v?1\.1\b/i,
|
499
|
+
],
|
500
|
+
"MPL-2.0" => [
|
501
|
+
/\bMPL[-|\s|\_]?v?2\.0\b/i, /\bMPL[-|\s|\_]?v?2\b/i,
|
502
|
+
/\bMOZILLA\s+PUBLIC\s+LICENSE\s+2\.0\b/i,
|
503
|
+
/\bMozilla\sPublic\sLicense[\,]?\s+v?[\.]?\s*2\.0\b/i,
|
504
|
+
/\bMOZILLA\s+PUBLIC\s+LICENSE[,]?\s+version\s+2\.0\b/i,
|
505
|
+
/\bMozilla\s+v?2\.0\b/i,
|
506
|
+
/\b[\(]?MPL\s+2\.0[\)]?\b/, /\bMPL\b/i,
|
507
|
+
/\AMozilla\sPublic\sLicense\s*\z/i
|
508
|
+
],
|
509
|
+
"MS-PL" => [/\bMS-?PL\b/i],
|
510
|
+
"MS-RL" => [/\bMS-?RL\b/i, /\bMSR\-LA\b/i],
|
511
|
+
"ms_dotnet" => [/\bMICROSOFT\sSOFTWARE\sLICENSE\sTERMS\b/i],
|
512
|
+
"NASA-1.3" => [/\bNASA[-|\_|\s]?v?1\.3\b/i,
|
513
|
+
/\bNASA\sOpen\sSource\sAgreement\sversion\s1\.3\b/i],
|
514
|
+
"NCSA" => [/\bNCSA\s+License\b/i, /\bIllinois\/NCSA\sOpen\sSource\b/i, /\bNCSA\b/i ],
|
515
|
+
"NGPL" => [/\bNGPL\b/i],
|
516
|
+
"NOKIA" => [/\bNokia\sOpen\sSource\sLicense\b/i],
|
517
|
+
"NPL-1.1" => [/\bNetscape\sPublic\sLicense\b/i, /\b[(]?NPL[\)]?\b/i],
|
518
|
+
|
519
|
+
"NPOSL-3.0" => [/\bNPOSL[-|\s|\_]?v?3\.0\b/i, /\bNPOSL[-|\s|\_]?v?3\b/],
|
520
|
+
"OFL-1.0" => [/\bOFL[-|\s|\_]?v?1\.0\b/i, /\bOFL[-|\s|\_]?v?1(?!\.)\b/i,
|
521
|
+
/\bSIL\s+OFL\s+1\.0\b/i, /\ASIL\sOFL\s*\z/i ],
|
522
|
+
"OFL-1.1" => [
|
523
|
+
/\bOFL[-|\s|\_]?v?1\.1\b/i, /\bSIL\s+OFL\s+1\.1\b/i,
|
524
|
+
/\bSIL\sOpen\sFont\sLicense\b/i, /\bSIL\sOFL\s1\.1\b/i,
|
525
|
+
/\bOpen\sFont\sLicense\b/i ],
|
526
|
+
|
527
|
+
"OSL-1.0" => [/\bOSL[-|\s|\_]?v?1\.0\b/i, /\b\OSL[-|\s|\_]?v?1(?!\.)\b/i],
|
528
|
+
"OSL-2.0" => [/\bOSL[-|\s|\_]?v?2\.0\b/i, /\bOSL[-|\s|\_]?v?2(?!\.)\b/i],
|
529
|
+
"OSL-2.1" => [/\bOSL[-|\s|\_]?v?2\.1\b/i],
|
530
|
+
"OSL-3.0" => [
|
531
|
+
/\bOSL[-|\s|\_]?v?3\.0\b/i, /\bOSL[-|\s|\_]?v?3(?!\.)\b/i,
|
532
|
+
/\bOpen\sSoftware\sLicen[c|s]e\sv?3\.0\b/i,
|
533
|
+
/\bOSL[-|\s|\_]?v?\.?3\.[0|O]\b/i,
|
534
|
+
/\bOpen\sSoftware\sLicense\sversion\s3\.0\b/i,
|
535
|
+
/\AOSL\s*\z/i, /\bOpen-Software-License/i,
|
536
|
+
/\b[\(]?OSL[\)]?\s+v\s+3\.0\b/i
|
537
|
+
],
|
538
|
+
|
539
|
+
"PHP-3.0" => [/^PHP\s?\z/i, /\bPHP\sLicense\s3\.0\b/i, /\APHP[-|\s]LICEN[S|C]E\s*\z/i],
|
540
|
+
"PHP-3.01" => [/\bPHP\sLicense\sversion\s3\.0\d\b/i],
|
541
|
+
"PIL" => [/\bStandard\sPIL\sLicense\b/i, /\APIL\s*\z/i],
|
542
|
+
"PostgreSQL" => [/\bPostgreSQL\b/i],
|
543
|
+
"Public Domain" => [/\bPublic\s+Domain\b/i],
|
544
|
+
"Python-2.0" => [
|
545
|
+
/\bPython[-|\s|\_]?v?2\.0\b/i, /\bPython[-|\s|\_]?v?2(?!\.)\b/i,
|
546
|
+
/\bPSF[-|\s|\_]?v?2\b/i, /\bPSFL\b/i, /\bPSF\b/i,
|
547
|
+
/\bPython\s+Software\s+Foundation\b/i,
|
548
|
+
/\APython\b/i, /\bPSL\b/i, /\bSAME\sAS\spython2\.3\b/i,
|
549
|
+
/\bhttps?:\/\/www\.opensource\.org\/licenses\/PythonSoftFoundation\.php\b/i,
|
550
|
+
/\bhttps?:\/\/opensource\.org\/licenses\/PythonSoftFoundation\.php\b/i
|
551
|
+
],
|
552
|
+
"Repoze" => [/\bRepoze\sPublic\sLicense\b/i],
|
553
|
+
"RPL-1.1" => [/\bRPL[-|\s|_]?v?1\.1\b/i, /\bRPL[-|\s|_]?v?1(?!\.)\b/i],
|
554
|
+
"RPL-1.5" => [
|
555
|
+
/\bRPL[-|\s|_]?v?1\.5\b/i, /\ARPL\s*\z/i,
|
556
|
+
/\bhttps?:\/\/www\.opensource\.org\/licenses\/rpl\.php\b/i
|
557
|
+
],
|
558
|
+
"Ruby" => [/\bRUBY\sLICEN[S|C]E\b/i, /\ARUBY\b/i, /\bRUBY\'s\b/i],
|
559
|
+
"QPL-1.0" => [/\bQPL[-|\s|_]?v?1\.0\b/i,
|
560
|
+
/\bQT\sPublic\sLicen[c|s]e\b/i,
|
561
|
+
/\bPyQ\sGeneral\sLicense\b/i],
|
562
|
+
"Sleepycat" => [/\bSleepyCat\b/i],
|
563
|
+
"SPL-1.0" => [
|
564
|
+
/\bSPL[-|\_|\s]?v?1\.0\b/i, /\bSun\sPublic\sLicense\b/i
|
565
|
+
],
|
566
|
+
"W3C" => [/\bW3C\b/i],
|
567
|
+
"OpenSSL" => [/\bOPENSSL\b/i],
|
568
|
+
"Unicode-TOU" => [/\AUnicode-TOU[\s|\/|-]/i],
|
569
|
+
"UPL-1.0" => [/\bUniversal\sPermissive\sLicense\b/i],
|
570
|
+
"Unlicense" => [
|
571
|
+
/\bUNLI[C|S]EN[S|C]E\b/i, /\AUnlicen[s|c]ed\s*\z/i, /^go\sfor\sit\b/i,
|
572
|
+
/^Undecided\b/i,
|
573
|
+
/\bNO\s+LICEN[C|S]E\b/i, /\bNON[\s|-|\_]?LICENSE\b/i
|
574
|
+
],
|
575
|
+
"Whiskeyware" => [/\bWH?ISKEY[-|\s|\_]?WARE\b/i],
|
576
|
+
"WTFPL" => [
|
577
|
+
/\bWTF[P|G]?L\b/i, /\bWTFPL[-|v]?2\b/i, /^WTF\b/i, /\AWTFP\s*\z/i,
|
578
|
+
/\bDo\s+whatever\s+you\s+want\b/i, /\bDWTFYW\b/i, /\AWTPFL\s*\z/i,
|
579
|
+
/\bDo\s+What\s+the\s+Fuck\s+You\s+Want\b/i, /\ADWTFYWT\s*\z/i,
|
580
|
+
/\ADo\sWHATEVER\b/i, /\ADWYW\b/i, /\bDWTFYWTP\b/i,
|
581
|
+
/\ADWHTFYWTPL\s*\z/i, /\AWhatever\s*\z/i,
|
582
|
+
/\bDO\s(THE\s)?FUCK\sWHAT\sYOU\sWANT\b/i
|
583
|
+
],
|
584
|
+
"WXwindows" => [/\bwxWINDOWS\s+LIBRARY\sLICEN[C|S]E\b/i, /\AWXwindows\s*\z/i],
|
585
|
+
"X11" => [/\bX11\b/i],
|
586
|
+
"Zend-2.0" => [/\bZend\sFramework\b/i],
|
587
|
+
"ZPL-1.1" => [/\bZPL[-|\s|\_]?v?1\.1\b/i, /\bZPL[-|\s|\_]?v?1(?!\.)\b/i,
|
588
|
+
/\bZPL[-|\s|\_]?1\.0\b/i],
|
589
|
+
"ZPL-2.1" => [
|
590
|
+
/\bZPL[-|\s|\/|_|]?v?2\.1\b/i, /\bZPL[-|\s|_]?v?2(?!\.)\b/i,
|
591
|
+
/\bZPL\s+2\.\d\b/i, /\bZOPE\s+PUBLIC\s+LICENSE\b/i,
|
592
|
+
/\bZPL\s?$/i
|
593
|
+
],
|
594
|
+
"zlib-acknowledgement" => [/\bZLIB[\/|-|\s]LIBPNG\b/i],
|
595
|
+
"ZLIB" => [/\bZLIB(?!\-|\/)\b/i]
|
596
|
+
|
597
|
+
}
|
598
|
+
end
|
599
|
+
|
600
|
+
end
|
601
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'narray'
|
2
|
+
require 'tf-idf-similarity'
|
3
|
+
require 'msgpack'
|
4
|
+
|
5
|
+
module LicenseMatcher
|
6
|
+
|
7
|
+
class TFRubyMatcher
|
8
|
+
include Preprocess
|
9
|
+
|
10
|
+
attr_reader :corpus, :model, :spdx_ids
|
11
|
+
|
12
|
+
DEFAULT_INDEX_PATH = 'data/index.msgpack'
|
13
|
+
DEFAULT_MIN_CONFIDENCE = 0.9
|
14
|
+
A_DOC_ROW = 3 # a array index to find the rows of indexed documents
|
15
|
+
|
16
|
+
def initialize(index_path = DEFAULT_INDEX_PATH)
|
17
|
+
spdx_ids, spdx_docs = read_corpus(index_path)
|
18
|
+
|
19
|
+
@spdx_ids = spdx_ids
|
20
|
+
@corpus = spdx_docs
|
21
|
+
@model = TfIdfSimilarity::BM25Model.new(@corpus, :library => :narray)
|
22
|
+
|
23
|
+
true
|
24
|
+
end
|
25
|
+
|
26
|
+
def match_text(text, min_confidence = DEFAULT_MIN_CONFIDENCE, is_processed_text = false)
|
27
|
+
return [] if text.to_s.empty?
|
28
|
+
|
29
|
+
text = preprocess_text(text) if is_processed_text == false
|
30
|
+
test_doc = TfIdfSimilarity::Document.new(text, {:id => "test"})
|
31
|
+
|
32
|
+
mat1 = @model.instance_variable_get(:@matrix)
|
33
|
+
mat2 = doc_tfidf_matrix(test_doc)
|
34
|
+
|
35
|
+
n_docs = @model.documents.size
|
36
|
+
dists = []
|
37
|
+
n_docs.times do |i|
|
38
|
+
dists << [i, cos_sim(mat1[i, true], mat2)]
|
39
|
+
end
|
40
|
+
|
41
|
+
doc_id, best_score = dists.sort {|a,b| b[1] <=> a[1]}.first
|
42
|
+
best_match = @model.documents[doc_id].id
|
43
|
+
|
44
|
+
if best_score.to_f > min_confidence
|
45
|
+
best_match
|
46
|
+
else
|
47
|
+
""
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def match_html(html_text, min_confidence = DEFAULT_MIN_CONFIDENCE)
|
52
|
+
match_text(preprocess_html(html_text), min_confidence)
|
53
|
+
end
|
54
|
+
|
55
|
+
#-- helpers
|
56
|
+
# Transforms document into TF-IDF matrix used for comparition
|
57
|
+
def doc_tfidf_matrix(doc)
|
58
|
+
arr = Array.new(@model.terms.size) do |i|
|
59
|
+
the_term = @model.terms[i]
|
60
|
+
if doc.term_count(the_term) > 0
|
61
|
+
#calc score only for words that exists in the test doc and the corpus of licenses
|
62
|
+
model.idf(the_term) * model.tf(doc, the_term)
|
63
|
+
else
|
64
|
+
0.0
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
NArray[*arr]
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
# Calculates cosine similarity between 2 TF-IDF vector
|
73
|
+
def cos_sim(mat1, mat2)
|
74
|
+
length = (mat1 * mat2).sum
|
75
|
+
norm = Math::sqrt((mat1 ** 2).sum) * Math::sqrt((mat2 ** 2).sum)
|
76
|
+
|
77
|
+
( norm > 0 ? length / norm : 0.0)
|
78
|
+
end
|
79
|
+
|
80
|
+
# Reads the content of licenses from the pre-built index
|
81
|
+
# NB! it is sensitive to the changes in the Fosslim/Index serialization
|
82
|
+
def read_corpus(index_path)
|
83
|
+
idx = MessagePack.unpack File.read index_path
|
84
|
+
spdx_ids = []
|
85
|
+
docs = []
|
86
|
+
|
87
|
+
idx[A_DOC_ROW].to_a.each do |doc_row|
|
88
|
+
_, spdx_id, content, _ = doc_row
|
89
|
+
txt = preprocess_text content
|
90
|
+
if txt
|
91
|
+
spdx_ids << spdx_id
|
92
|
+
docs << TfIdfSimilarity::Document.new(txt, :id => spdx_id)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
[spdx_ids, docs]
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
|
2
|
+
module LicenseMatcher
|
3
|
+
|
4
|
+
class UrlMatcher
|
5
|
+
attr_reader :url_index
|
6
|
+
|
7
|
+
DEFAULT_LICENSE_JSON = 'data/spdx_licenses/licenses.json'
|
8
|
+
|
9
|
+
def initialize(license_json_file = DEFAULT_LICENSE_JSON)
|
10
|
+
licenses_json_doc = read_json_file license_json_file
|
11
|
+
raise("Failed to read licenses.json") if licenses_json_doc.nil?
|
12
|
+
|
13
|
+
@url_index = read_license_url_index(licenses_json_doc)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Matches License.url with urls in Licenses.json and returns tuple [spdx_id, score]
|
17
|
+
def match_url(the_url)
|
18
|
+
the_url = the_url.to_s.strip
|
19
|
+
spdx_id = nil
|
20
|
+
|
21
|
+
case the_url
|
22
|
+
when 'http://jquery.org/license'
|
23
|
+
return ['mit', 1.0] #Jquery license page doesnt include any license text
|
24
|
+
when 'https://www.mozilla.org/en-US/MPL/'
|
25
|
+
return ['mpl-2.0', 1.0]
|
26
|
+
when 'http://fairlicense.org'
|
27
|
+
return ['fair', 1.0]
|
28
|
+
when 'http://www.aforgenet.com/framework/license.html'
|
29
|
+
return ['lgpl-3.0', 1.0]
|
30
|
+
when 'http://www.apache.org/licenses/'
|
31
|
+
return ['apache-2.0', 1.0]
|
32
|
+
when 'http://aws.amazon.com/apache2.0/'
|
33
|
+
return ['apache-2.0', 1.0]
|
34
|
+
when 'http://aws.amazon.com/asl/'
|
35
|
+
return ['amazon', 1.0]
|
36
|
+
when 'https://choosealicense.com/no-license/'
|
37
|
+
return ['no-license', 1.0]
|
38
|
+
when 'http://www.gzip.org/zlib/zlib_license.html'
|
39
|
+
return ['zlib', 1.0]
|
40
|
+
when 'http://zlib.net/zlib-license.html'
|
41
|
+
return ['zlib', 1.0]
|
42
|
+
when 'http://www.wtfpl.net/about/'
|
43
|
+
return ['wtfpl', 1.0]
|
44
|
+
end
|
45
|
+
|
46
|
+
#does url match with choosealicense.com
|
47
|
+
match = the_url.match(/\bhttps?:\/\/(www\.)?choosealicense\.com\/licenses\/([\S|^\/]+)[\/]?\b/i)
|
48
|
+
if match
|
49
|
+
return [match[2].to_s.downcase, 1.0]
|
50
|
+
end
|
51
|
+
|
52
|
+
match = the_url.match(/\bhttps?:\/\/(www\.)?creativecommons\.org\/licenses\/([\S|^\/]+)[\/]?\b/i)
|
53
|
+
if match
|
54
|
+
return ["cc-#{match[2].to_s.gsub(/\//, '-')}", 1.0]
|
55
|
+
end
|
56
|
+
|
57
|
+
#check through SPDX urls
|
58
|
+
@url_index.each do |lic_url, lic_id|
|
59
|
+
lic_url = lic_url.to_s.strip.gsub(/https?:\/\//i, '').gsub(/www\./, '') #normalizes urls in the file
|
60
|
+
matcher = Regexp.new("https?:\/\/(www\.)?#{lic_url}", Regexp::IGNORECASE)
|
61
|
+
|
62
|
+
if matcher.match(the_url)
|
63
|
+
spdx_id = lic_id.to_s.downcase
|
64
|
+
break
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
return [] if spdx_id.nil?
|
69
|
+
|
70
|
+
[spdx_id, 1.0]
|
71
|
+
end
|
72
|
+
|
73
|
+
# Reads license urls from the license.json and builds a map {url : spdx_id}
|
74
|
+
def read_license_url_index(spdx_licenses)
|
75
|
+
url_index = {}
|
76
|
+
spdx_licenses.each {|lic| url_index.merge! process_spdx_item(lic) }
|
77
|
+
url_index
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
def process_spdx_item(lic)
|
82
|
+
url_index = {}
|
83
|
+
lic_id = lic[:id].to_s.strip.downcase
|
84
|
+
|
85
|
+
return url_index if lic_id.empty?
|
86
|
+
|
87
|
+
lic[:links].to_a.each {|x| url_index[x[:url]] = lic_id }
|
88
|
+
lic[:text].to_a.each {|x| url_index[x[:url]] = lic_id }
|
89
|
+
|
90
|
+
url_index
|
91
|
+
end
|
92
|
+
|
93
|
+
def read_json_file(file_path)
|
94
|
+
JSON.parse(File.read(file_path), {symbolize_names: true})
|
95
|
+
rescue
|
96
|
+
log.info "Failed to read json file `#{file_path}`"
|
97
|
+
nil
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
data/lib/license_matcher.rb
CHANGED
@@ -1,7 +1,21 @@
|
|
1
1
|
require "helix_runtime"
|
2
2
|
|
3
3
|
begin
|
4
|
-
|
4
|
+
require "license_matcher/native"
|
5
5
|
rescue LoadError
|
6
|
-
|
6
|
+
warn "Unable to load license_matcher/native. Please run `rake build`"
|
7
|
+
end
|
8
|
+
|
9
|
+
require 'license_matcher/preprocess'
|
10
|
+
require 'license_matcher/url_matcher'
|
11
|
+
require 'license_matcher/rule_matcher'
|
12
|
+
require 'license_matcher/tf_ruby_matcher'
|
13
|
+
|
14
|
+
module LicenseMatcher
|
15
|
+
|
16
|
+
# if class is missing from the module,
|
17
|
+
# then look from global ns
|
18
|
+
def self.const_missing(c)
|
19
|
+
Object.const_get(c)
|
20
|
+
end
|
7
21
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: license_matcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Timo Sulg
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-09-
|
12
|
+
date: 2017-09-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: helix_runtime
|
@@ -25,6 +25,48 @@ dependencies:
|
|
25
25
|
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: 0.6.0
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: narray
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: 0.6.1.2
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: 0.6.1.2
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: tf-idf-similarity
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: 0.1.6
|
49
|
+
type: :runtime
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: 0.1.6
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: nokogiri
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - "~>"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 1.8.0
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 1.8.0
|
28
70
|
- !ruby/object:Gem::Dependency
|
29
71
|
name: bundler
|
30
72
|
requirement: !ruby/object:Gem::Requirement
|
@@ -67,6 +109,20 @@ dependencies:
|
|
67
109
|
- - "~>"
|
68
110
|
- !ruby/object:Gem::Version
|
69
111
|
version: '3.4'
|
112
|
+
- !ruby/object:Gem::Dependency
|
113
|
+
name: msgpack
|
114
|
+
requirement: !ruby/object:Gem::Requirement
|
115
|
+
requirements:
|
116
|
+
- - "~>"
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: 1.1.0
|
119
|
+
type: :development
|
120
|
+
prerelease: false
|
121
|
+
version_requirements: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - "~>"
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: 1.1.0
|
70
126
|
description: "\n LicenseMatcher is rubygem, which uses Fosslim to match various
|
71
127
|
OSS license\n with correct SPDX-id or EULA label.\n "
|
72
128
|
email:
|
@@ -86,6 +142,10 @@ files:
|
|
86
142
|
- Rakefile
|
87
143
|
- lib/license_matcher.rb
|
88
144
|
- lib/license_matcher/native.bundle
|
145
|
+
- lib/license_matcher/preprocess.rb
|
146
|
+
- lib/license_matcher/rule_matcher.rb
|
147
|
+
- lib/license_matcher/tf_ruby_matcher.rb
|
148
|
+
- lib/license_matcher/url_matcher.rb
|
89
149
|
- lib/tasks/helix_runtime.rake
|
90
150
|
homepage: https://www.github.com/fosslim
|
91
151
|
licenses: []
|
@@ -101,9 +161,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
101
161
|
version: '0'
|
102
162
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
163
|
requirements:
|
104
|
-
- - "
|
164
|
+
- - ">="
|
105
165
|
- !ruby/object:Gem::Version
|
106
|
-
version:
|
166
|
+
version: '0'
|
107
167
|
requirements: []
|
108
168
|
rubyforge_project:
|
109
169
|
rubygems_version: 2.5.2
|