license_matcher 0.1.0.pre.alpha → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +13 -1
- data/README.md +76 -5
- data/lib/license_matcher/native.bundle +0 -0
- data/lib/license_matcher/preprocess.rb +75 -0
- data/lib/license_matcher/rule_matcher.rb +601 -0
- data/lib/license_matcher/tf_ruby_matcher.rb +100 -0
- data/lib/license_matcher/url_matcher.rb +102 -0
- data/lib/license_matcher.rb +16 -2
- metadata +64 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6b710a76802bc5254b2d0d9dd3c36376c613b7e5
|
4
|
+
data.tar.gz: fef8fe16e4028ed1c49710956dceb4ac13acee9b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee54a1ae1b3258f9bc474a5a05c7221281be666976f6e1d547bb1673c4fa57507d0006a21b236d9ce02a55fa6e9e1ac9fe646a6c96d53c2d7255ab59d5e2c821
|
7
|
+
data.tar.gz: adb94097dea0e79e19ac4b2cf9285d42e190902f2d8f798e5eb7239d094e20ca12193efb7342ad9ee4065b2e5f2bc48ccf3cacc318419699d7a7f2b528fb6fab
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
license_matcher (0.1.0)
|
4
|
+
license_matcher (0.1.0.pre.alpha)
|
5
5
|
helix_runtime (~> 0.6.0)
|
6
|
+
narray (~> 0.6.1.2)
|
7
|
+
nokogiri (~> 1.8.0)
|
8
|
+
tf-idf-similarity (~> 0.1.6)
|
6
9
|
|
7
10
|
GEM
|
8
11
|
remote: https://rubygems.org/
|
@@ -13,6 +16,11 @@ GEM
|
|
13
16
|
rake (>= 10.0)
|
14
17
|
thor (~> 0.19.4)
|
15
18
|
toml (~> 0.1.2)
|
19
|
+
mini_portile2 (2.2.0)
|
20
|
+
msgpack (1.1.0)
|
21
|
+
narray (0.6.1.2)
|
22
|
+
nokogiri (1.8.0)
|
23
|
+
mini_portile2 (~> 2.2.0)
|
16
24
|
parslet (1.5.0)
|
17
25
|
blankslate (~> 2.0)
|
18
26
|
rake (10.5.0)
|
@@ -29,9 +37,12 @@ GEM
|
|
29
37
|
diff-lcs (>= 1.2.0, < 2.0)
|
30
38
|
rspec-support (~> 3.6.0)
|
31
39
|
rspec-support (3.6.0)
|
40
|
+
tf-idf-similarity (0.1.6)
|
41
|
+
unicode_utils (~> 1.4)
|
32
42
|
thor (0.19.4)
|
33
43
|
toml (0.1.2)
|
34
44
|
parslet (~> 1.5.0)
|
45
|
+
unicode_utils (1.4.0)
|
35
46
|
|
36
47
|
PLATFORMS
|
37
48
|
ruby
|
@@ -39,6 +50,7 @@ PLATFORMS
|
|
39
50
|
DEPENDENCIES
|
40
51
|
bundler (~> 1.15)
|
41
52
|
license_matcher!
|
53
|
+
msgpack (~> 1.1.0)
|
42
54
|
rake (~> 10.0)
|
43
55
|
rspec (~> 3.4)
|
44
56
|
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# LicenseMatcher
|
2
2
|
|
3
|
-
LicenseMatcher is a rubygem that
|
3
|
+
LicenseMatcher is a rubygem that matches a fulltext of Opensource License Text with the SPDX id; So you dont have to guess is it **BSD** or **MIT** license, let the `LicenseMatcher` does the heavy lifting for you;
|
4
4
|
|
5
5
|
|
6
6
|
It uses [Fosslim](https://github.com/Fosslim/fosslim/) library underneath, which gives remarkable performance with lower memory cost than pure Ruby implementation;
|
@@ -34,16 +34,87 @@ run `bundle exec irb` on your commandline to fire up Ruby REPL;
|
|
34
34
|
```
|
35
35
|
require 'license_matcher'
|
36
36
|
|
37
|
-
# build index
|
38
|
-
|
37
|
+
# download pre-build index
|
38
|
+
curl -O https://github.com/Fosslim/license_matcher/blob/master/data/index.msgpack
|
39
|
+
|
40
|
+
# or build index from the SPDX data
|
41
|
+
LicenseMatcher::TFRustMatcher.build_index( "data/licenses", "data/index.msgpack")
|
39
42
|
|
40
43
|
# match license text
|
41
44
|
txt = File.read("fixtures/files/mit.txt");
|
42
|
-
|
43
|
-
lm.
|
45
|
+
|
46
|
+
lm = LicenseMatcher::TFRubyMatcher.new("data/index.msgpack")
|
47
|
+
lm.match_text(txt, 0.9)
|
48
|
+
|
49
|
+
|
50
|
+
```
|
51
|
+
|
52
|
+
|
53
|
+
## Matchers
|
54
|
+
|
55
|
+
It currently supports 4 different models:
|
56
|
+
|
57
|
+
* **UrlMatcher.match_url** - finds matching SPDX license by comparing URL with urls in the `licenses.json`
|
58
|
+
|
59
|
+
```ruby
|
60
|
+
lm = LicenseMatcher::UrlMatcher.new
|
61
|
+
lm.match_url "https://opensource.org/licenses/AAL"
|
62
|
+
|
63
|
+
=> "AAL"
|
64
|
+
```
|
65
|
+
|
66
|
+
* **RuleMatcher.match_rule** - scans a text and returns the SPDX id, which rule matches longest substring in the license text
|
67
|
+
|
68
|
+
```ruby
|
69
|
+
lm = LicenseMatcher::RuleMatcher.new
|
70
|
+
lm.match_rules "It is license under Apache 2.0 License."
|
71
|
+
|
72
|
+
=> "Apache-2.0"
|
73
|
+
```
|
74
|
+
|
75
|
+
* **TFRubyMatcher** - original Ruby implementation, uses TF/IDF and Cosine similarity;
|
44
76
|
|
45
77
|
```
|
78
|
+
lm = LicenseMatcher::TFRubyMatcher.new
|
46
79
|
|
80
|
+
txt = File.read "fixtures/files/mit.html"
|
81
|
+
clean_txt = lm.preprocess_html txt # NB! it may help to increase accuracy
|
82
|
+
lm.match_txt clean_txt
|
83
|
+
```
|
84
|
+
|
85
|
+
* **TFRustMatcher** - uses simple Jaccard similarity;
|
86
|
+
|
87
|
+
```
|
88
|
+
lm2 = LicenseMatcher::TFRustMatcher.new
|
89
|
+
|
90
|
+
txt = File.read "fixtures/files/mit.txt"
|
91
|
+
lm2.match_text txt
|
92
|
+
```
|
93
|
+
|
94
|
+
## Benchmarks
|
95
|
+
|
96
|
+
* initialization, Ruby version 1times, Rust version 1000x
|
97
|
+
|
98
|
+
```
|
99
|
+
user system total real
|
100
|
+
TFRubyMatcher: 12.850000 0.180000 13.030000 ( 13.210955)
|
101
|
+
TFRustMatcher: 26.260000 9.400000 35.660000 ( 38.264632)
|
102
|
+
```
|
103
|
+
* matching preprocessed short [MIT](https://raw.githubusercontent.com/Fosslim/license_matcher/master/data/spdx_licenses/plain/MIT) text 1000x times
|
104
|
+
|
105
|
+
```
|
106
|
+
user system total real
|
107
|
+
TFRubyMatcher:102.410000 12.180000 114.590000 (116.308119)
|
108
|
+
TFRustMatcher: 7.170000 0.040000 7.210000 ( 7.266000)
|
109
|
+
```
|
110
|
+
|
111
|
+
* matching preprocessed long [AGPL-3.0](https://raw.githubusercontent.com/Fosslim/license_matcher/master/data/spdx_licenses/plain/AGPL-3.0) text 1000x times
|
112
|
+
|
113
|
+
```
|
114
|
+
user system total real
|
115
|
+
TFRubyMatcher:242.450000 21.960000 264.410000 (276.417704)
|
116
|
+
TFRustMatcher: 9.340000 0.070000 9.410000 ( 9.478597)
|
117
|
+
```
|
47
118
|
|
48
119
|
## Development
|
49
120
|
|
Binary file
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Preprocess
|
4
|
+
def preprocess_text(text)
|
5
|
+
text = safe_encode(text)
|
6
|
+
|
7
|
+
#remove markdown url tags
|
8
|
+
text = text.gsub(/\[.+?\]\(.+?\)/, ' ')
|
9
|
+
|
10
|
+
#remove spam words
|
11
|
+
text.gsub!(/\bTHE\b/i, '')
|
12
|
+
|
13
|
+
#remove some XML grabage
|
14
|
+
text = text.gsub(/\<\!\[CDATA.*?\]\]\>/, ' ').to_s
|
15
|
+
text = text.gsub(/\<\!--.+?--\>/, ' ').to_s
|
16
|
+
text = text.gsub(/<\!\[CDATA.+?\]>/, ' ').to_s
|
17
|
+
|
18
|
+
return text.to_s.strip.gsub(/\s+/, ' ')
|
19
|
+
end
|
20
|
+
|
21
|
+
def preprocess_html(html_text)
|
22
|
+
# if text is HTML doc, then
|
23
|
+
# extract text only from visible html tags
|
24
|
+
text = ""
|
25
|
+
|
26
|
+
html_doc = parse_html(html_text)
|
27
|
+
if html_doc
|
28
|
+
text = clean_html(html_doc)
|
29
|
+
else
|
30
|
+
p "match_html: failed to parse html document\n#{html_text}"
|
31
|
+
end
|
32
|
+
|
33
|
+
return text
|
34
|
+
end
|
35
|
+
|
36
|
+
def clean_html(html_doc)
|
37
|
+
body_text = ""
|
38
|
+
body_elements = html_doc.xpath(
|
39
|
+
'//p | //h1 | //h2 | //h3 | //h4 | //h5 | //h6 | //em | //strong | //b | //td | //pre
|
40
|
+
| //li[not(@id) and not(@class) and not(a)] | //section//section[@class="project-info"]
|
41
|
+
| //blockquote | //textarea'
|
42
|
+
).to_a
|
43
|
+
|
44
|
+
#extract text from html tag and separate them by space
|
45
|
+
body_elements.each {|el| body_text += ' ' + el.text.to_s}
|
46
|
+
|
47
|
+
#REMOVE XML CDATA like opensource.org pages has
|
48
|
+
body_text = body_text.to_s.strip
|
49
|
+
body_text.gsub!(/\<\!\[CDATA.+?\]\]\>/i, ' ')
|
50
|
+
|
51
|
+
if body_text.empty?
|
52
|
+
p "match_html: document didnt pass noise filter, will use whole body content"
|
53
|
+
body_text = html_doc.xpath('//body').text.to_s.strip
|
54
|
+
end
|
55
|
+
|
56
|
+
return body_text
|
57
|
+
end
|
58
|
+
|
59
|
+
def parse_html(html_text)
|
60
|
+
begin
|
61
|
+
return Nokogiri.HTML(safe_encode(html_text))
|
62
|
+
rescue Exception => e
|
63
|
+
log.error "failed to parse html doc: \n #{html_text}"
|
64
|
+
return nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def safe_encode(txt)
|
69
|
+
txt.to_s.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
70
|
+
rescue
|
71
|
+
p "Failed to encode text:\n #{txt}i"
|
72
|
+
return ""
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
@@ -0,0 +1,601 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module LicenseMatcher
|
4
|
+
|
5
|
+
class RuleMatcher
|
6
|
+
include Preprocess
|
7
|
+
|
8
|
+
attr_reader :licenses, :rules, :id_spdx_idx
|
9
|
+
|
10
|
+
DEFAULT_LICENSE_JSON = 'data/spdx_licenses/licenses.json'
|
11
|
+
|
12
|
+
|
13
|
+
def initialize(license_json_file = DEFAULT_LICENSE_JSON)
|
14
|
+
|
15
|
+
licenses_json_doc = read_json_file license_json_file
|
16
|
+
raise("Failed to read licenses.json") if licenses_json_doc.nil?
|
17
|
+
|
18
|
+
@rules = init_rules(licenses_json_doc)
|
19
|
+
@id_spdx_idx = init_id_idx(licenses_json_doc) #reverse index from downcased licenseID to case sensitive spdx id
|
20
|
+
true
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
def init_id_idx(licenses_json_doc)
|
25
|
+
idx = {}
|
26
|
+
licenses_json_doc.to_a.each do |spdx_item|
|
27
|
+
lic_id = spdx_item[:id].to_s.downcase
|
28
|
+
idx[lic_id] = spdx_item[:id]
|
29
|
+
end
|
30
|
+
|
31
|
+
idx
|
32
|
+
end
|
33
|
+
|
34
|
+
# finds matching regex rules in the text and sorts matches by length of match
|
35
|
+
# ps: not very efficient, but good enough to handle special cases;
|
36
|
+
# @args:
|
37
|
+
# text - string, a name of license,
|
38
|
+
# @returns:
|
39
|
+
# [[spdx_id, score, matching_rule, matching_length],...]
|
40
|
+
def match_rules(text, early_exit = false)
|
41
|
+
matches = []
|
42
|
+
text = preprocess_text(text)
|
43
|
+
|
44
|
+
#if text is already spdx_id, then shortcut matching
|
45
|
+
if @rules.has_key?(text.downcase)
|
46
|
+
return [[text.downcase, 1.0]]
|
47
|
+
end
|
48
|
+
|
49
|
+
text += ' ' # required to make wordborder matcher to work with 1word texts
|
50
|
+
@rules.each do |spdx_id, rules|
|
51
|
+
match_res = matches_any_rule?(rules, text)
|
52
|
+
unless match_res.nil?
|
53
|
+
matches << ([spdx_id, 1.0] + match_res)
|
54
|
+
break if early_exit == true
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
matches.sort do |a, b|
|
59
|
+
if (a.size == b.size and a.size == 4)
|
60
|
+
-1 * (a[3] <=> b[3])
|
61
|
+
else
|
62
|
+
0
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# if testable license text is in the ignore set return true
|
68
|
+
def ignore?(lic_text)
|
69
|
+
ignore_rules = get_ignore_rules
|
70
|
+
m = matches_any_rule?(ignore_rules, lic_text.to_s)
|
71
|
+
not m.nil?
|
72
|
+
end
|
73
|
+
|
74
|
+
def matches_any_rule?(rules, license_name)
|
75
|
+
res = nil
|
76
|
+
rules.each do |rule|
|
77
|
+
m = rule.match(license_name.to_s)
|
78
|
+
if m
|
79
|
+
res = [rule, m[0].size]
|
80
|
+
break
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
res
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
#-- helpers
|
89
|
+
|
90
|
+
def read_json_file(file_path)
|
91
|
+
JSON.parse(File.read(file_path), {symbolize_names: true})
|
92
|
+
rescue
|
93
|
+
log.info "Failed to read json file `#{file_path}`"
|
94
|
+
nil
|
95
|
+
end
|
96
|
+
|
97
|
+
# combines SPDX rules with custom handwritten rules
|
98
|
+
def init_rules(license_json_doc)
|
99
|
+
rules = {}
|
100
|
+
rules = build_rules_from_spdx_json(license_json_doc)
|
101
|
+
|
102
|
+
get_custom_rules.each do |spdx_id, custom_rules_array|
|
103
|
+
spdx_id = spdx_id.to_s.strip.downcase
|
104
|
+
|
105
|
+
if rules.has_key?(spdx_id)
|
106
|
+
rules[spdx_id].concat custom_rules_array
|
107
|
+
else
|
108
|
+
rules[spdx_id] = custom_rules_array
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
rules
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
# builds regex rules based on the LicenseJSON file
|
117
|
+
# rules are using urls, IDS, names and alternative names to build full string matching regexes
|
118
|
+
def build_rules_from_spdx_json(spdx_json)
|
119
|
+
spdx_rules = {}
|
120
|
+
|
121
|
+
sorted_spdx_json = spdx_json.sort_by {|x| x[:id]}
|
122
|
+
sorted_spdx_json.each do |spdx_item|
|
123
|
+
spdx_id = spdx_item[:id].to_s.downcase.strip
|
124
|
+
spdx_rules[spdx_id] = build_spdx_item_rules(spdx_item)
|
125
|
+
end
|
126
|
+
|
127
|
+
spdx_rules
|
128
|
+
end
|
129
|
+
|
130
|
+
def build_spdx_item_rules(spdx_item)
|
131
|
+
rules = []
|
132
|
+
|
133
|
+
#links are first, because it has highest confidence that they are talking about the license
|
134
|
+
spdx_item[:links].to_a.each do |link|
|
135
|
+
lic_url = link[:url].to_s.strip.gsub(/https?:\/\//i, '').gsub(/www\./, '').gsub(/\./, '\\.') #normalizes urls in the file
|
136
|
+
|
137
|
+
rules << Regexp.new("\\b[\\(]?https?:\/\/(www\.)?#{lic_url}[\/]?[\\)]?\\b".gsub(/\s+/, ''), Regexp::IGNORECASE)
|
138
|
+
end
|
139
|
+
|
140
|
+
#include also links to license texts
|
141
|
+
spdx_item[:text].to_a.each do |link|
|
142
|
+
lic_url = link[:url].to_s.strip.gsub(/https?:\/\//i, '').gsub(/www\./, '').gsub(/\./, '\\.') #normalizes urls in the file
|
143
|
+
rules << Regexp.new("\\b[\\(]?https?:\/\/(www\.)?#{lic_url}[\/]?[\\)]?\\b".gsub(/\s+/, ''), Regexp::IGNORECASE)
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
spdx_name = preprocess_text(spdx_item[:name])
|
148
|
+
spdx_name.gsub!(/\(.+?\)/, '') #remove SPDX ids in the license names
|
149
|
+
spdx_name.gsub!(/\./, '\\.') #mark version dots as not regex selector
|
150
|
+
spdx_name.gsub!(/[\*|\?|\+]/, '.') #replace regex selector with whatever mark ~> WTFPL name
|
151
|
+
spdx_name.gsub!(/\,/, '[\\,]?') #make comma optional
|
152
|
+
spdx_name.strip!
|
153
|
+
spdx_name.gsub!(/\s+/, '\\s\\+') #replace spaces with space selector
|
154
|
+
|
155
|
+
rules << Regexp.new("\\b#{spdx_name}\\b", Regexp::IGNORECASE)
|
156
|
+
|
157
|
+
#use spdx_id in full-text match if it's uniq and doest ambiquity like MIT, Fair, Glide
|
158
|
+
spdx_id = spdx_item[:id].to_s.strip.downcase
|
159
|
+
if spdx_id.match /[\d|-]|ware\z/
|
160
|
+
rules << Regexp.new("\\b[\\(]?#{spdx_id}[\\)]?\\b".gsub(/\s+/, '\\s').gsub(/\./, '\\.'), Regexp::IGNORECASE)
|
161
|
+
else
|
162
|
+
rules << Regexp.new("\\A[\\(]?#{spdx_id}[\\)]?\\b", Regexp::IGNORECASE)
|
163
|
+
end
|
164
|
+
|
165
|
+
spdx_item[:identifiers].to_a.each do |id|
|
166
|
+
rules << Regexp.new("\\b#{id[:identifier]}\\s".gsub(/\s+/, '\\s').gsub(/\./, '\\.'), Regexp::IGNORECASE)
|
167
|
+
end
|
168
|
+
|
169
|
+
spdx_item[:other_names].to_a.each do |alt|
|
170
|
+
rules << Regexp.new("\\b#{alt[:name]}\\b".gsub(/\s+/, '\\s'), Regexp::IGNORECASE)
|
171
|
+
end
|
172
|
+
|
173
|
+
rules
|
174
|
+
end
|
175
|
+
|
176
|
+
|
177
|
+
def get_ignore_rules
|
178
|
+
[
|
179
|
+
/\bProprietary\b/i, /\bOther\/Proprietary\b/i, /\ALICEN[C|S]E\.\w{2,8}\b/i,
|
180
|
+
/^LICEN[C|S]ING\.\w{2,8}\b/i, /^COPYING\.\w{2,8}/i,
|
181
|
+
/\ADFSG\s+APPROVED\b/i, /\ASee\slicense\sin\spackage\b/i,
|
182
|
+
/\ASee LICENSE\b/i,
|
183
|
+
/\AFree\s+for\s+non[-]?commercial\b/i, /\AFree\s+To\s+Use\b/i,
|
184
|
+
/\AFree\sFor\sHome\sUse\b/i, /\AFree\s+For\s+Educational\b/i,
|
185
|
+
/^Freely\s+Distributable\s*$/i, /^COPYRIGHT\s+\d{2,4}/i,
|
186
|
+
/^Copyright\s+\(c\)\s+\d{2,4}\b/i, /^COPYRIGHT\s*$/i, /^COPYRIGHT\.\w{2,8}\b/i,
|
187
|
+
/^\(c\)\s+\d{2,4}\d/,
|
188
|
+
/^LICENSE\s*$/i, /^FREE\s*$/i, /\ASee\sLicense\s*\b/i, /^TODO\s*$/i, /^FREEWARE\s*$/i,
|
189
|
+
/^All\srights\sreserved\s*$/i, /^COPYING\s*$/i, /^OTHER\s*$/i, /^NONE\s*$/i, /^DUAL\s*$/i,
|
190
|
+
/^KEEP\s+IT\s+REAL\s*\b/i, /\ABE\s+REAL\s*\z/i, /\APrivate\s*\z/i, /\ACommercial\s*\z/i,
|
191
|
+
/\ASee\s+LICENSE\s+file\b/i, /\ASee\sthe\sLICENSE\b/i, /\ALICEN[C|S]E\s*\z/i,
|
192
|
+
/^PUBLIC\s*$/i, /^see file LICENSE\s*$/i, /^__license__\s*$/i,
|
193
|
+
/\bLIEULA\b/i, /\AEULA\s*\z/i, /^qQuickLicen[c|s]e\b/i, /^For\sfun\b/i, /\AVarious\s*\z/i,
|
194
|
+
/^GNU\s*$/i, /^GNU[-|\s]?v3\s*$/i, /^OSI\s+Approved\s*$/i, /^OSI\s*$/i,
|
195
|
+
/\AOPEN\sSOURCE\sLICENSE\s?\z/i, /\AOPEN\s*\z/i, /\Aunknown\s*\z/i,
|
196
|
+
/^https?:\/\/github.com/i, /^https?:\/\/gitlab\.com/i
|
197
|
+
]
|
198
|
+
end
|
199
|
+
|
200
|
+
|
201
|
+
def get_custom_rules
|
202
|
+
{
|
203
|
+
"AAL" => [/\bAAL\b/i, /\bAAL\s+License\b/i,
|
204
|
+
/\bAttribution\s+Assurance\s+License\b/i],
|
205
|
+
"AFL-1.1" => [/\bAFL[-|v]?1[^\.]\b/i, /\bafl[-|v]?1\.1\b/i],
|
206
|
+
"AFL-1.2" => [/\bAFL[-|v]?1\.2\b/i],
|
207
|
+
"AFL-2.0" => [/\bAFL[-|v]?2[^\.]\b/i, /\bAFL[-|v]?2\.0\b/i],
|
208
|
+
"AFL-2.1" => [/\bAFL[-|v]?2\.1\b/i],
|
209
|
+
"AFL-3.0" => [
|
210
|
+
/\bAFL[-|\s|\_]?v?3\.0\b/i, /\bAFL[-|\s|\_]?v?3/i,
|
211
|
+
/\AAcademic\s+Free\s+License\s*\z/i, /^AFL\s?\z/i,
|
212
|
+
/\bhttps?:\/\/opensource\.org\/licenses\/academic\.php\b/i,
|
213
|
+
/\AAcademic[-|\s]Free[-|\s]License[-]?\s*\z/i,
|
214
|
+
/\bAcademic.Free.License.\(AFL\)/i
|
215
|
+
],
|
216
|
+
"AGPL-1.0" => [
|
217
|
+
/\bAGPL[-|v|_|\s]?1\.0\b/i,
|
218
|
+
/\bAGPL[-|v|_|\s]1\b/i , /\bAGPL[_|-|v]?2\b/i,
|
219
|
+
/\bAffero\s+General\s+Public\s+License\s+[v]?1\b/i,
|
220
|
+
/\bAGPL\s(?!(v|\d))/i #Matches only AGPL, but not AGPL v1, AGPL 1.0 etc
|
221
|
+
],
|
222
|
+
"AGPL-3.0" => [
|
223
|
+
/\bAGPL[-|_|\s]?v?3\.0\b/i, /\bAGPL[-|\s|_]?v?3[\+]?\b/i,
|
224
|
+
/\bAPGLv?3[\+]?\b/i, #some packages has typos
|
225
|
+
/\bGNU\s+Affero\s+General\s+Public\s+License\s+[v]?3/i,
|
226
|
+
/\bAFFERO\sGNU\sPUBLIC\sLICENSE\sv3\b/i,
|
227
|
+
/\bGnu\sAffero\sPublic\sLicense\sv3+?\b/i,
|
228
|
+
/\bAffero\sGeneral\sPublic\sLicen[s|c]e[\,]?\sversion\s+3[\.0]?\b/i,
|
229
|
+
/\bAffero\sGeneral\sPublic\sLicense\sv?3\b/i,
|
230
|
+
/\bAGPL\sversion\s3[\.]?\b/i,
|
231
|
+
/\bGNU\sAGPL\sv?3[\.0]?\b/i,
|
232
|
+
/\bGNU\sAFFERO\sv?3\b/i,
|
233
|
+
/\bhttps?:\/\/gnu\.org\/licenses\/agpl\.html\b/i,
|
234
|
+
/\AAFFERO\sGENERAL\sPUBLIC\sLICENSE\s*\z/i,
|
235
|
+
/^AFFERO\s*\z/i
|
236
|
+
],
|
237
|
+
"Aladdin" => [/\b[\(]?AFPL[\)]?\b/i, /\bAladdin\sFree\sPublic\sLicense\b/i],
|
238
|
+
"Amazon" => [/\bAmazon\sSoftware\sLicense\b/i],
|
239
|
+
"Apache-1.0" => [/\bAPACHE[-|_|\s]?v?1[^\.]/i, /\bAPACHE[-|\s]?v?1\.0\b/i],
|
240
|
+
"Apache-1.1" => [/\bAPACHE[-|_|\s]?v?1\.1\b/i],
|
241
|
+
"Apache-2.0" => [
|
242
|
+
/\bAPACHE\s+2\.0\b/i, /\bAPACHE[-|_|\s]?v?2\b/i,
|
243
|
+
/\bApache\sOpen\sSource\sLicense\s2\.0\b/i,
|
244
|
+
/\bAPACH[A|E]\s+Licen[c|s]e\s+[\(]?v?2\.0[\)]?\b/i,
|
245
|
+
/\bAPACHE\s+LICENSE\,?\s+VERSION\s+2\.0\b/i,
|
246
|
+
/\bApache\s+License\s+v?2\b/i,
|
247
|
+
/\bApache\s+Software\sLicense\b/i,
|
248
|
+
/\bApapche[-|\s|\_]?v?2\.0\b/i, /\bAL[-|\s|\_]2\.0\b/i,
|
249
|
+
/\bAPL\s+2\.0\b/i, /\bAPL[\.|-|v]?2\b/i, /\bASL\s+2\.0\b/i,
|
250
|
+
/\bASL[-|v|\s]?2\b/i, /\bALv2\b/i, /\bASF[-|\s]?2\.0\b/i,
|
251
|
+
/\AAPACHE\s*\z/i, /\AASL\s*\z/i, /\bASL\s+v?\.2\.0\b/i, /\AASF\s*\z/i,
|
252
|
+
/\AApache\s+license\s*\z/i,
|
253
|
+
],
|
254
|
+
"APL-1.0" => [/\bapl[-|_|\s]?v?1\b/i, /\bAPL[-|_|\s]?v?1\.0\b/i, /^APL$/i],
|
255
|
+
"APSL-1.0" => [/\bAPSL[-|_|\s]?v?1\.0\b/i, /\bAPSL[-|_|\s]?v?1(?!\.)\b/i, /\AAPPLE\s+PUBLIC\s+SOURCE\s*\z/i],
|
256
|
+
"APSL-1.1" => [/\bAPSL[-|_|\s]?v?1\.1\b/i],
|
257
|
+
"APSL-1.2" => [/\bAPSL[-|_|\s]?v?1\.2\b/i],
|
258
|
+
"APSL-2.0" => [/\bAPSL[-|_|\s]?v?2\.0\b/i, /\bAPSL[-|_|\s]?v?2\b/i],
|
259
|
+
|
260
|
+
"Artistic-1.0-Perl" => [/\bArtistic[-|_|\s]?v?1\.0\-Perl\b/i, /\bPerlArtistic\b/i],
|
261
|
+
"Artistic-1.0" => [/\bartistic[-|_|\s]?v?1\.0(?!\-)\b/i, /\bartistic[-|_|\s]?v?1(?!\.)\b/i],
|
262
|
+
"Artistic-2.0" => [/\bARTISTIC[-|_|\s]?v?2\.0\b/i, /\bartistic[-|_|\s]?v?2\b/i,
|
263
|
+
/\bArtistic.2.0\b/i,
|
264
|
+
/\bARTISTIC\s+LICENSE\b/i, /\AARTISTIC\s*\z/i],
|
265
|
+
"Beerware" => [
|
266
|
+
/\bBEERWARE\b/i, /\bBEER\s+LICEN[c|s]E\b/i,
|
267
|
+
/\bBEER[-|\s]WARE\b/i, /^BEER\b/i,
|
268
|
+
/\bBuy\ssnare\sa\sbeer\b/i,
|
269
|
+
/\bFree\sas\sin\sbeer\b/i
|
270
|
+
],
|
271
|
+
'BitTorrent-1.1' => [/\bBitTorrent\sOpen\sSource\sLicense\b/i],
|
272
|
+
"0BSD" => [/\A0BSD\s*\z/i],
|
273
|
+
"BSD-2-CLAUSE" => [
|
274
|
+
/\bBSD[-|_|\s]?v?2\b/i, /^FREEBSD\b/i, /^OPENBSD\b/i,
|
275
|
+
/\bBSDLv2\b/i
|
276
|
+
],
|
277
|
+
"BSD-3-CLAUSE" => [/\bBSD[-|_|\s]?v?3\b/i, /\bBSD[-|\s]3[-\s]CLAUSE\b/i,
|
278
|
+
/\bBDS[-|_|\s]3[-|\s]CLAUSE\b/i,
|
279
|
+
/\bthree-clause\sBSD\slicen[s|c]e\b/i,
|
280
|
+
/\ABDS\s*\z/i, /^various\/BSDish\s*$/],
|
281
|
+
"BSD-4-CLAUSE" => [
|
282
|
+
/\bBSD[-|_|\s]?v?4/i, /\ABSD\s*\z/i, /\ABSD\s+LI[s|c]EN[S|C]E\s*\z/i,
|
283
|
+
/\bBSD-4-CLAUSE\b/i,
|
284
|
+
/\bhttps?:\/\/en\.wikipedia\.org\/wiki\/BSD_licenses\b/i
|
285
|
+
],
|
286
|
+
"BSL-1.0" => [
|
287
|
+
/\bBSL[-|_|\s]?v?1\.0\b/i, /\bbsl[-|_|\s]?v?1\b/i, /^BOOST\b/i,
|
288
|
+
/\bBOOST\s+SOFTWARE\s+LICENSE\b/i,
|
289
|
+
/\bBoost\sLicense\s1\.0\b/i
|
290
|
+
],
|
291
|
+
"CC0-1.0" => [
|
292
|
+
/\bCC0[-|_|\s]?v?1\.0\b/i, /\bCC0[-|_|\s]?v?1\b/i,
|
293
|
+
/\bCC[-|\s]?[0|o]\b/i, /\bCreative\s+Commons\s+0\b/i,
|
294
|
+
/\bhttps?:\/\/creativecommons\.org\/publicdomain\/zero\/1\.0[\/]?\b/i,
|
295
|
+
/\bcc[-|\_]zero\b/i
|
296
|
+
],
|
297
|
+
"CC-BY-1.0" => [/\bCC.BY.v?1\.0\b/i, /\bCC.BY.v?1\b/i, /^CC[-|_|\s]?BY$/i],
|
298
|
+
"CC-BY-2.0" => [/\bCC.BY.v?2\.0\b/i, /\bCC.BY.v?2(?!\.)\b/i],
|
299
|
+
"CC-BY-2.5" => [/\bCC.BY.v?2\.5\b/i],
|
300
|
+
"CC-BY-3.0" => [
|
301
|
+
/\bCC.BY.v?3\.0\b/i, /\b[\(]?CC.BY[\)]?.v?3\b/i,
|
302
|
+
/\bCreative\sCommons\sBY\s3\.0\b/i,
|
303
|
+
/\bhttps?:\/\/.+\/licenses\/by\/3\.0[\/]?/i
|
304
|
+
],
|
305
|
+
"CC-BY-4.0" => [
|
306
|
+
/^CC[-|\s]?BY[-|\s]?v?4\.0$/i, /\bCC.BY.v?4\b/i, /\bCC.BY.4\.0\b/i,
|
307
|
+
/\bCREATIVE\s+COMMONS\s+ATTRIBUTION\s+[v]?4\.0\b/i,
|
308
|
+
/\ACREATIVE\s+COMMONS\s+ATTRIBUTION\s*\z\b/i
|
309
|
+
],
|
310
|
+
"CC-BY-SA-1.0" => [/\bCC[-|\s]BY.SA.v?1\.0\b/i, /\bCC[-|\s]BY.SA.v?1\b/i],
|
311
|
+
"CC-BY-SA-2.0" => [/\bCC[-|\s]BY.SA.v?2\.0\b/i, /\bCC[-|\s]BY.SA.v?2(?!\.)\b/i],
|
312
|
+
"CC-BY-SA-2.5" => [/\bCC[-|\s]BY.SA.v?2\.5\b/i],
|
313
|
+
"CC-BY-SA-3.0" => [/\bCC[-|\s]BY.SA.v?3\.0\b/i, /\bCC[-|\s]BY.SA.v?3\b/i,
|
314
|
+
/\bCC3\.0[-|_|\s]BY.SA\b/i,
|
315
|
+
/\bhttps?:\/\/(www\.)?.+\/by.sa\/3\.0[\/]?/i],
|
316
|
+
"CC-BY-SA-4.0" => [
|
317
|
+
/CC[-|\s]BY.SA.v?4\.0$/i, /\bCC[-|\s]BY.SA.v?4\b/i,
|
318
|
+
/CCSA-4\.0/i
|
319
|
+
],
|
320
|
+
"CC-BY-NC-1.0" => [/\bCC[-|\s]BY.NC[-|\s]?v?1\.0\b/i, /\bCC[-|\s]BY.NC[-|\s]?v?1\b/i],
|
321
|
+
"CC-BY-NC-2.0" => [/\bCC[-|\s]BY.NC[-|\s]?v?2\.0\b/i],
|
322
|
+
"CC-BY-NC-2.5" => [/\bCC[-|\s]BY.NC[-|\s]?v?2\.5\b/i],
|
323
|
+
"CC-BY-NC-3.0" => [/\bCC[-|\s]BY.NC[-|\s]?v?3\.0\b/i, /\bCC.BY.NC[-|\s]?v?3\b/i,
|
324
|
+
/\bCreative\s+Commons\s+Non[-]?Commercial[,]?\s+3\.0\b/i],
|
325
|
+
"CC-BY-NC-4.0" => [
|
326
|
+
/\bCC[-|\s]BY.NC[-|\s|_]?v?4\.0\b/i, /\bCC.BY.NC[-|\s|_]?v?4\b/i,
|
327
|
+
/\bhttps?:\/\/creativecommons\.org\/licenses\/by-nc\/3\.0[\/]?\b/i
|
328
|
+
],
|
329
|
+
"CC-BY-NC-SA-1.0" => [ /\bCC[-|\s+]BY.NC.SA[-|\s+]v?1\.0\b/i,
|
330
|
+
/\bCC[-|\s+]BY.NC.SA[-|\s+]v?1\b/i
|
331
|
+
],
|
332
|
+
"CC-BY-NC-SA-2.0" => [/\bCC[-|\s]?BY.NC.SA[-|\s]?v?2\.0\b/i],
|
333
|
+
"CC-BY-NC-SA-2.5" => [/\bCC[-|\s]?BY.NC.SA[-|\s]?v?2\.5\b/i],
|
334
|
+
"CC-BY-NC-SA-3.0" => [
|
335
|
+
/\bCC[-|\s]?BY.NC.SA[-|\s]?v?3\.0\b/i,
|
336
|
+
/\bCC[-|\s]?BY.NC.SA[-|\s]?v?3(?!\.)\b/i,
|
337
|
+
/\bBY[-|\s]NC[-|\s]SA\sv?3\.0\b/i,
|
338
|
+
/\bhttp:\/\/creativecommons.org\/licenses\/by-nc-sa\/3.0\/us[\/]?\b/i
|
339
|
+
],
|
340
|
+
"CC-BY-NC-SA-4.0" => [/\bCC[-|\s]?BY.NC.SA[-|\s]?v?4\.0\b/i,
|
341
|
+
/\bCC[-|_|\s]BY.NC.SA[-|\s]?v?4(?!\.)\b/i,
|
342
|
+
/\bBY.NC.SA[-|\s|\_]v?4\.0\b/i],
|
343
|
+
|
344
|
+
"CC-BY-ND-1.0" => [/\bCC[-|\s]BY.ND[-|\s]?v?1\.0\b/i],
|
345
|
+
"CC-BY-ND-2.0" => [/\bCC[-|\s]BY.ND[-|\s]?v?2\.0\b/i],
|
346
|
+
"CC-BY-ND-2.5" => [/\bCC[-|\s]BY.ND[-|\s]?v?2\.5\b/i],
|
347
|
+
"CC-BY-ND-3.0" => [/\bCC[-|\s]BY.ND[-|\s]?v?3\.0\b/i],
|
348
|
+
"CC-BY-ND-4.0" => [
|
349
|
+
/\bCC[-|\s]BY.ND[-|\s]?v?4\.0\b/i,
|
350
|
+
/\bCC\sBY.NC.ND\s4\.0/i
|
351
|
+
],
|
352
|
+
|
353
|
+
"CC-BY-NC-ND-3.0" => [/\bCC.BY.NC.ND.3\.0\b/i],
|
354
|
+
"CC-BY-NC-ND-4.0" => [/\bCC.BY.NC.ND.4\.0\b/i],
|
355
|
+
"CDDL-1.0" => [/\bCDDL[-|_|\s]?v?1\.0\b/i, /\bCDDL[-|_|\s]?v?1\b/i, /^CDDL$/i,
|
356
|
+
/\bCDDL\s+LICEN[C|S]E\b/i,
|
357
|
+
/\bCOMMON\sDEVELOPMENT\sAND\sDISTRIBUTION\sLICENSE\b/i
|
358
|
+
],
|
359
|
+
"CECILL-B" => [/\bCECILL[-|_|\s]?B\b/i],
|
360
|
+
"CECILL-C" => [/\bCECILL[-|_|\s]?C\b/i],
|
361
|
+
"CECILL-1.0" => [
|
362
|
+
/\bCECILL[-|\s|_]?v?1\.0\b/i, /\bCECILL[-|\s|_]?v?1\b/i,
|
363
|
+
/\ACECILL\s?\z/i, /\bCECILL\s+v?1\.2\b/i,
|
364
|
+
/^http:\/\/www\.cecill\.info\/licences\/Licence_CeCILL-C_V1-en.html$/i,
|
365
|
+
/\bhttp:\/\/www\.cecill\.info\b/i
|
366
|
+
],
|
367
|
+
"CECILL-2.1" => [
|
368
|
+
/\bCECILL[-|_|\s]?2\.1\b/i, /\bCECILL[\s|_|-]?v?2\b/i,
|
369
|
+
/\bCECILL\sVERSION\s2\.1\b/i
|
370
|
+
],
|
371
|
+
"CPL-1.0" => [
|
372
|
+
/\bCPL[-|\s|_]?v?1\.0\b/i, /\bCPL[-|\s|_]?v?1\b/i,
|
373
|
+
/\bCommon\s+Public\s+License\b/i, /\ACPL\s*\z/i
|
374
|
+
],
|
375
|
+
"CPAL-1.0" => [
|
376
|
+
/\bCommon\sPublic\sAttribution\sLicense\s1\.0\b/i,
|
377
|
+
/[\(]?\bCPAL\b[\)]?/i
|
378
|
+
],
|
379
|
+
"CUSTOM" => [ /\bCUSTOM\s+LICENSE\b/i ],
|
380
|
+
"DBAD" => [
|
381
|
+
/\bDONT\sBE\sA\sDICK\b/i, /\ADBAD\s*\z/i,
|
382
|
+
/\bdbad[-|\s|\_]license\b/i, /\ADBAD-1\s*\z/i,
|
383
|
+
/\ADBAP\b/i,
|
384
|
+
/\bhttps?:\/\/www\.dbad-license\.org[\/]?\b/i
|
385
|
+
],
|
386
|
+
"D-FSL-1.0" => [
|
387
|
+
/\bD-?FSL[-|_|\s]?v?1\.0\b/i, /\bD-?FSL[-|\s|_]?v?1\b/,
|
388
|
+
/\bGerman\sFREE\sSOFTWARE\b/i,
|
389
|
+
/\bDeutsche\sFreie\sSoftware\sLizenz\b/i
|
390
|
+
],
|
391
|
+
"ECL-1.0" => [ /\bECL[-|\s|_]?v?1\.0\b/i, /\bECL[-|\s|_]?v?1\b/i ],
|
392
|
+
"ECL-2.0" => [
|
393
|
+
/\bECL[-|\s|_]?v?2\.0\b/i, /\bECL[-|\s|_]?v?2\b/i,
|
394
|
+
/\bEDUCATIONAL\s+COMMUNITY\s+LICENSE[,]?\sVERSION\s2\.0\b/i
|
395
|
+
],
|
396
|
+
"EFL-1.0" => [/\bEFL[-|\s|_]?v?1\.0\b/i, /\bEFL[-|\s|_]?v?1\b/i ],
|
397
|
+
"EFL-2.0" => [
|
398
|
+
/\bEFL[-|\s|_]?v?2\.0\b/i, /\bEFL[-|\s|_]?v?2\b/i,
|
399
|
+
/\bEiffel\sForum\sLicense,?\sversion\s2/i,
|
400
|
+
/\bEiffel\sForum\sLicense\s2(?!\.)\b/i,
|
401
|
+
/\bEiffel\sForum\sLicense\b/i
|
402
|
+
],
|
403
|
+
"EPL-1.0" => [
|
404
|
+
/\bEPL[-|\s|_]?v?1\.0\b/i, /\bEPL[-|\s|_]?v?1\b/i,
|
405
|
+
/\bECLIPSE\s+PUBLIC\s+LICENSE\s+[v]?1\.0\b/i,
|
406
|
+
/\bECLIPSE\s+PUBLIC\s+LICENSE\b/i,
|
407
|
+
/^ECLIPSE$/i, /\AEPL\s*\z/
|
408
|
+
],
|
409
|
+
"ESA-1.0" => [
|
410
|
+
/\bESCL\s+[-|_]?\sType\s?1\b/,
|
411
|
+
/\bESA\sSOFTWARE\sCommunity\sLICENSE.+TYPE\s?1\b/i
|
412
|
+
],
|
413
|
+
"EUPL-1.0" => [/\b[\(]?EUPL[-|\s]?v?1\.0[\)]?\b/i],
|
414
|
+
"EUPL-1.1" => [
|
415
|
+
/\b[\(]?EUPL[-|\s]?v?1\.1[\)]?\b/i,
|
416
|
+
/\bEUROPEAN\s+UNION\s+PUBLIC\s+LICENSE\s+1\.1\b/i,
|
417
|
+
/\bEuropean\sUnion\sPublic\sLicense\b/i,
|
418
|
+
/\bEUPL\s+V?\.?1\.1\b/i, /\AEUPL\s*\z/i
|
419
|
+
],
|
420
|
+
"Fair" => [ /\bFAIR\s+LICENSE\b/i, /\AFair\s*\z/i],
|
421
|
+
"FreeType" => [ /\bFreeType\s+LICENSE\b/i],
|
422
|
+
"GFDL-1.0" => [
|
423
|
+
/\bGNU\sFree\sDocumentation\sLicense\b/i,
|
424
|
+
/\b[\(]?FDL[\)]?\b/
|
425
|
+
],
|
426
|
+
"GPL-1.0" => [
|
427
|
+
/\bGPL[-|\s|_]?v?1\.0\b/i, /\bGPL[-|\s|_]?v?1\b/i,
|
428
|
+
/\bGNU\sPUBLIC\sLICEN[S|C]E\sv?1\b/i
|
429
|
+
],
|
430
|
+
"GPL-2.0" => [
|
431
|
+
/\bGPL[-|\s|_]?v?2\.0/i, /\bGPL[-|\s|_]?v?2\b/i, /\bGPL\s+[v]?2\b/i,
|
432
|
+
/\bGNU\s+PUBLIC\s+LICENSE\s+v?2\.0\b/i,
|
433
|
+
/\bGNU\s+PUBLIC\s+License\sV?2\b/i,
|
434
|
+
/\bGNU\spublic\slicense\sversion\s2\b/i,
|
435
|
+
/\bGNU\sGeneral\sPublic\sLicense\sv?2\.0\b/i,
|
436
|
+
/\bGNU\sPublic\sLicense\s>=2\b/i,
|
437
|
+
/\bGNU\s+GPL\s+v2\b/i, /^GNUv?2\b/i, /^GLPv2\b/,
|
438
|
+
/\bWhatever\slicense\sPlone\sis\b/i
|
439
|
+
],
|
440
|
+
"GPL-3.0" => [
|
441
|
+
/\bGNU\s+GENERAL\s+PUBLIC\s+License\s+[v]?3\b/i,
|
442
|
+
/\bGNU\s+General\s+Public\s+License[\,]?\sVersion\s3[\.0]?\b/i,
|
443
|
+
/\bGNU\sPublic\sLicense\sv?3\.0\b/i,
|
444
|
+
/\bGNU\s+PUBLIC\s+LICENSE\s+v?3\b/i,
|
445
|
+
/\bGnu\sPublic\sLicense\sversion\s3\b/i,
|
446
|
+
/\bGNU\sGeneral\sPublic\sLicense\sversion\s?3\b/i,
|
447
|
+
/\bGPL[-|\s|_]?v?3\.0\b/i, /\bGPL[-|\s|_]?v?[\.]?3\b/i, /\bGPL\s+3\b/i,
|
448
|
+
/\bGNU\s+PUBLIC\s+v3\+?\b/i,
|
449
|
+
/\bGNUGPL[-|\s|\_]?v?3\b/i, /\bGNU\s+PL\s+[v]?3\b/i,
|
450
|
+
/\bGLPv3\b/i, /\bGNU3\b/i, /GPvL3/i, /\bGNU\sGLP\sv?3\b/i,
|
451
|
+
/\AGNU\sGENERAL\sPUBLIC\sLICENSE\s*\z/i, /\A[\(]?GPL[\)]?\s*\z/i
|
452
|
+
],
|
453
|
+
|
454
|
+
"IDPL-1.0" => [
|
455
|
+
/\bIDPL[-|\s|\_]?v?1\.0\b/,
|
456
|
+
/\bhttps?:\/\/www\.firebirdsql\.org\/index\.php\?op=doc\&id=idpl\b/i
|
457
|
+
],
|
458
|
+
"IPL-1.0" => [/\bIBM\sOpen\sSource\sLicense\b/i, /\bIBM\sPublic\sLicen[s|c]e\b/i],
|
459
|
+
"ISC" => [/\bISC\s+LICENSE\b/i, /\b[\(]?ISCL[\)]?\b/i, /\bISC\b/i,
|
460
|
+
/\AICS\s*\z/i],
|
461
|
+
"JSON" => [/\bJSON\s+LICENSE\b/i],
|
462
|
+
"KINDLY" => [/\bKINDLY\s+License\b/i],
|
463
|
+
"LGPL-2.0" => [
|
464
|
+
/\bLGPL[-|\s|_]?v?2\.0\b/i, /\bLGPL[-|\s|_]?v?2(?!\.)\b/i,
|
465
|
+
/\bLesser\sGeneral\sPublic\sLicense\sv?2(?!\.)\b/i,
|
466
|
+
/\bLPGL[-|\s|\_]?v?2(?!\.)\b/i
|
467
|
+
],
|
468
|
+
"LGPL-2.1" => [
|
469
|
+
/\bLGPL[-|\s|_]?v?2\.1\b/i,
|
470
|
+
/\bLesser\sGeneral\sPublic\sLicense\s+\(LGPL\)\s+Version\s+2\.1\b/i,
|
471
|
+
/\bLESSER\sGENERAL\sPUBLIC\sLICENSE[\,]?\sVersion\s2\.1[\,]?\b/i,
|
472
|
+
/\bLESSER\sGENERAL\sPUBLIC\sLICENSE[\,]?\sv?2\.1\b/i
|
473
|
+
],
|
474
|
+
"LGPL-3.0" => [/\bLGPL[-|\s|_]?v?3\.0\b/i, /\bLGPL[-|\s|_]?v?3[\+]?\b/i,
|
475
|
+
/\bLGLP[\s|-|v]?3\.0\b/i, /^LPLv3\s*$/, /\bLPGL[-|\s|_]?v?3[\+]?\b/i,
|
476
|
+
/\bLESSER\s+GENERAL\s+PUBLIC\s+License\s+[v]?3\b/i,
|
477
|
+
/\bLesser\sGeneral\sPublic\sLicense\sv?\.?\s+3\.0\b/i,
|
478
|
+
/\bhttps?:\/\/www\.gnu\.org\/copyleft\/lesser.html\b/i,
|
479
|
+
/\bLESSER\sGENERAL\sPUBLIC\sLICENSE\sVersion\s3\b/i,
|
480
|
+
/\bLesser\sGeneral\sPublic\sLicense[\,]?\sversion\s3\.0\b/i,
|
481
|
+
/\bLESSER\sGENERAL\sPUBLIC\sLICENSE.+?version\s?3/i,
|
482
|
+
/\A[\(]?LGPL[\)]?\s*\z/i
|
483
|
+
],
|
484
|
+
"MirOS" => [/\bMirOS\b/i],
|
485
|
+
"MIT" => [
|
486
|
+
/\bMIT\s+LICEN[S|C]E\b/i, /\AMITL?\s*\z/i, /\bEXPAT\b/i,
|
487
|
+
/\bMIT[-|\_]LICENSE\.\w{2,8}\b/i, /^MTI\b/i,
|
488
|
+
/\bMIT[-|\s|\_]?v?2\.0\b/i, /\AM\.I\.T[\.]?\s*\z/,
|
489
|
+
/\bMassachusetts-Institute-of-Technology-License/i
|
490
|
+
],
|
491
|
+
"MITNFA" => [/\bMIT\s\+no\-false\-attribs\slicense\b/i],
|
492
|
+
"MPL-1.0" => [
|
493
|
+
/\bMPL[-|\s|\_]?v?1\.0\b/i, /\bMPL[-|\s|\_]?v?1(?!\.)\b/i,
|
494
|
+
/\bMozilla\sPublic\sLicense\sv?1\.0\b/i,
|
495
|
+
],
|
496
|
+
"MPL-1.1" => [
|
497
|
+
/\bMozilla.Public.License\s+v?1\.1\b/i,
|
498
|
+
/\bMPL[-|\s|\_]?v?1\.1\b/i,
|
499
|
+
],
|
500
|
+
"MPL-2.0" => [
|
501
|
+
/\bMPL[-|\s|\_]?v?2\.0\b/i, /\bMPL[-|\s|\_]?v?2\b/i,
|
502
|
+
/\bMOZILLA\s+PUBLIC\s+LICENSE\s+2\.0\b/i,
|
503
|
+
/\bMozilla\sPublic\sLicense[\,]?\s+v?[\.]?\s*2\.0\b/i,
|
504
|
+
/\bMOZILLA\s+PUBLIC\s+LICENSE[,]?\s+version\s+2\.0\b/i,
|
505
|
+
/\bMozilla\s+v?2\.0\b/i,
|
506
|
+
/\b[\(]?MPL\s+2\.0[\)]?\b/, /\bMPL\b/i,
|
507
|
+
/\AMozilla\sPublic\sLicense\s*\z/i
|
508
|
+
],
|
509
|
+
"MS-PL" => [/\bMS-?PL\b/i],
|
510
|
+
"MS-RL" => [/\bMS-?RL\b/i, /\bMSR\-LA\b/i],
|
511
|
+
"ms_dotnet" => [/\bMICROSOFT\sSOFTWARE\sLICENSE\sTERMS\b/i],
|
512
|
+
"NASA-1.3" => [/\bNASA[-|\_|\s]?v?1\.3\b/i,
|
513
|
+
/\bNASA\sOpen\sSource\sAgreement\sversion\s1\.3\b/i],
|
514
|
+
"NCSA" => [/\bNCSA\s+License\b/i, /\bIllinois\/NCSA\sOpen\sSource\b/i, /\bNCSA\b/i ],
|
515
|
+
"NGPL" => [/\bNGPL\b/i],
|
516
|
+
"NOKIA" => [/\bNokia\sOpen\sSource\sLicense\b/i],
|
517
|
+
"NPL-1.1" => [/\bNetscape\sPublic\sLicense\b/i, /\b[(]?NPL[\)]?\b/i],
|
518
|
+
|
519
|
+
"NPOSL-3.0" => [/\bNPOSL[-|\s|\_]?v?3\.0\b/i, /\bNPOSL[-|\s|\_]?v?3\b/],
|
520
|
+
"OFL-1.0" => [/\bOFL[-|\s|\_]?v?1\.0\b/i, /\bOFL[-|\s|\_]?v?1(?!\.)\b/i,
|
521
|
+
/\bSIL\s+OFL\s+1\.0\b/i, /\ASIL\sOFL\s*\z/i ],
|
522
|
+
"OFL-1.1" => [
|
523
|
+
/\bOFL[-|\s|\_]?v?1\.1\b/i, /\bSIL\s+OFL\s+1\.1\b/i,
|
524
|
+
/\bSIL\sOpen\sFont\sLicense\b/i, /\bSIL\sOFL\s1\.1\b/i,
|
525
|
+
/\bOpen\sFont\sLicense\b/i ],
|
526
|
+
|
527
|
+
"OSL-1.0" => [/\bOSL[-|\s|\_]?v?1\.0\b/i, /\b\OSL[-|\s|\_]?v?1(?!\.)\b/i],
|
528
|
+
"OSL-2.0" => [/\bOSL[-|\s|\_]?v?2\.0\b/i, /\bOSL[-|\s|\_]?v?2(?!\.)\b/i],
|
529
|
+
"OSL-2.1" => [/\bOSL[-|\s|\_]?v?2\.1\b/i],
|
530
|
+
"OSL-3.0" => [
|
531
|
+
/\bOSL[-|\s|\_]?v?3\.0\b/i, /\bOSL[-|\s|\_]?v?3(?!\.)\b/i,
|
532
|
+
/\bOpen\sSoftware\sLicen[c|s]e\sv?3\.0\b/i,
|
533
|
+
/\bOSL[-|\s|\_]?v?\.?3\.[0|O]\b/i,
|
534
|
+
/\bOpen\sSoftware\sLicense\sversion\s3\.0\b/i,
|
535
|
+
/\AOSL\s*\z/i, /\bOpen-Software-License/i,
|
536
|
+
/\b[\(]?OSL[\)]?\s+v\s+3\.0\b/i
|
537
|
+
],
|
538
|
+
|
539
|
+
"PHP-3.0" => [/^PHP\s?\z/i, /\bPHP\sLicense\s3\.0\b/i, /\APHP[-|\s]LICEN[S|C]E\s*\z/i],
|
540
|
+
"PHP-3.01" => [/\bPHP\sLicense\sversion\s3\.0\d\b/i],
|
541
|
+
"PIL" => [/\bStandard\sPIL\sLicense\b/i, /\APIL\s*\z/i],
|
542
|
+
"PostgreSQL" => [/\bPostgreSQL\b/i],
|
543
|
+
"Public Domain" => [/\bPublic\s+Domain\b/i],
|
544
|
+
"Python-2.0" => [
|
545
|
+
/\bPython[-|\s|\_]?v?2\.0\b/i, /\bPython[-|\s|\_]?v?2(?!\.)\b/i,
|
546
|
+
/\bPSF[-|\s|\_]?v?2\b/i, /\bPSFL\b/i, /\bPSF\b/i,
|
547
|
+
/\bPython\s+Software\s+Foundation\b/i,
|
548
|
+
/\APython\b/i, /\bPSL\b/i, /\bSAME\sAS\spython2\.3\b/i,
|
549
|
+
/\bhttps?:\/\/www\.opensource\.org\/licenses\/PythonSoftFoundation\.php\b/i,
|
550
|
+
/\bhttps?:\/\/opensource\.org\/licenses\/PythonSoftFoundation\.php\b/i
|
551
|
+
],
|
552
|
+
"Repoze" => [/\bRepoze\sPublic\sLicense\b/i],
|
553
|
+
"RPL-1.1" => [/\bRPL[-|\s|_]?v?1\.1\b/i, /\bRPL[-|\s|_]?v?1(?!\.)\b/i],
|
554
|
+
"RPL-1.5" => [
|
555
|
+
/\bRPL[-|\s|_]?v?1\.5\b/i, /\ARPL\s*\z/i,
|
556
|
+
/\bhttps?:\/\/www\.opensource\.org\/licenses\/rpl\.php\b/i
|
557
|
+
],
|
558
|
+
"Ruby" => [/\bRUBY\sLICEN[S|C]E\b/i, /\ARUBY\b/i, /\bRUBY\'s\b/i],
|
559
|
+
"QPL-1.0" => [/\bQPL[-|\s|_]?v?1\.0\b/i,
|
560
|
+
/\bQT\sPublic\sLicen[c|s]e\b/i,
|
561
|
+
/\bPyQ\sGeneral\sLicense\b/i],
|
562
|
+
"Sleepycat" => [/\bSleepyCat\b/i],
|
563
|
+
"SPL-1.0" => [
|
564
|
+
/\bSPL[-|\_|\s]?v?1\.0\b/i, /\bSun\sPublic\sLicense\b/i
|
565
|
+
],
|
566
|
+
"W3C" => [/\bW3C\b/i],
|
567
|
+
"OpenSSL" => [/\bOPENSSL\b/i],
|
568
|
+
"Unicode-TOU" => [/\AUnicode-TOU[\s|\/|-]/i],
|
569
|
+
"UPL-1.0" => [/\bUniversal\sPermissive\sLicense\b/i],
|
570
|
+
"Unlicense" => [
|
571
|
+
/\bUNLI[C|S]EN[S|C]E\b/i, /\AUnlicen[s|c]ed\s*\z/i, /^go\sfor\sit\b/i,
|
572
|
+
/^Undecided\b/i,
|
573
|
+
/\bNO\s+LICEN[C|S]E\b/i, /\bNON[\s|-|\_]?LICENSE\b/i
|
574
|
+
],
|
575
|
+
"Whiskeyware" => [/\bWH?ISKEY[-|\s|\_]?WARE\b/i],
|
576
|
+
"WTFPL" => [
|
577
|
+
/\bWTF[P|G]?L\b/i, /\bWTFPL[-|v]?2\b/i, /^WTF\b/i, /\AWTFP\s*\z/i,
|
578
|
+
/\bDo\s+whatever\s+you\s+want\b/i, /\bDWTFYW\b/i, /\AWTPFL\s*\z/i,
|
579
|
+
/\bDo\s+What\s+the\s+Fuck\s+You\s+Want\b/i, /\ADWTFYWT\s*\z/i,
|
580
|
+
/\ADo\sWHATEVER\b/i, /\ADWYW\b/i, /\bDWTFYWTP\b/i,
|
581
|
+
/\ADWHTFYWTPL\s*\z/i, /\AWhatever\s*\z/i,
|
582
|
+
/\bDO\s(THE\s)?FUCK\sWHAT\sYOU\sWANT\b/i
|
583
|
+
],
|
584
|
+
"WXwindows" => [/\bwxWINDOWS\s+LIBRARY\sLICEN[C|S]E\b/i, /\AWXwindows\s*\z/i],
|
585
|
+
"X11" => [/\bX11\b/i],
|
586
|
+
"Zend-2.0" => [/\bZend\sFramework\b/i],
|
587
|
+
"ZPL-1.1" => [/\bZPL[-|\s|\_]?v?1\.1\b/i, /\bZPL[-|\s|\_]?v?1(?!\.)\b/i,
|
588
|
+
/\bZPL[-|\s|\_]?1\.0\b/i],
|
589
|
+
"ZPL-2.1" => [
|
590
|
+
/\bZPL[-|\s|\/|_|]?v?2\.1\b/i, /\bZPL[-|\s|_]?v?2(?!\.)\b/i,
|
591
|
+
/\bZPL\s+2\.\d\b/i, /\bZOPE\s+PUBLIC\s+LICENSE\b/i,
|
592
|
+
/\bZPL\s?$/i
|
593
|
+
],
|
594
|
+
"zlib-acknowledgement" => [/\bZLIB[\/|-|\s]LIBPNG\b/i],
|
595
|
+
"ZLIB" => [/\bZLIB(?!\-|\/)\b/i]
|
596
|
+
|
597
|
+
}
|
598
|
+
end
|
599
|
+
|
600
|
+
end
|
601
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'narray'
|
2
|
+
require 'tf-idf-similarity'
|
3
|
+
require 'msgpack'
|
4
|
+
|
5
|
+
module LicenseMatcher
|
6
|
+
|
7
|
+
class TFRubyMatcher
|
8
|
+
include Preprocess
|
9
|
+
|
10
|
+
attr_reader :corpus, :model, :spdx_ids
|
11
|
+
|
12
|
+
DEFAULT_INDEX_PATH = 'data/index.msgpack'
|
13
|
+
DEFAULT_MIN_CONFIDENCE = 0.9
|
14
|
+
A_DOC_ROW = 3 # a array index to find the rows of indexed documents
|
15
|
+
|
16
|
+
def initialize(index_path = DEFAULT_INDEX_PATH)
|
17
|
+
spdx_ids, spdx_docs = read_corpus(index_path)
|
18
|
+
|
19
|
+
@spdx_ids = spdx_ids
|
20
|
+
@corpus = spdx_docs
|
21
|
+
@model = TfIdfSimilarity::BM25Model.new(@corpus, :library => :narray)
|
22
|
+
|
23
|
+
true
|
24
|
+
end
|
25
|
+
|
26
|
+
def match_text(text, min_confidence = DEFAULT_MIN_CONFIDENCE, is_processed_text = false)
|
27
|
+
return [] if text.to_s.empty?
|
28
|
+
|
29
|
+
text = preprocess_text(text) if is_processed_text == false
|
30
|
+
test_doc = TfIdfSimilarity::Document.new(text, {:id => "test"})
|
31
|
+
|
32
|
+
mat1 = @model.instance_variable_get(:@matrix)
|
33
|
+
mat2 = doc_tfidf_matrix(test_doc)
|
34
|
+
|
35
|
+
n_docs = @model.documents.size
|
36
|
+
dists = []
|
37
|
+
n_docs.times do |i|
|
38
|
+
dists << [i, cos_sim(mat1[i, true], mat2)]
|
39
|
+
end
|
40
|
+
|
41
|
+
doc_id, best_score = dists.sort {|a,b| b[1] <=> a[1]}.first
|
42
|
+
best_match = @model.documents[doc_id].id
|
43
|
+
|
44
|
+
if best_score.to_f > min_confidence
|
45
|
+
best_match
|
46
|
+
else
|
47
|
+
""
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def match_html(html_text, min_confidence = DEFAULT_MIN_CONFIDENCE)
|
52
|
+
match_text(preprocess_html(html_text), min_confidence)
|
53
|
+
end
|
54
|
+
|
55
|
+
#-- helpers
|
56
|
+
# Transforms document into TF-IDF matrix used for comparition
|
57
|
+
def doc_tfidf_matrix(doc)
|
58
|
+
arr = Array.new(@model.terms.size) do |i|
|
59
|
+
the_term = @model.terms[i]
|
60
|
+
if doc.term_count(the_term) > 0
|
61
|
+
#calc score only for words that exists in the test doc and the corpus of licenses
|
62
|
+
model.idf(the_term) * model.tf(doc, the_term)
|
63
|
+
else
|
64
|
+
0.0
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
NArray[*arr]
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
# Calculates cosine similarity between 2 TF-IDF vector
|
73
|
+
def cos_sim(mat1, mat2)
|
74
|
+
length = (mat1 * mat2).sum
|
75
|
+
norm = Math::sqrt((mat1 ** 2).sum) * Math::sqrt((mat2 ** 2).sum)
|
76
|
+
|
77
|
+
( norm > 0 ? length / norm : 0.0)
|
78
|
+
end
|
79
|
+
|
80
|
+
# Reads the content of licenses from the pre-built index
|
81
|
+
# NB! it is sensitive to the changes in the Fosslim/Index serialization
|
82
|
+
def read_corpus(index_path)
|
83
|
+
idx = MessagePack.unpack File.read index_path
|
84
|
+
spdx_ids = []
|
85
|
+
docs = []
|
86
|
+
|
87
|
+
idx[A_DOC_ROW].to_a.each do |doc_row|
|
88
|
+
_, spdx_id, content, _ = doc_row
|
89
|
+
txt = preprocess_text content
|
90
|
+
if txt
|
91
|
+
spdx_ids << spdx_id
|
92
|
+
docs << TfIdfSimilarity::Document.new(txt, :id => spdx_id)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
[spdx_ids, docs]
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
|
2
|
+
module LicenseMatcher
|
3
|
+
|
4
|
+
class UrlMatcher
|
5
|
+
attr_reader :url_index
|
6
|
+
|
7
|
+
DEFAULT_LICENSE_JSON = 'data/spdx_licenses/licenses.json'
|
8
|
+
|
9
|
+
def initialize(license_json_file = DEFAULT_LICENSE_JSON)
|
10
|
+
licenses_json_doc = read_json_file license_json_file
|
11
|
+
raise("Failed to read licenses.json") if licenses_json_doc.nil?
|
12
|
+
|
13
|
+
@url_index = read_license_url_index(licenses_json_doc)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Matches License.url with urls in Licenses.json and returns tuple [spdx_id, score]
|
17
|
+
def match_url(the_url)
|
18
|
+
the_url = the_url.to_s.strip
|
19
|
+
spdx_id = nil
|
20
|
+
|
21
|
+
case the_url
|
22
|
+
when 'http://jquery.org/license'
|
23
|
+
return ['mit', 1.0] #Jquery license page doesnt include any license text
|
24
|
+
when 'https://www.mozilla.org/en-US/MPL/'
|
25
|
+
return ['mpl-2.0', 1.0]
|
26
|
+
when 'http://fairlicense.org'
|
27
|
+
return ['fair', 1.0]
|
28
|
+
when 'http://www.aforgenet.com/framework/license.html'
|
29
|
+
return ['lgpl-3.0', 1.0]
|
30
|
+
when 'http://www.apache.org/licenses/'
|
31
|
+
return ['apache-2.0', 1.0]
|
32
|
+
when 'http://aws.amazon.com/apache2.0/'
|
33
|
+
return ['apache-2.0', 1.0]
|
34
|
+
when 'http://aws.amazon.com/asl/'
|
35
|
+
return ['amazon', 1.0]
|
36
|
+
when 'https://choosealicense.com/no-license/'
|
37
|
+
return ['no-license', 1.0]
|
38
|
+
when 'http://www.gzip.org/zlib/zlib_license.html'
|
39
|
+
return ['zlib', 1.0]
|
40
|
+
when 'http://zlib.net/zlib-license.html'
|
41
|
+
return ['zlib', 1.0]
|
42
|
+
when 'http://www.wtfpl.net/about/'
|
43
|
+
return ['wtfpl', 1.0]
|
44
|
+
end
|
45
|
+
|
46
|
+
#does url match with choosealicense.com
|
47
|
+
match = the_url.match(/\bhttps?:\/\/(www\.)?choosealicense\.com\/licenses\/([\S|^\/]+)[\/]?\b/i)
|
48
|
+
if match
|
49
|
+
return [match[2].to_s.downcase, 1.0]
|
50
|
+
end
|
51
|
+
|
52
|
+
match = the_url.match(/\bhttps?:\/\/(www\.)?creativecommons\.org\/licenses\/([\S|^\/]+)[\/]?\b/i)
|
53
|
+
if match
|
54
|
+
return ["cc-#{match[2].to_s.gsub(/\//, '-')}", 1.0]
|
55
|
+
end
|
56
|
+
|
57
|
+
#check through SPDX urls
|
58
|
+
@url_index.each do |lic_url, lic_id|
|
59
|
+
lic_url = lic_url.to_s.strip.gsub(/https?:\/\//i, '').gsub(/www\./, '') #normalizes urls in the file
|
60
|
+
matcher = Regexp.new("https?:\/\/(www\.)?#{lic_url}", Regexp::IGNORECASE)
|
61
|
+
|
62
|
+
if matcher.match(the_url)
|
63
|
+
spdx_id = lic_id.to_s.downcase
|
64
|
+
break
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
return [] if spdx_id.nil?
|
69
|
+
|
70
|
+
[spdx_id, 1.0]
|
71
|
+
end
|
72
|
+
|
73
|
+
# Reads license urls from the license.json and builds a map {url : spdx_id}
|
74
|
+
def read_license_url_index(spdx_licenses)
|
75
|
+
url_index = {}
|
76
|
+
spdx_licenses.each {|lic| url_index.merge! process_spdx_item(lic) }
|
77
|
+
url_index
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
def process_spdx_item(lic)
|
82
|
+
url_index = {}
|
83
|
+
lic_id = lic[:id].to_s.strip.downcase
|
84
|
+
|
85
|
+
return url_index if lic_id.empty?
|
86
|
+
|
87
|
+
lic[:links].to_a.each {|x| url_index[x[:url]] = lic_id }
|
88
|
+
lic[:text].to_a.each {|x| url_index[x[:url]] = lic_id }
|
89
|
+
|
90
|
+
url_index
|
91
|
+
end
|
92
|
+
|
93
|
+
def read_json_file(file_path)
|
94
|
+
JSON.parse(File.read(file_path), {symbolize_names: true})
|
95
|
+
rescue
|
96
|
+
log.info "Failed to read json file `#{file_path}`"
|
97
|
+
nil
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
data/lib/license_matcher.rb
CHANGED
@@ -1,7 +1,21 @@
|
|
1
1
|
require "helix_runtime"
|
2
2
|
|
3
3
|
begin
|
4
|
-
|
4
|
+
require "license_matcher/native"
|
5
5
|
rescue LoadError
|
6
|
-
|
6
|
+
warn "Unable to load license_matcher/native. Please run `rake build`"
|
7
|
+
end
|
8
|
+
|
9
|
+
require 'license_matcher/preprocess'
|
10
|
+
require 'license_matcher/url_matcher'
|
11
|
+
require 'license_matcher/rule_matcher'
|
12
|
+
require 'license_matcher/tf_ruby_matcher'
|
13
|
+
|
14
|
+
module LicenseMatcher
|
15
|
+
|
16
|
+
# if class is missing from the module,
|
17
|
+
# then look from global ns
|
18
|
+
def self.const_missing(c)
|
19
|
+
Object.const_get(c)
|
20
|
+
end
|
7
21
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: license_matcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Timo Sulg
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-09-
|
12
|
+
date: 2017-09-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: helix_runtime
|
@@ -25,6 +25,48 @@ dependencies:
|
|
25
25
|
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: 0.6.0
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: narray
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - "~>"
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: 0.6.1.2
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - "~>"
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: 0.6.1.2
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: tf-idf-similarity
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - "~>"
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: 0.1.6
|
49
|
+
type: :runtime
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - "~>"
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: 0.1.6
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: nokogiri
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - "~>"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 1.8.0
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 1.8.0
|
28
70
|
- !ruby/object:Gem::Dependency
|
29
71
|
name: bundler
|
30
72
|
requirement: !ruby/object:Gem::Requirement
|
@@ -67,6 +109,20 @@ dependencies:
|
|
67
109
|
- - "~>"
|
68
110
|
- !ruby/object:Gem::Version
|
69
111
|
version: '3.4'
|
112
|
+
- !ruby/object:Gem::Dependency
|
113
|
+
name: msgpack
|
114
|
+
requirement: !ruby/object:Gem::Requirement
|
115
|
+
requirements:
|
116
|
+
- - "~>"
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
version: 1.1.0
|
119
|
+
type: :development
|
120
|
+
prerelease: false
|
121
|
+
version_requirements: !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - "~>"
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: 1.1.0
|
70
126
|
description: "\n LicenseMatcher is rubygem, which uses Fosslim to match various
|
71
127
|
OSS license\n with correct SPDX-id or EULA label.\n "
|
72
128
|
email:
|
@@ -86,6 +142,10 @@ files:
|
|
86
142
|
- Rakefile
|
87
143
|
- lib/license_matcher.rb
|
88
144
|
- lib/license_matcher/native.bundle
|
145
|
+
- lib/license_matcher/preprocess.rb
|
146
|
+
- lib/license_matcher/rule_matcher.rb
|
147
|
+
- lib/license_matcher/tf_ruby_matcher.rb
|
148
|
+
- lib/license_matcher/url_matcher.rb
|
89
149
|
- lib/tasks/helix_runtime.rake
|
90
150
|
homepage: https://www.github.com/fosslim
|
91
151
|
licenses: []
|
@@ -101,9 +161,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
101
161
|
version: '0'
|
102
162
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
163
|
requirements:
|
104
|
-
- - "
|
164
|
+
- - ">="
|
105
165
|
- !ruby/object:Gem::Version
|
106
|
-
version:
|
166
|
+
version: '0'
|
107
167
|
requirements: []
|
108
168
|
rubyforge_project:
|
109
169
|
rubygems_version: 2.5.2
|