license_matcher 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Cargo.lock +11 -4
- data/Cargo.toml +2 -2
- data/Gemfile.lock +1 -1
- data/README.md +23 -12
- data/lib/license_matcher/native.bundle +0 -0
- data/lib/license_matcher/tf_ruby_matcher.rb +5 -2
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 75f4cc4338e87a4244875d5172770867b63588cb
|
4
|
+
data.tar.gz: 3bb514e6e029140949ed3c78c1b32a1a04333a04
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1b99befeaf51c94e2f85acdc991abd02486de41ea3f7506a2e7a6402f08e4c0765b322cb480db98703da01ef286bf9751bc6fc458356e1c93c4142f611a26457
|
7
|
+
data.tar.gz: 67b3d8e0265f91ebd9d1818983dfea906f543dffcc08d41d3cb50aeb66d56fe76714409085e741513a36229cbaefef388e0ffe691e20e06ec558796eef6f4ac0
|
data/Cargo.lock
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
[root]
|
2
2
|
name = "license_matcher"
|
3
|
-
version = "0.
|
3
|
+
version = "0.2.0"
|
4
4
|
dependencies = [
|
5
|
-
"fosslim 0.0.2
|
5
|
+
"fosslim 0.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
6
6
|
"helix 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
7
7
|
]
|
8
8
|
|
@@ -28,10 +28,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
28
28
|
|
29
29
|
[[package]]
|
30
30
|
name = "fosslim"
|
31
|
-
version = "0.0.2
|
31
|
+
version = "0.0.2"
|
32
32
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
33
33
|
dependencies = [
|
34
34
|
"rmp-serde 0.13.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
35
|
+
"seahash 3.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
35
36
|
"serde 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
36
37
|
"serde_derive 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
37
38
|
"serde_json 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
@@ -95,6 +96,11 @@ dependencies = [
|
|
95
96
|
"serde 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
96
97
|
]
|
97
98
|
|
99
|
+
[[package]]
|
100
|
+
name = "seahash"
|
101
|
+
version = "3.0.5"
|
102
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
103
|
+
|
98
104
|
[[package]]
|
99
105
|
name = "serde"
|
100
106
|
version = "1.0.14"
|
@@ -158,7 +164,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
158
164
|
"checksum cslice 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0f8cb7306107e4b10e64994de6d3274bd08996a7c1322a27b86482392f96be0a"
|
159
165
|
"checksum cstr-macro 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "db53fddba18cdd35477a7213a3ef6acfbfa333c31b42ce019e544c4a1420a06f"
|
160
166
|
"checksum dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "09c3753c3db574d215cba4ea76018483895d7bff25a31b49ba45db21c48e50ab"
|
161
|
-
"checksum fosslim 0.0.2
|
167
|
+
"checksum fosslim 0.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3024a8d550d9125c111b24ea5f36f8804e9b3b4a15143b2fc1a86f663edeb749"
|
162
168
|
"checksum helix 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "e131c0e8557e78f4d992b87e97cec90f8503c4b5f3af24e1183e4822736e8079"
|
163
169
|
"checksum itoa 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ac17257442c2ed77dbc9fd555cf83c58b0c7f7d0e8f2ae08c0ac05c72842e1f6"
|
164
170
|
"checksum libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)" = "2370ca07ec338939e356443dac2296f581453c35fe1e3a3ed06023c49435f915"
|
@@ -167,6 +173,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
167
173
|
"checksum quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e920b65c65f10b2ae65c831a81a073a89edd28c7cce89475bff467ab4167a"
|
168
174
|
"checksum rmp 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a3d45d7afc9b132b34a2479648863aa95c5c88e98b32285326a6ebadc80ec5c9"
|
169
175
|
"checksum rmp-serde 0.13.7 (registry+https://github.com/rust-lang/crates.io-index)" = "011e1d58446e9fa3af7cdc1fb91295b10621d3ac4cb3a85cc86385ee9ca50cd3"
|
176
|
+
"checksum seahash 3.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e048636bed25842fcdc36e5ad1ec6295b72d4b5b8a4b759b64915a4ce2b9d09d"
|
170
177
|
"checksum serde 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)" = "bcb6a7637a47663ee073391a139ed07851f27ed2532c2abc88c6bf27a16cdf34"
|
171
178
|
"checksum serde_derive 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)" = "812ff66056fd9a9a5b7c119714243b0862cf98340e7d4b5ee05a932c40d5ea6c"
|
172
179
|
"checksum serde_derive_internals 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bd381f6d01a6616cdba8530492d453b7761b456ba974e98768a18cad2cd76f58"
|
data/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "license_matcher"
|
3
|
-
version = "0.
|
3
|
+
version = "0.2.0"
|
4
4
|
authors = ["Timo Sulg <timgluz@gmail.com>"]
|
5
5
|
|
6
6
|
[lib]
|
@@ -8,4 +8,4 @@ crate-type = ["rlib", "cdylib"]
|
|
8
8
|
|
9
9
|
[dependencies]
|
10
10
|
helix = "^0.6.0"
|
11
|
-
fosslim = "0.0.2
|
11
|
+
fosslim = "0.0.2"
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -38,14 +38,14 @@ require 'license_matcher'
|
|
38
38
|
curl -O https://github.com/Fosslim/license_matcher/blob/master/data/index.msgpack
|
39
39
|
|
40
40
|
# or build index from the SPDX data
|
41
|
-
LicenseMatcher::
|
41
|
+
LicenseMatcher::IndexBuilder.build_index( "data/licenses", "data/index.msgpack")
|
42
42
|
|
43
43
|
# match license text
|
44
44
|
txt = File.read("fixtures/files/mit.txt");
|
45
45
|
|
46
46
|
lm = LicenseMatcher::TFRubyMatcher.new("data/index.msgpack")
|
47
|
-
lm.match_text(txt, 0.9)
|
48
|
-
|
47
|
+
m = lm.match_text(txt, 0.9)
|
48
|
+
p "spdx id: #{m.get_label()}, confidence: #{m.get_score()}"
|
49
49
|
|
50
50
|
```
|
51
51
|
|
@@ -91,29 +91,40 @@ txt = File.read "fixtures/files/mit.txt"
|
|
91
91
|
lm2.match_text txt
|
92
92
|
```
|
93
93
|
|
94
|
+
* **FingerprintMacher** - uses hashes of 5-word-ngrams to build fingerprints of the license files;
|
95
|
+
|
96
|
+
```
|
97
|
+
lm3 = File.read "fixtures/files/mit.txt"
|
98
|
+
lm3.match_text txt
|
99
|
+
```
|
100
|
+
|
94
101
|
## Benchmarks
|
95
102
|
|
96
|
-
* initialization
|
103
|
+
* initialization 1x
|
97
104
|
|
98
105
|
```
|
99
|
-
|
100
|
-
TFRubyMatcher:
|
101
|
-
TFRustMatcher:
|
106
|
+
user system total real
|
107
|
+
TFRubyMatcher: 12.970000 0.170000 13.140000 ( 13.361568)
|
108
|
+
TFRustMatcher: 0.030000 0.010000 0.040000 ( 0.033793)
|
109
|
+
FingerprintMatcher: 0.340000 0.010000 0.350000 ( 0.368786)
|
102
110
|
```
|
111
|
+
|
103
112
|
* matching preprocessed short [MIT](https://raw.githubusercontent.com/Fosslim/license_matcher/master/data/spdx_licenses/plain/MIT) text 1000x times
|
104
113
|
|
105
114
|
```
|
106
|
-
|
107
|
-
TFRubyMatcher:102.
|
108
|
-
TFRustMatcher: 7.
|
115
|
+
user system total real
|
116
|
+
TFRubyMatcher:102.380000 6.730000 109.110000 (113.526434)
|
117
|
+
TFRustMatcher: 7.920000 0.100000 8.020000 ( 8.248314)
|
118
|
+
FingerMatcher: 4.750000 0.060000 4.810000 ( 5.187512)
|
109
119
|
```
|
110
120
|
|
111
121
|
* matching preprocessed long [AGPL-3.0](https://raw.githubusercontent.com/Fosslim/license_matcher/master/data/spdx_licenses/plain/AGPL-3.0) text 1000x times
|
112
122
|
|
113
123
|
```
|
114
124
|
user system total real
|
115
|
-
TFRubyMatcher:
|
116
|
-
TFRustMatcher: 9.
|
125
|
+
TFRubyMatcher:217.270000 9.770000 227.040000 (232.190339)
|
126
|
+
TFRustMatcher: 9.330000 0.120000 9.450000 ( 9.654545)
|
127
|
+
FingerMatcher: 23.650000 0.250000 23.900000 ( 24.311123)
|
117
128
|
```
|
118
129
|
|
119
130
|
## Development
|
Binary file
|
@@ -23,6 +23,9 @@ module LicenseMatcher
|
|
23
23
|
true
|
24
24
|
end
|
25
25
|
|
26
|
+
# matches given text with SPDX licenses and returns Match object
|
27
|
+
# returns:
|
28
|
+
# match - Match {label: String, score: float}
|
26
29
|
def match_text(text, min_confidence = DEFAULT_MIN_CONFIDENCE, is_processed_text = false)
|
27
30
|
return [] if text.to_s.empty?
|
28
31
|
|
@@ -42,9 +45,9 @@ module LicenseMatcher
|
|
42
45
|
best_match = @model.documents[doc_id].id
|
43
46
|
|
44
47
|
if best_score.to_f > min_confidence
|
45
|
-
best_match
|
48
|
+
Match.new(best_match, best_score)
|
46
49
|
else
|
47
|
-
""
|
50
|
+
Match.new("", 0.0)
|
48
51
|
end
|
49
52
|
end
|
50
53
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: license_matcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Timo Sulg
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-09-
|
12
|
+
date: 2017-09-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: helix_runtime
|
@@ -148,7 +148,8 @@ files:
|
|
148
148
|
- lib/license_matcher/url_matcher.rb
|
149
149
|
- lib/tasks/helix_runtime.rake
|
150
150
|
homepage: https://www.github.com/fosslim
|
151
|
-
licenses:
|
151
|
+
licenses:
|
152
|
+
- MIT
|
152
153
|
metadata: {}
|
153
154
|
post_install_message:
|
154
155
|
rdoc_options: []
|