rbbt-dm 1.1.51 → 1.1.52
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b24501336450d017789e30104fd8681c1fc6c20163c96e134fddfbbe84930da4
|
4
|
+
data.tar.gz: b4b15f1b3f94d07c40b779be25919f6809d2750885be59dab0d0f67749329b30
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8f83514ee5511773598c04675ac03db34e9593cc65c67dfe84a933b107f3137355025de2c79f03c3afa1173f01128c4c7e36776fb549991032c546e7a72bab71
|
7
|
+
data.tar.gz: e84f508a5ee4a68163e8d409c897cc8c3716d5da6ac20ea5aa9eb159c7baf9b8cc8cc8feec0fcd4c05f9ca43f1686a2e5b32f381c716c825198fbcbafa0d6055
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# This is an auto-generated partial config. To use it with 'spacy train'
|
2
|
+
# you can run spacy init fill-config to auto-fill all default settings:
|
3
|
+
# python -m spacy init fill-config ./base_config.cfg ./config.cfg
|
4
|
+
[paths]
|
5
|
+
train = null
|
6
|
+
dev = null
|
7
|
+
|
8
|
+
[system]
|
9
|
+
gpu_allocator = null
|
10
|
+
|
11
|
+
[nlp]
|
12
|
+
lang = "en"
|
13
|
+
pipeline = ["tok2vec","textcat"]
|
14
|
+
batch_size = 1000
|
15
|
+
|
16
|
+
[components]
|
17
|
+
|
18
|
+
[components.tok2vec]
|
19
|
+
factory = "tok2vec"
|
20
|
+
|
21
|
+
[components.tok2vec.model]
|
22
|
+
@architectures = "spacy.Tok2Vec.v2"
|
23
|
+
|
24
|
+
[components.tok2vec.model.embed]
|
25
|
+
@architectures = "spacy.MultiHashEmbed.v2"
|
26
|
+
width = ${components.tok2vec.model.encode.width}
|
27
|
+
attrs = ["ORTH", "SHAPE"]
|
28
|
+
rows = [5000, 2500]
|
29
|
+
include_static_vectors = true
|
30
|
+
|
31
|
+
[components.tok2vec.model.encode]
|
32
|
+
@architectures = "spacy.MaxoutWindowEncoder.v2"
|
33
|
+
width = 256
|
34
|
+
depth = 8
|
35
|
+
window_size = 1
|
36
|
+
maxout_pieces = 3
|
37
|
+
|
38
|
+
[components.textcat]
|
39
|
+
factory = "textcat"
|
40
|
+
|
41
|
+
[components.textcat.model]
|
42
|
+
@architectures = "spacy.TextCatEnsemble.v2"
|
43
|
+
nO = null
|
44
|
+
|
45
|
+
[components.textcat.model.tok2vec]
|
46
|
+
@architectures = "spacy.Tok2VecListener.v1"
|
47
|
+
width = ${components.tok2vec.model.encode.width}
|
48
|
+
|
49
|
+
[components.textcat.model.linear_model]
|
50
|
+
@architectures = "spacy.TextCatBOW.v1"
|
51
|
+
exclusive_classes = true
|
52
|
+
ngram_size = 1
|
53
|
+
no_output_layer = false
|
54
|
+
|
55
|
+
[corpora]
|
56
|
+
|
57
|
+
[corpora.train]
|
58
|
+
@readers = "spacy.Corpus.v1"
|
59
|
+
path = ${paths.train}
|
60
|
+
max_length = 2000
|
61
|
+
|
62
|
+
[corpora.dev]
|
63
|
+
@readers = "spacy.Corpus.v1"
|
64
|
+
path = ${paths.dev}
|
65
|
+
max_length = 0
|
66
|
+
|
67
|
+
[training]
|
68
|
+
dev_corpus = "corpora.dev"
|
69
|
+
train_corpus = "corpora.train"
|
70
|
+
|
71
|
+
[training.optimizer]
|
72
|
+
@optimizers = "Adam.v1"
|
73
|
+
|
74
|
+
[training.batcher]
|
75
|
+
@batchers = "spacy.batch_by_words.v1"
|
76
|
+
discard_oversize = false
|
77
|
+
tolerance = 0.2
|
78
|
+
|
79
|
+
[training.batcher.size]
|
80
|
+
@schedules = "compounding.v1"
|
81
|
+
start = 100
|
82
|
+
stop = 1000
|
83
|
+
compound = 1.001
|
84
|
+
|
85
|
+
[initialize]
|
86
|
+
vectors = "en_core_web_lg"
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# This is an auto-generated partial config. To use it with 'spacy train'
|
2
|
+
# you can run spacy init fill-config to auto-fill all default settings:
|
3
|
+
# python -m spacy init fill-config ./base_config.cfg ./config.cfg
|
4
|
+
[paths]
|
5
|
+
train = null
|
6
|
+
dev = null
|
7
|
+
|
8
|
+
[system]
|
9
|
+
gpu_allocator = null
|
10
|
+
|
11
|
+
[nlp]
|
12
|
+
lang = "en"
|
13
|
+
pipeline = ["tok2vec","textcat"]
|
14
|
+
batch_size = 1000
|
15
|
+
|
16
|
+
[components]
|
17
|
+
|
18
|
+
[components.tok2vec]
|
19
|
+
factory = "tok2vec"
|
20
|
+
|
21
|
+
[components.tok2vec.model]
|
22
|
+
@architectures = "spacy.Tok2Vec.v2"
|
23
|
+
|
24
|
+
[components.tok2vec.model.embed]
|
25
|
+
@architectures = "spacy.MultiHashEmbed.v2"
|
26
|
+
width = ${components.tok2vec.model.encode.width}
|
27
|
+
attrs = ["ORTH", "SHAPE"]
|
28
|
+
rows = [5000, 2500]
|
29
|
+
include_static_vectors = false
|
30
|
+
|
31
|
+
[components.tok2vec.model.encode]
|
32
|
+
@architectures = "spacy.MaxoutWindowEncoder.v2"
|
33
|
+
width = 96
|
34
|
+
depth = 4
|
35
|
+
window_size = 1
|
36
|
+
maxout_pieces = 3
|
37
|
+
|
38
|
+
[components.textcat]
|
39
|
+
factory = "textcat"
|
40
|
+
|
41
|
+
[components.textcat.model]
|
42
|
+
@architectures = "spacy.TextCatBOW.v1"
|
43
|
+
exclusive_classes = true
|
44
|
+
ngram_size = 1
|
45
|
+
no_output_layer = false
|
46
|
+
|
47
|
+
[corpora]
|
48
|
+
|
49
|
+
[corpora.train]
|
50
|
+
@readers = "spacy.Corpus.v1"
|
51
|
+
path = ${paths.train}
|
52
|
+
max_length = 2000
|
53
|
+
|
54
|
+
[corpora.dev]
|
55
|
+
@readers = "spacy.Corpus.v1"
|
56
|
+
path = ${paths.dev}
|
57
|
+
max_length = 0
|
58
|
+
|
59
|
+
[training]
|
60
|
+
dev_corpus = "corpora.dev"
|
61
|
+
train_corpus = "corpora.train"
|
62
|
+
|
63
|
+
[training.optimizer]
|
64
|
+
@optimizers = "Adam.v1"
|
65
|
+
|
66
|
+
[training.batcher]
|
67
|
+
@batchers = "spacy.batch_by_words.v1"
|
68
|
+
discard_oversize = false
|
69
|
+
tolerance = 0.2
|
70
|
+
|
71
|
+
[training.batcher.size]
|
72
|
+
@schedules = "compounding.v1"
|
73
|
+
start = 100
|
74
|
+
stop = 1000
|
75
|
+
compound = 1.001
|
76
|
+
|
77
|
+
[initialize]
|
78
|
+
vectors = null
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# This is an auto-generated partial config. To use it with 'spacy train'
|
2
|
+
# you can run spacy init fill-config to auto-fill all default settings:
|
3
|
+
# python -m spacy init fill-config ./base_config.cfg ./config.cfg
|
4
|
+
[paths]
|
5
|
+
train = null
|
6
|
+
dev = null
|
7
|
+
|
8
|
+
[system]
|
9
|
+
gpu_allocator = "pytorch"
|
10
|
+
|
11
|
+
[nlp]
|
12
|
+
lang = "en"
|
13
|
+
pipeline = ["transformer","textcat"]
|
14
|
+
batch_size = 128
|
15
|
+
|
16
|
+
[components]
|
17
|
+
|
18
|
+
[components.transformer]
|
19
|
+
factory = "transformer"
|
20
|
+
|
21
|
+
[components.transformer.model]
|
22
|
+
@architectures = "spacy-transformers.TransformerModel.v1"
|
23
|
+
name = "roberta-base"
|
24
|
+
tokenizer_config = {"use_fast": true}
|
25
|
+
|
26
|
+
[components.transformer.model.get_spans]
|
27
|
+
@span_getters = "spacy-transformers.strided_spans.v1"
|
28
|
+
window = 128
|
29
|
+
stride = 96
|
30
|
+
|
31
|
+
[components.textcat]
|
32
|
+
factory = "textcat"
|
33
|
+
|
34
|
+
[components.textcat.model]
|
35
|
+
@architectures = "spacy.TextCatEnsemble.v2"
|
36
|
+
nO = null
|
37
|
+
|
38
|
+
[components.textcat.model.tok2vec]
|
39
|
+
@architectures = "spacy-transformers.TransformerListener.v1"
|
40
|
+
grad_factor = 1.0
|
41
|
+
|
42
|
+
[components.textcat.model.tok2vec.pooling]
|
43
|
+
@layers = "reduce_mean.v1"
|
44
|
+
|
45
|
+
[components.textcat.model.linear_model]
|
46
|
+
@architectures = "spacy.TextCatBOW.v1"
|
47
|
+
exclusive_classes = true
|
48
|
+
ngram_size = 1
|
49
|
+
no_output_layer = false
|
50
|
+
|
51
|
+
[corpora]
|
52
|
+
|
53
|
+
[corpora.train]
|
54
|
+
@readers = "spacy.Corpus.v1"
|
55
|
+
path = ${paths.train}
|
56
|
+
max_length = 500
|
57
|
+
|
58
|
+
[corpora.dev]
|
59
|
+
@readers = "spacy.Corpus.v1"
|
60
|
+
path = ${paths.dev}
|
61
|
+
max_length = 0
|
62
|
+
|
63
|
+
[training]
|
64
|
+
accumulate_gradient = 3
|
65
|
+
dev_corpus = "corpora.dev"
|
66
|
+
train_corpus = "corpora.train"
|
67
|
+
|
68
|
+
[training.optimizer]
|
69
|
+
@optimizers = "Adam.v1"
|
70
|
+
|
71
|
+
[training.optimizer.learn_rate]
|
72
|
+
@schedules = "warmup_linear.v1"
|
73
|
+
warmup_steps = 250
|
74
|
+
total_steps = 20000
|
75
|
+
initial_rate = 5e-5
|
76
|
+
|
77
|
+
[training.batcher]
|
78
|
+
@batchers = "spacy.batch_by_padded.v1"
|
79
|
+
discard_oversize = true
|
80
|
+
size = 2000
|
81
|
+
buffer = 256
|
82
|
+
|
83
|
+
[initialize]
|
84
|
+
vectors = null
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# This is an auto-generated partial config. To use it with 'spacy train'
|
2
|
+
# you can run spacy init fill-config to auto-fill all default settings:
|
3
|
+
# python -m spacy init fill-config ./base_config.cfg ./config.cfg
|
4
|
+
[paths]
|
5
|
+
train = null
|
6
|
+
dev = null
|
7
|
+
|
8
|
+
[system]
|
9
|
+
gpu_allocator = "pytorch"
|
10
|
+
|
11
|
+
[nlp]
|
12
|
+
lang = "en"
|
13
|
+
pipeline = ["transformer","textcat"]
|
14
|
+
batch_size = 128
|
15
|
+
|
16
|
+
[components]
|
17
|
+
|
18
|
+
[components.transformer]
|
19
|
+
factory = "transformer"
|
20
|
+
|
21
|
+
[components.transformer.model]
|
22
|
+
@architectures = "spacy-transformers.TransformerModel.v1"
|
23
|
+
name = "roberta-base"
|
24
|
+
tokenizer_config = {"use_fast": true}
|
25
|
+
|
26
|
+
[components.transformer.model.get_spans]
|
27
|
+
@span_getters = "spacy-transformers.strided_spans.v1"
|
28
|
+
window = 128
|
29
|
+
stride = 96
|
30
|
+
|
31
|
+
[components.textcat]
|
32
|
+
factory = "textcat"
|
33
|
+
|
34
|
+
[components.textcat.model]
|
35
|
+
@architectures = "spacy.TextCatBOW.v1"
|
36
|
+
exclusive_classes = true
|
37
|
+
ngram_size = 1
|
38
|
+
no_output_layer = false
|
39
|
+
|
40
|
+
[corpora]
|
41
|
+
|
42
|
+
[corpora.train]
|
43
|
+
@readers = "spacy.Corpus.v1"
|
44
|
+
path = ${paths.train}
|
45
|
+
max_length = 500
|
46
|
+
|
47
|
+
[corpora.dev]
|
48
|
+
@readers = "spacy.Corpus.v1"
|
49
|
+
path = ${paths.dev}
|
50
|
+
max_length = 0
|
51
|
+
|
52
|
+
[training]
|
53
|
+
accumulate_gradient = 3
|
54
|
+
dev_corpus = "corpora.dev"
|
55
|
+
train_corpus = "corpora.train"
|
56
|
+
|
57
|
+
[training.optimizer]
|
58
|
+
@optimizers = "Adam.v1"
|
59
|
+
|
60
|
+
[training.optimizer.learn_rate]
|
61
|
+
@schedules = "warmup_linear.v1"
|
62
|
+
warmup_steps = 250
|
63
|
+
total_steps = 20000
|
64
|
+
initial_rate = 5e-5
|
65
|
+
|
66
|
+
[training.batcher]
|
67
|
+
@batchers = "spacy.batch_by_padded.v1"
|
68
|
+
discard_oversize = true
|
69
|
+
size = 2000
|
70
|
+
buffer = 256
|
71
|
+
|
72
|
+
[initialize]
|
73
|
+
vectors = null
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.52
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
@@ -113,6 +113,10 @@ files:
|
|
113
113
|
- share/R/MA.R
|
114
114
|
- share/R/barcode.R
|
115
115
|
- share/R/heatmap.3.R
|
116
|
+
- share/spaCy/cpu/textcat_accuracy.conf
|
117
|
+
- share/spaCy/cpu/textcat_efficiency.conf
|
118
|
+
- share/spaCy/gpu/textcat_accuracy.conf
|
119
|
+
- share/spaCy/gpu/textcat_efficiency.conf
|
116
120
|
- test/rbbt/matrix/test_barcode.rb
|
117
121
|
- test/rbbt/network/test_paths.rb
|
118
122
|
- test/rbbt/statistics/test_fdr.rb
|