rbbt-dm 1.1.51 → 1.1.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5327d1c2a46283b95fa380a73be418cf7e5a3afd2aca6002bd9ca591ab3f6df8
4
- data.tar.gz: 1066345322e342c8f642b89825c1a8555c6bfa1d09985705d1b382654d91653f
3
+ metadata.gz: b24501336450d017789e30104fd8681c1fc6c20163c96e134fddfbbe84930da4
4
+ data.tar.gz: b4b15f1b3f94d07c40b779be25919f6809d2750885be59dab0d0f67749329b30
5
5
  SHA512:
6
- metadata.gz: c1a3cf2ec93909993b290c7c6cb0b6e9c6090155657403c705b93b74a538cbe91ff23dead14c33453dde0c31ba681099b3e5c93f2699a471c19c299b43d0f304
7
- data.tar.gz: e5b456330625bb57a494fb9e5fc9757e96c134da8f410fabe8f7e9d06169d09f0c4fa3c7e5a375870e6f45b8f5ffdf7855b8260719fb1a32846a2f24c18e8853
6
+ metadata.gz: 8f83514ee5511773598c04675ac03db34e9593cc65c67dfe84a933b107f3137355025de2c79f03c3afa1173f01128c4c7e36776fb549991032c546e7a72bab71
7
+ data.tar.gz: e84f508a5ee4a68163e8d409c897cc8c3716d5da6ac20ea5aa9eb159c7baf9b8cc8cc8feec0fcd4c05f9ca43f1686a2e5b32f381c716c825198fbcbafa0d6055
@@ -0,0 +1,86 @@
1
+ # This is an auto-generated partial config. To use it with 'spacy train'
2
+ # you can run spacy init fill-config to auto-fill all default settings:
3
+ # python -m spacy init fill-config ./base_config.cfg ./config.cfg
4
+ [paths]
5
+ train = null
6
+ dev = null
7
+
8
+ [system]
9
+ gpu_allocator = null
10
+
11
+ [nlp]
12
+ lang = "en"
13
+ pipeline = ["tok2vec","textcat"]
14
+ batch_size = 1000
15
+
16
+ [components]
17
+
18
+ [components.tok2vec]
19
+ factory = "tok2vec"
20
+
21
+ [components.tok2vec.model]
22
+ @architectures = "spacy.Tok2Vec.v2"
23
+
24
+ [components.tok2vec.model.embed]
25
+ @architectures = "spacy.MultiHashEmbed.v2"
26
+ width = ${components.tok2vec.model.encode.width}
27
+ attrs = ["ORTH", "SHAPE"]
28
+ rows = [5000, 2500]
29
+ include_static_vectors = true
30
+
31
+ [components.tok2vec.model.encode]
32
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
33
+ width = 256
34
+ depth = 8
35
+ window_size = 1
36
+ maxout_pieces = 3
37
+
38
+ [components.textcat]
39
+ factory = "textcat"
40
+
41
+ [components.textcat.model]
42
+ @architectures = "spacy.TextCatEnsemble.v2"
43
+ nO = null
44
+
45
+ [components.textcat.model.tok2vec]
46
+ @architectures = "spacy.Tok2VecListener.v1"
47
+ width = ${components.tok2vec.model.encode.width}
48
+
49
+ [components.textcat.model.linear_model]
50
+ @architectures = "spacy.TextCatBOW.v1"
51
+ exclusive_classes = true
52
+ ngram_size = 1
53
+ no_output_layer = false
54
+
55
+ [corpora]
56
+
57
+ [corpora.train]
58
+ @readers = "spacy.Corpus.v1"
59
+ path = ${paths.train}
60
+ max_length = 2000
61
+
62
+ [corpora.dev]
63
+ @readers = "spacy.Corpus.v1"
64
+ path = ${paths.dev}
65
+ max_length = 0
66
+
67
+ [training]
68
+ dev_corpus = "corpora.dev"
69
+ train_corpus = "corpora.train"
70
+
71
+ [training.optimizer]
72
+ @optimizers = "Adam.v1"
73
+
74
+ [training.batcher]
75
+ @batchers = "spacy.batch_by_words.v1"
76
+ discard_oversize = false
77
+ tolerance = 0.2
78
+
79
+ [training.batcher.size]
80
+ @schedules = "compounding.v1"
81
+ start = 100
82
+ stop = 1000
83
+ compound = 1.001
84
+
85
+ [initialize]
86
+ vectors = "en_core_web_lg"
@@ -0,0 +1,78 @@
1
+ # This is an auto-generated partial config. To use it with 'spacy train'
2
+ # you can run spacy init fill-config to auto-fill all default settings:
3
+ # python -m spacy init fill-config ./base_config.cfg ./config.cfg
4
+ [paths]
5
+ train = null
6
+ dev = null
7
+
8
+ [system]
9
+ gpu_allocator = null
10
+
11
+ [nlp]
12
+ lang = "en"
13
+ pipeline = ["tok2vec","textcat"]
14
+ batch_size = 1000
15
+
16
+ [components]
17
+
18
+ [components.tok2vec]
19
+ factory = "tok2vec"
20
+
21
+ [components.tok2vec.model]
22
+ @architectures = "spacy.Tok2Vec.v2"
23
+
24
+ [components.tok2vec.model.embed]
25
+ @architectures = "spacy.MultiHashEmbed.v2"
26
+ width = ${components.tok2vec.model.encode.width}
27
+ attrs = ["ORTH", "SHAPE"]
28
+ rows = [5000, 2500]
29
+ include_static_vectors = false
30
+
31
+ [components.tok2vec.model.encode]
32
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
33
+ width = 96
34
+ depth = 4
35
+ window_size = 1
36
+ maxout_pieces = 3
37
+
38
+ [components.textcat]
39
+ factory = "textcat"
40
+
41
+ [components.textcat.model]
42
+ @architectures = "spacy.TextCatBOW.v1"
43
+ exclusive_classes = true
44
+ ngram_size = 1
45
+ no_output_layer = false
46
+
47
+ [corpora]
48
+
49
+ [corpora.train]
50
+ @readers = "spacy.Corpus.v1"
51
+ path = ${paths.train}
52
+ max_length = 2000
53
+
54
+ [corpora.dev]
55
+ @readers = "spacy.Corpus.v1"
56
+ path = ${paths.dev}
57
+ max_length = 0
58
+
59
+ [training]
60
+ dev_corpus = "corpora.dev"
61
+ train_corpus = "corpora.train"
62
+
63
+ [training.optimizer]
64
+ @optimizers = "Adam.v1"
65
+
66
+ [training.batcher]
67
+ @batchers = "spacy.batch_by_words.v1"
68
+ discard_oversize = false
69
+ tolerance = 0.2
70
+
71
+ [training.batcher.size]
72
+ @schedules = "compounding.v1"
73
+ start = 100
74
+ stop = 1000
75
+ compound = 1.001
76
+
77
+ [initialize]
78
+ vectors = null
@@ -0,0 +1,84 @@
1
+ # This is an auto-generated partial config. To use it with 'spacy train'
2
+ # you can run spacy init fill-config to auto-fill all default settings:
3
+ # python -m spacy init fill-config ./base_config.cfg ./config.cfg
4
+ [paths]
5
+ train = null
6
+ dev = null
7
+
8
+ [system]
9
+ gpu_allocator = "pytorch"
10
+
11
+ [nlp]
12
+ lang = "en"
13
+ pipeline = ["transformer","textcat"]
14
+ batch_size = 128
15
+
16
+ [components]
17
+
18
+ [components.transformer]
19
+ factory = "transformer"
20
+
21
+ [components.transformer.model]
22
+ @architectures = "spacy-transformers.TransformerModel.v1"
23
+ name = "roberta-base"
24
+ tokenizer_config = {"use_fast": true}
25
+
26
+ [components.transformer.model.get_spans]
27
+ @span_getters = "spacy-transformers.strided_spans.v1"
28
+ window = 128
29
+ stride = 96
30
+
31
+ [components.textcat]
32
+ factory = "textcat"
33
+
34
+ [components.textcat.model]
35
+ @architectures = "spacy.TextCatEnsemble.v2"
36
+ nO = null
37
+
38
+ [components.textcat.model.tok2vec]
39
+ @architectures = "spacy-transformers.TransformerListener.v1"
40
+ grad_factor = 1.0
41
+
42
+ [components.textcat.model.tok2vec.pooling]
43
+ @layers = "reduce_mean.v1"
44
+
45
+ [components.textcat.model.linear_model]
46
+ @architectures = "spacy.TextCatBOW.v1"
47
+ exclusive_classes = true
48
+ ngram_size = 1
49
+ no_output_layer = false
50
+
51
+ [corpora]
52
+
53
+ [corpora.train]
54
+ @readers = "spacy.Corpus.v1"
55
+ path = ${paths.train}
56
+ max_length = 500
57
+
58
+ [corpora.dev]
59
+ @readers = "spacy.Corpus.v1"
60
+ path = ${paths.dev}
61
+ max_length = 0
62
+
63
+ [training]
64
+ accumulate_gradient = 3
65
+ dev_corpus = "corpora.dev"
66
+ train_corpus = "corpora.train"
67
+
68
+ [training.optimizer]
69
+ @optimizers = "Adam.v1"
70
+
71
+ [training.optimizer.learn_rate]
72
+ @schedules = "warmup_linear.v1"
73
+ warmup_steps = 250
74
+ total_steps = 20000
75
+ initial_rate = 5e-5
76
+
77
+ [training.batcher]
78
+ @batchers = "spacy.batch_by_padded.v1"
79
+ discard_oversize = true
80
+ size = 2000
81
+ buffer = 256
82
+
83
+ [initialize]
84
+ vectors = null
@@ -0,0 +1,73 @@
1
+ # This is an auto-generated partial config. To use it with 'spacy train'
2
+ # you can run spacy init fill-config to auto-fill all default settings:
3
+ # python -m spacy init fill-config ./base_config.cfg ./config.cfg
4
+ [paths]
5
+ train = null
6
+ dev = null
7
+
8
+ [system]
9
+ gpu_allocator = "pytorch"
10
+
11
+ [nlp]
12
+ lang = "en"
13
+ pipeline = ["transformer","textcat"]
14
+ batch_size = 128
15
+
16
+ [components]
17
+
18
+ [components.transformer]
19
+ factory = "transformer"
20
+
21
+ [components.transformer.model]
22
+ @architectures = "spacy-transformers.TransformerModel.v1"
23
+ name = "roberta-base"
24
+ tokenizer_config = {"use_fast": true}
25
+
26
+ [components.transformer.model.get_spans]
27
+ @span_getters = "spacy-transformers.strided_spans.v1"
28
+ window = 128
29
+ stride = 96
30
+
31
+ [components.textcat]
32
+ factory = "textcat"
33
+
34
+ [components.textcat.model]
35
+ @architectures = "spacy.TextCatBOW.v1"
36
+ exclusive_classes = true
37
+ ngram_size = 1
38
+ no_output_layer = false
39
+
40
+ [corpora]
41
+
42
+ [corpora.train]
43
+ @readers = "spacy.Corpus.v1"
44
+ path = ${paths.train}
45
+ max_length = 500
46
+
47
+ [corpora.dev]
48
+ @readers = "spacy.Corpus.v1"
49
+ path = ${paths.dev}
50
+ max_length = 0
51
+
52
+ [training]
53
+ accumulate_gradient = 3
54
+ dev_corpus = "corpora.dev"
55
+ train_corpus = "corpora.train"
56
+
57
+ [training.optimizer]
58
+ @optimizers = "Adam.v1"
59
+
60
+ [training.optimizer.learn_rate]
61
+ @schedules = "warmup_linear.v1"
62
+ warmup_steps = 250
63
+ total_steps = 20000
64
+ initial_rate = 5e-5
65
+
66
+ [training.batcher]
67
+ @batchers = "spacy.batch_by_padded.v1"
68
+ discard_oversize = true
69
+ size = 2000
70
+ buffer = 256
71
+
72
+ [initialize]
73
+ vectors = null
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.51
4
+ version: 1.1.52
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
@@ -113,6 +113,10 @@ files:
113
113
  - share/R/MA.R
114
114
  - share/R/barcode.R
115
115
  - share/R/heatmap.3.R
116
+ - share/spaCy/cpu/textcat_accuracy.conf
117
+ - share/spaCy/cpu/textcat_efficiency.conf
118
+ - share/spaCy/gpu/textcat_accuracy.conf
119
+ - share/spaCy/gpu/textcat_efficiency.conf
116
120
  - test/rbbt/matrix/test_barcode.rb
117
121
  - test/rbbt/network/test_paths.rb
118
122
  - test/rbbt/statistics/test_fdr.rb