dasein-core 0.2.6__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dasein/api.py +1219 -133
- dasein/capture.py +2379 -1803
- dasein/microturn.py +475 -0
- dasein/models/en_core_web_sm/en_core_web_sm/__init__.py +10 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSE +19 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSES_SOURCES +66 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/README.md +47 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/accuracy.json +330 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/attribute_ruler/patterns +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/config.cfg +269 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/lemmatizer/lookups/lookups.bin +1 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/meta.json +521 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/cfg +13 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/moves +1 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/cfg +13 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/moves +1 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/cfg +3 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/cfg +57 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/cfg +3 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tokenizer +3 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/key2row +1 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/lookups.bin +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/strings.json +84782 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors.cfg +3 -0
- dasein/models/en_core_web_sm/en_core_web_sm/meta.json +521 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSE +19 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSES_SOURCES +66 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/METADATA +59 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/RECORD +35 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/WHEEL +5 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/entry_points.txt +2 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/top_level.txt +1 -0
- dasein/pipecleaner.py +1917 -0
- dasein/services/post_run_client.py +4 -2
- dasein/services/service_adapter.py +4 -2
- dasein/wrappers.py +314 -0
- {dasein_core-0.2.6.dist-info → dasein_core-0.2.9.dist-info}/METADATA +4 -1
- dasein_core-0.2.9.dist-info/RECORD +59 -0
- dasein_core-0.2.6.dist-info/RECORD +0 -21
- {dasein_core-0.2.6.dist-info → dasein_core-0.2.9.dist-info}/WHEEL +0 -0
- {dasein_core-0.2.6.dist-info → dasein_core-0.2.9.dist-info}/licenses/LICENSE +0 -0
- {dasein_core-0.2.6.dist-info → dasein_core-0.2.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,330 @@
|
|
1
|
+
{
|
2
|
+
"token_acc": 0.9986194413,
|
3
|
+
"token_p": 0.9956819193,
|
4
|
+
"token_r": 0.9957659295,
|
5
|
+
"token_f": 0.9957239226,
|
6
|
+
"tag_acc": 0.97246532,
|
7
|
+
"sents_p": 0.9201877934,
|
8
|
+
"sents_r": 0.8921432812,
|
9
|
+
"sents_f": 0.9059485531,
|
10
|
+
"dep_uas": 0.9175304332,
|
11
|
+
"dep_las": 0.89874821,
|
12
|
+
"dep_las_per_type": {
|
13
|
+
"prep": {
|
14
|
+
"p": 0.853521338,
|
15
|
+
"r": 0.8635932461,
|
16
|
+
"f": 0.8585277532
|
17
|
+
},
|
18
|
+
"det": {
|
19
|
+
"p": 0.9763930156,
|
20
|
+
"r": 0.9781048683,
|
21
|
+
"f": 0.9772481923
|
22
|
+
},
|
23
|
+
"pobj": {
|
24
|
+
"p": 0.9613764045,
|
25
|
+
"r": 0.967681131,
|
26
|
+
"f": 0.9645184649
|
27
|
+
},
|
28
|
+
"nsubj": {
|
29
|
+
"p": 0.9565737052,
|
30
|
+
"r": 0.9467250821,
|
31
|
+
"f": 0.9516239128
|
32
|
+
},
|
33
|
+
"aux": {
|
34
|
+
"p": 0.9815061794,
|
35
|
+
"r": 0.9827294578,
|
36
|
+
"f": 0.9821174377
|
37
|
+
},
|
38
|
+
"advmod": {
|
39
|
+
"p": 0.8548033091,
|
40
|
+
"r": 0.8519266364,
|
41
|
+
"f": 0.8533625485
|
42
|
+
},
|
43
|
+
"relcl": {
|
44
|
+
"p": 0.7571736011,
|
45
|
+
"r": 0.7659651669,
|
46
|
+
"f": 0.7615440115
|
47
|
+
},
|
48
|
+
"root": {
|
49
|
+
"p": 0.9195942266,
|
50
|
+
"r": 0.8910218352,
|
51
|
+
"f": 0.9050825879
|
52
|
+
},
|
53
|
+
"xcomp": {
|
54
|
+
"p": 0.8836222144,
|
55
|
+
"r": 0.8966259871,
|
56
|
+
"f": 0.8900766079
|
57
|
+
},
|
58
|
+
"amod": {
|
59
|
+
"p": 0.9174389766,
|
60
|
+
"r": 0.9107223842,
|
61
|
+
"f": 0.9140683422
|
62
|
+
},
|
63
|
+
"compound": {
|
64
|
+
"p": 0.9126489559,
|
65
|
+
"r": 0.9298284696,
|
66
|
+
"f": 0.9211586207
|
67
|
+
},
|
68
|
+
"poss": {
|
69
|
+
"p": 0.9739583333,
|
70
|
+
"r": 0.9786634461,
|
71
|
+
"f": 0.9763052209
|
72
|
+
},
|
73
|
+
"ccomp": {
|
74
|
+
"p": 0.7671207315,
|
75
|
+
"r": 0.8372708758,
|
76
|
+
"f": 0.8006621872
|
77
|
+
},
|
78
|
+
"attr": {
|
79
|
+
"p": 0.899837794,
|
80
|
+
"r": 0.93313709,
|
81
|
+
"f": 0.9161849711
|
82
|
+
},
|
83
|
+
"case": {
|
84
|
+
"p": 0.9787549407,
|
85
|
+
"r": 0.9914914915,
|
86
|
+
"f": 0.9850820487
|
87
|
+
},
|
88
|
+
"mark": {
|
89
|
+
"p": 0.9068783069,
|
90
|
+
"r": 0.9083200848,
|
91
|
+
"f": 0.9075986232
|
92
|
+
},
|
93
|
+
"intj": {
|
94
|
+
"p": 0.6717131474,
|
95
|
+
"r": 0.6175824176,
|
96
|
+
"f": 0.6435114504
|
97
|
+
},
|
98
|
+
"advcl": {
|
99
|
+
"p": 0.6633986928,
|
100
|
+
"r": 0.6645681189,
|
101
|
+
"f": 0.6639828909
|
102
|
+
},
|
103
|
+
"cc": {
|
104
|
+
"p": 0.8323511726,
|
105
|
+
"r": 0.8277717976,
|
106
|
+
"f": 0.8300551691
|
107
|
+
},
|
108
|
+
"neg": {
|
109
|
+
"p": 0.9466865969,
|
110
|
+
"r": 0.9533366784,
|
111
|
+
"f": 0.95
|
112
|
+
},
|
113
|
+
"conj": {
|
114
|
+
"p": 0.7567333828,
|
115
|
+
"r": 0.7710221551,
|
116
|
+
"f": 0.763810949
|
117
|
+
},
|
118
|
+
"nsubjpass": {
|
119
|
+
"p": 0.9182939363,
|
120
|
+
"r": 0.9164102564,
|
121
|
+
"f": 0.9173511294
|
122
|
+
},
|
123
|
+
"auxpass": {
|
124
|
+
"p": 0.9501335708,
|
125
|
+
"r": 0.9722095672,
|
126
|
+
"f": 0.9610448097
|
127
|
+
},
|
128
|
+
"dobj": {
|
129
|
+
"p": 0.9229805886,
|
130
|
+
"r": 0.9396764682,
|
131
|
+
"f": 0.9312537019
|
132
|
+
},
|
133
|
+
"nummod": {
|
134
|
+
"p": 0.9379292801,
|
135
|
+
"r": 0.9310606061,
|
136
|
+
"f": 0.9344823216
|
137
|
+
},
|
138
|
+
"npadvmod": {
|
139
|
+
"p": 0.7629658087,
|
140
|
+
"r": 0.7055062167,
|
141
|
+
"f": 0.7331118494
|
142
|
+
},
|
143
|
+
"prt": {
|
144
|
+
"p": 0.8118323747,
|
145
|
+
"r": 0.8853046595,
|
146
|
+
"f": 0.8469781397
|
147
|
+
},
|
148
|
+
"pcomp": {
|
149
|
+
"p": 0.8835714286,
|
150
|
+
"r": 0.8662464986,
|
151
|
+
"f": 0.8748231966
|
152
|
+
},
|
153
|
+
"expl": {
|
154
|
+
"p": 0.9851380042,
|
155
|
+
"r": 0.9935760171,
|
156
|
+
"f": 0.9893390192
|
157
|
+
},
|
158
|
+
"acl": {
|
159
|
+
"p": 0.742010459,
|
160
|
+
"r": 0.6966721222,
|
161
|
+
"f": 0.7186268993
|
162
|
+
},
|
163
|
+
"agent": {
|
164
|
+
"p": 0.9034482759,
|
165
|
+
"r": 0.9390681004,
|
166
|
+
"f": 0.920913884
|
167
|
+
},
|
168
|
+
"dative": {
|
169
|
+
"p": 0.8,
|
170
|
+
"r": 0.6972477064,
|
171
|
+
"f": 0.7450980392
|
172
|
+
},
|
173
|
+
"acomp": {
|
174
|
+
"p": 0.9020594966,
|
175
|
+
"r": 0.893877551,
|
176
|
+
"f": 0.8979498861
|
177
|
+
},
|
178
|
+
"dep": {
|
179
|
+
"p": 0.4147286822,
|
180
|
+
"r": 0.1737012987,
|
181
|
+
"f": 0.2448512586
|
182
|
+
},
|
183
|
+
"csubj": {
|
184
|
+
"p": 0.6983240223,
|
185
|
+
"r": 0.7396449704,
|
186
|
+
"f": 0.7183908046
|
187
|
+
},
|
188
|
+
"quantmod": {
|
189
|
+
"p": 0.8727436823,
|
190
|
+
"r": 0.7855402112,
|
191
|
+
"f": 0.8268490808
|
192
|
+
},
|
193
|
+
"nmod": {
|
194
|
+
"p": 0.7498033045,
|
195
|
+
"r": 0.5807434491,
|
196
|
+
"f": 0.654532967
|
197
|
+
},
|
198
|
+
"appos": {
|
199
|
+
"p": 0.7048498845,
|
200
|
+
"r": 0.6620390456,
|
201
|
+
"f": 0.6827740492
|
202
|
+
},
|
203
|
+
"predet": {
|
204
|
+
"p": 0.8299595142,
|
205
|
+
"r": 0.8798283262,
|
206
|
+
"f": 0.8541666667
|
207
|
+
},
|
208
|
+
"preconj": {
|
209
|
+
"p": 0.5544554455,
|
210
|
+
"r": 0.6511627907,
|
211
|
+
"f": 0.5989304813
|
212
|
+
},
|
213
|
+
"oprd": {
|
214
|
+
"p": 0.8013245033,
|
215
|
+
"r": 0.7223880597,
|
216
|
+
"f": 0.759811617
|
217
|
+
},
|
218
|
+
"parataxis": {
|
219
|
+
"p": 0.6428571429,
|
220
|
+
"r": 0.4880694143,
|
221
|
+
"f": 0.5548705302
|
222
|
+
},
|
223
|
+
"meta": {
|
224
|
+
"p": 0.3770491803,
|
225
|
+
"r": 0.4423076923,
|
226
|
+
"f": 0.407079646
|
227
|
+
},
|
228
|
+
"csubjpass": {
|
229
|
+
"p": 0.5555555556,
|
230
|
+
"r": 0.8333333333,
|
231
|
+
"f": 0.6666666667
|
232
|
+
}
|
233
|
+
},
|
234
|
+
"ents_p": 0.8454836771,
|
235
|
+
"ents_r": 0.8456530449,
|
236
|
+
"ents_f": 0.8455683525,
|
237
|
+
"ents_per_type": {
|
238
|
+
"DATE": {
|
239
|
+
"p": 0.8603213844,
|
240
|
+
"r": 0.8838095238,
|
241
|
+
"f": 0.8719072972
|
242
|
+
},
|
243
|
+
"GPE": {
|
244
|
+
"p": 0.9146932953,
|
245
|
+
"r": 0.8942817294,
|
246
|
+
"f": 0.9043723554
|
247
|
+
},
|
248
|
+
"ORG": {
|
249
|
+
"p": 0.7955942623,
|
250
|
+
"r": 0.8234358431,
|
251
|
+
"f": 0.8092756644
|
252
|
+
},
|
253
|
+
"CARDINAL": {
|
254
|
+
"p": 0.8149171271,
|
255
|
+
"r": 0.8769322235,
|
256
|
+
"f": 0.8447880871
|
257
|
+
},
|
258
|
+
"PERSON": {
|
259
|
+
"p": 0.8617758186,
|
260
|
+
"r": 0.8932767624,
|
261
|
+
"f": 0.8772435897
|
262
|
+
},
|
263
|
+
"NORP": {
|
264
|
+
"p": 0.8957006369,
|
265
|
+
"r": 0.9,
|
266
|
+
"f": 0.8978451716
|
267
|
+
},
|
268
|
+
"ORDINAL": {
|
269
|
+
"p": 0.7844827586,
|
270
|
+
"r": 0.847826087,
|
271
|
+
"f": 0.8149253731
|
272
|
+
},
|
273
|
+
"QUANTITY": {
|
274
|
+
"p": 0.8529411765,
|
275
|
+
"r": 0.6373626374,
|
276
|
+
"f": 0.7295597484
|
277
|
+
},
|
278
|
+
"LOC": {
|
279
|
+
"p": 0.7210884354,
|
280
|
+
"r": 0.6751592357,
|
281
|
+
"f": 0.6973684211
|
282
|
+
},
|
283
|
+
"FAC": {
|
284
|
+
"p": 0.358490566,
|
285
|
+
"r": 0.2923076923,
|
286
|
+
"f": 0.3220338983
|
287
|
+
},
|
288
|
+
"TIME": {
|
289
|
+
"p": 0.7413793103,
|
290
|
+
"r": 0.7543859649,
|
291
|
+
"f": 0.747826087
|
292
|
+
},
|
293
|
+
"PRODUCT": {
|
294
|
+
"p": 0.5591397849,
|
295
|
+
"r": 0.2464454976,
|
296
|
+
"f": 0.3421052632
|
297
|
+
},
|
298
|
+
"WORK_OF_ART": {
|
299
|
+
"p": 0.4885496183,
|
300
|
+
"r": 0.3298969072,
|
301
|
+
"f": 0.3938461538
|
302
|
+
},
|
303
|
+
"EVENT": {
|
304
|
+
"p": 0.6428571429,
|
305
|
+
"r": 0.3103448276,
|
306
|
+
"f": 0.4186046512
|
307
|
+
},
|
308
|
+
"MONEY": {
|
309
|
+
"p": 0.9071428571,
|
310
|
+
"r": 0.8996458087,
|
311
|
+
"f": 0.9033787789
|
312
|
+
},
|
313
|
+
"LAW": {
|
314
|
+
"p": 0.5454545455,
|
315
|
+
"r": 0.46875,
|
316
|
+
"f": 0.5042016807
|
317
|
+
},
|
318
|
+
"PERCENT": {
|
319
|
+
"p": 0.9184,
|
320
|
+
"r": 0.8790199081,
|
321
|
+
"f": 0.8982785603
|
322
|
+
},
|
323
|
+
"LANGUAGE": {
|
324
|
+
"p": 0.8,
|
325
|
+
"r": 0.625,
|
326
|
+
"f": 0.701754386
|
327
|
+
}
|
328
|
+
},
|
329
|
+
"speed": 7920.0598120459
|
330
|
+
}
|
Binary file
|
@@ -0,0 +1,269 @@
|
|
1
|
+
[paths]
|
2
|
+
train = null
|
3
|
+
dev = null
|
4
|
+
vectors = null
|
5
|
+
init_tok2vec = null
|
6
|
+
|
7
|
+
[system]
|
8
|
+
gpu_allocator = null
|
9
|
+
seed = 0
|
10
|
+
|
11
|
+
[nlp]
|
12
|
+
lang = "en"
|
13
|
+
pipeline = ["tok2vec","tagger","parser","senter","attribute_ruler","lemmatizer","ner"]
|
14
|
+
disabled = ["senter"]
|
15
|
+
before_creation = null
|
16
|
+
after_creation = null
|
17
|
+
after_pipeline_creation = null
|
18
|
+
batch_size = 256
|
19
|
+
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
|
20
|
+
vectors = {"@vectors":"spacy.Vectors.v1"}
|
21
|
+
|
22
|
+
[components]
|
23
|
+
|
24
|
+
[components.attribute_ruler]
|
25
|
+
factory = "attribute_ruler"
|
26
|
+
scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
|
27
|
+
validate = false
|
28
|
+
|
29
|
+
[components.lemmatizer]
|
30
|
+
factory = "lemmatizer"
|
31
|
+
mode = "rule"
|
32
|
+
model = null
|
33
|
+
overwrite = false
|
34
|
+
scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
|
35
|
+
|
36
|
+
[components.ner]
|
37
|
+
factory = "ner"
|
38
|
+
incorrect_spans_key = null
|
39
|
+
moves = null
|
40
|
+
scorer = {"@scorers":"spacy.ner_scorer.v1"}
|
41
|
+
update_with_oracle_cut_size = 100
|
42
|
+
|
43
|
+
[components.ner.model]
|
44
|
+
@architectures = "spacy.TransitionBasedParser.v2"
|
45
|
+
state_type = "ner"
|
46
|
+
extra_state_tokens = false
|
47
|
+
hidden_width = 64
|
48
|
+
maxout_pieces = 2
|
49
|
+
use_upper = true
|
50
|
+
nO = null
|
51
|
+
|
52
|
+
[components.ner.model.tok2vec]
|
53
|
+
@architectures = "spacy.Tok2Vec.v2"
|
54
|
+
|
55
|
+
[components.ner.model.tok2vec.embed]
|
56
|
+
@architectures = "spacy.MultiHashEmbed.v2"
|
57
|
+
width = 96
|
58
|
+
attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
|
59
|
+
rows = [5000,1000,2500,2500]
|
60
|
+
include_static_vectors = false
|
61
|
+
|
62
|
+
[components.ner.model.tok2vec.encode]
|
63
|
+
@architectures = "spacy.MaxoutWindowEncoder.v2"
|
64
|
+
width = 96
|
65
|
+
depth = 4
|
66
|
+
window_size = 1
|
67
|
+
maxout_pieces = 3
|
68
|
+
|
69
|
+
[components.parser]
|
70
|
+
factory = "parser"
|
71
|
+
learn_tokens = false
|
72
|
+
min_action_freq = 30
|
73
|
+
moves = null
|
74
|
+
scorer = {"@scorers":"spacy.parser_scorer.v1"}
|
75
|
+
update_with_oracle_cut_size = 100
|
76
|
+
|
77
|
+
[components.parser.model]
|
78
|
+
@architectures = "spacy.TransitionBasedParser.v2"
|
79
|
+
state_type = "parser"
|
80
|
+
extra_state_tokens = false
|
81
|
+
hidden_width = 64
|
82
|
+
maxout_pieces = 2
|
83
|
+
use_upper = true
|
84
|
+
nO = null
|
85
|
+
|
86
|
+
[components.parser.model.tok2vec]
|
87
|
+
@architectures = "spacy.Tok2VecListener.v1"
|
88
|
+
width = ${components.tok2vec.model.encode:width}
|
89
|
+
upstream = "tok2vec"
|
90
|
+
|
91
|
+
[components.senter]
|
92
|
+
factory = "senter"
|
93
|
+
overwrite = false
|
94
|
+
scorer = {"@scorers":"spacy.senter_scorer.v1"}
|
95
|
+
|
96
|
+
[components.senter.model]
|
97
|
+
@architectures = "spacy.Tagger.v2"
|
98
|
+
nO = null
|
99
|
+
normalize = false
|
100
|
+
|
101
|
+
[components.senter.model.tok2vec]
|
102
|
+
@architectures = "spacy.Tok2Vec.v2"
|
103
|
+
|
104
|
+
[components.senter.model.tok2vec.embed]
|
105
|
+
@architectures = "spacy.MultiHashEmbed.v2"
|
106
|
+
width = 16
|
107
|
+
attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY"]
|
108
|
+
rows = [1000,500,500,500,50]
|
109
|
+
include_static_vectors = false
|
110
|
+
|
111
|
+
[components.senter.model.tok2vec.encode]
|
112
|
+
@architectures = "spacy.MaxoutWindowEncoder.v2"
|
113
|
+
width = 16
|
114
|
+
depth = 2
|
115
|
+
window_size = 1
|
116
|
+
maxout_pieces = 2
|
117
|
+
|
118
|
+
[components.tagger]
|
119
|
+
factory = "tagger"
|
120
|
+
label_smoothing = 0.0
|
121
|
+
neg_prefix = "!"
|
122
|
+
overwrite = false
|
123
|
+
scorer = {"@scorers":"spacy.tagger_scorer.v1"}
|
124
|
+
|
125
|
+
[components.tagger.model]
|
126
|
+
@architectures = "spacy.Tagger.v2"
|
127
|
+
nO = null
|
128
|
+
normalize = false
|
129
|
+
|
130
|
+
[components.tagger.model.tok2vec]
|
131
|
+
@architectures = "spacy.Tok2VecListener.v1"
|
132
|
+
width = ${components.tok2vec.model.encode:width}
|
133
|
+
upstream = "tok2vec"
|
134
|
+
|
135
|
+
[components.tok2vec]
|
136
|
+
factory = "tok2vec"
|
137
|
+
|
138
|
+
[components.tok2vec.model]
|
139
|
+
@architectures = "spacy.Tok2Vec.v2"
|
140
|
+
|
141
|
+
[components.tok2vec.model.embed]
|
142
|
+
@architectures = "spacy.MultiHashEmbed.v2"
|
143
|
+
width = ${components.tok2vec.model.encode:width}
|
144
|
+
attrs = ["NORM","PREFIX","SUFFIX","SHAPE","SPACY","IS_SPACE"]
|
145
|
+
rows = [5000,1000,2500,2500,50,50]
|
146
|
+
include_static_vectors = false
|
147
|
+
|
148
|
+
[components.tok2vec.model.encode]
|
149
|
+
@architectures = "spacy.MaxoutWindowEncoder.v2"
|
150
|
+
width = 96
|
151
|
+
depth = 4
|
152
|
+
window_size = 1
|
153
|
+
maxout_pieces = 3
|
154
|
+
|
155
|
+
[corpora]
|
156
|
+
|
157
|
+
[corpora.dev]
|
158
|
+
@readers = "spacy.Corpus.v1"
|
159
|
+
path = ${paths.dev}
|
160
|
+
gold_preproc = false
|
161
|
+
max_length = 0
|
162
|
+
limit = 0
|
163
|
+
augmenter = null
|
164
|
+
|
165
|
+
[corpora.train]
|
166
|
+
@readers = "spacy.Corpus.v1"
|
167
|
+
path = ${paths.train}
|
168
|
+
gold_preproc = false
|
169
|
+
max_length = 0
|
170
|
+
limit = 0
|
171
|
+
augmenter = null
|
172
|
+
|
173
|
+
[training]
|
174
|
+
train_corpus = "corpora.train"
|
175
|
+
dev_corpus = "corpora.dev"
|
176
|
+
seed = ${system:seed}
|
177
|
+
gpu_allocator = ${system:gpu_allocator}
|
178
|
+
dropout = 0.1
|
179
|
+
accumulate_gradient = 1
|
180
|
+
patience = 5000
|
181
|
+
max_epochs = 0
|
182
|
+
max_steps = 100000
|
183
|
+
eval_frequency = 1000
|
184
|
+
frozen_components = []
|
185
|
+
before_to_disk = null
|
186
|
+
annotating_components = []
|
187
|
+
before_update = null
|
188
|
+
|
189
|
+
[training.batcher]
|
190
|
+
@batchers = "spacy.batch_by_words.v1"
|
191
|
+
discard_oversize = false
|
192
|
+
tolerance = 0.2
|
193
|
+
get_length = null
|
194
|
+
|
195
|
+
[training.batcher.size]
|
196
|
+
@schedules = "compounding.v1"
|
197
|
+
start = 100
|
198
|
+
stop = 1000
|
199
|
+
compound = 1.001
|
200
|
+
t = 0.0
|
201
|
+
|
202
|
+
[training.logger]
|
203
|
+
@loggers = "spacy.ConsoleLogger.v1"
|
204
|
+
progress_bar = false
|
205
|
+
|
206
|
+
[training.optimizer]
|
207
|
+
@optimizers = "Adam.v1"
|
208
|
+
beta1 = 0.9
|
209
|
+
beta2 = 0.999
|
210
|
+
L2_is_weight_decay = true
|
211
|
+
L2 = 0.01
|
212
|
+
grad_clip = 1.0
|
213
|
+
use_averages = true
|
214
|
+
eps = 0.00000001
|
215
|
+
learn_rate = 0.001
|
216
|
+
|
217
|
+
[training.score_weights]
|
218
|
+
tag_acc = 0.16
|
219
|
+
dep_uas = 0.0
|
220
|
+
dep_las = 0.16
|
221
|
+
dep_las_per_type = null
|
222
|
+
sents_p = null
|
223
|
+
sents_r = null
|
224
|
+
sents_f = 0.02
|
225
|
+
lemma_acc = 0.5
|
226
|
+
ents_f = 0.16
|
227
|
+
ents_p = 0.0
|
228
|
+
ents_r = 0.0
|
229
|
+
ents_per_type = null
|
230
|
+
speed = 0.0
|
231
|
+
|
232
|
+
[pretraining]
|
233
|
+
|
234
|
+
[initialize]
|
235
|
+
vocab_data = null
|
236
|
+
vectors = ${paths.vectors}
|
237
|
+
init_tok2vec = ${paths.init_tok2vec}
|
238
|
+
before_init = null
|
239
|
+
after_init = null
|
240
|
+
|
241
|
+
[initialize.components]
|
242
|
+
|
243
|
+
[initialize.components.ner]
|
244
|
+
|
245
|
+
[initialize.components.ner.labels]
|
246
|
+
@readers = "spacy.read_labels.v1"
|
247
|
+
path = "corpus/labels/ner.json"
|
248
|
+
require = false
|
249
|
+
|
250
|
+
[initialize.components.parser]
|
251
|
+
|
252
|
+
[initialize.components.parser.labels]
|
253
|
+
@readers = "spacy.read_labels.v1"
|
254
|
+
path = "corpus/labels/parser.json"
|
255
|
+
require = false
|
256
|
+
|
257
|
+
[initialize.components.tagger]
|
258
|
+
|
259
|
+
[initialize.components.tagger.labels]
|
260
|
+
@readers = "spacy.read_labels.v1"
|
261
|
+
path = "corpus/labels/tagger.json"
|
262
|
+
require = false
|
263
|
+
|
264
|
+
[initialize.lookups]
|
265
|
+
@misc = "spacy.LookupsDataLoader.v1"
|
266
|
+
lang = ${nlp.lang}
|
267
|
+
tables = ["lexeme_norm"]
|
268
|
+
|
269
|
+
[initialize.tokenizer]
|