dasein-core 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dasein/api.py +1202 -133
- dasein/capture.py +2379 -1803
- dasein/microturn.py +475 -0
- dasein/models/en_core_web_sm/en_core_web_sm/__init__.py +10 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSE +19 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSES_SOURCES +66 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/README.md +47 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/accuracy.json +330 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/attribute_ruler/patterns +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/config.cfg +269 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/lemmatizer/lookups/lookups.bin +1 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/meta.json +521 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/cfg +13 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/moves +1 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/cfg +13 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/moves +1 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/cfg +3 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/cfg +57 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/cfg +3 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tokenizer +3 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/key2row +1 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/lookups.bin +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/strings.json +84782 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors.cfg +3 -0
- dasein/models/en_core_web_sm/en_core_web_sm/meta.json +521 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSE +19 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSES_SOURCES +66 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/METADATA +59 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/RECORD +35 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/WHEEL +5 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/entry_points.txt +2 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/top_level.txt +1 -0
- dasein/pipecleaner.py +1917 -0
- dasein/wrappers.py +314 -0
- {dasein_core-0.2.7.dist-info → dasein_core-0.2.9.dist-info}/METADATA +4 -1
- dasein_core-0.2.9.dist-info/RECORD +59 -0
- dasein_core-0.2.7.dist-info/RECORD +0 -21
- {dasein_core-0.2.7.dist-info → dasein_core-0.2.9.dist-info}/WHEEL +0 -0
- {dasein_core-0.2.7.dist-info → dasein_core-0.2.9.dist-info}/licenses/LICENSE +0 -0
- {dasein_core-0.2.7.dist-info → dasein_core-0.2.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,521 @@
|
|
1
|
+
{
|
2
|
+
"lang":"en",
|
3
|
+
"name":"core_web_sm",
|
4
|
+
"version":"3.7.1",
|
5
|
+
"description":"English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.",
|
6
|
+
"author":"Explosion",
|
7
|
+
"email":"contact@explosion.ai",
|
8
|
+
"url":"https://explosion.ai",
|
9
|
+
"license":"MIT",
|
10
|
+
"spacy_version":">=3.7.2,<3.8.0",
|
11
|
+
"spacy_git_version":"bd2c17e20",
|
12
|
+
"vectors":{
|
13
|
+
"width":0,
|
14
|
+
"vectors":0,
|
15
|
+
"keys":0,
|
16
|
+
"name":null
|
17
|
+
},
|
18
|
+
"labels":{
|
19
|
+
"tok2vec":[
|
20
|
+
|
21
|
+
],
|
22
|
+
"tagger":[
|
23
|
+
"$",
|
24
|
+
"''",
|
25
|
+
",",
|
26
|
+
"-LRB-",
|
27
|
+
"-RRB-",
|
28
|
+
".",
|
29
|
+
":",
|
30
|
+
"ADD",
|
31
|
+
"AFX",
|
32
|
+
"CC",
|
33
|
+
"CD",
|
34
|
+
"DT",
|
35
|
+
"EX",
|
36
|
+
"FW",
|
37
|
+
"HYPH",
|
38
|
+
"IN",
|
39
|
+
"JJ",
|
40
|
+
"JJR",
|
41
|
+
"JJS",
|
42
|
+
"LS",
|
43
|
+
"MD",
|
44
|
+
"NFP",
|
45
|
+
"NN",
|
46
|
+
"NNP",
|
47
|
+
"NNPS",
|
48
|
+
"NNS",
|
49
|
+
"PDT",
|
50
|
+
"POS",
|
51
|
+
"PRP",
|
52
|
+
"PRP$",
|
53
|
+
"RB",
|
54
|
+
"RBR",
|
55
|
+
"RBS",
|
56
|
+
"RP",
|
57
|
+
"SYM",
|
58
|
+
"TO",
|
59
|
+
"UH",
|
60
|
+
"VB",
|
61
|
+
"VBD",
|
62
|
+
"VBG",
|
63
|
+
"VBN",
|
64
|
+
"VBP",
|
65
|
+
"VBZ",
|
66
|
+
"WDT",
|
67
|
+
"WP",
|
68
|
+
"WP$",
|
69
|
+
"WRB",
|
70
|
+
"XX",
|
71
|
+
"_SP",
|
72
|
+
"``"
|
73
|
+
],
|
74
|
+
"parser":[
|
75
|
+
"ROOT",
|
76
|
+
"acl",
|
77
|
+
"acomp",
|
78
|
+
"advcl",
|
79
|
+
"advmod",
|
80
|
+
"agent",
|
81
|
+
"amod",
|
82
|
+
"appos",
|
83
|
+
"attr",
|
84
|
+
"aux",
|
85
|
+
"auxpass",
|
86
|
+
"case",
|
87
|
+
"cc",
|
88
|
+
"ccomp",
|
89
|
+
"compound",
|
90
|
+
"conj",
|
91
|
+
"csubj",
|
92
|
+
"csubjpass",
|
93
|
+
"dative",
|
94
|
+
"dep",
|
95
|
+
"det",
|
96
|
+
"dobj",
|
97
|
+
"expl",
|
98
|
+
"intj",
|
99
|
+
"mark",
|
100
|
+
"meta",
|
101
|
+
"neg",
|
102
|
+
"nmod",
|
103
|
+
"npadvmod",
|
104
|
+
"nsubj",
|
105
|
+
"nsubjpass",
|
106
|
+
"nummod",
|
107
|
+
"oprd",
|
108
|
+
"parataxis",
|
109
|
+
"pcomp",
|
110
|
+
"pobj",
|
111
|
+
"poss",
|
112
|
+
"preconj",
|
113
|
+
"predet",
|
114
|
+
"prep",
|
115
|
+
"prt",
|
116
|
+
"punct",
|
117
|
+
"quantmod",
|
118
|
+
"relcl",
|
119
|
+
"xcomp"
|
120
|
+
],
|
121
|
+
"attribute_ruler":[
|
122
|
+
|
123
|
+
],
|
124
|
+
"lemmatizer":[
|
125
|
+
|
126
|
+
],
|
127
|
+
"ner":[
|
128
|
+
"CARDINAL",
|
129
|
+
"DATE",
|
130
|
+
"EVENT",
|
131
|
+
"FAC",
|
132
|
+
"GPE",
|
133
|
+
"LANGUAGE",
|
134
|
+
"LAW",
|
135
|
+
"LOC",
|
136
|
+
"MONEY",
|
137
|
+
"NORP",
|
138
|
+
"ORDINAL",
|
139
|
+
"ORG",
|
140
|
+
"PERCENT",
|
141
|
+
"PERSON",
|
142
|
+
"PRODUCT",
|
143
|
+
"QUANTITY",
|
144
|
+
"TIME",
|
145
|
+
"WORK_OF_ART"
|
146
|
+
]
|
147
|
+
},
|
148
|
+
"pipeline":[
|
149
|
+
"tok2vec",
|
150
|
+
"tagger",
|
151
|
+
"parser",
|
152
|
+
"attribute_ruler",
|
153
|
+
"lemmatizer",
|
154
|
+
"ner"
|
155
|
+
],
|
156
|
+
"components":[
|
157
|
+
"tok2vec",
|
158
|
+
"tagger",
|
159
|
+
"parser",
|
160
|
+
"senter",
|
161
|
+
"attribute_ruler",
|
162
|
+
"lemmatizer",
|
163
|
+
"ner"
|
164
|
+
],
|
165
|
+
"disabled":[
|
166
|
+
"senter"
|
167
|
+
],
|
168
|
+
"performance":{
|
169
|
+
"token_acc":0.9986194413,
|
170
|
+
"token_p":0.9956819193,
|
171
|
+
"token_r":0.9957659295,
|
172
|
+
"token_f":0.9957239226,
|
173
|
+
"tag_acc":0.97246532,
|
174
|
+
"sents_p":0.9201877934,
|
175
|
+
"sents_r":0.8921432812,
|
176
|
+
"sents_f":0.9059485531,
|
177
|
+
"dep_uas":0.9175304332,
|
178
|
+
"dep_las":0.89874821,
|
179
|
+
"dep_las_per_type":{
|
180
|
+
"prep":{
|
181
|
+
"p":0.853521338,
|
182
|
+
"r":0.8635932461,
|
183
|
+
"f":0.8585277532
|
184
|
+
},
|
185
|
+
"det":{
|
186
|
+
"p":0.9763930156,
|
187
|
+
"r":0.9781048683,
|
188
|
+
"f":0.9772481923
|
189
|
+
},
|
190
|
+
"pobj":{
|
191
|
+
"p":0.9613764045,
|
192
|
+
"r":0.967681131,
|
193
|
+
"f":0.9645184649
|
194
|
+
},
|
195
|
+
"nsubj":{
|
196
|
+
"p":0.9565737052,
|
197
|
+
"r":0.9467250821,
|
198
|
+
"f":0.9516239128
|
199
|
+
},
|
200
|
+
"aux":{
|
201
|
+
"p":0.9815061794,
|
202
|
+
"r":0.9827294578,
|
203
|
+
"f":0.9821174377
|
204
|
+
},
|
205
|
+
"advmod":{
|
206
|
+
"p":0.8548033091,
|
207
|
+
"r":0.8519266364,
|
208
|
+
"f":0.8533625485
|
209
|
+
},
|
210
|
+
"relcl":{
|
211
|
+
"p":0.7571736011,
|
212
|
+
"r":0.7659651669,
|
213
|
+
"f":0.7615440115
|
214
|
+
},
|
215
|
+
"root":{
|
216
|
+
"p":0.9195942266,
|
217
|
+
"r":0.8910218352,
|
218
|
+
"f":0.9050825879
|
219
|
+
},
|
220
|
+
"xcomp":{
|
221
|
+
"p":0.8836222144,
|
222
|
+
"r":0.8966259871,
|
223
|
+
"f":0.8900766079
|
224
|
+
},
|
225
|
+
"amod":{
|
226
|
+
"p":0.9174389766,
|
227
|
+
"r":0.9107223842,
|
228
|
+
"f":0.9140683422
|
229
|
+
},
|
230
|
+
"compound":{
|
231
|
+
"p":0.9126489559,
|
232
|
+
"r":0.9298284696,
|
233
|
+
"f":0.9211586207
|
234
|
+
},
|
235
|
+
"poss":{
|
236
|
+
"p":0.9739583333,
|
237
|
+
"r":0.9786634461,
|
238
|
+
"f":0.9763052209
|
239
|
+
},
|
240
|
+
"ccomp":{
|
241
|
+
"p":0.7671207315,
|
242
|
+
"r":0.8372708758,
|
243
|
+
"f":0.8006621872
|
244
|
+
},
|
245
|
+
"attr":{
|
246
|
+
"p":0.899837794,
|
247
|
+
"r":0.93313709,
|
248
|
+
"f":0.9161849711
|
249
|
+
},
|
250
|
+
"case":{
|
251
|
+
"p":0.9787549407,
|
252
|
+
"r":0.9914914915,
|
253
|
+
"f":0.9850820487
|
254
|
+
},
|
255
|
+
"mark":{
|
256
|
+
"p":0.9068783069,
|
257
|
+
"r":0.9083200848,
|
258
|
+
"f":0.9075986232
|
259
|
+
},
|
260
|
+
"intj":{
|
261
|
+
"p":0.6717131474,
|
262
|
+
"r":0.6175824176,
|
263
|
+
"f":0.6435114504
|
264
|
+
},
|
265
|
+
"advcl":{
|
266
|
+
"p":0.6633986928,
|
267
|
+
"r":0.6645681189,
|
268
|
+
"f":0.6639828909
|
269
|
+
},
|
270
|
+
"cc":{
|
271
|
+
"p":0.8323511726,
|
272
|
+
"r":0.8277717976,
|
273
|
+
"f":0.8300551691
|
274
|
+
},
|
275
|
+
"neg":{
|
276
|
+
"p":0.9466865969,
|
277
|
+
"r":0.9533366784,
|
278
|
+
"f":0.95
|
279
|
+
},
|
280
|
+
"conj":{
|
281
|
+
"p":0.7567333828,
|
282
|
+
"r":0.7710221551,
|
283
|
+
"f":0.763810949
|
284
|
+
},
|
285
|
+
"nsubjpass":{
|
286
|
+
"p":0.9182939363,
|
287
|
+
"r":0.9164102564,
|
288
|
+
"f":0.9173511294
|
289
|
+
},
|
290
|
+
"auxpass":{
|
291
|
+
"p":0.9501335708,
|
292
|
+
"r":0.9722095672,
|
293
|
+
"f":0.9610448097
|
294
|
+
},
|
295
|
+
"dobj":{
|
296
|
+
"p":0.9229805886,
|
297
|
+
"r":0.9396764682,
|
298
|
+
"f":0.9312537019
|
299
|
+
},
|
300
|
+
"nummod":{
|
301
|
+
"p":0.9379292801,
|
302
|
+
"r":0.9310606061,
|
303
|
+
"f":0.9344823216
|
304
|
+
},
|
305
|
+
"npadvmod":{
|
306
|
+
"p":0.7629658087,
|
307
|
+
"r":0.7055062167,
|
308
|
+
"f":0.7331118494
|
309
|
+
},
|
310
|
+
"prt":{
|
311
|
+
"p":0.8118323747,
|
312
|
+
"r":0.8853046595,
|
313
|
+
"f":0.8469781397
|
314
|
+
},
|
315
|
+
"pcomp":{
|
316
|
+
"p":0.8835714286,
|
317
|
+
"r":0.8662464986,
|
318
|
+
"f":0.8748231966
|
319
|
+
},
|
320
|
+
"expl":{
|
321
|
+
"p":0.9851380042,
|
322
|
+
"r":0.9935760171,
|
323
|
+
"f":0.9893390192
|
324
|
+
},
|
325
|
+
"acl":{
|
326
|
+
"p":0.742010459,
|
327
|
+
"r":0.6966721222,
|
328
|
+
"f":0.7186268993
|
329
|
+
},
|
330
|
+
"agent":{
|
331
|
+
"p":0.9034482759,
|
332
|
+
"r":0.9390681004,
|
333
|
+
"f":0.920913884
|
334
|
+
},
|
335
|
+
"dative":{
|
336
|
+
"p":0.8,
|
337
|
+
"r":0.6972477064,
|
338
|
+
"f":0.7450980392
|
339
|
+
},
|
340
|
+
"acomp":{
|
341
|
+
"p":0.9020594966,
|
342
|
+
"r":0.893877551,
|
343
|
+
"f":0.8979498861
|
344
|
+
},
|
345
|
+
"dep":{
|
346
|
+
"p":0.4147286822,
|
347
|
+
"r":0.1737012987,
|
348
|
+
"f":0.2448512586
|
349
|
+
},
|
350
|
+
"csubj":{
|
351
|
+
"p":0.6983240223,
|
352
|
+
"r":0.7396449704,
|
353
|
+
"f":0.7183908046
|
354
|
+
},
|
355
|
+
"quantmod":{
|
356
|
+
"p":0.8727436823,
|
357
|
+
"r":0.7855402112,
|
358
|
+
"f":0.8268490808
|
359
|
+
},
|
360
|
+
"nmod":{
|
361
|
+
"p":0.7498033045,
|
362
|
+
"r":0.5807434491,
|
363
|
+
"f":0.654532967
|
364
|
+
},
|
365
|
+
"appos":{
|
366
|
+
"p":0.7048498845,
|
367
|
+
"r":0.6620390456,
|
368
|
+
"f":0.6827740492
|
369
|
+
},
|
370
|
+
"predet":{
|
371
|
+
"p":0.8299595142,
|
372
|
+
"r":0.8798283262,
|
373
|
+
"f":0.8541666667
|
374
|
+
},
|
375
|
+
"preconj":{
|
376
|
+
"p":0.5544554455,
|
377
|
+
"r":0.6511627907,
|
378
|
+
"f":0.5989304813
|
379
|
+
},
|
380
|
+
"oprd":{
|
381
|
+
"p":0.8013245033,
|
382
|
+
"r":0.7223880597,
|
383
|
+
"f":0.759811617
|
384
|
+
},
|
385
|
+
"parataxis":{
|
386
|
+
"p":0.6428571429,
|
387
|
+
"r":0.4880694143,
|
388
|
+
"f":0.5548705302
|
389
|
+
},
|
390
|
+
"meta":{
|
391
|
+
"p":0.3770491803,
|
392
|
+
"r":0.4423076923,
|
393
|
+
"f":0.407079646
|
394
|
+
},
|
395
|
+
"csubjpass":{
|
396
|
+
"p":0.5555555556,
|
397
|
+
"r":0.8333333333,
|
398
|
+
"f":0.6666666667
|
399
|
+
}
|
400
|
+
},
|
401
|
+
"ents_p":0.8454836771,
|
402
|
+
"ents_r":0.8456530449,
|
403
|
+
"ents_f":0.8455683525,
|
404
|
+
"ents_per_type":{
|
405
|
+
"DATE":{
|
406
|
+
"p":0.8603213844,
|
407
|
+
"r":0.8838095238,
|
408
|
+
"f":0.8719072972
|
409
|
+
},
|
410
|
+
"GPE":{
|
411
|
+
"p":0.9146932953,
|
412
|
+
"r":0.8942817294,
|
413
|
+
"f":0.9043723554
|
414
|
+
},
|
415
|
+
"ORG":{
|
416
|
+
"p":0.7955942623,
|
417
|
+
"r":0.8234358431,
|
418
|
+
"f":0.8092756644
|
419
|
+
},
|
420
|
+
"CARDINAL":{
|
421
|
+
"p":0.8149171271,
|
422
|
+
"r":0.8769322235,
|
423
|
+
"f":0.8447880871
|
424
|
+
},
|
425
|
+
"PERSON":{
|
426
|
+
"p":0.8617758186,
|
427
|
+
"r":0.8932767624,
|
428
|
+
"f":0.8772435897
|
429
|
+
},
|
430
|
+
"NORP":{
|
431
|
+
"p":0.8957006369,
|
432
|
+
"r":0.9,
|
433
|
+
"f":0.8978451716
|
434
|
+
},
|
435
|
+
"ORDINAL":{
|
436
|
+
"p":0.7844827586,
|
437
|
+
"r":0.847826087,
|
438
|
+
"f":0.8149253731
|
439
|
+
},
|
440
|
+
"QUANTITY":{
|
441
|
+
"p":0.8529411765,
|
442
|
+
"r":0.6373626374,
|
443
|
+
"f":0.7295597484
|
444
|
+
},
|
445
|
+
"LOC":{
|
446
|
+
"p":0.7210884354,
|
447
|
+
"r":0.6751592357,
|
448
|
+
"f":0.6973684211
|
449
|
+
},
|
450
|
+
"FAC":{
|
451
|
+
"p":0.358490566,
|
452
|
+
"r":0.2923076923,
|
453
|
+
"f":0.3220338983
|
454
|
+
},
|
455
|
+
"TIME":{
|
456
|
+
"p":0.7413793103,
|
457
|
+
"r":0.7543859649,
|
458
|
+
"f":0.747826087
|
459
|
+
},
|
460
|
+
"PRODUCT":{
|
461
|
+
"p":0.5591397849,
|
462
|
+
"r":0.2464454976,
|
463
|
+
"f":0.3421052632
|
464
|
+
},
|
465
|
+
"WORK_OF_ART":{
|
466
|
+
"p":0.4885496183,
|
467
|
+
"r":0.3298969072,
|
468
|
+
"f":0.3938461538
|
469
|
+
},
|
470
|
+
"EVENT":{
|
471
|
+
"p":0.6428571429,
|
472
|
+
"r":0.3103448276,
|
473
|
+
"f":0.4186046512
|
474
|
+
},
|
475
|
+
"MONEY":{
|
476
|
+
"p":0.9071428571,
|
477
|
+
"r":0.8996458087,
|
478
|
+
"f":0.9033787789
|
479
|
+
},
|
480
|
+
"LAW":{
|
481
|
+
"p":0.5454545455,
|
482
|
+
"r":0.46875,
|
483
|
+
"f":0.5042016807
|
484
|
+
},
|
485
|
+
"PERCENT":{
|
486
|
+
"p":0.9184,
|
487
|
+
"r":0.8790199081,
|
488
|
+
"f":0.8982785603
|
489
|
+
},
|
490
|
+
"LANGUAGE":{
|
491
|
+
"p":0.8,
|
492
|
+
"r":0.625,
|
493
|
+
"f":0.701754386
|
494
|
+
}
|
495
|
+
},
|
496
|
+
"speed":7920.0598120459
|
497
|
+
},
|
498
|
+
"sources":[
|
499
|
+
{
|
500
|
+
"name":"OntoNotes 5",
|
501
|
+
"url":"https://catalog.ldc.upenn.edu/LDC2013T19",
|
502
|
+
"license":"commercial (licensed by Explosion)",
|
503
|
+
"author":"Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston"
|
504
|
+
},
|
505
|
+
{
|
506
|
+
"name":"ClearNLP Constituent-to-Dependency Conversion",
|
507
|
+
"url":"https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md",
|
508
|
+
"license":"Citation provided for reference, no code packaged with model",
|
509
|
+
"author":"Emory University"
|
510
|
+
},
|
511
|
+
{
|
512
|
+
"name":"WordNet 3.0",
|
513
|
+
"url":"https://wordnet.princeton.edu/",
|
514
|
+
"author":"Princeton University",
|
515
|
+
"license":"WordNet 3.0 License"
|
516
|
+
}
|
517
|
+
],
|
518
|
+
"requirements":[
|
519
|
+
|
520
|
+
]
|
521
|
+
}
|
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright 2021 ExplosionAI GmbH
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
4
|
+
this software and associated documentation files (the "Software"), to deal in
|
5
|
+
the Software without restriction, including without limitation the rights to
|
6
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
7
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
8
|
+
so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
11
|
+
copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
SOFTWARE.
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# OntoNotes 5
|
2
|
+
|
3
|
+
* Author: Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston
|
4
|
+
* URL: https://catalog.ldc.upenn.edu/LDC2013T19
|
5
|
+
* License: commercial (licensed by Explosion)
|
6
|
+
|
7
|
+
```
|
8
|
+
```
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
# ClearNLP Constituent-to-Dependency Conversion
|
14
|
+
|
15
|
+
* Author: Emory University
|
16
|
+
* URL: https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md
|
17
|
+
* License: Citation provided for reference, no code packaged with model
|
18
|
+
|
19
|
+
```
|
20
|
+
```
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
# WordNet 3.0
|
26
|
+
|
27
|
+
* Author: Princeton University
|
28
|
+
* URL: https://wordnet.princeton.edu/
|
29
|
+
* License: WordNet 3.0 License
|
30
|
+
|
31
|
+
```
|
32
|
+
WordNet Release 3.0
|
33
|
+
|
34
|
+
This software and database is being provided to you, the LICENSEE, by
|
35
|
+
Princeton University under the following license. By obtaining, using
|
36
|
+
and/or copying this software and database, you agree that you have
|
37
|
+
read, understood, and will comply with these terms and conditions.:
|
38
|
+
|
39
|
+
Permission to use, copy, modify and distribute this software and
|
40
|
+
database and its documentation for any purpose and without fee or
|
41
|
+
royalty is hereby granted, provided that you agree to comply with
|
42
|
+
the following copyright notice and statements, including the disclaimer,
|
43
|
+
and that the same appear on ALL copies of the software, database and
|
44
|
+
documentation, including modifications that you make for internal
|
45
|
+
use or for distribution.
|
46
|
+
|
47
|
+
WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved.
|
48
|
+
|
49
|
+
THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON
|
50
|
+
UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
|
51
|
+
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON
|
52
|
+
UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-
|
53
|
+
ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE
|
54
|
+
OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT
|
55
|
+
INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR
|
56
|
+
OTHER RIGHTS.
|
57
|
+
|
58
|
+
The name of Princeton University or Princeton may not be used in
|
59
|
+
advertising or publicity pertaining to distribution of the software
|
60
|
+
and/or database. Title to copyright in this software, database and
|
61
|
+
any associated documentation shall at all times remain with
|
62
|
+
Princeton University and LICENSEE agrees to preserve same.```
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
|
@@ -0,0 +1,59 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: en-core-web-sm
|
3
|
+
Version: 3.7.1
|
4
|
+
Summary: English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.
|
5
|
+
Home-page: https://explosion.ai
|
6
|
+
Author: Explosion
|
7
|
+
Author-email: contact@explosion.ai
|
8
|
+
License: MIT
|
9
|
+
License-File: LICENSE
|
10
|
+
License-File: LICENSES_SOURCES
|
11
|
+
Requires-Dist: spacy <3.8.0,>=3.7.2
|
12
|
+
|
13
|
+
### Details: https://spacy.io/models/en#en_core_web_sm
|
14
|
+
|
15
|
+
English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.
|
16
|
+
|
17
|
+
| Feature | Description |
|
18
|
+
| --- | --- |
|
19
|
+
| **Name** | `en_core_web_sm` |
|
20
|
+
| **Version** | `3.7.1` |
|
21
|
+
| **spaCy** | `>=3.7.2,<3.8.0` |
|
22
|
+
| **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
|
23
|
+
| **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
|
24
|
+
| **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
|
25
|
+
| **Sources** | [OntoNotes 5](https://catalog.ldc.upenn.edu/LDC2013T19) (Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston)<br />[ClearNLP Constituent-to-Dependency Conversion](https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md) (Emory University)<br />[WordNet 3.0](https://wordnet.princeton.edu/) (Princeton University) |
|
26
|
+
| **License** | `MIT` |
|
27
|
+
| **Author** | [Explosion](https://explosion.ai) |
|
28
|
+
|
29
|
+
### Label Scheme
|
30
|
+
|
31
|
+
<details>
|
32
|
+
|
33
|
+
<summary>View label scheme (113 labels for 3 components)</summary>
|
34
|
+
|
35
|
+
| Component | Labels |
|
36
|
+
| --- | --- |
|
37
|
+
| **`tagger`** | `$`, `''`, `,`, `-LRB-`, `-RRB-`, `.`, `:`, `ADD`, `AFX`, `CC`, `CD`, `DT`, `EX`, `FW`, `HYPH`, `IN`, `JJ`, `JJR`, `JJS`, `LS`, `MD`, `NFP`, `NN`, `NNP`, `NNPS`, `NNS`, `PDT`, `POS`, `PRP`, `PRP$`, `RB`, `RBR`, `RBS`, `RP`, `SYM`, `TO`, `UH`, `VB`, `VBD`, `VBG`, `VBN`, `VBP`, `VBZ`, `WDT`, `WP`, `WP$`, `WRB`, `XX`, `_SP`, ```` |
|
38
|
+
| **`parser`** | `ROOT`, `acl`, `acomp`, `advcl`, `advmod`, `agent`, `amod`, `appos`, `attr`, `aux`, `auxpass`, `case`, `cc`, `ccomp`, `compound`, `conj`, `csubj`, `csubjpass`, `dative`, `dep`, `det`, `dobj`, `expl`, `intj`, `mark`, `meta`, `neg`, `nmod`, `npadvmod`, `nsubj`, `nsubjpass`, `nummod`, `oprd`, `parataxis`, `pcomp`, `pobj`, `poss`, `preconj`, `predet`, `prep`, `prt`, `punct`, `quantmod`, `relcl`, `xcomp` |
|
39
|
+
| **`ner`** | `CARDINAL`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `TIME`, `WORK_OF_ART` |
|
40
|
+
|
41
|
+
</details>
|
42
|
+
|
43
|
+
### Accuracy
|
44
|
+
|
45
|
+
| Type | Score |
|
46
|
+
| --- | --- |
|
47
|
+
| `TOKEN_ACC` | 99.86 |
|
48
|
+
| `TOKEN_P` | 99.57 |
|
49
|
+
| `TOKEN_R` | 99.58 |
|
50
|
+
| `TOKEN_F` | 99.57 |
|
51
|
+
| `TAG_ACC` | 97.25 |
|
52
|
+
| `SENTS_P` | 92.02 |
|
53
|
+
| `SENTS_R` | 89.21 |
|
54
|
+
| `SENTS_F` | 90.59 |
|
55
|
+
| `DEP_UAS` | 91.75 |
|
56
|
+
| `DEP_LAS` | 89.87 |
|
57
|
+
| `ENTS_P` | 84.55 |
|
58
|
+
| `ENTS_R` | 84.57 |
|
59
|
+
| `ENTS_F` | 84.56 |
|