dasein-core 0.2.7__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. dasein/api.py +1144 -133
  2. dasein/capture.py +2325 -1803
  3. dasein/microturn.py +475 -0
  4. dasein/models/en_core_web_sm/en_core_web_sm/__init__.py +10 -0
  5. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSE +19 -0
  6. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSES_SOURCES +66 -0
  7. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/README.md +47 -0
  8. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/accuracy.json +330 -0
  9. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/attribute_ruler/patterns +0 -0
  10. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/config.cfg +269 -0
  11. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/lemmatizer/lookups/lookups.bin +1 -0
  12. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/meta.json +521 -0
  13. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/cfg +13 -0
  14. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/model +0 -0
  15. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/moves +1 -0
  16. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/cfg +13 -0
  17. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/model +0 -0
  18. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/moves +1 -0
  19. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/cfg +3 -0
  20. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/model +0 -0
  21. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/cfg +57 -0
  22. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/model +0 -0
  23. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/cfg +3 -0
  24. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/model +0 -0
  25. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tokenizer +3 -0
  26. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/key2row +1 -0
  27. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/lookups.bin +0 -0
  28. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/strings.json +84782 -0
  29. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors +0 -0
  30. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors.cfg +3 -0
  31. dasein/models/en_core_web_sm/en_core_web_sm/meta.json +521 -0
  32. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSE +19 -0
  33. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSES_SOURCES +66 -0
  34. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/METADATA +59 -0
  35. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/RECORD +35 -0
  36. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/WHEEL +5 -0
  37. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/entry_points.txt +2 -0
  38. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/top_level.txt +1 -0
  39. dasein/pipecleaner.py +1917 -0
  40. dasein/wrappers.py +315 -0
  41. {dasein_core-0.2.7.dist-info → dasein_core-0.2.10.dist-info}/METADATA +4 -1
  42. dasein_core-0.2.10.dist-info/RECORD +59 -0
  43. dasein_core-0.2.7.dist-info/RECORD +0 -21
  44. {dasein_core-0.2.7.dist-info → dasein_core-0.2.10.dist-info}/WHEEL +0 -0
  45. {dasein_core-0.2.7.dist-info → dasein_core-0.2.10.dist-info}/licenses/LICENSE +0 -0
  46. {dasein_core-0.2.7.dist-info → dasein_core-0.2.10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,521 @@
1
+ {
2
+ "lang":"en",
3
+ "name":"core_web_sm",
4
+ "version":"3.7.1",
5
+ "description":"English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.",
6
+ "author":"Explosion",
7
+ "email":"contact@explosion.ai",
8
+ "url":"https://explosion.ai",
9
+ "license":"MIT",
10
+ "spacy_version":">=3.7.2,<3.8.0",
11
+ "spacy_git_version":"bd2c17e20",
12
+ "vectors":{
13
+ "width":0,
14
+ "vectors":0,
15
+ "keys":0,
16
+ "name":null
17
+ },
18
+ "labels":{
19
+ "tok2vec":[
20
+
21
+ ],
22
+ "tagger":[
23
+ "$",
24
+ "''",
25
+ ",",
26
+ "-LRB-",
27
+ "-RRB-",
28
+ ".",
29
+ ":",
30
+ "ADD",
31
+ "AFX",
32
+ "CC",
33
+ "CD",
34
+ "DT",
35
+ "EX",
36
+ "FW",
37
+ "HYPH",
38
+ "IN",
39
+ "JJ",
40
+ "JJR",
41
+ "JJS",
42
+ "LS",
43
+ "MD",
44
+ "NFP",
45
+ "NN",
46
+ "NNP",
47
+ "NNPS",
48
+ "NNS",
49
+ "PDT",
50
+ "POS",
51
+ "PRP",
52
+ "PRP$",
53
+ "RB",
54
+ "RBR",
55
+ "RBS",
56
+ "RP",
57
+ "SYM",
58
+ "TO",
59
+ "UH",
60
+ "VB",
61
+ "VBD",
62
+ "VBG",
63
+ "VBN",
64
+ "VBP",
65
+ "VBZ",
66
+ "WDT",
67
+ "WP",
68
+ "WP$",
69
+ "WRB",
70
+ "XX",
71
+ "_SP",
72
+ "``"
73
+ ],
74
+ "parser":[
75
+ "ROOT",
76
+ "acl",
77
+ "acomp",
78
+ "advcl",
79
+ "advmod",
80
+ "agent",
81
+ "amod",
82
+ "appos",
83
+ "attr",
84
+ "aux",
85
+ "auxpass",
86
+ "case",
87
+ "cc",
88
+ "ccomp",
89
+ "compound",
90
+ "conj",
91
+ "csubj",
92
+ "csubjpass",
93
+ "dative",
94
+ "dep",
95
+ "det",
96
+ "dobj",
97
+ "expl",
98
+ "intj",
99
+ "mark",
100
+ "meta",
101
+ "neg",
102
+ "nmod",
103
+ "npadvmod",
104
+ "nsubj",
105
+ "nsubjpass",
106
+ "nummod",
107
+ "oprd",
108
+ "parataxis",
109
+ "pcomp",
110
+ "pobj",
111
+ "poss",
112
+ "preconj",
113
+ "predet",
114
+ "prep",
115
+ "prt",
116
+ "punct",
117
+ "quantmod",
118
+ "relcl",
119
+ "xcomp"
120
+ ],
121
+ "attribute_ruler":[
122
+
123
+ ],
124
+ "lemmatizer":[
125
+
126
+ ],
127
+ "ner":[
128
+ "CARDINAL",
129
+ "DATE",
130
+ "EVENT",
131
+ "FAC",
132
+ "GPE",
133
+ "LANGUAGE",
134
+ "LAW",
135
+ "LOC",
136
+ "MONEY",
137
+ "NORP",
138
+ "ORDINAL",
139
+ "ORG",
140
+ "PERCENT",
141
+ "PERSON",
142
+ "PRODUCT",
143
+ "QUANTITY",
144
+ "TIME",
145
+ "WORK_OF_ART"
146
+ ]
147
+ },
148
+ "pipeline":[
149
+ "tok2vec",
150
+ "tagger",
151
+ "parser",
152
+ "attribute_ruler",
153
+ "lemmatizer",
154
+ "ner"
155
+ ],
156
+ "components":[
157
+ "tok2vec",
158
+ "tagger",
159
+ "parser",
160
+ "senter",
161
+ "attribute_ruler",
162
+ "lemmatizer",
163
+ "ner"
164
+ ],
165
+ "disabled":[
166
+ "senter"
167
+ ],
168
+ "performance":{
169
+ "token_acc":0.9986194413,
170
+ "token_p":0.9956819193,
171
+ "token_r":0.9957659295,
172
+ "token_f":0.9957239226,
173
+ "tag_acc":0.97246532,
174
+ "sents_p":0.9201877934,
175
+ "sents_r":0.8921432812,
176
+ "sents_f":0.9059485531,
177
+ "dep_uas":0.9175304332,
178
+ "dep_las":0.89874821,
179
+ "dep_las_per_type":{
180
+ "prep":{
181
+ "p":0.853521338,
182
+ "r":0.8635932461,
183
+ "f":0.8585277532
184
+ },
185
+ "det":{
186
+ "p":0.9763930156,
187
+ "r":0.9781048683,
188
+ "f":0.9772481923
189
+ },
190
+ "pobj":{
191
+ "p":0.9613764045,
192
+ "r":0.967681131,
193
+ "f":0.9645184649
194
+ },
195
+ "nsubj":{
196
+ "p":0.9565737052,
197
+ "r":0.9467250821,
198
+ "f":0.9516239128
199
+ },
200
+ "aux":{
201
+ "p":0.9815061794,
202
+ "r":0.9827294578,
203
+ "f":0.9821174377
204
+ },
205
+ "advmod":{
206
+ "p":0.8548033091,
207
+ "r":0.8519266364,
208
+ "f":0.8533625485
209
+ },
210
+ "relcl":{
211
+ "p":0.7571736011,
212
+ "r":0.7659651669,
213
+ "f":0.7615440115
214
+ },
215
+ "root":{
216
+ "p":0.9195942266,
217
+ "r":0.8910218352,
218
+ "f":0.9050825879
219
+ },
220
+ "xcomp":{
221
+ "p":0.8836222144,
222
+ "r":0.8966259871,
223
+ "f":0.8900766079
224
+ },
225
+ "amod":{
226
+ "p":0.9174389766,
227
+ "r":0.9107223842,
228
+ "f":0.9140683422
229
+ },
230
+ "compound":{
231
+ "p":0.9126489559,
232
+ "r":0.9298284696,
233
+ "f":0.9211586207
234
+ },
235
+ "poss":{
236
+ "p":0.9739583333,
237
+ "r":0.9786634461,
238
+ "f":0.9763052209
239
+ },
240
+ "ccomp":{
241
+ "p":0.7671207315,
242
+ "r":0.8372708758,
243
+ "f":0.8006621872
244
+ },
245
+ "attr":{
246
+ "p":0.899837794,
247
+ "r":0.93313709,
248
+ "f":0.9161849711
249
+ },
250
+ "case":{
251
+ "p":0.9787549407,
252
+ "r":0.9914914915,
253
+ "f":0.9850820487
254
+ },
255
+ "mark":{
256
+ "p":0.9068783069,
257
+ "r":0.9083200848,
258
+ "f":0.9075986232
259
+ },
260
+ "intj":{
261
+ "p":0.6717131474,
262
+ "r":0.6175824176,
263
+ "f":0.6435114504
264
+ },
265
+ "advcl":{
266
+ "p":0.6633986928,
267
+ "r":0.6645681189,
268
+ "f":0.6639828909
269
+ },
270
+ "cc":{
271
+ "p":0.8323511726,
272
+ "r":0.8277717976,
273
+ "f":0.8300551691
274
+ },
275
+ "neg":{
276
+ "p":0.9466865969,
277
+ "r":0.9533366784,
278
+ "f":0.95
279
+ },
280
+ "conj":{
281
+ "p":0.7567333828,
282
+ "r":0.7710221551,
283
+ "f":0.763810949
284
+ },
285
+ "nsubjpass":{
286
+ "p":0.9182939363,
287
+ "r":0.9164102564,
288
+ "f":0.9173511294
289
+ },
290
+ "auxpass":{
291
+ "p":0.9501335708,
292
+ "r":0.9722095672,
293
+ "f":0.9610448097
294
+ },
295
+ "dobj":{
296
+ "p":0.9229805886,
297
+ "r":0.9396764682,
298
+ "f":0.9312537019
299
+ },
300
+ "nummod":{
301
+ "p":0.9379292801,
302
+ "r":0.9310606061,
303
+ "f":0.9344823216
304
+ },
305
+ "npadvmod":{
306
+ "p":0.7629658087,
307
+ "r":0.7055062167,
308
+ "f":0.7331118494
309
+ },
310
+ "prt":{
311
+ "p":0.8118323747,
312
+ "r":0.8853046595,
313
+ "f":0.8469781397
314
+ },
315
+ "pcomp":{
316
+ "p":0.8835714286,
317
+ "r":0.8662464986,
318
+ "f":0.8748231966
319
+ },
320
+ "expl":{
321
+ "p":0.9851380042,
322
+ "r":0.9935760171,
323
+ "f":0.9893390192
324
+ },
325
+ "acl":{
326
+ "p":0.742010459,
327
+ "r":0.6966721222,
328
+ "f":0.7186268993
329
+ },
330
+ "agent":{
331
+ "p":0.9034482759,
332
+ "r":0.9390681004,
333
+ "f":0.920913884
334
+ },
335
+ "dative":{
336
+ "p":0.8,
337
+ "r":0.6972477064,
338
+ "f":0.7450980392
339
+ },
340
+ "acomp":{
341
+ "p":0.9020594966,
342
+ "r":0.893877551,
343
+ "f":0.8979498861
344
+ },
345
+ "dep":{
346
+ "p":0.4147286822,
347
+ "r":0.1737012987,
348
+ "f":0.2448512586
349
+ },
350
+ "csubj":{
351
+ "p":0.6983240223,
352
+ "r":0.7396449704,
353
+ "f":0.7183908046
354
+ },
355
+ "quantmod":{
356
+ "p":0.8727436823,
357
+ "r":0.7855402112,
358
+ "f":0.8268490808
359
+ },
360
+ "nmod":{
361
+ "p":0.7498033045,
362
+ "r":0.5807434491,
363
+ "f":0.654532967
364
+ },
365
+ "appos":{
366
+ "p":0.7048498845,
367
+ "r":0.6620390456,
368
+ "f":0.6827740492
369
+ },
370
+ "predet":{
371
+ "p":0.8299595142,
372
+ "r":0.8798283262,
373
+ "f":0.8541666667
374
+ },
375
+ "preconj":{
376
+ "p":0.5544554455,
377
+ "r":0.6511627907,
378
+ "f":0.5989304813
379
+ },
380
+ "oprd":{
381
+ "p":0.8013245033,
382
+ "r":0.7223880597,
383
+ "f":0.759811617
384
+ },
385
+ "parataxis":{
386
+ "p":0.6428571429,
387
+ "r":0.4880694143,
388
+ "f":0.5548705302
389
+ },
390
+ "meta":{
391
+ "p":0.3770491803,
392
+ "r":0.4423076923,
393
+ "f":0.407079646
394
+ },
395
+ "csubjpass":{
396
+ "p":0.5555555556,
397
+ "r":0.8333333333,
398
+ "f":0.6666666667
399
+ }
400
+ },
401
+ "ents_p":0.8454836771,
402
+ "ents_r":0.8456530449,
403
+ "ents_f":0.8455683525,
404
+ "ents_per_type":{
405
+ "DATE":{
406
+ "p":0.8603213844,
407
+ "r":0.8838095238,
408
+ "f":0.8719072972
409
+ },
410
+ "GPE":{
411
+ "p":0.9146932953,
412
+ "r":0.8942817294,
413
+ "f":0.9043723554
414
+ },
415
+ "ORG":{
416
+ "p":0.7955942623,
417
+ "r":0.8234358431,
418
+ "f":0.8092756644
419
+ },
420
+ "CARDINAL":{
421
+ "p":0.8149171271,
422
+ "r":0.8769322235,
423
+ "f":0.8447880871
424
+ },
425
+ "PERSON":{
426
+ "p":0.8617758186,
427
+ "r":0.8932767624,
428
+ "f":0.8772435897
429
+ },
430
+ "NORP":{
431
+ "p":0.8957006369,
432
+ "r":0.9,
433
+ "f":0.8978451716
434
+ },
435
+ "ORDINAL":{
436
+ "p":0.7844827586,
437
+ "r":0.847826087,
438
+ "f":0.8149253731
439
+ },
440
+ "QUANTITY":{
441
+ "p":0.8529411765,
442
+ "r":0.6373626374,
443
+ "f":0.7295597484
444
+ },
445
+ "LOC":{
446
+ "p":0.7210884354,
447
+ "r":0.6751592357,
448
+ "f":0.6973684211
449
+ },
450
+ "FAC":{
451
+ "p":0.358490566,
452
+ "r":0.2923076923,
453
+ "f":0.3220338983
454
+ },
455
+ "TIME":{
456
+ "p":0.7413793103,
457
+ "r":0.7543859649,
458
+ "f":0.747826087
459
+ },
460
+ "PRODUCT":{
461
+ "p":0.5591397849,
462
+ "r":0.2464454976,
463
+ "f":0.3421052632
464
+ },
465
+ "WORK_OF_ART":{
466
+ "p":0.4885496183,
467
+ "r":0.3298969072,
468
+ "f":0.3938461538
469
+ },
470
+ "EVENT":{
471
+ "p":0.6428571429,
472
+ "r":0.3103448276,
473
+ "f":0.4186046512
474
+ },
475
+ "MONEY":{
476
+ "p":0.9071428571,
477
+ "r":0.8996458087,
478
+ "f":0.9033787789
479
+ },
480
+ "LAW":{
481
+ "p":0.5454545455,
482
+ "r":0.46875,
483
+ "f":0.5042016807
484
+ },
485
+ "PERCENT":{
486
+ "p":0.9184,
487
+ "r":0.8790199081,
488
+ "f":0.8982785603
489
+ },
490
+ "LANGUAGE":{
491
+ "p":0.8,
492
+ "r":0.625,
493
+ "f":0.701754386
494
+ }
495
+ },
496
+ "speed":7920.0598120459
497
+ },
498
+ "sources":[
499
+ {
500
+ "name":"OntoNotes 5",
501
+ "url":"https://catalog.ldc.upenn.edu/LDC2013T19",
502
+ "license":"commercial (licensed by Explosion)",
503
+ "author":"Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston"
504
+ },
505
+ {
506
+ "name":"ClearNLP Constituent-to-Dependency Conversion",
507
+ "url":"https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md",
508
+ "license":"Citation provided for reference, no code packaged with model",
509
+ "author":"Emory University"
510
+ },
511
+ {
512
+ "name":"WordNet 3.0",
513
+ "url":"https://wordnet.princeton.edu/",
514
+ "author":"Princeton University",
515
+ "license":"WordNet 3.0 License"
516
+ }
517
+ ],
518
+ "requirements":[
519
+
520
+ ]
521
+ }
@@ -0,0 +1,19 @@
1
+ Copyright 2021 ExplosionAI GmbH
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ this software and associated documentation files (the "Software"), to deal in
5
+ the Software without restriction, including without limitation the rights to
6
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7
+ of the Software, and to permit persons to whom the Software is furnished to do
8
+ so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
@@ -0,0 +1,66 @@
1
+ # OntoNotes 5
2
+
3
+ * Author: Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston
4
+ * URL: https://catalog.ldc.upenn.edu/LDC2013T19
5
+ * License: commercial (licensed by Explosion)
6
+
7
+ ```
8
+ ```
9
+
10
+
11
+
12
+
13
+ # ClearNLP Constituent-to-Dependency Conversion
14
+
15
+ * Author: Emory University
16
+ * URL: https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md
17
+ * License: Citation provided for reference, no code packaged with model
18
+
19
+ ```
20
+ ```
21
+
22
+
23
+
24
+
25
+ # WordNet 3.0
26
+
27
+ * Author: Princeton University
28
+ * URL: https://wordnet.princeton.edu/
29
+ * License: WordNet 3.0 License
30
+
31
+ ```
32
+ WordNet Release 3.0
33
+
34
+ This software and database is being provided to you, the LICENSEE, by
35
+ Princeton University under the following license. By obtaining, using
36
+ and/or copying this software and database, you agree that you have
37
+ read, understood, and will comply with these terms and conditions.:
38
+
39
+ Permission to use, copy, modify and distribute this software and
40
+ database and its documentation for any purpose and without fee or
41
+ royalty is hereby granted, provided that you agree to comply with
42
+ the following copyright notice and statements, including the disclaimer,
43
+ and that the same appear on ALL copies of the software, database and
44
+ documentation, including modifications that you make for internal
45
+ use or for distribution.
46
+
47
+ WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved.
48
+
49
+ THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON
50
+ UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
51
+ IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON
52
+ UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-
53
+ ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE
54
+ OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT
55
+ INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR
56
+ OTHER RIGHTS.
57
+
58
+ The name of Princeton University or Princeton may not be used in
59
+ advertising or publicity pertaining to distribution of the software
60
+ and/or database. Title to copyright in this software, database and
61
+ any associated documentation shall at all times remain with
62
+ Princeton University and LICENSEE agrees to preserve same.```
63
+
64
+
65
+
66
+
@@ -0,0 +1,59 @@
1
+ Metadata-Version: 2.1
2
+ Name: en-core-web-sm
3
+ Version: 3.7.1
4
+ Summary: English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.
5
+ Home-page: https://explosion.ai
6
+ Author: Explosion
7
+ Author-email: contact@explosion.ai
8
+ License: MIT
9
+ License-File: LICENSE
10
+ License-File: LICENSES_SOURCES
11
+ Requires-Dist: spacy <3.8.0,>=3.7.2
12
+
13
+ ### Details: https://spacy.io/models/en#en_core_web_sm
14
+
15
+ English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.
16
+
17
+ | Feature | Description |
18
+ | --- | --- |
19
+ | **Name** | `en_core_web_sm` |
20
+ | **Version** | `3.7.1` |
21
+ | **spaCy** | `>=3.7.2,<3.8.0` |
22
+ | **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
23
+ | **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
24
+ | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
25
+ | **Sources** | [OntoNotes 5](https://catalog.ldc.upenn.edu/LDC2013T19) (Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston)<br />[ClearNLP Constituent-to-Dependency Conversion](https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md) (Emory University)<br />[WordNet 3.0](https://wordnet.princeton.edu/) (Princeton University) |
26
+ | **License** | `MIT` |
27
+ | **Author** | [Explosion](https://explosion.ai) |
28
+
29
+ ### Label Scheme
30
+
31
+ <details>
32
+
33
+ <summary>View label scheme (113 labels for 3 components)</summary>
34
+
35
+ | Component | Labels |
36
+ | --- | --- |
37
+ | **`tagger`** | `$`, `''`, `,`, `-LRB-`, `-RRB-`, `.`, `:`, `ADD`, `AFX`, `CC`, `CD`, `DT`, `EX`, `FW`, `HYPH`, `IN`, `JJ`, `JJR`, `JJS`, `LS`, `MD`, `NFP`, `NN`, `NNP`, `NNPS`, `NNS`, `PDT`, `POS`, `PRP`, `PRP$`, `RB`, `RBR`, `RBS`, `RP`, `SYM`, `TO`, `UH`, `VB`, `VBD`, `VBG`, `VBN`, `VBP`, `VBZ`, `WDT`, `WP`, `WP$`, `WRB`, `XX`, `_SP`, ```` |
38
+ | **`parser`** | `ROOT`, `acl`, `acomp`, `advcl`, `advmod`, `agent`, `amod`, `appos`, `attr`, `aux`, `auxpass`, `case`, `cc`, `ccomp`, `compound`, `conj`, `csubj`, `csubjpass`, `dative`, `dep`, `det`, `dobj`, `expl`, `intj`, `mark`, `meta`, `neg`, `nmod`, `npadvmod`, `nsubj`, `nsubjpass`, `nummod`, `oprd`, `parataxis`, `pcomp`, `pobj`, `poss`, `preconj`, `predet`, `prep`, `prt`, `punct`, `quantmod`, `relcl`, `xcomp` |
39
+ | **`ner`** | `CARDINAL`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `TIME`, `WORK_OF_ART` |
40
+
41
+ </details>
42
+
43
+ ### Accuracy
44
+
45
+ | Type | Score |
46
+ | --- | --- |
47
+ | `TOKEN_ACC` | 99.86 |
48
+ | `TOKEN_P` | 99.57 |
49
+ | `TOKEN_R` | 99.58 |
50
+ | `TOKEN_F` | 99.57 |
51
+ | `TAG_ACC` | 97.25 |
52
+ | `SENTS_P` | 92.02 |
53
+ | `SENTS_R` | 89.21 |
54
+ | `SENTS_F` | 90.59 |
55
+ | `DEP_UAS` | 91.75 |
56
+ | `DEP_LAS` | 89.87 |
57
+ | `ENTS_P` | 84.55 |
58
+ | `ENTS_R` | 84.57 |
59
+ | `ENTS_F` | 84.56 |