en-mof-chem-ner 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. en_mof_chem_ner-0.0.1/LICENSE +21 -0
  2. en_mof_chem_ner-0.0.1/MANIFEST.in +4 -0
  3. en_mof_chem_ner-0.0.1/PKG-INFO +54 -0
  4. en_mof_chem_ner-0.0.1/README.md +35 -0
  5. en_mof_chem_ner-0.0.1/en_mof_chem_ner/__init__.py +10 -0
  6. en_mof_chem_ner-0.0.1/en_mof_chem_ner/en_mof_chem_ner-0.0.1/LICENSE +21 -0
  7. en_mof_chem_ner-0.0.1/en_mof_chem_ner/en_mof_chem_ner-0.0.1/README.md +35 -0
  8. en_mof_chem_ner-0.0.1/en_mof_chem_ner/en_mof_chem_ner-0.0.1/config.cfg +145 -0
  9. en_mof_chem_ner-0.0.1/en_mof_chem_ner/en_mof_chem_ner-0.0.1/meta.json +90 -0
  10. en_mof_chem_ner-0.0.1/en_mof_chem_ner/en_mof_chem_ner-0.0.1/ner/cfg +13 -0
  11. en_mof_chem_ner-0.0.1/en_mof_chem_ner/en_mof_chem_ner-0.0.1/ner/model +0 -0
  12. en_mof_chem_ner-0.0.1/en_mof_chem_ner/en_mof_chem_ner-0.0.1/ner/moves +1 -0
  13. en_mof_chem_ner-0.0.1/en_mof_chem_ner/en_mof_chem_ner-0.0.1/tok2vec/cfg +3 -0
  14. en_mof_chem_ner-0.0.1/en_mof_chem_ner/en_mof_chem_ner-0.0.1/tok2vec/model +0 -0
  15. en_mof_chem_ner-0.0.1/en_mof_chem_ner/en_mof_chem_ner-0.0.1/tokenizer +3 -0
  16. en_mof_chem_ner-0.0.1/en_mof_chem_ner/en_mof_chem_ner-0.0.1/vocab/key2row +1 -0
  17. en_mof_chem_ner-0.0.1/en_mof_chem_ner/en_mof_chem_ner-0.0.1/vocab/lookups.bin +1 -0
  18. en_mof_chem_ner-0.0.1/en_mof_chem_ner/en_mof_chem_ner-0.0.1/vocab/strings.json +170699 -0
  19. en_mof_chem_ner-0.0.1/en_mof_chem_ner/en_mof_chem_ner-0.0.1/vocab/vectors +0 -0
  20. en_mof_chem_ner-0.0.1/en_mof_chem_ner/en_mof_chem_ner-0.0.1/vocab/vectors.cfg +3 -0
  21. en_mof_chem_ner-0.0.1/en_mof_chem_ner/meta.json +90 -0
  22. en_mof_chem_ner-0.0.1/en_mof_chem_ner.egg-info/PKG-INFO +54 -0
  23. en_mof_chem_ner-0.0.1/en_mof_chem_ner.egg-info/SOURCES.txt +29 -0
  24. en_mof_chem_ner-0.0.1/en_mof_chem_ner.egg-info/dependency_links.txt +1 -0
  25. en_mof_chem_ner-0.0.1/en_mof_chem_ner.egg-info/entry_points.txt +2 -0
  26. en_mof_chem_ner-0.0.1/en_mof_chem_ner.egg-info/not-zip-safe +1 -0
  27. en_mof_chem_ner-0.0.1/en_mof_chem_ner.egg-info/requires.txt +1 -0
  28. en_mof_chem_ner-0.0.1/en_mof_chem_ner.egg-info/top_level.txt +1 -0
  29. en_mof_chem_ner-0.0.1/meta.json +90 -0
  30. en_mof_chem_ner-0.0.1/setup.cfg +4 -0
  31. en_mof_chem_ner-0.0.1/setup.py +72 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Dinga Wonanke
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,4 @@
1
+ include meta.json
2
+ include LICENSE
3
+ include LICENSES_SOURCES
4
+ include README.md
@@ -0,0 +1,54 @@
1
+ Metadata-Version: 2.4
2
+ Name: en_mof_chem_ner
3
+ Version: 0.0.1
4
+ Summary: A Spacy name entity recogniser to extract mof reagents from text
5
+ Home-page: https://github.com/bafgreat/mofsyncondition.git
6
+ Author: Dinga Wonanke
7
+ Author-email: dak52@uclive.ac.nz
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Requires-Dist: spacy<3.8.0,>=3.7.5
11
+ Dynamic: author
12
+ Dynamic: author-email
13
+ Dynamic: description
14
+ Dynamic: home-page
15
+ Dynamic: license
16
+ Dynamic: license-file
17
+ Dynamic: requires-dist
18
+ Dynamic: summary
19
+
20
+ A Spacy name entity recogniser to extract mof reagents from text
21
+
22
+ | Feature | Description |
23
+ | --- | --- |
24
+ | **Name** | `en_mof_chem_ner` |
25
+ | **Version** | `0.0.1` |
26
+ | **spaCy** | `>=3.7.5,<3.8.0` |
27
+ | **Default Pipeline** | `tok2vec`, `ner` |
28
+ | **Components** | `tok2vec`, `ner` |
29
+ | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
30
+ | **Sources** | n/a |
31
+ | **License** | `MIT` |
32
+ | **Author** | [Dinga Wonanke](https://github.com/bafgreat/mofsyncondition.git) |
33
+
34
+ ### Label Scheme
35
+
36
+ <details>
37
+
38
+ <summary>View label scheme (7 labels for 1 components)</summary>
39
+
40
+ | Component | Labels |
41
+ | --- | --- |
42
+ | **`ner`** | `ATMOSPHERE`, `METAL_SALT`, `MODULATOR`, `MOF`, `ORGANIC_LIGAND`, `SOLVENT`, `SYNTH_METHOD` |
43
+
44
+ </details>
45
+
46
+ ### Accuracy
47
+
48
+ | Type | Score |
49
+ | --- | --- |
50
+ | `ENTS_F` | 91.66 |
51
+ | `ENTS_P` | 92.78 |
52
+ | `ENTS_R` | 90.56 |
53
+ | `TOK2VEC_LOSS` | 26365.16 |
54
+ | `NER_LOSS` | 78555.25 |
@@ -0,0 +1,35 @@
1
+ A Spacy name entity recogniser to extract mof reagents from text
2
+
3
+ | Feature | Description |
4
+ | --- | --- |
5
+ | **Name** | `en_mof_chem_ner` |
6
+ | **Version** | `0.0.1` |
7
+ | **spaCy** | `>=3.7.5,<3.8.0` |
8
+ | **Default Pipeline** | `tok2vec`, `ner` |
9
+ | **Components** | `tok2vec`, `ner` |
10
+ | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
11
+ | **Sources** | n/a |
12
+ | **License** | `MIT` |
13
+ | **Author** | [Dinga Wonanke](https://github.com/bafgreat/mofsyncondition.git) |
14
+
15
+ ### Label Scheme
16
+
17
+ <details>
18
+
19
+ <summary>View label scheme (7 labels for 1 components)</summary>
20
+
21
+ | Component | Labels |
22
+ | --- | --- |
23
+ | **`ner`** | `ATMOSPHERE`, `METAL_SALT`, `MODULATOR`, `MOF`, `ORGANIC_LIGAND`, `SOLVENT`, `SYNTH_METHOD` |
24
+
25
+ </details>
26
+
27
+ ### Accuracy
28
+
29
+ | Type | Score |
30
+ | --- | --- |
31
+ | `ENTS_F` | 91.66 |
32
+ | `ENTS_P` | 92.78 |
33
+ | `ENTS_R` | 90.56 |
34
+ | `TOK2VEC_LOSS` | 26365.16 |
35
+ | `NER_LOSS` | 78555.25 |
@@ -0,0 +1,10 @@
1
+ from pathlib import Path
2
+ from spacy.util import load_model_from_init_py, get_model_meta
3
+
4
+
5
+
6
+ __version__ = get_model_meta(Path(__file__).parent)['version']
7
+
8
+
9
+ def load(**overrides):
10
+ return load_model_from_init_py(__file__, **overrides)
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Dinga Wonanke
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,35 @@
1
+ A Spacy name entity recogniser to extract mof reagents from text
2
+
3
+ | Feature | Description |
4
+ | --- | --- |
5
+ | **Name** | `en_mof_chem_ner` |
6
+ | **Version** | `0.0.1` |
7
+ | **spaCy** | `>=3.7.5,<3.8.0` |
8
+ | **Default Pipeline** | `tok2vec`, `ner` |
9
+ | **Components** | `tok2vec`, `ner` |
10
+ | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
11
+ | **Sources** | n/a |
12
+ | **License** | `MIT` |
13
+ | **Author** | [Dinga Wonanke](https://github.com/bafgreat/mofsyncondition.git) |
14
+
15
+ ### Label Scheme
16
+
17
+ <details>
18
+
19
+ <summary>View label scheme (7 labels for 1 components)</summary>
20
+
21
+ | Component | Labels |
22
+ | --- | --- |
23
+ | **`ner`** | `ATMOSPHERE`, `METAL_SALT`, `MODULATOR`, `MOF`, `ORGANIC_LIGAND`, `SOLVENT`, `SYNTH_METHOD` |
24
+
25
+ </details>
26
+
27
+ ### Accuracy
28
+
29
+ | Type | Score |
30
+ | --- | --- |
31
+ | `ENTS_F` | 91.66 |
32
+ | `ENTS_P` | 92.78 |
33
+ | `ENTS_R` | 90.56 |
34
+ | `TOK2VEC_LOSS` | 26365.16 |
35
+ | `NER_LOSS` | 78555.25 |
@@ -0,0 +1,145 @@
1
+ [paths]
2
+ train = "../../data/trainin_data/chem_train.spacy"
3
+ dev = "../../data/trainin_data/chem_dev.spacy"
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ gpu_allocator = null
9
+ seed = 0
10
+
11
+ [nlp]
12
+ lang = "en"
13
+ pipeline = ["tok2vec","ner"]
14
+ batch_size = 1000
15
+ disabled = []
16
+ before_creation = null
17
+ after_creation = null
18
+ after_pipeline_creation = null
19
+ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+ vectors = {"@vectors":"spacy.Vectors.v1"}
21
+
22
+ [components]
23
+
24
+ [components.ner]
25
+ factory = "ner"
26
+ incorrect_spans_key = null
27
+ moves = null
28
+ scorer = {"@scorers":"spacy.ner_scorer.v1"}
29
+ update_with_oracle_cut_size = 100
30
+
31
+ [components.ner.model]
32
+ @architectures = "spacy.TransitionBasedParser.v2"
33
+ state_type = "ner"
34
+ extra_state_tokens = false
35
+ hidden_width = 64
36
+ maxout_pieces = 2
37
+ use_upper = true
38
+ nO = null
39
+
40
+ [components.ner.model.tok2vec]
41
+ @architectures = "spacy.Tok2VecListener.v1"
42
+ width = ${components.tok2vec.model.encode.width}
43
+ upstream = "*"
44
+
45
+ [components.tok2vec]
46
+ factory = "tok2vec"
47
+
48
+ [components.tok2vec.model]
49
+ @architectures = "spacy.Tok2Vec.v2"
50
+
51
+ [components.tok2vec.model.embed]
52
+ @architectures = "spacy.MultiHashEmbed.v2"
53
+ width = ${components.tok2vec.model.encode.width}
54
+ attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
55
+ rows = [5000,1000,2500,2500]
56
+ include_static_vectors = false
57
+
58
+ [components.tok2vec.model.encode]
59
+ @architectures = "spacy.MaxoutWindowEncoder.v2"
60
+ width = 96
61
+ depth = 4
62
+ window_size = 1
63
+ maxout_pieces = 3
64
+
65
+ [corpora]
66
+
67
+ [corpora.dev]
68
+ @readers = "spacy.Corpus.v1"
69
+ path = ${paths.dev}
70
+ max_length = 0
71
+ gold_preproc = false
72
+ limit = 0
73
+ augmenter = null
74
+
75
+ [corpora.train]
76
+ @readers = "spacy.Corpus.v1"
77
+ path = ${paths.train}
78
+ max_length = 0
79
+ gold_preproc = false
80
+ limit = 0
81
+ augmenter = null
82
+
83
+ [training]
84
+ dev_corpus = "corpora.dev"
85
+ train_corpus = "corpora.train"
86
+ seed = ${system.seed}
87
+ gpu_allocator = ${system.gpu_allocator}
88
+ dropout = 0.1
89
+ accumulate_gradient = 1
90
+ patience = 1600
91
+ max_epochs = 0
92
+ max_steps = 10000000
93
+ eval_frequency = 200
94
+ frozen_components = []
95
+ annotating_components = []
96
+ before_to_disk = null
97
+ before_update = null
98
+
99
+ [training.batcher]
100
+ @batchers = "spacy.batch_by_words.v1"
101
+ discard_oversize = false
102
+ tolerance = 0.2
103
+ get_length = null
104
+
105
+ [training.batcher.size]
106
+ @schedules = "compounding.v1"
107
+ start = 100
108
+ stop = 1000
109
+ compound = 1.001
110
+ t = 0.0
111
+
112
+ [training.logger]
113
+ @loggers = "spacy.ConsoleLogger.v1"
114
+ progress_bar = false
115
+
116
+ [training.optimizer]
117
+ @optimizers = "Adam.v1"
118
+ beta1 = 0.9
119
+ beta2 = 0.999
120
+ L2_is_weight_decay = true
121
+ L2 = 0.01
122
+ grad_clip = 1.0
123
+ use_averages = false
124
+ eps = 0.00000001
125
+ learn_rate = 0.0001
126
+
127
+ [training.score_weights]
128
+ ents_f = 1.0
129
+ ents_p = 0.0
130
+ ents_r = 0.0
131
+ ents_per_type = null
132
+
133
+ [pretraining]
134
+
135
+ [initialize]
136
+ vectors = ${paths.vectors}
137
+ init_tok2vec = ${paths.init_tok2vec}
138
+ vocab_data = null
139
+ lookups = null
140
+ before_init = null
141
+ after_init = null
142
+
143
+ [initialize.components]
144
+
145
+ [initialize.tokenizer]
@@ -0,0 +1,90 @@
1
+ {
2
+ "lang":"en",
3
+ "name":"mof_chem_ner",
4
+ "version":"0.0.1",
5
+ "description":"A Spacy name entity recogniser to extract mof reagents from text",
6
+ "author":"Dinga Wonanke",
7
+ "email":"dak52@uclive.ac.nz",
8
+ "url":"https://github.com/bafgreat/mofsyncondition.git",
9
+ "license":"MIT",
10
+ "spacy_version":">=3.7.5,<3.8.0",
11
+ "spacy_git_version":"a6d0fc360",
12
+ "vectors":{
13
+ "width":0,
14
+ "vectors":0,
15
+ "keys":0,
16
+ "name":null
17
+ },
18
+ "labels":{
19
+ "tok2vec":[
20
+
21
+ ],
22
+ "ner":[
23
+ "ATMOSPHERE",
24
+ "METAL_SALT",
25
+ "MODULATOR",
26
+ "MOF",
27
+ "ORGANIC_LIGAND",
28
+ "SOLVENT",
29
+ "SYNTH_METHOD"
30
+ ]
31
+ },
32
+ "pipeline":[
33
+ "tok2vec",
34
+ "ner"
35
+ ],
36
+ "components":[
37
+ "tok2vec",
38
+ "ner"
39
+ ],
40
+ "disabled":[
41
+
42
+ ],
43
+ "performance":{
44
+ "ents_f":0.9165606814,
45
+ "ents_p":0.9277522041,
46
+ "ents_r":0.9056359482,
47
+ "ents_per_type":{
48
+ "METAL_SALT":{
49
+ "p":0.9291938998,
50
+ "r":0.9081714134,
51
+ "f":0.9185623906
52
+ },
53
+ "ORGANIC_LIGAND":{
54
+ "p":0.7599510104,
55
+ "r":0.7156862745,
56
+ "f":0.7371547372
57
+ },
58
+ "SOLVENT":{
59
+ "p":0.9814999005,
60
+ "r":0.9899678973,
61
+ "f":0.9857157127
62
+ },
63
+ "MODULATOR":{
64
+ "p":0.9721886336,
65
+ "r":0.9560047562,
66
+ "f":0.964028777
67
+ },
68
+ "ATMOSPHERE":{
69
+ "p":0.9715224535,
70
+ "r":0.9662309368,
71
+ "f":0.9688694702
72
+ },
73
+ "SYNTH_METHOD":{
74
+ "p":0.9970238095,
75
+ "r":0.9940652819,
76
+ "f":0.9955423477
77
+ },
78
+ "MOF":{
79
+ "p":0.6797066015,
80
+ "r":0.4973166369,
81
+ "f":0.5743801653
82
+ }
83
+ },
84
+ "tok2vec_loss":263.6515560191,
85
+ "ner_loss":785.552518154
86
+ },
87
+ "requirements":[
88
+
89
+ ]
90
+ }
@@ -0,0 +1,13 @@
1
+ {
2
+ "moves":null,
3
+ "update_with_oracle_cut_size":100,
4
+ "multitasks":[
5
+
6
+ ],
7
+ "min_action_freq":1,
8
+ "learn_tokens":false,
9
+ "beam_width":1,
10
+ "beam_density":0.0,
11
+ "beam_update_prob":0.0,
12
+ "incorrect_spans_key":null
13
+ }
@@ -0,0 +1 @@
1
+ ��moves� {"0":{},"1":{"SOLVENT":30481,"METAL_SALT":29278,"ORGANIC_LIGAND":22060,"ATMOSPHERE":9849,"MOF":6516,"MODULATOR":5561,"SYNTH_METHOD":2794},"2":{"SOLVENT":30481,"METAL_SALT":29278,"ORGANIC_LIGAND":22060,"ATMOSPHERE":9849,"MOF":6516,"MODULATOR":5561,"SYNTH_METHOD":2794},"3":{"SOLVENT":30481,"METAL_SALT":29278,"ORGANIC_LIGAND":22060,"ATMOSPHERE":9849,"MOF":6516,"MODULATOR":5561,"SYNTH_METHOD":2794},"4":{"SOLVENT":30481,"METAL_SALT":29278,"ORGANIC_LIGAND":22060,"ATMOSPHERE":9849,"MOF":6516,"MODULATOR":5561,"SYNTH_METHOD":2794,"":1},"5":{"":1}}�cfg��neg_key�