OntoLearner 1.4.10__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ontolearner/VERSION +1 -1
- ontolearner/base/learner.py +41 -18
- ontolearner/evaluation/metrics.py +72 -32
- ontolearner/learner/__init__.py +3 -2
- ontolearner/learner/label_mapper.py +5 -4
- ontolearner/learner/llm.py +257 -0
- ontolearner/learner/prompt.py +40 -5
- ontolearner/learner/rag/__init__.py +14 -0
- ontolearner/learner/{rag.py → rag/rag.py} +7 -2
- ontolearner/learner/retriever/__init__.py +1 -1
- ontolearner/learner/retriever/{llm_retriever.py → augmented_retriever.py} +48 -39
- ontolearner/learner/retriever/learner.py +3 -4
- ontolearner/learner/taxonomy_discovery/alexbek.py +632 -310
- ontolearner/learner/taxonomy_discovery/skhnlp.py +216 -156
- ontolearner/learner/text2onto/__init__.py +1 -1
- ontolearner/learner/text2onto/alexbek.py +484 -1105
- ontolearner/learner/text2onto/sbunlp.py +498 -493
- ontolearner/ontology/biology.py +2 -3
- ontolearner/ontology/chemistry.py +16 -18
- ontolearner/ontology/ecology_environment.py +2 -3
- ontolearner/ontology/general.py +4 -6
- ontolearner/ontology/material_science_engineering.py +64 -45
- ontolearner/ontology/medicine.py +2 -3
- ontolearner/ontology/scholarly_knowledge.py +6 -9
- ontolearner/processor.py +3 -3
- ontolearner/text2onto/splitter.py +69 -6
- {ontolearner-1.4.10.dist-info → ontolearner-1.5.0.dist-info}/METADATA +2 -2
- {ontolearner-1.4.10.dist-info → ontolearner-1.5.0.dist-info}/RECORD +30 -29
- {ontolearner-1.4.10.dist-info → ontolearner-1.5.0.dist-info}/WHEEL +1 -1
- {ontolearner-1.4.10.dist-info → ontolearner-1.5.0.dist-info}/licenses/LICENSE +0 -0
ontolearner/ontology/biology.py
CHANGED
|
@@ -70,15 +70,14 @@ class GO(BaseOntology):
|
|
|
70
70
|
format = "OWL"
|
|
71
71
|
download_url = "https://geneontology.org/docs/download-ontology/"
|
|
72
72
|
|
|
73
|
-
|
|
74
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
73
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
75
74
|
"""Override to handle GO-specific blank nodes."""
|
|
76
75
|
# Check the general patterns from the parent class
|
|
77
76
|
# GO-specific patterns
|
|
78
77
|
if label.startswith('GO_'):
|
|
79
78
|
return True
|
|
80
79
|
|
|
81
|
-
if
|
|
80
|
+
if super()._is_anonymous_id(label):
|
|
82
81
|
return True
|
|
83
82
|
|
|
84
83
|
return False
|
|
@@ -57,15 +57,15 @@ class ChEBI(BaseOntology):
|
|
|
57
57
|
format = "OWL"
|
|
58
58
|
download_url = "https://www.ebi.ac.uk/chebi/"
|
|
59
59
|
|
|
60
|
-
|
|
61
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
60
|
+
|
|
61
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
62
62
|
"""Override to handle ChEBI-specific blank nodes."""
|
|
63
63
|
# ChEBI-specific patterns
|
|
64
64
|
if re.match(r'^CHEBI_[0-9]+$', label):
|
|
65
65
|
return True
|
|
66
66
|
|
|
67
67
|
# Check the general patterns from the parent class
|
|
68
|
-
if
|
|
68
|
+
if super()._is_anonymous_id(label):
|
|
69
69
|
return True
|
|
70
70
|
|
|
71
71
|
return False
|
|
@@ -89,15 +89,15 @@ class CHEMINF(BaseOntology):
|
|
|
89
89
|
format = "OWL"
|
|
90
90
|
download_url = "https://terminology.tib.eu/ts/ontologies/CHEMINF"
|
|
91
91
|
|
|
92
|
-
|
|
93
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
92
|
+
|
|
93
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
94
94
|
"""Override to handle CHEMINF-specific blank nodes."""
|
|
95
95
|
# ChEBI-specific patterns
|
|
96
96
|
if re.match(r'^CHEMINF_[0-9]+$', label):
|
|
97
97
|
return True
|
|
98
98
|
|
|
99
99
|
# Check the general patterns from the parent class
|
|
100
|
-
if
|
|
100
|
+
if super()._is_anonymous_id(label):
|
|
101
101
|
return True
|
|
102
102
|
|
|
103
103
|
return False
|
|
@@ -145,15 +145,15 @@ class ChMO(BaseOntology):
|
|
|
145
145
|
format = "OWL"
|
|
146
146
|
download_url = "https://github.com/rsc-ontologies/rsc-cmo"
|
|
147
147
|
|
|
148
|
-
|
|
149
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
148
|
+
|
|
149
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
150
150
|
"""Override to handle ChMO-specific blank nodes."""
|
|
151
151
|
# ChEBI-specific patterns
|
|
152
152
|
if re.match(r'^CHMO_[0-9]+$', label):
|
|
153
153
|
return True
|
|
154
154
|
|
|
155
155
|
# Check the general patterns from the parent class
|
|
156
|
-
if
|
|
156
|
+
if super()._is_anonymous_id(label):
|
|
157
157
|
return True
|
|
158
158
|
|
|
159
159
|
return False
|
|
@@ -190,15 +190,15 @@ class MassSpectrometry(BaseOntology):
|
|
|
190
190
|
format = "OWL"
|
|
191
191
|
download_url = "https://terminology.tib.eu/ts/ontologies/MS"
|
|
192
192
|
|
|
193
|
-
|
|
194
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
193
|
+
|
|
194
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
195
195
|
"""Override to handle MassSpectrometry-specific blank nodes."""
|
|
196
196
|
# MassSpectrometry-specific patterns
|
|
197
197
|
if re.match(r'^PEFF_[0-9]+$', label):
|
|
198
198
|
return True
|
|
199
199
|
|
|
200
200
|
# Check the general patterns from the parent class
|
|
201
|
-
if
|
|
201
|
+
if super()._is_anonymous_id(label):
|
|
202
202
|
return True
|
|
203
203
|
|
|
204
204
|
return False
|
|
@@ -220,8 +220,7 @@ class MOP(BaseOntology):
|
|
|
220
220
|
format = "OWL"
|
|
221
221
|
download_url = "https://terminology.tib.eu/ts/ontologies/MOP"
|
|
222
222
|
|
|
223
|
-
|
|
224
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
223
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
225
224
|
"""Override to handle MOP-specific blank nodes."""
|
|
226
225
|
# MOP-specific patterns
|
|
227
226
|
if re.match(r'^MOP_[0-9]+$', label):
|
|
@@ -234,7 +233,7 @@ class MOP(BaseOntology):
|
|
|
234
233
|
return True
|
|
235
234
|
|
|
236
235
|
# Check the general patterns from the parent class
|
|
237
|
-
if
|
|
236
|
+
if super()._is_anonymous_id(label):
|
|
238
237
|
return True
|
|
239
238
|
|
|
240
239
|
return False
|
|
@@ -357,8 +356,7 @@ class RXNO(BaseOntology):
|
|
|
357
356
|
format = "OWL"
|
|
358
357
|
download_url = "https://github.com/rsc-ontologies/rxno"
|
|
359
358
|
|
|
360
|
-
|
|
361
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
359
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
362
360
|
"""Override to handle RXNO-specific blank nodes."""
|
|
363
361
|
# RXNO-specific patterns
|
|
364
362
|
if re.match(r'^RXNO_[0-9]+$', label):
|
|
@@ -368,7 +366,7 @@ class RXNO(BaseOntology):
|
|
|
368
366
|
return True
|
|
369
367
|
|
|
370
368
|
# Check the general patterns from the parent class
|
|
371
|
-
if
|
|
369
|
+
if super()._is_anonymous_id(label):
|
|
372
370
|
return True
|
|
373
371
|
|
|
374
372
|
return False
|
|
@@ -35,15 +35,14 @@ class ENVO(BaseOntology):
|
|
|
35
35
|
format = "OWL"
|
|
36
36
|
download_url = "https://obofoundry.org/ontology/envo.html"
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
38
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
40
39
|
"""Override to handle ENVO-specific blank nodes."""
|
|
41
40
|
# ENVO-specific patterns
|
|
42
41
|
if re.match(r'^PATO_[0-9]+$', label):
|
|
43
42
|
return True
|
|
44
43
|
|
|
45
44
|
# Check the general patterns from the parent class
|
|
46
|
-
if
|
|
45
|
+
if super()._is_anonymous_id(label):
|
|
47
46
|
return True
|
|
48
47
|
|
|
49
48
|
return False
|
ontolearner/ontology/general.py
CHANGED
|
@@ -53,15 +53,14 @@ class DBpedia(BaseOntology):
|
|
|
53
53
|
format = "OWL"
|
|
54
54
|
download_url = "https://wiki.dbpedia.org/"
|
|
55
55
|
|
|
56
|
-
|
|
57
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
56
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
58
57
|
"""Override to handle DBpedia/Wikidata-specific blank nodes."""
|
|
59
58
|
# DBpedia/Wikidata-specific patterns
|
|
60
59
|
if re.match(r'^Q[0-9]+$', label):
|
|
61
60
|
return True
|
|
62
61
|
|
|
63
62
|
# Check the general patterns from the parent class
|
|
64
|
-
if
|
|
63
|
+
if super()._is_anonymous_id(label):
|
|
65
64
|
return True
|
|
66
65
|
|
|
67
66
|
return False
|
|
@@ -226,15 +225,14 @@ class UMBEL(BaseOntology):
|
|
|
226
225
|
format = "n3"
|
|
227
226
|
download_url = "https://github.com/structureddynamics/UMBEL/tree/master/Ontology"
|
|
228
227
|
|
|
229
|
-
|
|
230
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
228
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
231
229
|
"""Override to handle UMBEL-specific blank nodes."""
|
|
232
230
|
# UMBEL-specific patterns
|
|
233
231
|
if re.match(r'^f5295f96ac3e649dcb1740b0d93d3e6c2b[0-9a-f]+$', label): # Long hexadecimal identifiers
|
|
234
232
|
return True
|
|
235
233
|
|
|
236
234
|
# Check the general patterns from the parent class
|
|
237
|
-
if
|
|
235
|
+
if super()._is_anonymous_id(label):
|
|
238
236
|
return True
|
|
239
237
|
|
|
240
238
|
return False
|
|
@@ -74,15 +74,14 @@ class Atomistic(BaseOntology):
|
|
|
74
74
|
format = "TTL"
|
|
75
75
|
download_url = "https://github.com/emmo-repo/domain-atomistic"
|
|
76
76
|
|
|
77
|
-
|
|
78
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
77
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
79
78
|
"""Override to handle Atomistic-specific blank nodes."""
|
|
80
79
|
# EMMO-specific patterns (UUID format) in Atomistic
|
|
81
80
|
if re.match(r'^EMMO_[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}$', label):
|
|
82
81
|
return True
|
|
83
82
|
|
|
84
83
|
# Check the general patterns from the parent class
|
|
85
|
-
if
|
|
84
|
+
if super()._is_anonymous_id(label):
|
|
86
85
|
return True
|
|
87
86
|
|
|
88
87
|
return False
|
|
@@ -106,8 +105,8 @@ class BattINFO(BaseOntology):
|
|
|
106
105
|
format = "TTL"
|
|
107
106
|
download_url = "https://github.com/BIG-MAP/BattINFO"
|
|
108
107
|
|
|
109
|
-
|
|
110
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
108
|
+
|
|
109
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
111
110
|
"""Override to handle BattINFO-specific blank nodes."""
|
|
112
111
|
# UUID pattern for various prefixes
|
|
113
112
|
uuid_pattern = r'[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}$'
|
|
@@ -123,7 +122,7 @@ class BattINFO(BaseOntology):
|
|
|
123
122
|
return True
|
|
124
123
|
|
|
125
124
|
# Check the general patterns from the parent class
|
|
126
|
-
if
|
|
125
|
+
if super()._is_anonymous_id(label):
|
|
127
126
|
return True
|
|
128
127
|
|
|
129
128
|
return False
|
|
@@ -168,8 +167,7 @@ class BVCO(BaseOntology):
|
|
|
168
167
|
format = "TTL"
|
|
169
168
|
download_url = "https://github.com/Battery-Value-Chain-Ontology/ontology"
|
|
170
169
|
|
|
171
|
-
|
|
172
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
170
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
173
171
|
"""Override to handle BVCO-specific blank nodes."""
|
|
174
172
|
# UUID pattern for various prefixes
|
|
175
173
|
uuid_pattern = r'[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}$'
|
|
@@ -184,7 +182,7 @@ class BVCO(BaseOntology):
|
|
|
184
182
|
return True
|
|
185
183
|
|
|
186
184
|
# Check the general patterns from the parent class
|
|
187
|
-
if
|
|
185
|
+
if super()._is_anonymous_id(label):
|
|
188
186
|
return True
|
|
189
187
|
|
|
190
188
|
return False
|
|
@@ -277,8 +275,7 @@ class CHAMEO(BaseOntology):
|
|
|
277
275
|
format = "TTL"
|
|
278
276
|
download_url = "https://github.com/emmo-repo/domain-characterisation-methodology"
|
|
279
277
|
|
|
280
|
-
|
|
281
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
278
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
282
279
|
"""Override to handle CHAMEO-specific blank nodes."""
|
|
283
280
|
# UUID pattern for various prefixes
|
|
284
281
|
uuid_pattern = r'[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}$'
|
|
@@ -287,7 +284,7 @@ class CHAMEO(BaseOntology):
|
|
|
287
284
|
return True
|
|
288
285
|
|
|
289
286
|
# Check the general patterns from the parent class
|
|
290
|
-
if
|
|
287
|
+
if super()._is_anonymous_id(label):
|
|
291
288
|
return True
|
|
292
289
|
|
|
293
290
|
return False
|
|
@@ -400,8 +397,7 @@ class EMMOCrystallography(BaseOntology):
|
|
|
400
397
|
format = "TTL"
|
|
401
398
|
download_url = "https://github.com/emmo-repo/domain-crystallography"
|
|
402
399
|
|
|
403
|
-
|
|
404
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
400
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
405
401
|
"""Override to handle EMMOCrystallography-specific blank nodes."""
|
|
406
402
|
# UUID pattern for various prefixes
|
|
407
403
|
uuid_pattern = r'[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}$'
|
|
@@ -410,7 +406,7 @@ class EMMOCrystallography(BaseOntology):
|
|
|
410
406
|
return True
|
|
411
407
|
|
|
412
408
|
# Check the general patterns from the parent class
|
|
413
|
-
if
|
|
409
|
+
if super()._is_anonymous_id(label):
|
|
414
410
|
return True
|
|
415
411
|
|
|
416
412
|
return False
|
|
@@ -451,8 +447,7 @@ class GPO(BaseOntology):
|
|
|
451
447
|
format = "TTL"
|
|
452
448
|
download_url = "https://github.com/General-Process-Ontology/ontology"
|
|
453
449
|
|
|
454
|
-
|
|
455
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
450
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
456
451
|
"""Override to handle GPO-specific blank nodes."""
|
|
457
452
|
# UUID pattern for various prefixes
|
|
458
453
|
uuid_pattern = r'[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}$'
|
|
@@ -463,7 +458,7 @@ class GPO(BaseOntology):
|
|
|
463
458
|
return True
|
|
464
459
|
|
|
465
460
|
# Check the general patterns from the parent class
|
|
466
|
-
if
|
|
461
|
+
if super()._is_anonymous_id(label):
|
|
467
462
|
return True
|
|
468
463
|
|
|
469
464
|
return False
|
|
@@ -697,8 +692,7 @@ class MechanicalTesting(BaseOntology):
|
|
|
697
692
|
format = "OWL"
|
|
698
693
|
download_url = "https://github.com/emmo-repo/domain-mechanical-testing"
|
|
699
694
|
|
|
700
|
-
|
|
701
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
695
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
702
696
|
"""Override to handle MechanicalTesting-specific blank nodes."""
|
|
703
697
|
# UUID pattern for various prefixes
|
|
704
698
|
uuid_pattern = r'[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}$'
|
|
@@ -707,7 +701,7 @@ class MechanicalTesting(BaseOntology):
|
|
|
707
701
|
return True
|
|
708
702
|
|
|
709
703
|
# Check the general patterns from the parent class
|
|
710
|
-
if
|
|
704
|
+
if super()._is_anonymous_id(label):
|
|
711
705
|
return True
|
|
712
706
|
|
|
713
707
|
return False
|
|
@@ -731,8 +725,7 @@ class MicroStructures(BaseOntology):
|
|
|
731
725
|
format = "OWL"
|
|
732
726
|
download_url = "https://github.com/jesper-friis/emmo-microstructure"
|
|
733
727
|
|
|
734
|
-
|
|
735
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
728
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
736
729
|
"""Override to handle MicroStructures-specific blank nodes."""
|
|
737
730
|
# UUID pattern for various prefixes
|
|
738
731
|
uuid_pattern = r'[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}$'
|
|
@@ -741,7 +734,7 @@ class MicroStructures(BaseOntology):
|
|
|
741
734
|
return True
|
|
742
735
|
|
|
743
736
|
# Check the general patterns from the parent class
|
|
744
|
-
if
|
|
737
|
+
if super()._is_anonymous_id(label):
|
|
745
738
|
return True
|
|
746
739
|
|
|
747
740
|
return False
|
|
@@ -868,8 +861,7 @@ class OIECharacterisation(BaseOntology):
|
|
|
868
861
|
format = "TTL"
|
|
869
862
|
download_url = "https://github.com/emmo-repo/OIE-Ontologies/"
|
|
870
863
|
|
|
871
|
-
|
|
872
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
864
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
873
865
|
"""Override to handle OIECharacterisation-specific blank nodes."""
|
|
874
866
|
# UUID pattern for various prefixes
|
|
875
867
|
uuid_pattern = r'[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}$'
|
|
@@ -878,7 +870,7 @@ class OIECharacterisation(BaseOntology):
|
|
|
878
870
|
return True
|
|
879
871
|
|
|
880
872
|
# Check the general patterns from the parent class
|
|
881
|
-
if
|
|
873
|
+
if super()._is_anonymous_id(label):
|
|
882
874
|
return True
|
|
883
875
|
|
|
884
876
|
return False
|
|
@@ -901,8 +893,7 @@ class OIEManufacturing(BaseOntology):
|
|
|
901
893
|
format = "TTL"
|
|
902
894
|
download_url = "https://github.com/emmo-repo/OIE-Ontologies/"
|
|
903
895
|
|
|
904
|
-
|
|
905
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
896
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
906
897
|
"""Override to handle OIEManufacturing-specific blank nodes."""
|
|
907
898
|
# UUID pattern for various prefixes
|
|
908
899
|
uuid_pattern = r'[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}$'
|
|
@@ -911,7 +902,7 @@ class OIEManufacturing(BaseOntology):
|
|
|
911
902
|
return True
|
|
912
903
|
|
|
913
904
|
# Check the general patterns from the parent class
|
|
914
|
-
if
|
|
905
|
+
if super()._is_anonymous_id(label):
|
|
915
906
|
return True
|
|
916
907
|
|
|
917
908
|
return False
|
|
@@ -934,8 +925,7 @@ class OIEMaterials(BaseOntology):
|
|
|
934
925
|
format = "TTL"
|
|
935
926
|
download_url = "https://github.com/emmo-repo/OIE-Ontologies/"
|
|
936
927
|
|
|
937
|
-
|
|
938
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
928
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
939
929
|
"""Override to handle OIEMaterials-specific blank nodes."""
|
|
940
930
|
# UUID pattern for various prefixes
|
|
941
931
|
uuid_pattern = r'[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}$'
|
|
@@ -944,7 +934,7 @@ class OIEMaterials(BaseOntology):
|
|
|
944
934
|
return True
|
|
945
935
|
|
|
946
936
|
# Check the general patterns from the parent class
|
|
947
|
-
if
|
|
937
|
+
if super()._is_anonymous_id(label):
|
|
948
938
|
return True
|
|
949
939
|
|
|
950
940
|
return False
|
|
@@ -967,8 +957,7 @@ class OIEModels(BaseOntology):
|
|
|
967
957
|
format = "TTL"
|
|
968
958
|
download_url = "https://github.com/emmo-repo/OIE-Ontologies/"
|
|
969
959
|
|
|
970
|
-
|
|
971
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
960
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
972
961
|
"""Override to handle OIEModels-specific blank nodes."""
|
|
973
962
|
# UUID pattern for various prefixes
|
|
974
963
|
uuid_pattern = r'[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}$'
|
|
@@ -977,7 +966,7 @@ class OIEModels(BaseOntology):
|
|
|
977
966
|
return True
|
|
978
967
|
|
|
979
968
|
# Check the general patterns from the parent class
|
|
980
|
-
if
|
|
969
|
+
if super()._is_anonymous_id(label):
|
|
981
970
|
return True
|
|
982
971
|
|
|
983
972
|
return False
|
|
@@ -999,8 +988,7 @@ class OIESoftware(BaseOntology):
|
|
|
999
988
|
format = "TTL"
|
|
1000
989
|
download_url = "https://github.com/emmo-repo/OIE-Ontologies/"
|
|
1001
990
|
|
|
1002
|
-
|
|
1003
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
991
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
1004
992
|
"""Override to handle OIESoftware-specific blank nodes."""
|
|
1005
993
|
# UUID pattern for various prefixes
|
|
1006
994
|
uuid_pattern = r'[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}$'
|
|
@@ -1009,7 +997,7 @@ class OIESoftware(BaseOntology):
|
|
|
1009
997
|
return True
|
|
1010
998
|
|
|
1011
999
|
# Check the general patterns from the parent class
|
|
1012
|
-
if
|
|
1000
|
+
if super()._is_anonymous_id(label):
|
|
1013
1001
|
return True
|
|
1014
1002
|
|
|
1015
1003
|
return False
|
|
@@ -1067,8 +1055,7 @@ class Photovoltaics(BaseOntology):
|
|
|
1067
1055
|
format = "TTL"
|
|
1068
1056
|
download_url = "https://github.com/emmo-repo/domain-photovoltaics"
|
|
1069
1057
|
|
|
1070
|
-
|
|
1071
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
1058
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
1072
1059
|
"""Override to handle Photovoltaics-specific blank nodes."""
|
|
1073
1060
|
# UUID pattern for various prefixes
|
|
1074
1061
|
uuid_pattern = r'[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}$'
|
|
@@ -1077,7 +1064,7 @@ class Photovoltaics(BaseOntology):
|
|
|
1077
1064
|
return True
|
|
1078
1065
|
|
|
1079
1066
|
# Check the general patterns from the parent class
|
|
1080
|
-
if
|
|
1067
|
+
if super()._is_anonymous_id(label):
|
|
1081
1068
|
return True
|
|
1082
1069
|
|
|
1083
1070
|
return False
|
|
@@ -1219,8 +1206,8 @@ class VIMMP(BaseOntology):
|
|
|
1219
1206
|
format = "OWL"
|
|
1220
1207
|
download_url = "https://matportal.org/ontologies/VIMMP_ONTOLOGIES"
|
|
1221
1208
|
|
|
1222
|
-
|
|
1223
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
1209
|
+
|
|
1210
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
1224
1211
|
"""Override to handle VIMMP-specific blank nodes."""
|
|
1225
1212
|
# UUID pattern for various prefixes
|
|
1226
1213
|
uuid_pattern = r'[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}$'
|
|
@@ -1232,7 +1219,39 @@ class VIMMP(BaseOntology):
|
|
|
1232
1219
|
return True
|
|
1233
1220
|
|
|
1234
1221
|
# Check the general patterns from the parent class
|
|
1235
|
-
if
|
|
1222
|
+
if super()._is_anonymous_id(label):
|
|
1236
1223
|
return True
|
|
1237
1224
|
|
|
1238
1225
|
return False
|
|
1226
|
+
|
|
1227
|
+
|
|
1228
|
+
class MDSOnto(BaseOntology):
|
|
1229
|
+
"""
|
|
1230
|
+
MDS-Onto is a domain (low) level ontology that describes terms in Materials Data Science. It is divided into six
|
|
1231
|
+
large modules: BuiltEnv, Exposure, Chemistry, Manufacture, Characterization, and Geospatial. Under each module,
|
|
1232
|
+
there are multiple sub-modules such as FTIR, AFM, Chem-Rxn, PV-Module, Accelerated Exposure, etc.
|
|
1233
|
+
"""
|
|
1234
|
+
ontology_id = "MDSOnto"
|
|
1235
|
+
ontology_full_name = "The Modular Ontology for Materials and Data Science (MDS-Onto)"
|
|
1236
|
+
domain = "Materials Science and Engineering"
|
|
1237
|
+
category = "Materials Data Science"
|
|
1238
|
+
version = "0.3.1.16"
|
|
1239
|
+
last_updated = "2026-02-03"
|
|
1240
|
+
creator = "SDLE Research Center"
|
|
1241
|
+
license = "CC BY-SA 4.0"
|
|
1242
|
+
format = "OWL"
|
|
1243
|
+
download_url = "https://cwrusdle.bitbucket.io/files/MDS_Onto/index-en.html"
|
|
1244
|
+
|
|
1245
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
1246
|
+
"""Override to handle MDSOnto-specific and ontology-generated blank nodes."""
|
|
1247
|
+
# Treat IDs starting with 'ont0' as anonymous (e.g. ont00000562)
|
|
1248
|
+
if re.match(r'^ont0', label):
|
|
1249
|
+
return True
|
|
1250
|
+
if super()._is_anonymous_id(label):
|
|
1251
|
+
return True
|
|
1252
|
+
return False
|
|
1253
|
+
|
|
1254
|
+
|
|
1255
|
+
# def contains_imports(self) -> bool:
|
|
1256
|
+
# """Hook: Check if the ontology contains imports."""
|
|
1257
|
+
# return True # Set to True if your ontology imports other ontologies
|
ontolearner/ontology/medicine.py
CHANGED
|
@@ -33,14 +33,13 @@ class BTO(BaseOntology):
|
|
|
33
33
|
format = "OWL"
|
|
34
34
|
download_url = "https://terminology.tib.eu/ts/ontologies/BTO"
|
|
35
35
|
|
|
36
|
-
|
|
37
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
36
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
38
37
|
"""Override to handle VIMMP-specific blank nodes."""
|
|
39
38
|
if re.match(r'^BTO_[0-9]+$', label):
|
|
40
39
|
return True
|
|
41
40
|
|
|
42
41
|
# Check the general patterns from the parent class
|
|
43
|
-
if
|
|
42
|
+
if super()._is_anonymous_id(label):
|
|
44
43
|
return True
|
|
45
44
|
|
|
46
45
|
return False
|
|
@@ -181,8 +181,7 @@ class DUO(BaseOntology):
|
|
|
181
181
|
format = "OWL"
|
|
182
182
|
download_url = "https://terminology.tib.eu/ts/ontologies/DUO/"
|
|
183
183
|
|
|
184
|
-
|
|
185
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
184
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
186
185
|
"""Override to handle DUO-specific blank nodes."""
|
|
187
186
|
if re.match(r'^APOLLO_SV_[0-9]+$', label):
|
|
188
187
|
return True
|
|
@@ -191,7 +190,7 @@ class DUO(BaseOntology):
|
|
|
191
190
|
return True
|
|
192
191
|
|
|
193
192
|
# Check the general patterns from the parent class
|
|
194
|
-
if
|
|
193
|
+
if super()._is_anonymous_id(label):
|
|
195
194
|
return True
|
|
196
195
|
|
|
197
196
|
return False
|
|
@@ -327,14 +326,13 @@ class Metadata4Ing(BaseOntology):
|
|
|
327
326
|
format = "TTL"
|
|
328
327
|
download_url = "https://git.rwth-aachen.de/nfdi4ing/metadata4ing/metadata4ing"
|
|
329
328
|
|
|
330
|
-
|
|
331
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
329
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
332
330
|
"""Override to handle Metadata4Ing-specific blank nodes."""
|
|
333
331
|
if re.match(r'^\d{4}-\d{4}-\d{4}-\d{4}$', label):
|
|
334
332
|
return True
|
|
335
333
|
|
|
336
334
|
# Check the general patterns from the parent class
|
|
337
|
-
if
|
|
335
|
+
if super()._is_anonymous_id(label):
|
|
338
336
|
return True
|
|
339
337
|
|
|
340
338
|
return False
|
|
@@ -517,14 +515,13 @@ class SWO(BaseOntology):
|
|
|
517
515
|
format = "OWL"
|
|
518
516
|
download_url = "https://terminology.tib.eu/ts/ontologies/SWO"
|
|
519
517
|
|
|
520
|
-
|
|
521
|
-
def _is_anonymous_id(label: str) -> bool:
|
|
518
|
+
def _is_anonymous_id(self, label: str) -> bool:
|
|
522
519
|
"""Override to handle SWO-specific blank nodes."""
|
|
523
520
|
if re.match(r'^SWO_[0-9]+$', label):
|
|
524
521
|
return True
|
|
525
522
|
|
|
526
523
|
# Check the general patterns from the parent class
|
|
527
|
-
if
|
|
524
|
+
if super()._is_anonymous_id(label):
|
|
528
525
|
return True
|
|
529
526
|
|
|
530
527
|
return False
|
ontolearner/processor.py
CHANGED
|
@@ -850,13 +850,13 @@ If you find our work helpful, feel free to give us a cite.
|
|
|
850
850
|
|
|
851
851
|
# Commit and push ontology repository
|
|
852
852
|
repo.git_add(auto_lfs_track=True)
|
|
853
|
-
commit_message = f"
|
|
853
|
+
commit_message = f"✨ Added {self.processed_ontology['ontology_id']} ontology!"
|
|
854
854
|
repo.git_commit(commit_message)
|
|
855
855
|
repo.git_push()
|
|
856
856
|
|
|
857
857
|
# Commit and push metrics repository
|
|
858
858
|
metrics_repo.git_add(auto_lfs_track=True)
|
|
859
|
-
metrics_commit_message = f"
|
|
859
|
+
metrics_commit_message = f"📝 Update metrics for {self.processed_ontology['ontology_id']}"
|
|
860
860
|
metrics_repo.git_commit(metrics_commit_message)
|
|
861
861
|
metrics_repo.git_push()
|
|
862
862
|
|
|
@@ -864,7 +864,7 @@ If you find our work helpful, feel free to give us a cite.
|
|
|
864
864
|
"status": "success",
|
|
865
865
|
"repository": repo_id,
|
|
866
866
|
"metrics_repository": metrics_repo_id,
|
|
867
|
-
"ontology_id": self.ontology_id,
|
|
867
|
+
"ontology_id": self.processed_ontology['ontology_id'],
|
|
868
868
|
"url": f"https://huggingface.co/datasets/{repo_id}",
|
|
869
869
|
"metrics_url": f"https://huggingface.co/spaces/{metrics_repo_id}"
|
|
870
870
|
}
|
|
@@ -200,10 +200,73 @@ class SyntheticDataSplitter:
|
|
|
200
200
|
|
|
201
201
|
return terms_splits, types_splits, docs_split, types2docs_splits
|
|
202
202
|
|
|
203
|
-
def
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
203
|
+
def split_fine_grained(self, doc_ids):
|
|
204
|
+
"""
|
|
205
|
+
Build a single split bundle containing only:
|
|
206
|
+
- docs
|
|
207
|
+
- terms
|
|
208
|
+
- types
|
|
209
|
+
- terms2docs
|
|
210
|
+
- terms2types
|
|
211
|
+
"""
|
|
212
|
+
# normalize to string ids (constructor uses str(row.id))
|
|
213
|
+
doc_ids = {str(d) for d in (doc_ids or [])}
|
|
214
|
+
|
|
215
|
+
# docs + collect terms/types from docs
|
|
216
|
+
docs = []
|
|
217
|
+
terms_set = set()
|
|
218
|
+
types_set = set()
|
|
219
|
+
|
|
220
|
+
for doc_id in doc_ids:
|
|
221
|
+
doc = self.doc_id_to_doc[doc_id]
|
|
222
|
+
docs.append({"id": str(doc.id), "title": doc.title, "text": doc.text})
|
|
223
|
+
|
|
224
|
+
terms_set.update(self.doc_id_to_terms[doc_id])
|
|
225
|
+
types_set.update(self.doc_id_to_types[doc_id])
|
|
226
|
+
|
|
227
|
+
terms = sorted(terms_set)
|
|
228
|
+
types = sorted(types_set)
|
|
229
|
+
|
|
230
|
+
# terms2docs: use the constructor-built mapping and restrict to this split's doc_ids
|
|
231
|
+
terms2docs = {
|
|
232
|
+
term: sorted(list(self.term_to_doc_id.get(term, set()) & doc_ids))
|
|
233
|
+
for term in terms
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
# terms2types: ontology lookup (term -> parent types)
|
|
237
|
+
terms2types = {term: self.child_to_parent.get(term, []) for term in terms}
|
|
238
|
+
|
|
239
|
+
return {
|
|
240
|
+
"documents": docs,
|
|
241
|
+
"terms": terms,
|
|
242
|
+
"types": types,
|
|
243
|
+
"terms2docs": terms2docs,
|
|
244
|
+
"terms2types": terms2types,
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
def train_test_val_split(self, train: float = 0.8, val: float = 0.1, test: float = 0.1):
|
|
248
|
+
"""
|
|
249
|
+
Returns:
|
|
250
|
+
train_split, val_split, test_split
|
|
251
|
+
|
|
252
|
+
Each split is a dict with keys:
|
|
253
|
+
- "docs"
|
|
254
|
+
- "terms"
|
|
255
|
+
- "types"
|
|
256
|
+
- "terms2docs"
|
|
257
|
+
- "terms2types"
|
|
258
|
+
"""
|
|
259
|
+
# compute which docs go to which split
|
|
260
|
+
split_targets, split_docs_targets = self.set_train_val_test_sizes(
|
|
261
|
+
train_percentage=train,
|
|
262
|
+
val_percentage=val,
|
|
263
|
+
test_percentage=test,
|
|
264
|
+
)
|
|
207
265
|
split_docs = self.create_train_val_test_splits(split_targets, split_docs_targets)
|
|
208
|
-
|
|
209
|
-
|
|
266
|
+
# split_docs: {"train": set(doc_ids), "val": set(doc_ids), "test": set(doc_ids)}
|
|
267
|
+
|
|
268
|
+
train_split = self.split_fine_grained(split_docs.get("train", set()))
|
|
269
|
+
val_split = self.split_fine_grained(split_docs.get("val", set()))
|
|
270
|
+
test_split = self.split_fine_grained(split_docs.get("test", set()))
|
|
271
|
+
|
|
272
|
+
return train_split, val_split, test_split
|