OntoLearner 1.4.7__py3-none-any.whl → 1.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ontolearner/VERSION +1 -1
- ontolearner/base/learner.py +15 -12
- ontolearner/learner/__init__.py +1 -1
- ontolearner/learner/label_mapper.py +1 -1
- ontolearner/learner/retriever/__init__.py +19 -0
- ontolearner/learner/retriever/crossencoder.py +129 -0
- ontolearner/learner/retriever/embedding.py +229 -0
- ontolearner/learner/retriever/learner.py +217 -0
- ontolearner/learner/retriever/llm_retriever.py +356 -0
- ontolearner/learner/retriever/ngram.py +123 -0
- ontolearner/learner/taxonomy_discovery/__init__.py +18 -0
- ontolearner/learner/taxonomy_discovery/alexbek.py +500 -0
- ontolearner/learner/taxonomy_discovery/rwthdbis.py +1082 -0
- ontolearner/learner/taxonomy_discovery/sbunlp.py +402 -0
- ontolearner/learner/taxonomy_discovery/skhnlp.py +1138 -0
- ontolearner/learner/term_typing/__init__.py +17 -0
- ontolearner/learner/term_typing/alexbek.py +1262 -0
- ontolearner/learner/term_typing/rwthdbis.py +379 -0
- ontolearner/learner/term_typing/sbunlp.py +478 -0
- ontolearner/learner/text2onto/__init__.py +16 -0
- ontolearner/learner/text2onto/alexbek.py +1219 -0
- ontolearner/learner/text2onto/sbunlp.py +598 -0
- {ontolearner-1.4.7.dist-info → ontolearner-1.4.9.dist-info}/METADATA +16 -12
- {ontolearner-1.4.7.dist-info → ontolearner-1.4.9.dist-info}/RECORD +26 -9
- ontolearner/learner/retriever.py +0 -101
- {ontolearner-1.4.7.dist-info → ontolearner-1.4.9.dist-info}/WHEEL +0 -0
- {ontolearner-1.4.7.dist-info → ontolearner-1.4.9.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
ontolearner/VERSION,sha256=
|
|
1
|
+
ontolearner/VERSION,sha256=x-xbkXEIv48hifmVFcVtJDdZj6d_bmXwy3Lp4d5pPVY,6
|
|
2
2
|
ontolearner/__init__.py,sha256=E4yukFv2PV4uyztTPDWljCySY9AVDcDDzabuvxfabYE,1889
|
|
3
3
|
ontolearner/_learner.py,sha256=2CRQvpsz8akIOdxTs2-KLJ-MssULrjpK-QDD3QXUJXI,5297
|
|
4
4
|
ontolearner/_ontology.py,sha256=W1mp195SImqLKwaj4ueEaBWuLJg2jUdx1JT20Ds3fmQ,6950
|
|
5
5
|
ontolearner/base/__init__.py,sha256=5pf-ltxzGp32xhEcPdbtm11wXJrYJMUeWG-mbcAYD8Q,705
|
|
6
|
-
ontolearner/base/learner.py,sha256=
|
|
6
|
+
ontolearner/base/learner.py,sha256=latiGv8p3nyPrxMp7g5B2MSF-JEInRwIlbOn09uh7io,18899
|
|
7
7
|
ontolearner/base/ontology.py,sha256=JbMJ1-WUyHWQiNJL-DeaqcriUimLdqN3_ESROgqOPTQ,24772
|
|
8
8
|
ontolearner/base/text2onto.py,sha256=iUXYZoqnwgebQuQzM-XSGTVRfHLlhjUK_z5XUvhRICc,5388
|
|
9
9
|
ontolearner/data_structure/__init__.py,sha256=1HiKvk8FKjhYeI92RHnJXxyQbUJBi3JFytjQjthsY_s,599
|
|
@@ -12,12 +12,29 @@ ontolearner/data_structure/metric.py,sha256=4QKkZ5L1YK6hDTU-N5Z9I9Ha99DVHmGfYxK7
|
|
|
12
12
|
ontolearner/evaluation/__init__.py,sha256=4BZr3BUXjQDTj4Aqlqy4THa80lZPsMuh1EBTCyi9Wig,842
|
|
13
13
|
ontolearner/evaluation/evaluate.py,sha256=NYCVcmPqpyIxYZrMAim37gL-erdh698RD3t3eNTTgZc,1163
|
|
14
14
|
ontolearner/evaluation/metrics.py,sha256=3Aw6ycJ3_Q6xfj4tMBJP6QcexUei0G16H0ZQWt87aRU,6286
|
|
15
|
-
ontolearner/learner/__init__.py,sha256=
|
|
16
|
-
ontolearner/learner/label_mapper.py,sha256
|
|
15
|
+
ontolearner/learner/__init__.py,sha256=RKREPrrjzQ5KYvcOwC_2l7yFKwFBd6HoCwhX2H6Spg8,798
|
|
16
|
+
ontolearner/learner/label_mapper.py,sha256=YMPeFKzJxoCYNU5z7QRYPbB88sWdu1iT6iBDpPsjn-4,3792
|
|
17
17
|
ontolearner/learner/llm.py,sha256=3kq_IrwEPTFgeNVKZH9Er_OydJuDpRBtM3YXNNa8_KA,10343
|
|
18
18
|
ontolearner/learner/prompt.py,sha256=0ckH7xphIDKczPe7G-rwiOxFGZ7RsLnpPlNW92b-31U,1574
|
|
19
19
|
ontolearner/learner/rag.py,sha256=eysB2RvcWkVo53s8-kSbZtJv904YVTmdtxplM4ukUKM,4283
|
|
20
|
-
ontolearner/learner/retriever.py,sha256=
|
|
20
|
+
ontolearner/learner/retriever/__init__.py,sha256=G5XuJcTblqXVWboVW9StJ2Vo2xACp_kG5_w2nrueqlc,854
|
|
21
|
+
ontolearner/learner/retriever/crossencoder.py,sha256=yurzGE4zydlBSwUefi1CugsWv34HEZ61qADG_-nILbo,4996
|
|
22
|
+
ontolearner/learner/retriever/embedding.py,sha256=Lp9oA7LiOYaSWDvzG779KMv5keNl6Xv7hw0WpeaepDE,7875
|
|
23
|
+
ontolearner/learner/retriever/learner.py,sha256=VcarTwwR8HNddJCh0loCQejDzZ_GO4NkdQUjEhLVy48,11181
|
|
24
|
+
ontolearner/learner/retriever/llm_retriever.py,sha256=goInWYxrD9PSo_EsSKbNV8wEaSPvWY3LEC8XM7jlH64,12917
|
|
25
|
+
ontolearner/learner/retriever/ngram.py,sha256=XgS1OeheKEIi7wfJHZgS8mWxKv9MQrP0apOJD_XSOnM,4575
|
|
26
|
+
ontolearner/learner/taxonomy_discovery/__init__.py,sha256=-Hb5Dl6_6c4l1uIT2zWtyBWMq5cjVD4PNjxt5qJePl4,747
|
|
27
|
+
ontolearner/learner/taxonomy_discovery/alexbek.py,sha256=kFEDvoKxLf-sB7-d5REkcC0DqXZpcA6ZSJ2QHrNoC5E,19010
|
|
28
|
+
ontolearner/learner/taxonomy_discovery/rwthdbis.py,sha256=698Gze2cR-QIhpTbuaOFm7Q4p0lCbdWz3rO6rewJZ1s,41644
|
|
29
|
+
ontolearner/learner/taxonomy_discovery/sbunlp.py,sha256=hyTxPMCdS2BIb9R61OQgT9ibZYmPd-vaj7KBCRCAggk,14987
|
|
30
|
+
ontolearner/learner/taxonomy_discovery/skhnlp.py,sha256=nEsA1MJueEs25IC5B-4OAOn5R6mOfz_7C4xIUC6hNN4,45516
|
|
31
|
+
ontolearner/learner/term_typing/__init__.py,sha256=2rBbgp8683GNVgB58T4xe76l4m-NTqL7MwpAnux0IDY,691
|
|
32
|
+
ontolearner/learner/term_typing/alexbek.py,sha256=SzWQbndkhAjxETVbrJ4uyH7ykL_TMIwHozSS08zwjoM,46684
|
|
33
|
+
ontolearner/learner/term_typing/rwthdbis.py,sha256=F6Jr1SrsbDOIe0Ee_FkDVGTG4wRWpM-R2YqrqEQiex0,14576
|
|
34
|
+
ontolearner/learner/term_typing/sbunlp.py,sha256=Xd3UqMO3m_Skn_2geTN22MGQmSD6R8bYfPgubZre3IE,19820
|
|
35
|
+
ontolearner/learner/text2onto/__init__.py,sha256=4-G6iel0Nxcj4nzPxUDqtFf9CMCzi8LghooOSAnbNfc,641
|
|
36
|
+
ontolearner/learner/text2onto/alexbek.py,sha256=MySzxJUR0F3UyeS5rPIN988xxtPaoAxDFkBc-Q0vFTE,45494
|
|
37
|
+
ontolearner/learner/text2onto/sbunlp.py,sha256=5p-s2Ixtntws5eO3gOUyYLpfZpCbOE0hG5gEcCwKHz4,24177
|
|
21
38
|
ontolearner/ontology/__init__.py,sha256=F9Ta1qCX9mOxIK5CPRypEoglQNkpJ6SJpqziz73xKQE,1328
|
|
22
39
|
ontolearner/ontology/agriculture.py,sha256=ZaXHNEFjbtsMH8M7HQ8ypnfJS4TUQy_as16fwv-kOKA,5903
|
|
23
40
|
ontolearner/ontology/arts_humanities.py,sha256=K4ceDJL6PfIfSJZ86uQUkUXOVoiERG6ItgvVE2lhLKk,3996
|
|
@@ -53,7 +70,7 @@ ontolearner/tools/visualizer.py,sha256=cwijl4yYaS1SCLM5wbvRTEcbQj9Bjo4fHzZR6q6o8
|
|
|
53
70
|
ontolearner/utils/__init__.py,sha256=pSEyU3dlPMADBqygqaaid44RdWf0Lo3Fvz-K_rQ7_Bw,733
|
|
54
71
|
ontolearner/utils/io.py,sha256=3DqGK2p7c0onKi0Xxs16WB08uHfHUId3bW0dDKwyS0g,2110
|
|
55
72
|
ontolearner/utils/train_test_split.py,sha256=Zlm42eT6QGWwlySyomCPIiTGmGqeN_h4z4xBY2EAOR8,11530
|
|
56
|
-
ontolearner-1.4.
|
|
57
|
-
ontolearner-1.4.
|
|
58
|
-
ontolearner-1.4.
|
|
59
|
-
ontolearner-1.4.
|
|
73
|
+
ontolearner-1.4.9.dist-info/METADATA,sha256=c_V_1mUkxAhzJz04u1wRYU7xodpZQdiJXBVFzUCIMK8,11444
|
|
74
|
+
ontolearner-1.4.9.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
75
|
+
ontolearner-1.4.9.dist-info/licenses/LICENSE,sha256=krXMLuMKgzX-UgaufgfJdm9ojIloZot7ZdvJUnNxl4I,1067
|
|
76
|
+
ontolearner-1.4.9.dist-info/RECORD,,
|
ontolearner/learner/retriever.py
DELETED
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2025 SciKnowOrg
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the MIT License (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# https://opensource.org/licenses/MIT
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
|
|
15
|
-
from ..base import AutoRetriever, AutoLearner
|
|
16
|
-
from typing import Any, Optional
|
|
17
|
-
import warnings
|
|
18
|
-
|
|
19
|
-
class AutoRetrieverLearner(AutoLearner):
|
|
20
|
-
def __init__(self, base_retriever: Any = AutoRetriever(), top_k: int = 5, batch_size: int = -1):
|
|
21
|
-
super().__init__()
|
|
22
|
-
self.retriever = base_retriever
|
|
23
|
-
self.top_k = top_k
|
|
24
|
-
self._is_term_typing_fit = False
|
|
25
|
-
self._batch_size = batch_size
|
|
26
|
-
|
|
27
|
-
def load(self, model_id: str = "sentence-transformers/all-MiniLM-L6-v2"):
|
|
28
|
-
self.retriever.load(model_id=model_id)
|
|
29
|
-
|
|
30
|
-
def _retriever_fit(self, data: Any):
|
|
31
|
-
if isinstance(data, list) and all(isinstance(item, str) for item in data):
|
|
32
|
-
self.retriever.index(inputs=data)
|
|
33
|
-
else:
|
|
34
|
-
raise TypeError("Expected a list of strings for retriever at term-typing task.")
|
|
35
|
-
|
|
36
|
-
def _retriever_predict(self, data:Any, top_k: int) -> Any:
|
|
37
|
-
if isinstance(data, list):
|
|
38
|
-
return self.retriever.retrieve(query=data, top_k=top_k, batch_size=self._batch_size)
|
|
39
|
-
if isinstance(data, str):
|
|
40
|
-
return self.retriever.retrieve(query=[data], top_k=top_k)
|
|
41
|
-
raise TypeError(f"Unsupported data type {type(data)}. You should pass a List[str] or a str.")
|
|
42
|
-
|
|
43
|
-
def _term_typing(self, data: Any, test: bool = False) -> Optional[Any]:
|
|
44
|
-
"""
|
|
45
|
-
during training: data = ["type-1", .... ],
|
|
46
|
-
during testing: data = ['term-1', ...]
|
|
47
|
-
"""
|
|
48
|
-
if test:
|
|
49
|
-
if self._is_term_typing_fit:
|
|
50
|
-
types = self._retriever_predict(data=data, top_k=self.top_k)
|
|
51
|
-
return [{"term": term, "types": type} for term, type in zip(data, types)]
|
|
52
|
-
else:
|
|
53
|
-
raise RuntimeError("Term typing model must be fit before prediction.")
|
|
54
|
-
else:
|
|
55
|
-
self._retriever_fit(data=data)
|
|
56
|
-
self._is_term_typing_fit = True
|
|
57
|
-
|
|
58
|
-
def _taxonomy_discovery(self, data: Any, test: bool = False) -> Optional[Any]:
|
|
59
|
-
"""
|
|
60
|
-
during training: data = ['type-1', ...],
|
|
61
|
-
during testing (same data): data= ['type-1', ...]
|
|
62
|
-
"""
|
|
63
|
-
if test:
|
|
64
|
-
self._retriever_fit(data=data)
|
|
65
|
-
candidates_lst = self._retriever_predict(data=data, top_k=self.top_k + 1)
|
|
66
|
-
taxonomic_pairs = [{"parent": candidate, "child": query}
|
|
67
|
-
for query, candidates in zip(data, candidates_lst)
|
|
68
|
-
for candidate in candidates if candidate.lower() != query.lower()]
|
|
69
|
-
return taxonomic_pairs
|
|
70
|
-
else:
|
|
71
|
-
warnings.warn("No requirement for fiting the taxonomy discovery model, the predict module will use the input data to do the fit as well.")
|
|
72
|
-
|
|
73
|
-
def _non_taxonomic_re(self, data: Any, test: bool = False) -> Optional[Any]:
|
|
74
|
-
"""
|
|
75
|
-
during training: data = ['type-1', ...],
|
|
76
|
-
during testing: {'types': [...], 'relations': [... ]}
|
|
77
|
-
"""
|
|
78
|
-
if test:
|
|
79
|
-
# print(data)
|
|
80
|
-
if 'types' not in data or 'relations' not in data:
|
|
81
|
-
raise ValueError("The non-taxonomic re predict should take {'types': [...], 'relations': [... ]}")
|
|
82
|
-
if len(data['types']) == 0:
|
|
83
|
-
warnings.warn("No `types` avaliable to do the non-taxonomic re-prediction.")
|
|
84
|
-
return None
|
|
85
|
-
self._retriever_fit(data=data['types'])
|
|
86
|
-
candidates_lst = self._retriever_predict(data=data['types'], top_k=self.top_k + 1)
|
|
87
|
-
taxonomic_pairs = []
|
|
88
|
-
taxonomic_pairs_query = []
|
|
89
|
-
for query, candidates in zip(data['types'], candidates_lst):
|
|
90
|
-
for candidate in candidates:
|
|
91
|
-
if candidate != query:
|
|
92
|
-
taxonomic_pairs.append((query, candidate))
|
|
93
|
-
taxonomic_pairs_query.append(f"Head: {query} \n Tail: {candidate}")
|
|
94
|
-
self._retriever_fit(data=data['relations'])
|
|
95
|
-
candidate_relations_lst = self._retriever_predict(data=taxonomic_pairs_query, top_k=self.top_k)
|
|
96
|
-
non_taxonomic_re = [{"head": head, "tail": tail, "relation": relation}
|
|
97
|
-
for (head, tail), candidate_relations in zip(taxonomic_pairs, candidate_relations_lst)
|
|
98
|
-
for relation in candidate_relations]
|
|
99
|
-
return non_taxonomic_re
|
|
100
|
-
else:
|
|
101
|
-
warnings.warn("No requirement for fiting the non-taxonomic RE model, the predict module will use the input data to do the fit as well..")
|
|
File without changes
|
|
File without changes
|