OntoLearner 1.4.8__py3-none-any.whl → 1.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ontolearner/VERSION CHANGED
@@ -1 +1 @@
1
- 1.4.8
1
+ 1.4.9
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from .llm import AutoLLMLearner, FalconLLM, MistralLLM
16
- from .retriever import AutoRetrieverLearner
16
+ from .retriever import AutoRetrieverLearner, LLMAugmentedRetrieverLearner
17
17
  from .rag import AutoRAGLearner
18
18
  from .prompt import StandardizedPrompting
19
19
  from .label_mapper import LabelMapper
@@ -0,0 +1,19 @@
1
+ # Copyright (c) 2025 SciKnowOrg
2
+ #
3
+ # Licensed under the MIT License (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://opensource.org/licenses/MIT
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .crossencoder import CrossEncoderRetriever
16
+ from .embedding import GloveRetriever, Word2VecRetriever
17
+ from .ngram import NgramRetriever
18
+ from .learner import AutoRetrieverLearner, LLMAugmentedRetrieverLearner
19
+ from .llm_retriever import LLMAugmenterGenerator, LLMAugmenter, LLMAugmentedRetriever
@@ -0,0 +1,129 @@
1
+ # Copyright (c) 2025 SciKnowOrg
2
+ #
3
+ # Licensed under the MIT License (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://opensource.org/licenses/MIT
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import logging
15
+ from typing import List
16
+ from sentence_transformers import CrossEncoder, SentenceTransformer, util
17
+ from tqdm import tqdm
18
+ import numpy as np
19
+
20
+ from ...base import AutoRetriever
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class CrossEncoderRetriever(AutoRetriever):
26
+ """
27
+ A hybrid dense retriever that combines a BiEncoder for fast candidate
28
+ retrieval and a CrossEncoder for accurate reranking.
29
+
30
+ This retriever follows a two-stage retrieval process:
31
+
32
+ 1. **BiEncoder retrieval**:
33
+ Encodes all documents and queries into embeddings.
34
+ Computes approximate nearest neighbors to obtain a set of top-k candidates.
35
+
36
+ 2. **CrossEncoder reranking**:
37
+ Evaluates each (query, document) pair for semantic relevance.
38
+ Reranks the initial candidates and outputs the final top results.
39
+
40
+ This provides an efficient and accurate alternative to pure CrossEncoder
41
+ or pure BiEncoder approaches.
42
+ """
43
+
44
+ def __init__(self, bi_encoder_model_id: str = None) -> None:
45
+ """
46
+ Initialize the retriever.
47
+
48
+ Args:
49
+ bi_encoder_model_id (str, optional):
50
+ Model ID for the BiEncoder used in the first-stage retrieval.
51
+ If not provided, the CrossEncoder model_id passed to `load()`
52
+ will also be used as the BiEncoder.
53
+ """
54
+ super().__init__()
55
+ self.bi_encoder_model_id = bi_encoder_model_id
56
+
57
+ def load(self, model_id: str):
58
+ """
59
+ Load both the BiEncoder and CrossEncoder models.
60
+
61
+ Args:
62
+ model_id (str):
63
+ Model ID for the CrossEncoder (reranking model). If no explicit
64
+ BiEncoder ID was given at initialization, this ID is also used
65
+ for the BiEncoder.
66
+
67
+ Notes:
68
+ - BiEncoder is used for fast vector similarity search.
69
+ - CrossEncoder is used for slow but accurate reranking.
70
+ """
71
+ if not self.bi_encoder_model_id:
72
+ self.bi_encoder_model_id = model_id
73
+ self.bi_encoder = SentenceTransformer(self.bi_encoder_model_id)
74
+ self.cross_encoder = CrossEncoder(model_id)
75
+
76
+ def index(self, inputs: List[str]):
77
+ """
78
+ Pre-encode all documents using the BiEncoder to support efficient
79
+ semantic search.
80
+
81
+ Args:
82
+ inputs (List[str]):
83
+ List of documents to index.
84
+
85
+ Stores:
86
+ - `self.documents`: Raw input documents.
87
+ - `self.document_embeddings`: Tensor of BiEncoder embeddings.
88
+ """
89
+ self.documents = inputs
90
+ self.document_embeddings = self.bi_encoder.encode(inputs, convert_to_tensor=True, show_progress_bar=True)
91
+
92
+ def retrieve(self, query: List[str], top_k: int = 5, rerank_k: int = 100, batch_size: int = 32) -> List[List[str]]:
93
+ """
94
+ Retrieve top-k most relevant documents per query using a two-stage process.
95
+
96
+ Stage 1: Retrieve top `rerank_k` documents using BiEncoder embeddings.
97
+ Stage 2: Rerank those candidates using the CrossEncoder, returning `top_k`.
98
+
99
+ Args:
100
+ query (List[str]):
101
+ List of user query strings.
102
+ top_k (int):
103
+ Number of final documents to return after reranking.
104
+ rerank_k (int):
105
+ Number of candidates to retrieve before reranking.
106
+ batch_size (int):
107
+ Batch size for CrossEncoder inference.
108
+
109
+ Returns:
110
+ List[List[str]]:
111
+ For each query, a list of top-k reranked documents.
112
+ """
113
+ results = []
114
+ # Step 1: Encode queries with the BiEncoder
115
+ query_embeddings = self.bi_encoder.encode(
116
+ query, convert_to_tensor=True, show_progress_bar=True
117
+ )
118
+ # Step 2: Retrieve candidate documents
119
+ hits_batch = util.semantic_search(query_embeddings, self.document_embeddings, top_k=rerank_k)
120
+ # Step 3: Rerank using CrossEncoder
121
+ for i, hits in enumerate(tqdm(hits_batch, desc="Reranking")):
122
+ candidates = [self.documents[hit["corpus_id"]] for hit in hits]
123
+ pairs = [(query[i], doc) for doc in candidates]
124
+ scores = self.cross_encoder.predict(pairs, batch_size=batch_size, show_progress_bar=False)
125
+ reranked_idx = np.argsort(scores)[::-1][:top_k]
126
+ top_docs = [candidates[j] for j in reranked_idx]
127
+ results.append(top_docs)
128
+
129
+ return results
@@ -0,0 +1,229 @@
1
+ # Copyright (c) 2025 SciKnowOrg
2
+ #
3
+ # Licensed under the MIT License (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://opensource.org/licenses/MIT
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import logging
15
+ import torch
16
+ import torch.nn.functional as F
17
+ import numpy as np
18
+
19
+ from tqdm import tqdm
20
+ from typing import List, Optional
21
+ from sklearn.metrics.pairwise import cosine_similarity
22
+ from gensim.models import KeyedVectors
23
+ from gensim.utils import simple_preprocess
24
+
25
+ from ...base import AutoRetriever
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ class Word2VecRetriever(AutoRetriever):
31
+ """
32
+ Retriever that encodes each document by averaging its Word2Vec-style
33
+ word embeddings. Retrieval is performed by cosine similarity between
34
+ averaged document vectors and averaged query vectors.
35
+ """
36
+
37
+ def __init__(self) -> None:
38
+ """
39
+ Initialize an empty Word2VecRetriever. The model must be loaded using
40
+ :meth:`load` before indexing or retrieval.
41
+ """
42
+ super().__init__()
43
+ self.embedding_model: Optional[KeyedVectors] = None
44
+ self.documents: List[str] = []
45
+ self.embeddings: Optional[torch.Tensor] = None
46
+
47
+ def load(self, model_id: str) -> None:
48
+ """
49
+ Load a pre-trained Word2Vec KeyedVectors model.
50
+
51
+ Args:
52
+ model_id (str):
53
+ Path to a Word2Vec `.bin` or `.txt` vector file.
54
+ """
55
+ self.embedding_model = KeyedVectors.load_word2vec_format(model_id, binary=True)
56
+
57
+ def _encode_text(self, text: str) -> np.ndarray:
58
+ """
59
+ Encode text by averaging embeddings for all in-vocabulary words.
60
+
61
+ Args:
62
+ text (str): Input text string.
63
+
64
+ Returns:
65
+ np.ndarray: Averaged embedding vector. If no word is in the vocabulary,
66
+ a zero vector of appropriate dimensionality is returned.
67
+ """
68
+ if self.embedding_model is None:
69
+ raise RuntimeError("Word2Vec model must be loaded before encoding.")
70
+
71
+ words = simple_preprocess(text)
72
+ valid_vectors = [self.embedding_model[word] for word in words if word in self.embedding_model]
73
+
74
+ if not valid_vectors:
75
+ return np.zeros(self.embedding_model.vector_size)
76
+
77
+ return np.mean(valid_vectors, axis=0)
78
+
79
+ def index(self, inputs: List[str]) -> None:
80
+ """
81
+ Encode and index a list of documents.
82
+
83
+ Args:
84
+ inputs (List[str]): Documents to index.
85
+
86
+ Stores:
87
+ - self.documents: The input documents.
88
+ - self.embeddings: L2-normalized document embeddings.
89
+ """
90
+ self.documents = inputs
91
+ embeddings = [self._encode_text(doc) for doc in tqdm(inputs)]
92
+ self.embeddings = F.normalize(torch.tensor(np.stack(embeddings)), p=2, dim=1)
93
+
94
+ def retrieve(self, query: List[str], top_k: int = 5, batch_size: int = -1) -> List[List[str]]:
95
+ """
96
+ Retrieve the top-k most similar documents for each query.
97
+
98
+ Args:
99
+ query (List[str]): Query texts.
100
+ top_k (int): Number of results to return per query.
101
+ batch_size (int): Batch size for processing queries. -1 means all at once.
102
+
103
+ Returns:
104
+ List[List[str]]: One list per query containing top-k matching documents.
105
+ """
106
+ if self.embeddings is None:
107
+ raise RuntimeError("Documents must be indexed before retrieval.")
108
+
109
+ query_vec = [self._encode_text(q) for q in query]
110
+ query_vec = F.normalize(torch.tensor(np.stack(query_vec)), p=2, dim=1)
111
+
112
+ if batch_size == -1:
113
+ batch_size = len(query)
114
+
115
+ results = []
116
+ for i in tqdm(range(0, len(query), batch_size)):
117
+ q_batch = query_vec[i:i + batch_size]
118
+ sim = cosine_similarity(q_batch, self.embeddings)
119
+
120
+ topk_idx = np.argsort(sim, axis=1)[:, ::-1][:, :top_k]
121
+
122
+ for row in topk_idx:
123
+ results.append([self.documents[j] for j in row])
124
+
125
+ return results
126
+
127
+
128
+ class GloveRetriever(AutoRetriever):
129
+ """
130
+ Retriever that uses GloVe embedding vectors. Each document is encoded
131
+ by averaging the embeddings of all words that exist in the GloVe vocabulary.
132
+ """
133
+
134
+ def __init__(self) -> None:
135
+ """
136
+ Initialize an empty GloveRetriever. Model must be loaded before use.
137
+ """
138
+ super().__init__()
139
+ self.embedding_model: Optional[dict] = None
140
+ self.documents: List[str] = []
141
+ self.embeddings: Optional[torch.Tensor] = None
142
+
143
+ def load(self, model_id: str) -> None:
144
+ """
145
+ Load GloVe embeddings from a text file.
146
+
147
+ Args:
148
+ model_id (str):
149
+ Path to GloVe `.txt` file, e.g. `glove.6B.300d.txt`.
150
+ """
151
+ logger.info(f"Loading GloVe embeddings from {model_id} ...")
152
+ self.embedding_model = {}
153
+
154
+ with open(model_id, "r", encoding="utf8") as f:
155
+ for line in f:
156
+ values = line.split()
157
+ word = values[0]
158
+ vec = [float(v) for v in values[1:]]
159
+ self.embedding_model[word] = vec
160
+
161
+ logger.info(f"Loaded {len(self.embedding_model)} GloVe words.")
162
+
163
+ def _encode_text(self, text: str) -> np.ndarray:
164
+ """
165
+ Encode text by averaging GloVe embeddings.
166
+
167
+ Args:
168
+ text (str): Input text.
169
+
170
+ Returns:
171
+ np.ndarray: Averaged embedding vector. Returns zero vector if no words match.
172
+ """
173
+ if self.embedding_model is None:
174
+ raise RuntimeError("GloVe model must be loaded before encoding.")
175
+
176
+ words = text.lower().split()
177
+ vecs = [self.embedding_model[w] for w in words if w in self.embedding_model]
178
+
179
+ if not vecs:
180
+ dim = len(next(iter(self.embedding_model.values())))
181
+ return np.zeros(dim)
182
+
183
+ return np.mean(vecs, axis=0)
184
+
185
+ def index(self, inputs: List[str]) -> None:
186
+ """
187
+ Index a list of documents by encoding and normalizing them.
188
+
189
+ Args:
190
+ inputs (List[str]): Documents to index.
191
+ """
192
+ if self.embedding_model is None:
193
+ raise RuntimeError("You must load a GloVe model before indexing.")
194
+
195
+ self.documents = inputs
196
+ embeddings = [self._encode_text(doc) for doc in tqdm(inputs)]
197
+ self.embeddings = F.normalize(torch.tensor(np.stack(embeddings)), p=2, dim=1)
198
+
199
+ def retrieve(self, query: List[str], top_k: int = 5, batch_size: int = -1) -> List[List[str]]:
200
+ """
201
+ Retrieve top-k most similar documents.
202
+
203
+ Args:
204
+ query (List[str]): Query texts.
205
+ top_k (int): Number of results per query.
206
+ batch_size (int): Batch size for query computation.
207
+
208
+ Returns:
209
+ List[List[str]]: Each entry is a list of top-k matching documents.
210
+ """
211
+ if self.embeddings is None:
212
+ raise RuntimeError("Documents must be indexed before retrieval.")
213
+
214
+ query_vec = [self._encode_text(q) for q in query]
215
+ query_vec = F.normalize(torch.tensor(np.stack(query_vec)), p=2, dim=1)
216
+
217
+ if batch_size == -1:
218
+ batch_size = len(query)
219
+
220
+ results = []
221
+ for i in tqdm(range(0, len(query), batch_size)):
222
+ q_batch = query_vec[i:i + batch_size]
223
+ sim = cosine_similarity(q_batch, self.embeddings)
224
+ topk_idx = np.argsort(sim, axis=1)[:, ::-1][:, :top_k]
225
+
226
+ for row in topk_idx:
227
+ results.append([self.documents[j] for j in row])
228
+
229
+ return results
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from ..base import AutoRetriever, AutoLearner
15
+ from ...base import AutoRetriever, AutoLearner
16
16
  from typing import Any, Optional
17
17
  import warnings
18
18
 
@@ -120,3 +120,98 @@ class AutoRetrieverLearner(AutoLearner):
120
120
  return non_taxonomic_re
121
121
  else:
122
122
  warnings.warn("No requirement for fiting the non-taxonomic RE model, the predict module will use the input data to do the fit as well..")
123
+
124
+
125
+
126
+ class LLMAugmentedRetrieverLearner(AutoRetrieverLearner):
127
+
128
+ def set_augmenter(self, augmenter):
129
+ self.retriever.set_augmenter(augmenter=augmenter)
130
+
131
+ def _retriever_predict(self, data: Any, top_k: int, task: str) -> Any:
132
+ if isinstance(data, list):
133
+ return self.retriever.retrieve(query=data, top_k=top_k, batch_size=self._batch_size, task=task)
134
+ if isinstance(data, str):
135
+ return self.retriever.retrieve(query=[data], top_k=top_k, task=task)
136
+ raise TypeError(f"Unsupported data type {type(data)}. You should pass a List[str] or a str.")
137
+
138
+ def _term_typing(self, data: Any, test: bool = False) -> Optional[Any]:
139
+ """
140
+ during training: data = ["type-1", .... ],
141
+ during testing: data = ['term-1', ...]
142
+ """
143
+ if test:
144
+ if self._is_term_typing_fit:
145
+ types = self._retriever_predict(data=data, top_k=self.top_k, task='term-typing')
146
+ return [{"term": term, "types": type} for term, type in zip(data, types)]
147
+ else:
148
+ raise RuntimeError("Term typing model must be fit before prediction.")
149
+ else:
150
+ super()._term_typing(data=data, test=test)
151
+
152
+ def _taxonomy_discovery(self, data: Any, test: bool = False) -> Optional[Any]:
153
+ """
154
+ during training: data = ['type-1', ...],
155
+ during testing (same data): data= ['type-1', ...]
156
+ """
157
+ if test:
158
+ self._retriever_fit(data=data)
159
+ candidates_lst = self._retriever_predict(data=data, top_k=self.top_k + 1, task='taxonomy-discovery')
160
+ taxonomic_pairs = [{"parent": candidate, "child": query}
161
+ for query, candidates in zip(data, candidates_lst)
162
+ for candidate in candidates if candidate.lower() != query.lower()]
163
+ taxonomic_pairs += [{"parent": query, "child": candidate}
164
+ for query, candidates in zip(data, candidates_lst)
165
+ for candidate in candidates if candidate.lower() != query.lower()]
166
+ unique_taxonomic_pairs, seen = [], set()
167
+ for pair in taxonomic_pairs:
168
+ key = (pair["parent"].lower(), pair["child"].lower()) # Directional key (parent, child)
169
+ if key not in seen:
170
+ seen.add(key)
171
+ unique_taxonomic_pairs.append(pair)
172
+ return unique_taxonomic_pairs
173
+ else:
174
+ super()._taxonomy_discovery(data=data, test=test)
175
+
176
+ def _non_taxonomic_re(self, data: Any, test: bool = False) -> Optional[Any]:
177
+ """
178
+ during training: data = ['type-1', ...],
179
+ during testing: {'types': [...], 'relations': [... ]}
180
+ """
181
+ if test:
182
+ # print(data)
183
+ if 'types' not in data or 'relations' not in data:
184
+ raise ValueError("The non-taxonomic re predict should take {'types': [...], 'relations': [... ]}")
185
+ if len(data['types']) == 0:
186
+ warnings.warn("No `types` avaliable to do the non-taxonomic re-prediction.")
187
+ return None
188
+ self._retriever_fit(data=data['types'])
189
+ candidates_lst = self._retriever_predict(data=data['types'], top_k=self.top_k + 1, task='non-taxonomic-re')
190
+ taxonomic_pairs = []
191
+ taxonomic_pairs_query = []
192
+ seen = set()
193
+ for query, candidates in zip(data['types'], candidates_lst):
194
+ for candidate in candidates:
195
+ if candidate != query:
196
+ # Directional pair 1: query -> candidate
197
+ key1 = (query.lower(), candidate.lower())
198
+ if key1 not in seen:
199
+ seen.add(key1)
200
+ taxonomic_pairs.append((query, candidate))
201
+ taxonomic_pairs_query.append(f"Head: {query}\nTail: {candidate}")
202
+ # Directional pair 2: candidate -> query
203
+ key2 = (candidate.lower(), query.lower())
204
+ if key2 not in seen:
205
+ seen.add(key2)
206
+ taxonomic_pairs.append((candidate, query))
207
+ taxonomic_pairs_query.append(f"Head: {candidate}\nTail: {query}")
208
+
209
+ self._retriever_fit(data=data['relations'])
210
+ candidate_relations_lst = self._retriever_predict(data=taxonomic_pairs_query, top_k=self.top_k,
211
+ task='non-taxonomic-re')
212
+ non_taxonomic_re = [{"head": head, "tail": tail, "relation": relation}
213
+ for (head, tail), candidate_relations in zip(taxonomic_pairs, candidate_relations_lst)
214
+ for relation in candidate_relations]
215
+ return non_taxonomic_re
216
+ else:
217
+ super()._non_taxonomic_re(data=data, test=test)
@@ -0,0 +1,356 @@
1
+ # Copyright (c) 2025 SciKnowOrg
2
+ #
3
+ # Licensed under the MIT License (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://opensource.org/licenses/MIT
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from abc import ABC
16
+ from typing import Any, List, Dict
17
+ from openai import OpenAI
18
+ import time
19
+ from tqdm import tqdm
20
+
21
+ from ...base import AutoRetriever
22
+ from ...utils import load_json
23
+
24
+
25
+ class LLMAugmenterGenerator(ABC):
26
+ """
27
+ A generator class responsible for creating augmented query candidates using LLMs
28
+ such as GPT-4 and GPT-3.5. This class provides augmentation support for
29
+ three ontology-learning tasks:
30
+
31
+ - term-typing
32
+ - taxonomy-discovery
33
+ - non-taxonomic relation extraction
34
+
35
+ For taxonomy discovery, it invokes a function-calling LLM that returns
36
+ candidate parent classes for each query term.
37
+
38
+ Attributes:
39
+ client (OpenAI): OpenAI API client used for LLM inference.
40
+ model_id (str): The LLM model identifier.
41
+ term_typing_function (list): Function call schema for term typing (currently unused).
42
+ taxonomy_discovery_function (list): Function call schema for taxonomy discovery.
43
+ non_taxonomic_re_function (list): Function call schema for non-taxonomic relation extraction.
44
+ top_n_candidate (int): Number of augmented candidates to generate per query.
45
+ term_typing_prompt (str): Prompt template used for term typing tasks.
46
+ taxonomy_discovery_prompt (str): Prompt template used for taxonomy discovery.
47
+ non_taxonomic_re_prompt (str): Prompt template for non-taxonomic RE.
48
+ """
49
+
50
+ def __init__(self, model_id: str = 'gpt-4.1-mini', token: str = '', top_n_candidate: int = 5) -> None:
51
+ """
52
+ Initialize the LLM augmenter generator.
53
+
54
+ Args:
55
+ model_id (str): Name of the OpenAI model to use.
56
+ token (str): API key for authentication.
57
+ top_n_candidate (int): Number of generated candidate parents per query.
58
+ """
59
+ self.client = OpenAI(api_key=token)
60
+
61
+ self.model_id = model_id
62
+
63
+ self.term_typing_function = []
64
+ self.taxonomy_discovery_function = [
65
+ {
66
+ "name": "discover_taxonomy_parents",
67
+ "description": "Given a specific type or class (the query), identify potential parent classes that form valid hierarchical (is-a) relationships within a taxonomy.",
68
+ "parameters": {
69
+ "type": "object",
70
+ "properties": {
71
+ "candidate_parents": {
72
+ "type": "array",
73
+ "items": {"type": "string"},
74
+ "description": "A ranked list of candidate parent classes representing higher-level categories."
75
+ }
76
+ },
77
+ "required": ["candidate_parents"]
78
+ }
79
+ }
80
+ ]
81
+
82
+ self.non_taxonomic_re_function = []
83
+ self.top_n_candidate = top_n_candidate
84
+
85
+ self.term_typing_prompt = ""
86
+ self.taxonomy_discovery_prompt = (
87
+ "Given a type (or class) {query}, generate a list of the top {top_n_candidate} candidate classes "
88
+ "that can form hierarchical (is-a) relationships, where each of these classes is a parent of {query}."
89
+ )
90
+ self.non_taxonomic_re_prompt = ""
91
+
92
+ def get_config(self) -> Dict[str, Any]:
93
+ """
94
+ Get augmenter configuration metadata.
95
+
96
+ Returns:
97
+ dict: Dictionary containing the augmentation configuration.
98
+ """
99
+ return {
100
+ "top_n_candidate": self.top_n_candidate,
101
+ "augmenter_model": self.model_id
102
+ }
103
+
104
+ def generate(self, conversation, function):
105
+ """
106
+ Call an LLM to produce augmented candidates using function-calling.
107
+
108
+ Args:
109
+ conversation (list): Dialogue messages to send to the LLM.
110
+ function (list): Function schemas supplied to the model.
111
+
112
+ Returns:
113
+ list[str]: A list of top-k generated candidates.
114
+ """
115
+ while True:
116
+ try:
117
+ completion = self.client.chat.completions.create(
118
+ model=self.model_id,
119
+ messages=conversation,
120
+ functions=function
121
+ )
122
+ inference = eval(completion.choices[0].message.function_call.arguments)['candidate_parents'][:self.top_n_candidate]
123
+ assert len(inference) == self.top_n_candidate
124
+ break
125
+ except Exception:
126
+ print("sleep for 5 seconds")
127
+ time.sleep(5)
128
+
129
+ return inference
130
+
131
+ def tasks_data_former(self, data: Any, task: str) -> List[str] | Dict[str, List[str]]:
132
+ """
133
+ Convert raw dataset input into query lists depending on the ontology-learning task.
134
+
135
+ Args:
136
+ data (Any): Input dataset object.
137
+ task (str): One of {'term-typing', 'taxonomy-discovery', 'non-taxonomic-re'}.
138
+
139
+ Returns:
140
+ List[str] or Dict[str, List[str]]: Formatted query inputs.
141
+ """
142
+ formatted_data = []
143
+ if task == "term-typing":
144
+ for typing in data.term_typings:
145
+ formatted_data.append(typing.term)
146
+ formatted_data = list(set(formatted_data))
147
+
148
+ if task == "taxonomy-discovery":
149
+ for taxonomic_pairs in data.type_taxonomies.taxonomies:
150
+ formatted_data.append(taxonomic_pairs.parent)
151
+ formatted_data.append(taxonomic_pairs.child)
152
+ formatted_data = list(set(formatted_data))
153
+
154
+ if task == "non-taxonomic-re":
155
+ non_taxonomic_types = []
156
+ non_taxonomic_res = []
157
+ for triplet in data.type_non_taxonomic_relations.non_taxonomies:
158
+ non_taxonomic_types.extend([triplet.head, triplet.tail])
159
+ non_taxonomic_res.append(triplet.relation)
160
+ formatted_data = {"types": list(set(non_taxonomic_types)), "relations": list(set(non_taxonomic_res))}
161
+
162
+ return formatted_data
163
+
164
+ def _augment(self, query, conversations, function):
165
+ """
166
+ Internal helper to generate augmented candidates for a batch of queries.
167
+
168
+ Args:
169
+ query (list[str]): Input query terms.
170
+ conversations (list): LLM conversation blocks for each query.
171
+ function (list): Function-calling schemas.
172
+
173
+ Returns:
174
+ dict[str, list[str]]: Mapping from query → list of augmented candidates.
175
+ """
176
+ results = {}
177
+ for qu, conversation in tqdm(zip(query, conversations)):
178
+ results[qu] = self.generate(conversation=conversation, function=function)
179
+ return results
180
+
181
+ def augment_term_typing(self, query: List[str]) -> List[str]:
182
+ """
183
+ Augment term-typing queries.
184
+
185
+ Currently a passthrough: no augmentation is performed.
186
+
187
+ Args:
188
+ query (list[str]): Query terms.
189
+
190
+ Returns:
191
+ list[str]: Unmodified query terms.
192
+ """
193
+ return query
194
+
195
+ def augment_non_taxonomic_re(self, query: List[str]) -> List[str]:
196
+ """
197
+ Augment non-taxonomic relation extraction queries.
198
+
199
+ Currently a passthrough.
200
+
201
+ Args:
202
+ query (list[str]): Query terms.
203
+
204
+ Returns:
205
+ list[str]: Unmodified query terms.
206
+ """
207
+ return query
208
+
209
+ def augment_taxonomy_discovery(self, query: List[str]) -> Dict[str, List[str]]:
210
+ """
211
+ Generate augmented candidates for taxonomy discovery.
212
+
213
+ Args:
214
+ query (list[str]): List of type/class names to augment.
215
+
216
+ Returns:
217
+ dict[str, list[str]]: Mapping of original query → list of candidate parents.
218
+ """
219
+ conversations = []
220
+ for qu in query:
221
+ prompt = self.taxonomy_discovery_prompt.format(query=qu, top_n_candidate=self.top_n_candidate)
222
+ conversation = [
223
+ {"role": "system", "content": "Discover possible taxonomy parents."},
224
+ {"role": "user", "content": prompt}
225
+ ]
226
+ conversations.append(conversation)
227
+
228
+ return self._augment(query=query, conversations=conversations, function=self.taxonomy_discovery_function)
229
+
230
+ def augment(self, data: Any, task: str):
231
+ """
232
+ Main entry point for all augmentation modes.
233
+
234
+ Args:
235
+ data (Any): Dataset object to format and augment.
236
+ task (str): Task type.
237
+
238
+ Returns:
239
+ Any: Augmented output suitable for a retriever.
240
+
241
+ Raises:
242
+ ValueError: If an invalid task type is given.
243
+ """
244
+ data = self.tasks_data_former(data=data, task=task)
245
+ if task == 'term-typing':
246
+ return self.augment_term_typing(data)
247
+ elif task == 'taxonomy-discovery':
248
+ return self.augment_taxonomy_discovery(data)
249
+ elif task == 'non-taxonomic-re':
250
+ return self.augment_non_taxonomic_re(data)
251
+ else:
252
+ raise ValueError(f"{task} is not a valid task.")
253
+
254
+
255
+ class LLMAugmenter:
256
+ """
257
+ A lightweight augmenter that loads precomputed augmentation data from disk.
258
+
259
+ Attributes:
260
+ augments (dict): Loaded augmentation data.
261
+ top_n_candidate (int): Number of augmentation candidates per query.
262
+ """
263
+
264
+ def __init__(self, path: str) -> None:
265
+ """
266
+ Initialize an augmenter that uses offline augmentation data.
267
+
268
+ Args:
269
+ path (str): Path to a JSON file containing saved augmentations.
270
+ """
271
+ self.augments = load_json(path)
272
+ self.top_n_candidate = self.augments['config']['top_n_candidate']
273
+
274
+ def transform(self, query: str, task: str) -> List[str]:
275
+ """
276
+ Retrieve the augmented versions of a query term for a specific task.
277
+
278
+ Args:
279
+ query (str): Input query term.
280
+ task (str): Task identifier.
281
+
282
+ Returns:
283
+ list[str]: Augmented query candidates.
284
+ """
285
+ if task == 'taxonomy-discovery':
286
+ return self.augments[task].get(query, [query])
287
+ else:
288
+ return [query]
289
+
290
+
291
+ class LLMAugmentedRetriever(AutoRetriever):
292
+ """
293
+ A retriever that enhances queries using LLM-based augmentation before retrieving documents.
294
+
295
+ Supports special augmentation logic for taxonomy discovery where each input query
296
+ is expanded into several augmented variants.
297
+
298
+ Attributes:
299
+ augmenter: An augmenter instance that provides transform() and top_n_candidate.
300
+ """
301
+
302
+ def __init__(self) -> None:
303
+ """
304
+ Initialize the augmented retriever with no augmenter attached.
305
+ """
306
+ super().__init__()
307
+ self.augmenter = None
308
+
309
+ def set_augmenter(self, augmenter):
310
+ """
311
+ Attach an augmenter instance.
312
+
313
+ Args:
314
+ augmenter: An object providing `transform(query, task)` and `top_n_candidate`.
315
+ """
316
+ self.augmenter = augmenter
317
+
318
+ def retrieve(self, query: List[str], top_k: int = 5, batch_size: int = -1, task: str = None) -> List[List[str]]:
319
+ """
320
+ Retrieve documents for a batch of queries, optionally using query augmentation.
321
+
322
+ Args:
323
+ query (list[str]): List of input query terms.
324
+ top_k (int): Number of documents to retrieve.
325
+ batch_size (int): Batch size for retrieval.
326
+ task (str): Optional task identifier that determines augmentation behavior.
327
+
328
+ Returns:
329
+ list[list[str]]: A list of document lists, one per input query.
330
+ """
331
+ parent_retrieve = super(LLMAugmentedRetriever, self).retrieve
332
+
333
+ if task == 'taxonomy-discovery':
334
+ query_sets = []
335
+ for idx in range(self.augmenter.top_n_candidate):
336
+ query_set = []
337
+ for qu in query:
338
+ query_set.append(self.augmenter.transform(qu, task=task)[idx])
339
+ query_sets.append(query_set)
340
+
341
+ retrieves = [
342
+ parent_retrieve(query=query_set, top_k=top_k, batch_size=batch_size)
343
+ for query_set in query_sets
344
+ ]
345
+
346
+ results = []
347
+ for qu_idx, qu in enumerate(query):
348
+ qu_result = []
349
+ for top_idx in range(self.augmenter.top_n_candidate):
350
+ qu_result += retrieves[top_idx][qu_idx]
351
+ results.append(list(set(qu_result)))
352
+
353
+ return results
354
+
355
+ else:
356
+ return parent_retrieve(query=query, top_k=top_k, batch_size=batch_size)
@@ -0,0 +1,123 @@
1
+ # Copyright (c) 2025 SciKnowOrg
2
+ #
3
+ # Licensed under the MIT License (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://opensource.org/licenses/MIT
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import logging
15
+ import numpy as np
16
+ from typing import List
17
+ from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
18
+ from sklearn.metrics.pairwise import cosine_similarity
19
+ from tqdm import tqdm
20
+
21
+ from ...base import AutoRetriever
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class NgramRetriever(AutoRetriever):
27
+ """
28
+ A retriever based on traditional n-gram vectorization methods such as TF-IDF
29
+ and CountVectorizer.
30
+
31
+ This retriever converts documents and queries into sparse bag-of-ngrams
32
+ vectors and ranks documents using cosine similarity. It is simple,
33
+ interpretable, and suitable for small-scale baselines or non-semantic
34
+ text matching.
35
+ """
36
+
37
+ def __init__(self, **vectorizer_kwargs) -> None:
38
+ """
39
+ Initialize the n-gram retriever.
40
+
41
+ Args:
42
+ **vectorizer_kwargs: Additional keyword arguments passed directly
43
+ to the scikit-learn vectorizer (e.g., ngram_range, stop_words).
44
+ """
45
+ super().__init__()
46
+ self.vectorizer_kwargs = vectorizer_kwargs
47
+ self.vectorizer = None
48
+ self.embeddings = None
49
+
50
+ def load(self, model_id) -> None:
51
+ """
52
+ Load and initialize the vectorizer based on `model_id`.
53
+
54
+ Args:
55
+ model_id (str): Either `"tfidf"` for TF-IDF or `"count"` for
56
+ CountVectorizer.
57
+
58
+ Raises:
59
+ ValueError: If the model_id is not one of the supported options.
60
+ """
61
+ if model_id == "tfidf":
62
+ self.vectorizer = TfidfVectorizer(**self.vectorizer_kwargs)
63
+ elif model_id == "count":
64
+ self.vectorizer = CountVectorizer(**self.vectorizer_kwargs)
65
+ else:
66
+ raise ValueError(f"Invalid mode '{model_id}'. Choose from ['tfidf', 'count'].")
67
+
68
+ def index(self, inputs: List[str]) -> None:
69
+ """
70
+ Fit the vectorizer and index (vectorize) the input documents.
71
+
72
+ Args:
73
+ inputs (List[str]): List of text documents to index.
74
+
75
+ Notes:
76
+ This method must be run before calling `retrieve()`. It creates the
77
+ document embedding matrix used for similarity search.
78
+ """
79
+ if self.vectorizer is None:
80
+ # Default to TF-IDF if the user never called `load()`
81
+ self.load(model_id="tfidf")
82
+
83
+ self.documents = inputs
84
+ logger.info("Fitting vectorizer and transforming documents...")
85
+ self.embeddings = self.vectorizer.fit_transform(inputs)
86
+ logger.info(f"Document embeddings created with shape: {self.embeddings.shape}")
87
+
88
+ def retrieve(self, query: List[str], top_k: int = 5, batch_size: int = -1) -> List[List[str]]:
89
+ """
90
+ Retrieve the most similar documents for each query string.
91
+
92
+ Args:
93
+ query (List[str]): A list of query strings.
94
+ top_k (int): Number of most similar documents to return per query.
95
+ batch_size (int): Number of queries to process at once.
96
+ Use `-1` to process all queries in a single batch.
97
+
98
+ Returns:
99
+ List[List[str]]: For each query, a list containing the top-k
100
+ matching documents.
101
+
102
+ Raises:
103
+ RuntimeError: If retrieval is attempted before indexing.
104
+ """
105
+ if self.embeddings is None:
106
+ raise RuntimeError("Retriever must index documents before calling `retrieve()`.")
107
+
108
+ logger.info("Vectorizing query text...")
109
+ query_vec = self.vectorizer.transform(query)
110
+ logger.info(f"Query vectors created with shape: {query_vec.shape}")
111
+
112
+ results = []
113
+ if batch_size == -1:
114
+ batch_size = len(query)
115
+
116
+ for i in tqdm(range(0, len(query), batch_size)):
117
+ q_batch = query_vec[i : i + batch_size]
118
+ sim = cosine_similarity(q_batch, self.embeddings)
119
+ topk_idx = np.argsort(sim, axis=1)[:, ::-1][:, :top_k]
120
+ for row_indices in topk_idx:
121
+ results.append([self.documents[j] for j in row_indices])
122
+
123
+ return results
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: OntoLearner
3
- Version: 1.4.8
3
+ Version: 1.4.9
4
4
  Summary: OntoLearner: A Modular Python Library for Ontology Learning with LLMs.
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -17,6 +17,7 @@ Requires-Dist: Levenshtein
17
17
  Requires-Dist: bitsandbytes (>=0.45.1,<0.46.0)
18
18
  Requires-Dist: dspy (>=2.6.14,<3.0.0)
19
19
  Requires-Dist: g4f
20
+ Requires-Dist: gensim
20
21
  Requires-Dist: huggingface-hub (>=0.34.4,<0.35.0)
21
22
  Requires-Dist: matplotlib
22
23
  Requires-Dist: mistral-common[sentencepiece] (>=1.8.5,<2.0.0)
@@ -80,16 +81,16 @@ Please refer to [Installation](https://ontolearner.readthedocs.io/installation.h
80
81
 
81
82
  ## 🔗 Essential Resources
82
83
 
83
- | Resource | Info |
84
- |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
85
- | **[📚 OntoLearner Documentation](https://ontolearner.readthedocs.io/)** | OntoLearner's extensive documentation website. |
86
- | **[🤗 Datasets on Hugging Face](https://huggingface.co/collections/SciKnowOrg/ontolearner-benchmarking-6823bcd051300c210b7ef68a)** | Access curated, machine-readable ontologies. |
87
- | **Quick Tour on OntoLearner** [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1DuElAyEFzd1vtqTjDEXWcc0zCbiV2Yee?usp=sharing) ``version=1.2.1`` | OntoLearner hands-on Colab tutorials. |
88
- | **[🚀 Quickstart](https://ontolearner.readthedocs.io/quickstart.html)** | Get started quickly with OntoLearner’s main features and workflow. |
89
- | **[🕸️ Learning Tasks](https://ontolearner.readthedocs.io/learning_tasks/learning_tasks.html)** | Explore supported ontology learning tasks like LLMs4OL Paradigm tasks and Text2Onto. | |
90
- | **[🧠 Learner Models](https://ontolearner.readthedocs.io/learners/llm.html)** | Browse and configure various learner models, including LLMs, Retrieval, or RAG approaches. |
91
- | **[📚 Ontologies Documentations](https://ontolearner.readthedocs.io/benchmarking/benchmark.html)** | Review benchmark ontologies and datasets used for evaluation and training. |
92
- | **[🧩 How to work with Ontologizer?](https://ontolearner.readthedocs.io/ontologizer/ontology_modularization.html)** | Learn how to modularize and preprocess ontologies using the Ontologizer module. |
84
+ | Resource | Info |
85
+ |:-----------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------|
86
+ | **[📚 OntoLearner Documentation](https://ontolearner.readthedocs.io/)** | OntoLearner's extensive documentation website. |
87
+ | **[🤗 Datasets on Hugging Face](https://huggingface.co/collections/SciKnowOrg/ontolearner-benchmarking-6823bcd051300c210b7ef68a)** | Access curated, machine-readable ontologies. |
88
+ | **[🚀 Quickstart](https://ontolearner.readthedocs.io/quickstart.html)** | Get started quickly with OntoLearner’s main features and workflow. |
89
+ | **[🕸️ Learning Tasks](https://ontolearner.readthedocs.io/learning_tasks/learning_tasks.html)** | Explore supported ontology learning tasks like LLMs4OL Paradigm tasks and Text2Onto. | |
90
+ | **[🧠 Learner Models](https://ontolearner.readthedocs.io/learners/llm.html)** | Browse and configure various learner models, including LLMs, Retrieval, or RAG approaches. |
91
+ | **[📚 Ontologies Documentations](https://ontolearner.readthedocs.io/benchmarking/benchmark.html)** | Review benchmark ontologies and datasets used for evaluation and training. |
92
+ | **[🧩 How to work with Ontologizer?](https://ontolearner.readthedocs.io/ontologizer/ontology_modularization.html)** | Learn how to modularize and preprocess ontologies using the Ontologizer module. |
93
+ | **[🤗 Ontology Metrics Dashboard](https://huggingface.co/spaces/SciKnowOrg/OntoLearner-Benchmark-Metrics)** | Benchmark ontologies with their metrics and complexity scores. |
93
94
 
94
95
  ## 🚀 Quick Tour
95
96
  Get started with OntoLearner in just a few lines of code. This guide demonstrates how to initialize ontologies, load datasets, and train an LLM-assisted learner for ontology engineering tasks.
@@ -135,7 +136,7 @@ task = 'non-taxonomic-re'
135
136
  ret_learner = AutoRetrieverLearner(top_k=5)
136
137
  ret_learner.load(model_id='sentence-transformers/all-MiniLM-L6-v2')
137
138
 
138
- # 5. Fit the model to training data and do the predict
139
+ # 5. Fit the model to training data and then predict over the test data
139
140
  ret_learner.fit(train_data, task=task)
140
141
  predicts = ret_learner.predict(test_data, task=task)
141
142
 
@@ -1,4 +1,4 @@
1
- ontolearner/VERSION,sha256=ffNZhUJ2DAagaoMtfLwSvSCz_Rvq5cjAOZ6WQvcHNxc,6
1
+ ontolearner/VERSION,sha256=x-xbkXEIv48hifmVFcVtJDdZj6d_bmXwy3Lp4d5pPVY,6
2
2
  ontolearner/__init__.py,sha256=E4yukFv2PV4uyztTPDWljCySY9AVDcDDzabuvxfabYE,1889
3
3
  ontolearner/_learner.py,sha256=2CRQvpsz8akIOdxTs2-KLJ-MssULrjpK-QDD3QXUJXI,5297
4
4
  ontolearner/_ontology.py,sha256=W1mp195SImqLKwaj4ueEaBWuLJg2jUdx1JT20Ds3fmQ,6950
@@ -12,12 +12,17 @@ ontolearner/data_structure/metric.py,sha256=4QKkZ5L1YK6hDTU-N5Z9I9Ha99DVHmGfYxK7
12
12
  ontolearner/evaluation/__init__.py,sha256=4BZr3BUXjQDTj4Aqlqy4THa80lZPsMuh1EBTCyi9Wig,842
13
13
  ontolearner/evaluation/evaluate.py,sha256=NYCVcmPqpyIxYZrMAim37gL-erdh698RD3t3eNTTgZc,1163
14
14
  ontolearner/evaluation/metrics.py,sha256=3Aw6ycJ3_Q6xfj4tMBJP6QcexUei0G16H0ZQWt87aRU,6286
15
- ontolearner/learner/__init__.py,sha256=L54a3uvOeS6hbID6-BFd6fo9rH4WK2Q1XecpXMkEru0,768
15
+ ontolearner/learner/__init__.py,sha256=RKREPrrjzQ5KYvcOwC_2l7yFKwFBd6HoCwhX2H6Spg8,798
16
16
  ontolearner/learner/label_mapper.py,sha256=YMPeFKzJxoCYNU5z7QRYPbB88sWdu1iT6iBDpPsjn-4,3792
17
17
  ontolearner/learner/llm.py,sha256=3kq_IrwEPTFgeNVKZH9Er_OydJuDpRBtM3YXNNa8_KA,10343
18
18
  ontolearner/learner/prompt.py,sha256=0ckH7xphIDKczPe7G-rwiOxFGZ7RsLnpPlNW92b-31U,1574
19
19
  ontolearner/learner/rag.py,sha256=eysB2RvcWkVo53s8-kSbZtJv904YVTmdtxplM4ukUKM,4283
20
- ontolearner/learner/retriever.py,sha256=PNDAwsLIOBD3aQW2Ez0q6PqE3CB7d_GN-yLKJ9_D04s,6204
20
+ ontolearner/learner/retriever/__init__.py,sha256=G5XuJcTblqXVWboVW9StJ2Vo2xACp_kG5_w2nrueqlc,854
21
+ ontolearner/learner/retriever/crossencoder.py,sha256=yurzGE4zydlBSwUefi1CugsWv34HEZ61qADG_-nILbo,4996
22
+ ontolearner/learner/retriever/embedding.py,sha256=Lp9oA7LiOYaSWDvzG779KMv5keNl6Xv7hw0WpeaepDE,7875
23
+ ontolearner/learner/retriever/learner.py,sha256=VcarTwwR8HNddJCh0loCQejDzZ_GO4NkdQUjEhLVy48,11181
24
+ ontolearner/learner/retriever/llm_retriever.py,sha256=goInWYxrD9PSo_EsSKbNV8wEaSPvWY3LEC8XM7jlH64,12917
25
+ ontolearner/learner/retriever/ngram.py,sha256=XgS1OeheKEIi7wfJHZgS8mWxKv9MQrP0apOJD_XSOnM,4575
21
26
  ontolearner/learner/taxonomy_discovery/__init__.py,sha256=-Hb5Dl6_6c4l1uIT2zWtyBWMq5cjVD4PNjxt5qJePl4,747
22
27
  ontolearner/learner/taxonomy_discovery/alexbek.py,sha256=kFEDvoKxLf-sB7-d5REkcC0DqXZpcA6ZSJ2QHrNoC5E,19010
23
28
  ontolearner/learner/taxonomy_discovery/rwthdbis.py,sha256=698Gze2cR-QIhpTbuaOFm7Q4p0lCbdWz3rO6rewJZ1s,41644
@@ -65,7 +70,7 @@ ontolearner/tools/visualizer.py,sha256=cwijl4yYaS1SCLM5wbvRTEcbQj9Bjo4fHzZR6q6o8
65
70
  ontolearner/utils/__init__.py,sha256=pSEyU3dlPMADBqygqaaid44RdWf0Lo3Fvz-K_rQ7_Bw,733
66
71
  ontolearner/utils/io.py,sha256=3DqGK2p7c0onKi0Xxs16WB08uHfHUId3bW0dDKwyS0g,2110
67
72
  ontolearner/utils/train_test_split.py,sha256=Zlm42eT6QGWwlySyomCPIiTGmGqeN_h4z4xBY2EAOR8,11530
68
- ontolearner-1.4.8.dist-info/METADATA,sha256=B_ULVAw849kBqCpF0-oX8lUuq2d7GFMLPPESYQdWnp8,14158
69
- ontolearner-1.4.8.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
70
- ontolearner-1.4.8.dist-info/licenses/LICENSE,sha256=krXMLuMKgzX-UgaufgfJdm9ojIloZot7ZdvJUnNxl4I,1067
71
- ontolearner-1.4.8.dist-info/RECORD,,
73
+ ontolearner-1.4.9.dist-info/METADATA,sha256=c_V_1mUkxAhzJz04u1wRYU7xodpZQdiJXBVFzUCIMK8,11444
74
+ ontolearner-1.4.9.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
75
+ ontolearner-1.4.9.dist-info/licenses/LICENSE,sha256=krXMLuMKgzX-UgaufgfJdm9ojIloZot7ZdvJUnNxl4I,1067
76
+ ontolearner-1.4.9.dist-info/RECORD,,