PyPI - OntoLearner - Versions diffs - 1.4.4__py3-none-any.whl → 1.4.6__py3-none-any.whl - Mend

OntoLearner 1.4.4py3-none-any.whl → 1.4.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

ontolearner/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 1.4.4
1	+ 1.4.6

ontolearner/base/learner.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 from abc import ABC
-from typing import Any, List, Optional
+from typing import Any, List, Optional, Dict
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import torch.nn.functional as F
@@ -147,7 +147,7 @@ class AutoLearner(ABC):
     def _non_taxonomic_re(self, data: Any, test: bool = False) -> Optional[Any]:
         pass
-    def tasks_data_former(self, data: Any, task: str, test: bool = False) -> Any:
+    def tasks_data_former(self, data: Any, task: str, test: bool = False) -> List[str | Dict[str, str]]:
         formatted_data = []
         if task == "term-typing":
             for typing in data.term_typings:
@@ -173,7 +173,7 @@ class AutoLearner(ABC):
             formatted_data = {"types": non_taxonomic_types, "relations": non_taxonomic_res}
         return formatted_data
-    def tasks_ground_truth_former(self, data: Any, task: str) -> Any:
+    def tasks_ground_truth_former(self, data: Any, task: str) -> List[Dict[str, str]]:
         formatted_data = []
         if task == "term-typing":
             for typing in data.term_typings:
@@ -350,7 +350,7 @@ class AutoRetriever(ABC):
         self.documents = inputs
         self.embeddings = self.embedding_model.encode(inputs, convert_to_tensor=True)
-    def retrieve(self, query: List[str], top_k: int = 5) -> List[List[str]]:
+    def retrieve(self, query: List[str], top_k: int = 5, batch_size: int = -1) -> List[List[str]]:
         """
         Retrieve the top-k most similar examples for each query in a list of queries.
@@ -363,33 +363,37 @@ class AutoRetriever(ABC):
         """
         if self.embeddings is None:
             raise RuntimeError("Retriever model must index documents before prediction.")
-        # Encode all queries at once
         query_embeddings = self.embedding_model.encode(query, convert_to_tensor=True)  # shape: [num_queries, dim]
         if query_embeddings.shape[-1] != self.embeddings.shape[-1]:
             raise ValueError(
                 f"Embedding dimension mismatch: query embedding dim={query_embeddings.shape[-1]}, "
                 f"document embedding dim={self.embeddings.shape[-1]}"
             )
-        # Normalize embeddings for cosine similarity
-        query_norm = F.normalize(query_embeddings, p=2, dim=1)
         doc_norm = F.normalize(self.embeddings, p=2, dim=1)
+        if batch_size == -1:
+            results = self._retrieve(query_embeddings=query_embeddings, doc_norm=doc_norm, top_k=top_k)
+        else:
+            results = self._batch_retrieve(query_embeddings=query_embeddings, doc_norm=doc_norm, top_k=top_k, batch_size=batch_size)
+        return results
-        # Compute cosine similarity: [num_queries, num_docs]
-        similarity_matrix = torch.matmul(query_norm, doc_norm.T)
-        # Get top-k indices for each query
-        top_k = min(top_k, len(self.documents))
-        topk_similarities, topk_indices = torch.topk(similarity_matrix, k=top_k, dim=1)
-        # Retrieve documents for each query
+    def _retrieve(self, query_embeddings, doc_norm, top_k: int = 5) -> List[List[str]]:
+        query_norm = F.normalize(query_embeddings, p=2, dim=1)
+        similarity_matrix = torch.matmul(query_norm, doc_norm.T)
+        current_top_k = min(top_k, len(self.documents))
+        topk_similarities, topk_indices = torch.topk(similarity_matrix, k=current_top_k, dim=1)
         results = [[self.documents[i] for i in indices] for indices in topk_indices]
         return results
+    def _batch_retrieve(self, query_embeddings, doc_norm, top_k: int = 5, batch_size: int = 1024) -> List[List[str]]:
+        results = []
+        for i in range(0, query_embeddings.size(0), batch_size):
+            batch_queries = query_embeddings[i:i + batch_size]
+            batch_results = self._retrieve(batch_queries, doc_norm, top_k=top_k)
+            results.extend(batch_results)
+        return results
 class AutoPrompt(ABC):
     """
     Abstract base class for prompt formatting components.

ontolearner/evaluation/metrics.py CHANGED Viewed

@@ -11,13 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Dict
+from typing import List, Dict, Tuple, Set
 SYMMETRIC_RELATIONS = {"equivalentclass", "sameas", "disjointwith"}
-def text2onto_metrics(y_true, y_pred, similarity_threshold: float = 0.8) -> Dict:
-    def jaccard_similarity(a, b):
+def text2onto_metrics(y_true: List[str], y_pred: List[str], similarity_threshold: float = 0.8) -> Dict[str, float | int]:
+    def jaccard_similarity(a: str, b: str) -> float:
         set_a = set(a.lower().split())
         set_b = set(b.lower().split())
         if not set_a and not set_b:
@@ -46,10 +45,13 @@ def text2onto_metrics(y_true, y_pred, similarity_threshold: float = 0.8) -> Dict
     return {
         "f1_score": f1_score,
         "precision": precision,
-        "recall": recall
+        "recall": recall,
+        "total_correct": total_correct,
+        "total_predicted": total_predicted,
+        "total_ground_truth": total_ground_truth
     }
-def term_typing_metrics(y_true, y_pred) -> Dict:
+def term_typing_metrics(y_true: List[Dict[str, List[str]]], y_pred: List[Dict[str, List[str]]]) -> Dict[str, float | int]:
     """
     Compute precision, recall, and F1-score for term typing
     using (term, type) pair-level matching instead of ID-based lookups.
@@ -77,13 +79,17 @@ def term_typing_metrics(y_true, y_pred) -> Dict:
     precision = total_correct / total_predicted if total_predicted > 0 else 0.0
     recall = total_correct / total_ground_truth if total_ground_truth > 0 else 0.0
     f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
     return {
         "f1_score": f1_score,
         "precision": precision,
-        "recall": recall
+        "recall": recall,
+        "total_correct": total_correct,
+        "total_predicted": total_predicted,
+        "total_ground_truth": total_ground_truth
     }
-def taxonomy_discovery_metrics(y_true, y_pred) -> Dict:
+def taxonomy_discovery_metrics(y_true: List[Dict[str, str]], y_pred: List[Dict[str, str]]) -> Dict[str, float | int]:
     total_predicted = len(y_pred)
     total_ground_truth = len(y_true)
     # Convert ground truth and predictions to sets of tuples for easy comparison
@@ -102,18 +108,22 @@ def taxonomy_discovery_metrics(y_true, y_pred) -> Dict:
     return {
         "f1_score": f1_score,
         "precision": precision,
-        "recall": recall
+        "recall": recall,
+        "total_correct": total_correct,
+        "total_predicted": total_predicted,
+        "total_ground_truth": total_ground_truth
     }
-def non_taxonomic_re_metrics(y_true, y_pred) -> Dict:
-    def normalize_triple(item):
+def non_taxonomic_re_metrics(y_true: List[Dict[str, str]], y_pred: List[Dict[str, str]]) -> Dict[str, float | int]:
+    def normalize_triple(item: Dict[str, str]) -> Tuple[str, str, str]:
         return (
             item["head"].strip().lower(),
             item["relation"].strip().lower(),
             item["tail"].strip().lower()
         )
-    def expand_symmetric(triples):
+    def expand_symmetric(triples: Set[Tuple[str, str, str]]) -> Set[Tuple[str, str, str]]:
         expanded = set()
         for h, r, t in triples:
             expanded.add((h, r, t))
@@ -136,5 +146,8 @@ def non_taxonomic_re_metrics(y_true, y_pred) -> Dict:
     return {
         "f1_score": f1_score,
         "precision": precision,
-        "recall": recall
+        "recall": recall,
+        "total_correct": total_correct,
+        "total_predicted": total_predicted,
+        "total_ground_truth": total_ground_truth
     }

ontolearner/learner/retriever.py CHANGED Viewed

@@ -17,12 +17,12 @@ from typing import Any, Optional
 import warnings
 class AutoRetrieverLearner(AutoLearner):
-    def __init__(self, base_retriever: Any = AutoRetriever(), top_k: int = 5):
+    def __init__(self, base_retriever: Any = AutoRetriever(), top_k: int = 5, batch_size: int = -1):
         super().__init__()
         self.retriever = base_retriever
         self.top_k = top_k
         self._is_term_typing_fit = False
-        self._is_taxonomy_discovery_fit = False
+        self._batch_size = batch_size
     def load(self, model_id: str = "sentence-transformers/all-MiniLM-L6-v2"):
         self.retriever.load(model_id=model_id)
@@ -35,7 +35,7 @@ class AutoRetrieverLearner(AutoLearner):
     def _retriever_predict(self, data:Any, top_k: int) -> Any:
         if isinstance(data, list):
-            return self.retriever.retrieve(query=data, top_k=top_k)
+            return self.retriever.retrieve(query=data, top_k=top_k, batch_size=self._batch_size)
         if isinstance(data, str):
             return self.retriever.retrieve(query=[data], top_k=top_k)
         raise TypeError(f"Unsupported data type {type(data)}. You should pass a List[str] or a str.")
@@ -63,9 +63,9 @@ class AutoRetrieverLearner(AutoLearner):
         if test:
             self._retriever_fit(data=data)
             candidates_lst =  self._retriever_predict(data=data, top_k=self.top_k + 1)
-            taxonomic_pairs = [{"parent": query, "child": candidate}
+            taxonomic_pairs = [{"parent": candidate, "child": query}
                                for query, candidates in zip(data, candidates_lst)
-                               for candidate in candidates if candidate != query]
+                               for candidate in candidates if candidate.lower() != query.lower()]
             return taxonomic_pairs
         else:
             warnings.warn("No requirement for fiting the taxonomy discovery model, the predict module will use the input data to do the fit as well.")

{ontolearner-1.4.4.dist-info → ontolearner-1.4.6.dist-info}/METADATA RENAMED Viewed

@@ -1,8 +1,9 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: OntoLearner
-Version: 1.4.4
+Version: 1.4.6
 Summary: OntoLearner: A Modular Python Library for Ontology Learning with LLMs.
 License: MIT
+License-File: LICENSE
 Author: Hamed Babaei Giglou
 Author-email: hamedbabaeigiglou@gmail.com
 Requires-Python: >=3.10,<3.14.0

{ontolearner-1.4.4.dist-info → ontolearner-1.4.6.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-ontolearner/VERSION,sha256=0D2LJotBcTKKwkWM6MqC1pof3jhGnU5PquT4-o2KXjU,6
+ontolearner/VERSION,sha256=Vj1cgMVOd-L4RU7NXBQ4j5qzr3ftxztvRzatpkUnlSw,6
 ontolearner/__init__.py,sha256=E4yukFv2PV4uyztTPDWljCySY9AVDcDDzabuvxfabYE,1889
 ontolearner/_learner.py,sha256=2CRQvpsz8akIOdxTs2-KLJ-MssULrjpK-QDD3QXUJXI,5297
 ontolearner/_ontology.py,sha256=W1mp195SImqLKwaj4ueEaBWuLJg2jUdx1JT20Ds3fmQ,6950
 ontolearner/base/__init__.py,sha256=5pf-ltxzGp32xhEcPdbtm11wXJrYJMUeWG-mbcAYD8Q,705
-ontolearner/base/learner.py,sha256=DVWp7OHlhTYU3Es7Q6CWCOeL7Y5LTbjWilTri_DNExs,17897
+ontolearner/base/learner.py,sha256=J9-Oi2P_UA5Jdbh8muBN0VgH8HGi1uyhEi2LZmCv_rk,18543
 ontolearner/base/ontology.py,sha256=JbMJ1-WUyHWQiNJL-DeaqcriUimLdqN3_ESROgqOPTQ,24772
 ontolearner/base/text2onto.py,sha256=iUXYZoqnwgebQuQzM-XSGTVRfHLlhjUK_z5XUvhRICc,5388
 ontolearner/data_structure/__init__.py,sha256=1HiKvk8FKjhYeI92RHnJXxyQbUJBi3JFytjQjthsY_s,599
@@ -11,13 +11,13 @@ ontolearner/data_structure/data.py,sha256=jUUDfqsOZcEqIR83SRboiKibPdA_JquI1uOEiQ
 ontolearner/data_structure/metric.py,sha256=4QKkZ5L1YK6hDTU-N5Z9I9Ha99DVHmGfYxK7N2qdhfc,7589
 ontolearner/evaluation/__init__.py,sha256=4BZr3BUXjQDTj4Aqlqy4THa80lZPsMuh1EBTCyi9Wig,842
 ontolearner/evaluation/evaluate.py,sha256=NYCVcmPqpyIxYZrMAim37gL-erdh698RD3t3eNTTgZc,1163
-ontolearner/evaluation/metrics.py,sha256=jk-80kQZfWldYV9Lzhq3lZvWE8YT5ywqtzhIfmTm664,5378
+ontolearner/evaluation/metrics.py,sha256=3Aw6ycJ3_Q6xfj4tMBJP6QcexUei0G16H0ZQWt87aRU,6286
 ontolearner/learner/__init__.py,sha256=ZS816XCPb2K7azTlK2032A6ozZNoijlPLDOwcgu3-8g,745
 ontolearner/learner/label_mapper.py,sha256=-XW8MHafm4ix3e9u-RRwDePJ71D804DNuKzdf1zudtk,3789
 ontolearner/learner/llm.py,sha256=bwCoeR7z3YgYrkKyjDM-MRHZAuDzpUt8f-A0bDUbtGM,7151
 ontolearner/learner/prompt.py,sha256=0ckH7xphIDKczPe7G-rwiOxFGZ7RsLnpPlNW92b-31U,1574
 ontolearner/learner/rag.py,sha256=eysB2RvcWkVo53s8-kSbZtJv904YVTmdtxplM4ukUKM,4283
-ontolearner/learner/retriever.py,sha256=FIsvutDXvrr9N6AMu35TNJHdiQGbmRQ4TTGfRRdHdYo,4931
+ontolearner/learner/retriever.py,sha256=GDXr6l0m_prxnctxQzBpm75xL4jW2Q4b91iyePFcDAs,4988
 ontolearner/ontology/__init__.py,sha256=F9Ta1qCX9mOxIK5CPRypEoglQNkpJ6SJpqziz73xKQE,1328
 ontolearner/ontology/agriculture.py,sha256=ZaXHNEFjbtsMH8M7HQ8ypnfJS4TUQy_as16fwv-kOKA,5903
 ontolearner/ontology/arts_humanities.py,sha256=K4ceDJL6PfIfSJZ86uQUkUXOVoiERG6ItgvVE2lhLKk,3996
@@ -53,7 +53,7 @@ ontolearner/tools/visualizer.py,sha256=cwijl4yYaS1SCLM5wbvRTEcbQj9Bjo4fHzZR6q6o8
 ontolearner/utils/__init__.py,sha256=pSEyU3dlPMADBqygqaaid44RdWf0Lo3Fvz-K_rQ7_Bw,733
 ontolearner/utils/io.py,sha256=3DqGK2p7c0onKi0Xxs16WB08uHfHUId3bW0dDKwyS0g,2110
 ontolearner/utils/train_test_split.py,sha256=Zlm42eT6QGWwlySyomCPIiTGmGqeN_h4z4xBY2EAOR8,11530
-ontolearner-1.4.4.dist-info/LICENSE,sha256=krXMLuMKgzX-UgaufgfJdm9ojIloZot7ZdvJUnNxl4I,1067
-ontolearner-1.4.4.dist-info/METADATA,sha256=Uf9twY6zgfxNZbgmtCqIhYJ1nnzzwe-TzC5_bztEV_U,13999
-ontolearner-1.4.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-ontolearner-1.4.4.dist-info/RECORD,,
+ontolearner-1.4.6.dist-info/METADATA,sha256=S756f3Kes6TKDwU59ft3TU3GSkffQh6sjptJswr5orw,14021
+ontolearner-1.4.6.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
+ontolearner-1.4.6.dist-info/licenses/LICENSE,sha256=krXMLuMKgzX-UgaufgfJdm9ojIloZot7ZdvJUnNxl4I,1067
+ontolearner-1.4.6.dist-info/RECORD,,

{ontolearner-1.4.4.dist-info → ontolearner-1.4.6.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.1.3
+Generator: poetry-core 2.2.1
 Root-Is-Purelib: true
 Tag: py3-none-any

{ontolearner-1.4.4.dist-info → ontolearner-1.4.6.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

OntoLearner 1.4.4__py3-none-any.whl → 1.4.6__py3-none-any.whl

OntoLearner 1.4.4py3-none-any.whl → 1.4.6py3-none-any.whl