PyPI - OntoLearner - Versions diffs - 1.4.4__tar.gz → 1.4.6__tar.gz - Mend

OntoLearner 1.4.4tar.gz → 1.4.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

{ontolearner-1.4.4 → ontolearner-1.4.6}/PKG-INFO RENAMED Viewed

@@ -1,8 +1,9 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: OntoLearner
-Version: 1.4.4
+Version: 1.4.6
 Summary: OntoLearner: A Modular Python Library for Ontology Learning with LLMs.
 License: MIT
+License-File: LICENSE
 Author: Hamed Babaei Giglou
 Author-email: hamedbabaeigiglou@gmail.com
 Requires-Python: >=3.10,<3.14.0

ontolearner-1.4.6/ontolearner/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 1.4.6

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/base/learner.py RENAMED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 from abc import ABC
-from typing import Any, List, Optional
+from typing import Any, List, Optional, Dict
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import torch.nn.functional as F
@@ -147,7 +147,7 @@ class AutoLearner(ABC):
     def _non_taxonomic_re(self, data: Any, test: bool = False) -> Optional[Any]:
         pass
-    def tasks_data_former(self, data: Any, task: str, test: bool = False) -> Any:
+    def tasks_data_former(self, data: Any, task: str, test: bool = False) -> List[str | Dict[str, str]]:
         formatted_data = []
         if task == "term-typing":
             for typing in data.term_typings:
@@ -173,7 +173,7 @@ class AutoLearner(ABC):
             formatted_data = {"types": non_taxonomic_types, "relations": non_taxonomic_res}
         return formatted_data
-    def tasks_ground_truth_former(self, data: Any, task: str) -> Any:
+    def tasks_ground_truth_former(self, data: Any, task: str) -> List[Dict[str, str]]:
         formatted_data = []
         if task == "term-typing":
             for typing in data.term_typings:
@@ -350,7 +350,7 @@ class AutoRetriever(ABC):
         self.documents = inputs
         self.embeddings = self.embedding_model.encode(inputs, convert_to_tensor=True)
-    def retrieve(self, query: List[str], top_k: int = 5) -> List[List[str]]:
+    def retrieve(self, query: List[str], top_k: int = 5, batch_size: int = -1) -> List[List[str]]:
         """
         Retrieve the top-k most similar examples for each query in a list of queries.
@@ -363,33 +363,37 @@ class AutoRetriever(ABC):
         """
         if self.embeddings is None:
             raise RuntimeError("Retriever model must index documents before prediction.")
-        # Encode all queries at once
         query_embeddings = self.embedding_model.encode(query, convert_to_tensor=True)  # shape: [num_queries, dim]
         if query_embeddings.shape[-1] != self.embeddings.shape[-1]:
             raise ValueError(
                 f"Embedding dimension mismatch: query embedding dim={query_embeddings.shape[-1]}, "
                 f"document embedding dim={self.embeddings.shape[-1]}"
             )
-        # Normalize embeddings for cosine similarity
-        query_norm = F.normalize(query_embeddings, p=2, dim=1)
         doc_norm = F.normalize(self.embeddings, p=2, dim=1)
+        if batch_size == -1:
+            results = self._retrieve(query_embeddings=query_embeddings, doc_norm=doc_norm, top_k=top_k)
+        else:
+            results = self._batch_retrieve(query_embeddings=query_embeddings, doc_norm=doc_norm, top_k=top_k, batch_size=batch_size)
+        return results
-        # Compute cosine similarity: [num_queries, num_docs]
-        similarity_matrix = torch.matmul(query_norm, doc_norm.T)
-        # Get top-k indices for each query
-        top_k = min(top_k, len(self.documents))
-        topk_similarities, topk_indices = torch.topk(similarity_matrix, k=top_k, dim=1)
-        # Retrieve documents for each query
+    def _retrieve(self, query_embeddings, doc_norm, top_k: int = 5) -> List[List[str]]:
+        query_norm = F.normalize(query_embeddings, p=2, dim=1)
+        similarity_matrix = torch.matmul(query_norm, doc_norm.T)
+        current_top_k = min(top_k, len(self.documents))
+        topk_similarities, topk_indices = torch.topk(similarity_matrix, k=current_top_k, dim=1)
         results = [[self.documents[i] for i in indices] for indices in topk_indices]
         return results
+    def _batch_retrieve(self, query_embeddings, doc_norm, top_k: int = 5, batch_size: int = 1024) -> List[List[str]]:
+        results = []
+        for i in range(0, query_embeddings.size(0), batch_size):
+            batch_queries = query_embeddings[i:i + batch_size]
+            batch_results = self._retrieve(batch_queries, doc_norm, top_k=top_k)
+            results.extend(batch_results)
+        return results
 class AutoPrompt(ABC):
     """
     Abstract base class for prompt formatting components.

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/evaluation/metrics.py RENAMED Viewed

@@ -11,13 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Dict
+from typing import List, Dict, Tuple, Set
 SYMMETRIC_RELATIONS = {"equivalentclass", "sameas", "disjointwith"}
-def text2onto_metrics(y_true, y_pred, similarity_threshold: float = 0.8) -> Dict:
-    def jaccard_similarity(a, b):
+def text2onto_metrics(y_true: List[str], y_pred: List[str], similarity_threshold: float = 0.8) -> Dict[str, float | int]:
+    def jaccard_similarity(a: str, b: str) -> float:
         set_a = set(a.lower().split())
         set_b = set(b.lower().split())
         if not set_a and not set_b:
@@ -46,10 +45,13 @@ def text2onto_metrics(y_true, y_pred, similarity_threshold: float = 0.8) -> Dict
     return {
         "f1_score": f1_score,
         "precision": precision,
-        "recall": recall
+        "recall": recall,
+        "total_correct": total_correct,
+        "total_predicted": total_predicted,
+        "total_ground_truth": total_ground_truth
     }
-def term_typing_metrics(y_true, y_pred) -> Dict:
+def term_typing_metrics(y_true: List[Dict[str, List[str]]], y_pred: List[Dict[str, List[str]]]) -> Dict[str, float | int]:
     """
     Compute precision, recall, and F1-score for term typing
     using (term, type) pair-level matching instead of ID-based lookups.
@@ -77,13 +79,17 @@ def term_typing_metrics(y_true, y_pred) -> Dict:
     precision = total_correct / total_predicted if total_predicted > 0 else 0.0
     recall = total_correct / total_ground_truth if total_ground_truth > 0 else 0.0
     f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
     return {
         "f1_score": f1_score,
         "precision": precision,
-        "recall": recall
+        "recall": recall,
+        "total_correct": total_correct,
+        "total_predicted": total_predicted,
+        "total_ground_truth": total_ground_truth
     }
-def taxonomy_discovery_metrics(y_true, y_pred) -> Dict:
+def taxonomy_discovery_metrics(y_true: List[Dict[str, str]], y_pred: List[Dict[str, str]]) -> Dict[str, float | int]:
     total_predicted = len(y_pred)
     total_ground_truth = len(y_true)
     # Convert ground truth and predictions to sets of tuples for easy comparison
@@ -102,18 +108,22 @@ def taxonomy_discovery_metrics(y_true, y_pred) -> Dict:
     return {
         "f1_score": f1_score,
         "precision": precision,
-        "recall": recall
+        "recall": recall,
+        "total_correct": total_correct,
+        "total_predicted": total_predicted,
+        "total_ground_truth": total_ground_truth
     }
-def non_taxonomic_re_metrics(y_true, y_pred) -> Dict:
-    def normalize_triple(item):
+def non_taxonomic_re_metrics(y_true: List[Dict[str, str]], y_pred: List[Dict[str, str]]) -> Dict[str, float | int]:
+    def normalize_triple(item: Dict[str, str]) -> Tuple[str, str, str]:
         return (
             item["head"].strip().lower(),
             item["relation"].strip().lower(),
             item["tail"].strip().lower()
         )
-    def expand_symmetric(triples):
+    def expand_symmetric(triples: Set[Tuple[str, str, str]]) -> Set[Tuple[str, str, str]]:
         expanded = set()
         for h, r, t in triples:
             expanded.add((h, r, t))
@@ -136,5 +146,8 @@ def non_taxonomic_re_metrics(y_true, y_pred) -> Dict:
     return {
         "f1_score": f1_score,
         "precision": precision,
-        "recall": recall
+        "recall": recall,
+        "total_correct": total_correct,
+        "total_predicted": total_predicted,
+        "total_ground_truth": total_ground_truth
     }

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/learner/retriever.py RENAMED Viewed

@@ -17,12 +17,12 @@ from typing import Any, Optional
 import warnings
 class AutoRetrieverLearner(AutoLearner):
-    def __init__(self, base_retriever: Any = AutoRetriever(), top_k: int = 5):
+    def __init__(self, base_retriever: Any = AutoRetriever(), top_k: int = 5, batch_size: int = -1):
         super().__init__()
         self.retriever = base_retriever
         self.top_k = top_k
         self._is_term_typing_fit = False
-        self._is_taxonomy_discovery_fit = False
+        self._batch_size = batch_size
     def load(self, model_id: str = "sentence-transformers/all-MiniLM-L6-v2"):
         self.retriever.load(model_id=model_id)
@@ -35,7 +35,7 @@ class AutoRetrieverLearner(AutoLearner):
     def _retriever_predict(self, data:Any, top_k: int) -> Any:
         if isinstance(data, list):
-            return self.retriever.retrieve(query=data, top_k=top_k)
+            return self.retriever.retrieve(query=data, top_k=top_k, batch_size=self._batch_size)
         if isinstance(data, str):
             return self.retriever.retrieve(query=[data], top_k=top_k)
         raise TypeError(f"Unsupported data type {type(data)}. You should pass a List[str] or a str.")
@@ -63,9 +63,9 @@ class AutoRetrieverLearner(AutoLearner):
         if test:
             self._retriever_fit(data=data)
             candidates_lst =  self._retriever_predict(data=data, top_k=self.top_k + 1)
-            taxonomic_pairs = [{"parent": query, "child": candidate}
+            taxonomic_pairs = [{"parent": candidate, "child": query}
                                for query, candidates in zip(data, candidates_lst)
-                               for candidate in candidates if candidate != query]
+                               for candidate in candidates if candidate.lower() != query.lower()]
             return taxonomic_pairs
         else:
             warnings.warn("No requirement for fiting the taxonomy discovery model, the predict module will use the input data to do the fit as well.")

{ontolearner-1.4.4 → ontolearner-1.4.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "OntoLearner"
-version = "1.4.4"
+version = "1.4.6"
 description = "OntoLearner: A Modular Python Library for Ontology Learning with LLMs."
 authors = ["Hamed Babaei Giglou <hamedbabaeigiglou@gmail.com>", "Andrei C. Aioanei <andrei.c.aioanei@gmail.com>"]
 license = "MIT License"

ontolearner-1.4.4/ontolearner/VERSION DELETED Viewed

	@@ -1 +0,0 @@
1	- 1.4.4

{ontolearner-1.4.4 → ontolearner-1.4.6}/LICENSE RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/README.md RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/images/logo.png RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/__init__.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/_learner.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/_ontology.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/base/__init__.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/base/ontology.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/base/text2onto.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/data_structure/__init__.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/data_structure/data.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/data_structure/metric.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/evaluation/__init__.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/evaluation/evaluate.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/learner/__init__.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/learner/label_mapper.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/learner/llm.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/learner/prompt.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/learner/rag.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/__init__.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/agriculture.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/arts_humanities.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/biology.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/chemistry.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/ecology_environment.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/education.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/events.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/finance.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/food_beverage.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/general.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/geography.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/industry.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/law.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/library_cultural_heritage.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/material_science_engineering.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/medicine.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/news_media.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/scholarly_knowledge.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/social_sciences.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/units_measurements.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/upper_ontologies.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/ontology/web.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/processor.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/text2onto/__init__.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/text2onto/batchifier.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/text2onto/general.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/text2onto/splitter.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/text2onto/synthesizer.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/tools/__init__.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/tools/analyzer.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/tools/visualizer.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/utils/__init__.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/utils/io.py RENAMED Viewed

File without changes

{ontolearner-1.4.4 → ontolearner-1.4.6}/ontolearner/utils/train_test_split.py RENAMED Viewed

File without changes

OntoLearner 1.4.4__tar.gz → 1.4.6__tar.gz

OntoLearner 1.4.4tar.gz → 1.4.6tar.gz