PyPI - OntoLearner - Versions diffs - 1.4.9__py3-none-any.whl → 1.4.11__py3-none-any.whl - Mend

OntoLearner 1.4.9py3-none-any.whl → 1.4.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

ontolearner/VERSION +1 -1
ontolearner/base/learner.py +38 -17
ontolearner/base/ontology.py +2 -2
ontolearner/evaluation/metrics.py +72 -32
ontolearner/learner/__init__.py +1 -1
ontolearner/learner/label_mapper.py +1 -1
ontolearner/learner/prompt.py +40 -5
ontolearner/learner/rag/__init__.py +14 -0
ontolearner/learner/{rag.py → rag/rag.py} +7 -2
ontolearner/learner/retriever/__init__.py +1 -1
ontolearner/learner/retriever/{llm_retriever.py → augmented_retriever.py} +48 -39
ontolearner/learner/retriever/learner.py +3 -4
ontolearner/learner/text2onto/__init__.py +1 -1
ontolearner/learner/text2onto/alexbek.py +484 -1105
ontolearner/learner/text2onto/sbunlp.py +498 -493
ontolearner/text2onto/splitter.py +69 -6
ontolearner/tools/analyzer.py +51 -0
{ontolearner-1.4.9.dist-info → ontolearner-1.4.11.dist-info}/METADATA +2 -2
{ontolearner-1.4.9.dist-info → ontolearner-1.4.11.dist-info}/RECORD +21 -20
{ontolearner-1.4.9.dist-info → ontolearner-1.4.11.dist-info}/WHEEL +0 -0
{ontolearner-1.4.9.dist-info → ontolearner-1.4.11.dist-info}/licenses/LICENSE +0 -0

ontolearner/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 1.4.9
1	+ 1.4.11

ontolearner/base/learner.py CHANGED Viewed

@@ -18,6 +18,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import torch.nn.functional as F
 from sentence_transformers import SentenceTransformer
+from collections import defaultdict
 class AutoLearner(ABC):
     """
@@ -70,6 +71,7 @@ class AutoLearner(ABC):
                  - "term-typing": Predict semantic types for terms
                  - "taxonomy-discovery": Identify hierarchical relationships
                  - "non-taxonomy-discovery": Identify non-hierarchical relationships
+                 - "text2onto" : Extract ontology terms and their semantic types from documents
         Raises:
             NotImplementedError: If not implemented by concrete class.
@@ -81,6 +83,8 @@ class AutoLearner(ABC):
             self._taxonomy_discovery(train_data, test=False)
         elif task == 'non-taxonomic-re':
             self._non_taxonomic_re(train_data, test=False)
+        elif task == 'text2onto':
+            self._text2onto(train_data, test=False)
         else:
             raise ValueError(f"{task} is not a valid task.")
@@ -103,6 +107,7 @@ class AutoLearner(ABC):
             - term-typing: List of predicted types for each term
             - taxonomy-discovery: Boolean predictions for relationships
             - non-taxonomy-discovery: Predicted relation types
+            - text2onto : Extract ontology terms and their semantic types from documents
         Raises:
             NotImplementedError: If not implemented by concrete class.
@@ -115,6 +120,8 @@ class AutoLearner(ABC):
             return self._taxonomy_discovery(eval_data, test=True)
         elif task == 'non-taxonomic-re':
             return self._non_taxonomic_re(eval_data, test=True)
+        elif task == 'text2onto':
+            return self._text2onto(eval_data, test=True)
         else:
             raise ValueError(f"{task} is not a valid task.")
@@ -147,6 +154,9 @@ class AutoLearner(ABC):
     def _non_taxonomic_re(self, data: Any, test: bool = False) -> Optional[Any]:
         pass
+    def _text2onto(self, data: Any, test: bool = False) -> Optional[Any]:
+        pass
     def tasks_data_former(self, data: Any, task: str, test: bool = False) -> List[str | Dict[str, str]]:
         formatted_data = []
         if task == "term-typing":
@@ -171,6 +181,7 @@ class AutoLearner(ABC):
             non_taxonomic_types = list(set(non_taxonomic_types))
             non_taxonomic_res = list(set(non_taxonomic_res))
             formatted_data = {"types": non_taxonomic_types, "relations": non_taxonomic_res}
         return formatted_data
     def tasks_ground_truth_former(self, data: Any, task: str) -> List[Dict[str, str]]:
@@ -186,6 +197,26 @@ class AutoLearner(ABC):
                 formatted_data.append({"head": non_taxonomic_triplets.head,
                                        "tail": non_taxonomic_triplets.tail,
                                        "relation": non_taxonomic_triplets.relation})
+        if task == "text2onto":
+            terms2docs = data.get("terms2docs", {}) or {}
+            terms2types = data.get("terms2types", {}) or {}
+            # gold doc→terms
+            gold_terms = []
+            for term, doc_ids in terms2docs.items():
+                for doc_id in doc_ids or []:
+                    gold_terms.append({"doc_id": doc_id, "term": term})
+            # gold doc→types derived via doc→terms + term→types
+            doc2types = defaultdict(set)
+            for term, doc_ids in terms2docs.items():
+                for doc_id in doc_ids or []:
+                    for ty in (terms2types.get(term, []) or []):
+                        if isinstance(ty, str) and ty.strip():
+                            doc2types[doc_id].add(ty.strip())
+            gold_types = [{"doc_id": doc_id, "type": ty} for doc_id, tys in doc2types.items() for ty in tys]
+            return {"terms": gold_terms, "types": gold_types}
         return formatted_data
 class AutoLLM(ABC):
@@ -201,7 +232,7 @@ class AutoLLM(ABC):
         tokenizer: The tokenizer associated with the model.
     """
-    def __init__(self, label_mapper: Any, device: str='cpu', token: str="") -> None:
+    def __init__(self, label_mapper: Any, device: str='cpu', token: str="", max_length: int = 256) -> None:
         """
         Initialize the LLM component.
@@ -213,6 +244,7 @@ class AutoLLM(ABC):
         self.device=device
         self.model: Optional[Any] = None
         self.tokenizer: Optional[Any] = None
+        self.max_length = max_length
     def load(self, model_id: str) -> None:
@@ -236,10 +268,8 @@ class AutoLLM(ABC):
         self.tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side='left', token=self.token)
         self.tokenizer.pad_token = self.tokenizer.eos_token
         if self.device == "cpu":
-            # device_map = "cpu"
             self.model = AutoModelForCausalLM.from_pretrained(
                 model_id,
-                # device_map=device_map,
                 torch_dtype=torch.bfloat16,
                 token=self.token
             )
@@ -248,8 +278,8 @@ class AutoLLM(ABC):
             self.model = AutoModelForCausalLM.from_pretrained(
                 model_id,
                 device_map=device_map,
-                torch_dtype=torch.bfloat16,
-                token=self.token
+                token=self.token,
+                trust_remote_code=True,
             )
         self.label_mapper.fit()
@@ -276,29 +306,20 @@ class AutoLLM(ABC):
             List of generated text responses, one for each input prompt.
             Responses include the original input plus generated continuation.
         """
-        # Tokenize inputs and move to device
         encoded_inputs = self.tokenizer(inputs,
                                         return_tensors="pt",
-                                        padding=True,
+                                        max_length=self.max_length,
                                         truncation=True).to(self.model.device)
         input_ids = encoded_inputs["input_ids"]
         input_length = input_ids.shape[1]
-        # Generate output
         outputs = self.model.generate(
             **encoded_inputs,
             max_new_tokens=max_new_tokens,
-            pad_token_id=self.tokenizer.eos_token_id
+            pad_token_id=self.tokenizer.eos_token_id,
+            eos_token_id=self.tokenizer.eos_token_id
         )
-        # Extract only the newly generated tokens (excluding prompt)
         generated_tokens = outputs[:, input_length:]
-        # Decode only the generated part
         decoded_outputs = [self.tokenizer.decode(g, skip_special_tokens=True).strip() for g in generated_tokens]
-        print(decoded_outputs)
-        print(self.label_mapper.predict(decoded_outputs))
-        # Map the decoded text to labels
         return self.label_mapper.predict(decoded_outputs)
 class AutoRetriever(ABC):

ontolearner/base/ontology.py CHANGED Viewed

@@ -372,7 +372,7 @@ class BaseOntology(ABC):
         # Save updated metrics
         df.to_excel(metrics_file_path, index=False)
-    def is_valid_label(label: str) -> Any:
+    def is_valid_label(self, label: str) -> Any:
         invalids = ['root', 'thing']
         if label.lower() in invalids:
             return None
@@ -522,7 +522,7 @@ class BaseOntology(ABC):
                 return True
         return False
-    def _is_anonymous_id(label: str) -> bool:
+    def _is_anonymous_id(self, label: str) -> bool:
         """Check if a label represents an anonymous class identifier."""
         if not label:
             return True

ontolearner/evaluation/metrics.py CHANGED Viewed

@@ -11,44 +11,84 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import List, Dict, Tuple, Set
+from typing import List, Dict, Tuple, Set, Any, Union
 SYMMETRIC_RELATIONS = {"equivalentclass", "sameas", "disjointwith"}
-def text2onto_metrics(y_true: List[str], y_pred: List[str], similarity_threshold: float = 0.8) -> Dict[str, float | int]:
-    def jaccard_similarity(a: str, b: str) -> float:
-        set_a = set(a.lower().split())
-        set_b = set(b.lower().split())
-        if not set_a and not set_b:
+def text2onto_metrics(
+    y_true: Dict[str, Any],
+    y_pred: Dict[str, Any],
+    similarity_threshold: float = 0.8
+) -> Dict[str, Any]:
+    """
+    Expects:
+      y_true = {"terms": [{"doc_id": str, "term": str}, ...],
+               "types": [{"doc_id": str, "type": str}, ...]}
+      y_pred = same shape
+    Returns:
+      {"terms": {...}, "types": {...}}
+    """
+    def jaccard_similarity(text_a: str, text_b: str) -> float:
+        tokens_a = set(text_a.lower().split())
+        tokens_b = set(text_b.lower().split())
+        if not tokens_a and not tokens_b:
             return 1.0
-        return len(set_a & set_b) / len(set_a | set_b)
-    matched_gt_indices = set()
-    matched_pred_indices = set()
-    for i, pred_label in enumerate(y_pred):
-        for j, gt_label in enumerate(y_true):
-            if j in matched_gt_indices:
-                continue
-            sim = jaccard_similarity(pred_label, gt_label)
-            if sim >= similarity_threshold:
-                matched_pred_indices.add(i)
-                matched_gt_indices.add(j)
-                break  # each gt matched once
-    total_correct = len(matched_pred_indices)
-    total_predicted = len(y_pred)
-    total_ground_truth = len(y_true)
+        return len(tokens_a & tokens_b) / len(tokens_a | tokens_b)
+    def pairs_to_strings(rows: List[Dict[str, str]], value_key: str) -> List[str]:
+        paired_strings: List[str] = []
+        for row in rows or []:
+            doc_id = (row.get("doc_id") or "").strip()
+            value = (row.get(value_key) or "").strip()
+            if doc_id and value:
+                # keep doc association + allow token Jaccard
+                paired_strings.append(f"{doc_id} {value}")
+        return paired_strings
+    def score_list(ground_truth_items: List[str], predicted_items: List[str]) -> Dict[str, Union[float, int]]:
+        matched_ground_truth_indices: Set[int] = set()
+        matched_predicted_indices: Set[int] = set()
+        for predicted_index, predicted_item in enumerate(predicted_items):
+            for ground_truth_index, ground_truth_item in enumerate(ground_truth_items):
+                if ground_truth_index in matched_ground_truth_indices:
+                    continue
+                if jaccard_similarity(predicted_item, ground_truth_item) >= similarity_threshold:
+                    matched_predicted_indices.add(predicted_index)
+                    matched_ground_truth_indices.add(ground_truth_index)
+                    break
+        total_correct = len(matched_predicted_indices)
+        total_predicted = len(predicted_items)
+        total_ground_truth = len(ground_truth_items)
+        precision = total_correct / total_predicted if total_predicted else 0.0
+        recall = total_correct / total_ground_truth if total_ground_truth else 0.0
+        f1 = (2 * precision * recall / (precision + recall)) if (precision + recall) else 0.0
+        return {
+            "f1_score": f1,
+            "precision": precision,
+            "recall": recall,
+            "total_correct": total_correct,
+            "total_predicted": total_predicted,
+            "total_ground_truth": total_ground_truth,
+        }
+    ground_truth_terms = pairs_to_strings(y_true.get("terms", []), "term")
+    predicted_terms = pairs_to_strings(y_pred.get("terms", []), "term")
+    ground_truth_types = pairs_to_strings(y_true.get("types", []), "type")
+    predicted_types = pairs_to_strings(y_pred.get("types", []), "type")
+    terms_metrics = score_list(ground_truth_terms, predicted_terms)
+    types_metrics = score_list(ground_truth_types, predicted_types)
-    precision = total_correct / total_predicted if total_predicted > 0 else 0
-    recall = total_correct / total_ground_truth if total_ground_truth > 0 else 0
-    f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
     return {
-        "f1_score": f1_score,
-        "precision": precision,
-        "recall": recall,
-        "total_correct": total_correct,
-        "total_predicted": total_predicted,
-        "total_ground_truth": total_ground_truth
+        "terms": terms_metrics,
+        "types": types_metrics,
     }
 def term_typing_metrics(y_true: List[Dict[str, List[str]]], y_pred: List[Dict[str, List[str]]]) -> Dict[str, float | int]:

ontolearner/learner/__init__.py CHANGED Viewed

@@ -14,6 +14,6 @@
 from .llm import AutoLLMLearner, FalconLLM, MistralLLM
 from .retriever import AutoRetrieverLearner, LLMAugmentedRetrieverLearner
-from .rag import AutoRAGLearner
+from .rag import AutoRAGLearner, LLMAugmentedRAGLearner
 from .prompt import StandardizedPrompting
 from .label_mapper import LabelMapper

ontolearner/learner/label_mapper.py CHANGED Viewed

@@ -31,7 +31,7 @@ class LabelMapper:
                  ngram_range: Tuple=(1, 1),
                  label_dict: Dict[str, List[str]]=None,
                  analyzer: str = 'word',
-                 iterator_no: int = 100):
+                 iterator_no: int = 1000):
         """
         Initializes the TFIDFLabelMapper with a specified classifier and TF-IDF configuration.

ontolearner/learner/prompt.py CHANGED Viewed

@@ -17,15 +17,50 @@ from ..base import AutoPrompt
 class StandardizedPrompting(AutoPrompt):
     def __init__(self, task: str = None):
         if task == "term-typing":
-            prompt_template = """Determine whether the given term can be categorized as an instance of the specified high-level type. Answer with `yes` if it is otherwise answer with `no`. Do not explain.
+            prompt_template = """You are performing term typing.
+Determine whether the given term is a clear and unambiguous instance of the specified high-level type.
+Rules:
+- Answer "yes" only if the term commonly and directly belongs to the type.
+- Answer "no" if the term does not belong to the type, is ambiguous, or only weakly related.
+- Use the most common meaning of the term.
+- Do not explain your answer.
 Term: {term}
 Type: {type}
-Answer: """
+Answer (yes or no):"""
         elif task == "taxonomy-discovery":
-            prompt_template = """Is {parent} a direct or indirect superclass (or parent concept) of {child} in a conceptual hierarchy? Answer with yes or no.
-Answer: """
+            prompt_template =  """You are identifying taxonomic (is-a) relationships.
+Question:
+Is "{parent}" a superclass (direct or indirect) of "{child}" in a standard conceptual or ontological hierarchy?
+Rules:
+- A superclass means: "{child}" is a type or instance of "{parent}".
+- Answer "yes" only if the relationship is a true is-a relationship.
+- Answer "no" for part-of, related-to, or associative relationships.
+- Use general world knowledge.
+- Do not explain.
+Parent: {parent}
+Child: {child}
+Answer (yes or no):"""
         elif task == "non-taxonomic-re":
-            prompt_template = """Given the conceptual types `{head}` and `{tail}`, does a `{relation}` relation exist between them? Respond with "yes" if it does, otherwise respond with "no"."""
+            prompt_template = """You are identifying non-taxonomic conceptual relationships.
+Given two conceptual types, determine whether the specified relation typically holds between them.
+Rules:
+- Answer "yes" only if the relation commonly and meaningfully applies.
+- Answer "no" if the relation is rare, indirect, or context-dependent.
+- Do not infer relations that require specific situations.
+- Do not explain.
+Head type: {head}
+Tail type: {tail}
+Relation: {relation}
+Answer (yes or no):"""
         else:
             raise ValueError("Unknown task! Current tasks are: 'term-typing', 'taxonomy-discovery', 'non-taxonomic-re'")
         super().__init__(prompt_template)

ontolearner/learner/rag/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright (c) 2025 SciKnowOrg
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://opensource.org/licenses/MIT
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .rag import AutoRAGLearner, LLMAugmentedRAGLearner

ontolearner/learner/{rag.py → rag/rag.py} RENAMED Viewed

@@ -14,8 +14,7 @@
 import warnings
 from typing import Any
-from ..base import AutoLearner
+from ...base import AutoLearner
 class AutoRAGLearner(AutoLearner):
     def __init__(self,
@@ -87,3 +86,9 @@ class AutoRAGLearner(AutoLearner):
             return self.llm._non_taxonomic_re_predict(dataset=dataset)
         else:
             warnings.warn("No requirement for fiting the non-taxonomic-re model, the predict module will use the input data to do the fit as well.")
+class LLMAugmentedRAGLearner(AutoRAGLearner):
+    def set_augmenter(self, augmenter):
+        self.retriever.set_augmenter(augmenter=augmenter)

ontolearner/learner/retriever/__init__.py CHANGED Viewed

@@ -16,4 +16,4 @@ from .crossencoder import CrossEncoderRetriever
 from .embedding import GloveRetriever, Word2VecRetriever
 from .ngram import NgramRetriever
 from .learner import AutoRetrieverLearner, LLMAugmentedRetrieverLearner
-from .llm_retriever import LLMAugmenterGenerator, LLMAugmenter, LLMAugmentedRetriever
+from .augmented_retriever import LLMAugmenterGenerator, LLMAugmenter, LLMAugmentedRetriever

ontolearner/learner/retriever/{llm_retriever.py → augmented_retriever.py} RENAMED Viewed

@@ -17,6 +17,8 @@ from typing import Any, List, Dict
 from openai import OpenAI
 import time
 from tqdm import tqdm
+import torch
+import torch.nn.functional as F
 from ...base import AutoRetriever
 from ...utils import load_json
@@ -125,7 +127,6 @@ class LLMAugmenterGenerator(ABC):
             except Exception:
                 print("sleep for 5 seconds")
                 time.sleep(5)
         return inference
     def tasks_data_former(self, data: Any, task: str) -> List[str] | Dict[str, List[str]]:
@@ -298,21 +299,12 @@ class LLMAugmentedRetriever(AutoRetriever):
     Attributes:
         augmenter: An augmenter instance that provides transform() and top_n_candidate.
     """
-    def __init__(self) -> None:
-        """
-        Initialize the augmented retriever with no augmenter attached.
-        """
+    def __init__(self, threshold: float = 0.0, cutoff_rate: float = 100.0) -> None:
         super().__init__()
-        self.augmenter = None
+        self.threshold = threshold
+        self.cutoff_rate = cutoff_rate
     def set_augmenter(self, augmenter):
-        """
-        Attach an augmenter instance.
-        Args:
-            augmenter: An object providing `transform(query, task)` and `top_n_candidate`.
-        """
         self.augmenter = augmenter
     def retrieve(self, query: List[str], top_k: int = 5, batch_size: int = -1, task: str = None) -> List[List[str]]:
@@ -328,29 +320,46 @@ class LLMAugmentedRetriever(AutoRetriever):
         Returns:
             list[list[str]]: A list of document lists, one per input query.
         """
-        parent_retrieve = super(LLMAugmentedRetriever, self).retrieve
-        if task == 'taxonomy-discovery':
-            query_sets = []
-            for idx in range(self.augmenter.top_n_candidate):
-                query_set = []
-                for qu in query:
-                    query_set.append(self.augmenter.transform(qu, task=task)[idx])
-                query_sets.append(query_set)
-            retrieves = [
-                parent_retrieve(query=query_set, top_k=top_k, batch_size=batch_size)
-                for query_set in query_sets
-            ]
-            results = []
-            for qu_idx, qu in enumerate(query):
-                qu_result = []
-                for top_idx in range(self.augmenter.top_n_candidate):
-                    qu_result += retrieves[top_idx][qu_idx]
-                results.append(list(set(qu_result)))
-            return results
-        else:
-            return parent_retrieve(query=query, top_k=top_k, batch_size=batch_size)
+        if task != 'taxonomy-discovery':
+            return super().retrieve(query=query, top_k=top_k, batch_size=batch_size)
+        return self.augmented_retrieve(query, top_k=top_k, batch_size=batch_size, task=task)
+    def augmented_retrieve(self, query: List[str], top_k: int = 5, batch_size: int = -1, task: str = None):
+        if self.embeddings is None:
+            raise RuntimeError("Retriever model must index documents before prediction.")
+        augmented_queries, index_map = [], []
+        for qu_idx, qu in enumerate(query):
+            augmented = self.augmenter.transform(qu, task=task)
+            for aug in augmented:
+                augmented_queries.append(aug)
+                index_map.append(qu_idx)
+        doc_norm = F.normalize(self.embeddings, p=2, dim=1)
+        results = [dict() for _ in range(len(query))]
+        if batch_size == -1:
+            batch_size = len(augmented_queries)
+        for start in range(0, len(augmented_queries), batch_size):
+            batch_aug = augmented_queries[start:start + batch_size]
+            batch_embeddings = self.embedding_model.encode(batch_aug, convert_to_tensor=True)
+            batch_norm = F.normalize(batch_embeddings, p=2, dim=1)
+            similarity_matrix = torch.matmul(batch_norm, doc_norm.T)
+            current_top_k = min(top_k, len(self.documents))
+            topk_similarities, topk_indices = torch.topk(similarity_matrix, k=current_top_k, dim=1)
+            for i, (doc_indices, sim_scores) in enumerate(zip(topk_indices, topk_similarities)):
+                original_query_idx = index_map[start + i]
+                for doc_idx, score in zip(doc_indices.tolist(), sim_scores.tolist()):
+                    if score >= self.threshold:
+                        doc = self.documents[doc_idx]
+                        prev = results[original_query_idx].get(doc, 0.0)
+                        results[original_query_idx][doc] = prev + score
+        final_results = []
+        for doc_score_map in results:
+            sorted_docs = sorted(doc_score_map.items(), key=lambda x: x[1], reverse=True)
+            final_results.append([doc for doc, _ in sorted_docs])
+        return final_results

ontolearner/learner/retriever/learner.py CHANGED Viewed

@@ -122,7 +122,6 @@ class AutoRetrieverLearner(AutoLearner):
             warnings.warn("No requirement for fiting the non-taxonomic RE model, the predict module will use the input data to do the fit as well..")
 class LLMAugmentedRetrieverLearner(AutoRetrieverLearner):
     def set_augmenter(self, augmenter):
@@ -160,9 +159,9 @@ class LLMAugmentedRetrieverLearner(AutoRetrieverLearner):
             taxonomic_pairs = [{"parent": candidate, "child": query}
                                for query, candidates in zip(data, candidates_lst)
                                for candidate in candidates if candidate.lower() != query.lower()]
-            taxonomic_pairs += [{"parent": query, "child": candidate}
-                                for query, candidates in zip(data, candidates_lst)
-                                for candidate in candidates if candidate.lower() != query.lower()]
+            # taxonomic_pairs += [{"parent": query, "child": candidate}
+            #                    for query, candidates in zip(data, candidates_lst)
+            #                    for candidate in candidates if candidate.lower() != query.lower()]
             unique_taxonomic_pairs, seen = [], set()
             for pair in taxonomic_pairs:
                 key = (pair["parent"].lower(), pair["child"].lower())  # Directional key (parent, child)

ontolearner/learner/text2onto/__init__.py CHANGED Viewed

@@ -12,5 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .alexbek import AlexbekFewShotLearner
+from .alexbek import AlexbekRAGFewShotLearner
 from .sbunlp import SBUNLPFewShotLearner

OntoLearner 1.4.9__py3-none-any.whl → 1.4.11__py3-none-any.whl

OntoLearner 1.4.9py3-none-any.whl → 1.4.11py3-none-any.whl