OntoLearner 1.4.4__tar.gz → 1.4.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {ontolearner-1.4.4 → ontolearner-1.4.5}/PKG-INFO +3 -2
  2. ontolearner-1.4.5/ontolearner/VERSION +1 -0
  3. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/base/learner.py +19 -15
  4. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/learner/retriever.py +3 -2
  5. {ontolearner-1.4.4 → ontolearner-1.4.5}/pyproject.toml +1 -1
  6. ontolearner-1.4.4/ontolearner/VERSION +0 -1
  7. {ontolearner-1.4.4 → ontolearner-1.4.5}/LICENSE +0 -0
  8. {ontolearner-1.4.4 → ontolearner-1.4.5}/README.md +0 -0
  9. {ontolearner-1.4.4 → ontolearner-1.4.5}/images/logo.png +0 -0
  10. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/__init__.py +0 -0
  11. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/_learner.py +0 -0
  12. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/_ontology.py +0 -0
  13. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/base/__init__.py +0 -0
  14. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/base/ontology.py +0 -0
  15. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/base/text2onto.py +0 -0
  16. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/data_structure/__init__.py +0 -0
  17. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/data_structure/data.py +0 -0
  18. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/data_structure/metric.py +0 -0
  19. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/evaluation/__init__.py +0 -0
  20. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/evaluation/evaluate.py +0 -0
  21. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/evaluation/metrics.py +0 -0
  22. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/learner/__init__.py +0 -0
  23. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/learner/label_mapper.py +0 -0
  24. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/learner/llm.py +0 -0
  25. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/learner/prompt.py +0 -0
  26. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/learner/rag.py +0 -0
  27. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/__init__.py +0 -0
  28. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/agriculture.py +0 -0
  29. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/arts_humanities.py +0 -0
  30. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/biology.py +0 -0
  31. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/chemistry.py +0 -0
  32. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/ecology_environment.py +0 -0
  33. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/education.py +0 -0
  34. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/events.py +0 -0
  35. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/finance.py +0 -0
  36. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/food_beverage.py +0 -0
  37. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/general.py +0 -0
  38. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/geography.py +0 -0
  39. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/industry.py +0 -0
  40. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/law.py +0 -0
  41. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/library_cultural_heritage.py +0 -0
  42. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/material_science_engineering.py +0 -0
  43. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/medicine.py +0 -0
  44. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/news_media.py +0 -0
  45. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/scholarly_knowledge.py +0 -0
  46. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/social_sciences.py +0 -0
  47. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/units_measurements.py +0 -0
  48. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/upper_ontologies.py +0 -0
  49. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/ontology/web.py +0 -0
  50. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/processor.py +0 -0
  51. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/text2onto/__init__.py +0 -0
  52. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/text2onto/batchifier.py +0 -0
  53. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/text2onto/general.py +0 -0
  54. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/text2onto/splitter.py +0 -0
  55. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/text2onto/synthesizer.py +0 -0
  56. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/tools/__init__.py +0 -0
  57. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/tools/analyzer.py +0 -0
  58. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/tools/visualizer.py +0 -0
  59. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/utils/__init__.py +0 -0
  60. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/utils/io.py +0 -0
  61. {ontolearner-1.4.4 → ontolearner-1.4.5}/ontolearner/utils/train_test_split.py +0 -0
@@ -1,8 +1,9 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: OntoLearner
3
- Version: 1.4.4
3
+ Version: 1.4.5
4
4
  Summary: OntoLearner: A Modular Python Library for Ontology Learning with LLMs.
5
5
  License: MIT
6
+ License-File: LICENSE
6
7
  Author: Hamed Babaei Giglou
7
8
  Author-email: hamedbabaeigiglou@gmail.com
8
9
  Requires-Python: >=3.10,<3.14.0
@@ -0,0 +1 @@
1
+ 1.4.5
@@ -350,7 +350,7 @@ class AutoRetriever(ABC):
350
350
  self.documents = inputs
351
351
  self.embeddings = self.embedding_model.encode(inputs, convert_to_tensor=True)
352
352
 
353
- def retrieve(self, query: List[str], top_k: int = 5) -> List[List[str]]:
353
+ def retrieve(self, query: List[str], top_k: int = 5, batch_size: int = -1) -> List[List[str]]:
354
354
  """
355
355
  Retrieve the top-k most similar examples for each query in a list of queries.
356
356
 
@@ -363,33 +363,37 @@ class AutoRetriever(ABC):
363
363
  """
364
364
  if self.embeddings is None:
365
365
  raise RuntimeError("Retriever model must index documents before prediction.")
366
-
367
- # Encode all queries at once
368
366
  query_embeddings = self.embedding_model.encode(query, convert_to_tensor=True) # shape: [num_queries, dim]
369
-
370
367
  if query_embeddings.shape[-1] != self.embeddings.shape[-1]:
371
368
  raise ValueError(
372
369
  f"Embedding dimension mismatch: query embedding dim={query_embeddings.shape[-1]}, "
373
370
  f"document embedding dim={self.embeddings.shape[-1]}"
374
371
  )
375
-
376
- # Normalize embeddings for cosine similarity
377
- query_norm = F.normalize(query_embeddings, p=2, dim=1)
378
372
  doc_norm = F.normalize(self.embeddings, p=2, dim=1)
373
+ if batch_size == -1:
374
+ results = self._retrieve(query_embeddings=query_embeddings, doc_norm=doc_norm, top_k=top_k)
375
+ else:
376
+ results = self._batch_retrieve(query_embeddings=query_embeddings, doc_norm=doc_norm, top_k=top_k, batch_size=batch_size)
377
+ return results
379
378
 
380
- # Compute cosine similarity: [num_queries, num_docs]
381
- similarity_matrix = torch.matmul(query_norm, doc_norm.T)
382
-
383
- # Get top-k indices for each query
384
- top_k = min(top_k, len(self.documents))
385
- topk_similarities, topk_indices = torch.topk(similarity_matrix, k=top_k, dim=1)
386
379
 
387
- # Retrieve documents for each query
380
+ def _retrieve(self, query_embeddings, doc_norm, top_k: int = 5) -> List[List[str]]:
381
+ query_norm = F.normalize(query_embeddings, p=2, dim=1)
382
+ similarity_matrix = torch.matmul(query_norm, doc_norm.T)
383
+ current_top_k = min(top_k, len(self.documents))
384
+ topk_similarities, topk_indices = torch.topk(similarity_matrix, k=current_top_k, dim=1)
388
385
  results = [[self.documents[i] for i in indices] for indices in topk_indices]
389
-
390
386
  return results
391
387
 
392
388
 
389
+ def _batch_retrieve(self, query_embeddings, doc_norm, top_k: int = 5, batch_size: int = 1024) -> List[List[str]]:
390
+ results = []
391
+ for i in range(0, query_embeddings.size(0), batch_size):
392
+ batch_queries = query_embeddings[i:i + batch_size]
393
+ batch_results = self._retrieve(batch_queries, doc_norm, top_k=top_k)
394
+ results.extend(batch_results)
395
+ return results
396
+
393
397
  class AutoPrompt(ABC):
394
398
  """
395
399
  Abstract base class for prompt formatting components.
@@ -17,12 +17,13 @@ from typing import Any, Optional
17
17
  import warnings
18
18
 
19
19
  class AutoRetrieverLearner(AutoLearner):
20
- def __init__(self, base_retriever: Any = AutoRetriever(), top_k: int = 5):
20
+ def __init__(self, base_retriever: Any = AutoRetriever(), top_k: int = 5, batch_size: int = -1):
21
21
  super().__init__()
22
22
  self.retriever = base_retriever
23
23
  self.top_k = top_k
24
24
  self._is_term_typing_fit = False
25
25
  self._is_taxonomy_discovery_fit = False
26
+ self._batch_size = batch_size
26
27
 
27
28
  def load(self, model_id: str = "sentence-transformers/all-MiniLM-L6-v2"):
28
29
  self.retriever.load(model_id=model_id)
@@ -35,7 +36,7 @@ class AutoRetrieverLearner(AutoLearner):
35
36
 
36
37
  def _retriever_predict(self, data:Any, top_k: int) -> Any:
37
38
  if isinstance(data, list):
38
- return self.retriever.retrieve(query=data, top_k=top_k)
39
+ return self.retriever.retrieve(query=data, top_k=top_k, batch_size=self._batch_size)
39
40
  if isinstance(data, str):
40
41
  return self.retriever.retrieve(query=[data], top_k=top_k)
41
42
  raise TypeError(f"Unsupported data type {type(data)}. You should pass a List[str] or a str.")
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "OntoLearner"
3
- version = "1.4.4"
3
+ version = "1.4.5"
4
4
  description = "OntoLearner: A Modular Python Library for Ontology Learning with LLMs."
5
5
  authors = ["Hamed Babaei Giglou <hamedbabaeigiglou@gmail.com>", "Andrei C. Aioanei <andrei.c.aioanei@gmail.com>"]
6
6
  license = "MIT License"
@@ -1 +0,0 @@
1
- 1.4.4
File without changes
File without changes
File without changes