OntoLearner 1.4.6__tar.gz → 1.4.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {ontolearner-1.4.6 → ontolearner-1.4.7}/PKG-INFO +2 -1
  2. ontolearner-1.4.7/ontolearner/VERSION +1 -0
  3. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/base/learner.py +5 -2
  4. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/learner/__init__.py +1 -1
  5. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/learner/llm.py +73 -3
  6. {ontolearner-1.4.6 → ontolearner-1.4.7}/pyproject.toml +2 -1
  7. ontolearner-1.4.6/ontolearner/VERSION +0 -1
  8. {ontolearner-1.4.6 → ontolearner-1.4.7}/LICENSE +0 -0
  9. {ontolearner-1.4.6 → ontolearner-1.4.7}/README.md +0 -0
  10. {ontolearner-1.4.6 → ontolearner-1.4.7}/images/logo.png +0 -0
  11. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/__init__.py +0 -0
  12. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/_learner.py +0 -0
  13. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/_ontology.py +0 -0
  14. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/base/__init__.py +0 -0
  15. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/base/ontology.py +0 -0
  16. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/base/text2onto.py +0 -0
  17. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/data_structure/__init__.py +0 -0
  18. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/data_structure/data.py +0 -0
  19. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/data_structure/metric.py +0 -0
  20. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/evaluation/__init__.py +0 -0
  21. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/evaluation/evaluate.py +0 -0
  22. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/evaluation/metrics.py +0 -0
  23. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/learner/label_mapper.py +0 -0
  24. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/learner/prompt.py +0 -0
  25. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/learner/rag.py +0 -0
  26. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/learner/retriever.py +0 -0
  27. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/__init__.py +0 -0
  28. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/agriculture.py +0 -0
  29. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/arts_humanities.py +0 -0
  30. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/biology.py +0 -0
  31. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/chemistry.py +0 -0
  32. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/ecology_environment.py +0 -0
  33. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/education.py +0 -0
  34. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/events.py +0 -0
  35. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/finance.py +0 -0
  36. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/food_beverage.py +0 -0
  37. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/general.py +0 -0
  38. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/geography.py +0 -0
  39. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/industry.py +0 -0
  40. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/law.py +0 -0
  41. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/library_cultural_heritage.py +0 -0
  42. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/material_science_engineering.py +0 -0
  43. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/medicine.py +0 -0
  44. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/news_media.py +0 -0
  45. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/scholarly_knowledge.py +0 -0
  46. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/social_sciences.py +0 -0
  47. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/units_measurements.py +0 -0
  48. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/upper_ontologies.py +0 -0
  49. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/ontology/web.py +0 -0
  50. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/processor.py +0 -0
  51. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/text2onto/__init__.py +0 -0
  52. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/text2onto/batchifier.py +0 -0
  53. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/text2onto/general.py +0 -0
  54. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/text2onto/splitter.py +0 -0
  55. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/text2onto/synthesizer.py +0 -0
  56. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/tools/__init__.py +0 -0
  57. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/tools/analyzer.py +0 -0
  58. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/tools/visualizer.py +0 -0
  59. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/utils/__init__.py +0 -0
  60. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/utils/io.py +0 -0
  61. {ontolearner-1.4.6 → ontolearner-1.4.7}/ontolearner/utils/train_test_split.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: OntoLearner
3
- Version: 1.4.6
3
+ Version: 1.4.7
4
4
  Summary: OntoLearner: A Modular Python Library for Ontology Learning with LLMs.
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -17,6 +17,7 @@ Requires-Dist: bitsandbytes (>=0.45.1,<0.46.0)
17
17
  Requires-Dist: dspy (>=2.6.14,<3.0.0)
18
18
  Requires-Dist: huggingface-hub (>=0.34.4,<0.35.0)
19
19
  Requires-Dist: matplotlib
20
+ Requires-Dist: mistral-common[sentencepiece] (>=1.8.5,<2.0.0)
20
21
  Requires-Dist: networkx (==3.2.1)
21
22
  Requires-Dist: numpy
22
23
  Requires-Dist: openpyxl
@@ -0,0 +1 @@
1
+ 1.4.7
@@ -238,7 +238,7 @@ class AutoLLM(ABC):
238
238
  if self.device == "cpu":
239
239
  device_map = "cpu"
240
240
  else:
241
- device_map = "auto"
241
+ device_map = "balanced"
242
242
  self.model = AutoModelForCausalLM.from_pretrained(
243
243
  model_id,
244
244
  device_map=device_map,
@@ -271,7 +271,10 @@ class AutoLLM(ABC):
271
271
  Responses include the original input plus generated continuation.
272
272
  """
273
273
  # Tokenize inputs and move to device
274
- encoded_inputs = self.tokenizer(inputs, return_tensors="pt", padding=True).to(self.model.device)
274
+ encoded_inputs = self.tokenizer(inputs,
275
+ return_tensors="pt",
276
+ padding=True,
277
+ truncation=True).to(self.model.device)
275
278
  input_ids = encoded_inputs["input_ids"]
276
279
  input_length = input_ids.shape[1]
277
280
 
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from .llm import AutoLLMLearner
15
+ from .llm import AutoLLMLearner, FalconLLM, MistralLLM
16
16
  from .retriever import AutoRetrieverLearner
17
17
  from .rag import AutoRAGLearner
18
18
  from .prompt import StandardizedPrompting
@@ -13,23 +13,27 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from ..base import AutoLLM, AutoLearner
16
- from typing import Any
16
+ from typing import Any, List
17
17
  import warnings
18
18
  from tqdm import tqdm
19
19
  from torch.utils.data import DataLoader
20
-
20
+ import torch
21
+ from transformers import Mistral3ForConditionalGeneration
22
+ from mistral_common.protocol.instruct.request import ChatCompletionRequest
23
+ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
21
24
 
22
25
  class AutoLLMLearner(AutoLearner):
23
26
 
24
27
  def __init__(self,
25
28
  prompting,
26
29
  label_mapper,
30
+ llm: AutoLLM = AutoLLM,
27
31
  token: str = "",
28
32
  max_new_tokens: int = 5,
29
33
  batch_size: int = 10,
30
34
  device='cpu') -> None:
31
35
  super().__init__()
32
- self.llm = AutoLLM(token=token, label_mapper=label_mapper, device=device)
36
+ self.llm = llm(token=token, label_mapper=label_mapper, device=device)
33
37
  self.prompting = prompting
34
38
  self.batch_size = batch_size
35
39
  self.max_new_tokens = max_new_tokens
@@ -136,3 +140,69 @@ class AutoLLMLearner(AutoLearner):
136
140
  return self._non_taxonomic_re_predict(dataset=dataset)
137
141
  else:
138
142
  warnings.warn("No requirement for fiting the non-taxonomic-re model, the predict module will use the input data to do the task.")
143
+
144
+
145
+ class FalconLLM(AutoLLM):
146
+
147
+ def generate(self, inputs: List[str], max_new_tokens: int = 50) -> List[str]:
148
+ encoded_inputs = self.tokenizer(inputs,
149
+ return_tensors="pt",
150
+ padding=True,
151
+ truncation=True).to(self.model.device)
152
+ input_ids = encoded_inputs["input_ids"]
153
+ input_length = input_ids.shape[1]
154
+ outputs = self.model.generate(
155
+ input_ids,
156
+ max_new_tokens=max_new_tokens,
157
+ pad_token_id=self.tokenizer.eos_token_id
158
+ )
159
+ generated_tokens = outputs[:, input_length:]
160
+ decoded_outputs = [self.tokenizer.decode(g, skip_special_tokens=True).strip() for g in generated_tokens]
161
+ return self.label_mapper.predict(decoded_outputs)
162
+
163
+ class MistralLLM(AutoLLM):
164
+
165
+ def load(self, model_id: str) -> None:
166
+ self.tokenizer = MistralTokenizer.from_hf_hub(model_id)
167
+ if self.device == "cpu":
168
+ device_map = "cpu"
169
+ else:
170
+ device_map = "balanced"
171
+ self.model = Mistral3ForConditionalGeneration.from_pretrained(
172
+ model_id,
173
+ device_map=device_map,
174
+ torch_dtype=torch.bfloat16,
175
+ token=self.token
176
+ )
177
+ if not hasattr(self.tokenizer, "pad_token_id") or self.tokenizer.pad_token_id is None:
178
+ self.tokenizer.pad_token_id = self.model.generation_config.eos_token_id
179
+ self.label_mapper.fit()
180
+
181
+ def generate(self, inputs: List[str], max_new_tokens: int = 50) -> List[str]:
182
+ tokenized_list = []
183
+ for prompt in inputs:
184
+ messages = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
185
+ tokenized = self.tokenizer.encode_chat_completion(ChatCompletionRequest(messages=messages))
186
+ tokenized_list.append(tokenized.tokens)
187
+ max_len = max(len(tokens) for tokens in tokenized_list)
188
+ input_ids, attention_masks = [], []
189
+ for tokens in tokenized_list:
190
+ pad_length = max_len - len(tokens)
191
+ input_ids.append(tokens + [self.tokenizer.pad_token_id] * pad_length)
192
+ attention_masks.append([1] * len(tokens) + [0] * pad_length)
193
+
194
+ input_ids = torch.tensor(input_ids).to(self.model.device)
195
+ attention_masks = torch.tensor(attention_masks).to(self.model.device)
196
+
197
+ outputs =self.model.generate(
198
+ input_ids=input_ids,
199
+ attention_mask=attention_masks,
200
+ eos_token_id=self.model.generation_config.eos_token_id,
201
+ pad_token_id=self.tokenizer.pad_token_id,
202
+ max_new_tokens=max_new_tokens,
203
+ )
204
+ decoded_outputs = []
205
+ for i, tokens in enumerate(outputs):
206
+ output_text = self.tokenizer.decode(tokens[len(tokenized_list[i]):])
207
+ decoded_outputs.append(output_text)
208
+ return self.label_mapper.predict(decoded_outputs)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "OntoLearner"
3
- version = "1.4.6"
3
+ version = "1.4.7"
4
4
  description = "OntoLearner: A Modular Python Library for Ontology Learning with LLMs."
5
5
  authors = ["Hamed Babaei Giglou <hamedbabaeigiglou@gmail.com>", "Andrei C. Aioanei <andrei.c.aioanei@gmail.com>"]
6
6
  license = "MIT License"
@@ -29,6 +29,7 @@ transformers = "^4.56.0"
29
29
  sentence-transformers = "^5.1.0"
30
30
  dspy = "^2.6.14"
31
31
  bitsandbytes="^0.45.1"
32
+ mistral-common = { version = "^1.8.5", extras = ["sentencepiece"] }
32
33
 
33
34
  [tool.poetry.dev-dependencies]
34
35
  ruff = "*"
@@ -1 +0,0 @@
1
- 1.4.6
File without changes
File without changes
File without changes