OntoLearner 1.4.10__tar.gz → 1.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {ontolearner-1.4.10 → ontolearner-1.5.0}/PKG-INFO +2 -2
  2. ontolearner-1.5.0/ontolearner/VERSION +1 -0
  3. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/base/learner.py +41 -18
  4. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/evaluation/metrics.py +72 -32
  5. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/learner/__init__.py +3 -2
  6. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/learner/label_mapper.py +5 -4
  7. ontolearner-1.5.0/ontolearner/learner/llm.py +465 -0
  8. ontolearner-1.5.0/ontolearner/learner/prompt.py +66 -0
  9. ontolearner-1.5.0/ontolearner/learner/rag/__init__.py +14 -0
  10. {ontolearner-1.4.10/ontolearner/learner → ontolearner-1.5.0/ontolearner/learner/rag}/rag.py +7 -2
  11. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/learner/retriever/__init__.py +1 -1
  12. ontolearner-1.4.10/ontolearner/learner/retriever/llm_retriever.py → ontolearner-1.5.0/ontolearner/learner/retriever/augmented_retriever.py +48 -39
  13. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/learner/retriever/learner.py +3 -4
  14. ontolearner-1.5.0/ontolearner/learner/taxonomy_discovery/alexbek.py +822 -0
  15. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/learner/taxonomy_discovery/skhnlp.py +216 -156
  16. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/learner/text2onto/__init__.py +1 -1
  17. ontolearner-1.5.0/ontolearner/learner/text2onto/alexbek.py +598 -0
  18. ontolearner-1.5.0/ontolearner/learner/text2onto/sbunlp.py +603 -0
  19. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/biology.py +2 -3
  20. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/chemistry.py +16 -18
  21. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/ecology_environment.py +2 -3
  22. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/general.py +4 -6
  23. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/material_science_engineering.py +64 -45
  24. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/medicine.py +2 -3
  25. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/scholarly_knowledge.py +6 -9
  26. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/processor.py +3 -3
  27. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/text2onto/splitter.py +69 -6
  28. {ontolearner-1.4.10 → ontolearner-1.5.0}/pyproject.toml +2 -2
  29. ontolearner-1.4.10/ontolearner/VERSION +0 -1
  30. ontolearner-1.4.10/ontolearner/learner/llm.py +0 -208
  31. ontolearner-1.4.10/ontolearner/learner/prompt.py +0 -31
  32. ontolearner-1.4.10/ontolearner/learner/taxonomy_discovery/alexbek.py +0 -500
  33. ontolearner-1.4.10/ontolearner/learner/text2onto/alexbek.py +0 -1219
  34. ontolearner-1.4.10/ontolearner/learner/text2onto/sbunlp.py +0 -598
  35. {ontolearner-1.4.10 → ontolearner-1.5.0}/LICENSE +0 -0
  36. {ontolearner-1.4.10 → ontolearner-1.5.0}/README.md +0 -0
  37. {ontolearner-1.4.10 → ontolearner-1.5.0}/images/logo.png +0 -0
  38. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/__init__.py +0 -0
  39. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/_learner.py +0 -0
  40. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/_ontology.py +0 -0
  41. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/base/__init__.py +0 -0
  42. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/base/ontology.py +0 -0
  43. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/base/text2onto.py +0 -0
  44. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/data_structure/__init__.py +0 -0
  45. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/data_structure/data.py +0 -0
  46. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/data_structure/metric.py +0 -0
  47. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/evaluation/__init__.py +0 -0
  48. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/evaluation/evaluate.py +0 -0
  49. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/learner/retriever/crossencoder.py +0 -0
  50. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/learner/retriever/embedding.py +0 -0
  51. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/learner/retriever/ngram.py +0 -0
  52. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/learner/taxonomy_discovery/__init__.py +0 -0
  53. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/learner/taxonomy_discovery/rwthdbis.py +0 -0
  54. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/learner/taxonomy_discovery/sbunlp.py +0 -0
  55. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/learner/term_typing/__init__.py +0 -0
  56. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/learner/term_typing/alexbek.py +0 -0
  57. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/learner/term_typing/rwthdbis.py +0 -0
  58. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/learner/term_typing/sbunlp.py +0 -0
  59. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/__init__.py +0 -0
  60. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/agriculture.py +0 -0
  61. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/arts_humanities.py +0 -0
  62. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/education.py +0 -0
  63. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/events.py +0 -0
  64. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/finance.py +0 -0
  65. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/food_beverage.py +0 -0
  66. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/geography.py +0 -0
  67. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/industry.py +0 -0
  68. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/law.py +0 -0
  69. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/library_cultural_heritage.py +0 -0
  70. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/news_media.py +0 -0
  71. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/social_sciences.py +0 -0
  72. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/units_measurements.py +0 -0
  73. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/upper_ontologies.py +0 -0
  74. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/ontology/web.py +0 -0
  75. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/text2onto/__init__.py +0 -0
  76. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/text2onto/batchifier.py +0 -0
  77. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/text2onto/general.py +0 -0
  78. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/text2onto/synthesizer.py +0 -0
  79. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/tools/__init__.py +0 -0
  80. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/tools/analyzer.py +0 -0
  81. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/tools/visualizer.py +0 -0
  82. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/utils/__init__.py +0 -0
  83. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/utils/io.py +0 -0
  84. {ontolearner-1.4.10 → ontolearner-1.5.0}/ontolearner/utils/train_test_split.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: OntoLearner
3
- Version: 1.4.10
3
+ Version: 1.5.0
4
4
  Summary: OntoLearner: A Modular Python Library for Ontology Learning with LLMs.
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
16
  Requires-Dist: Levenshtein
17
- Requires-Dist: bitsandbytes (>=0.45.1,<0.46.0)
17
+ Requires-Dist: bitsandbytes (>=0.45.1,<1.0.0) ; platform_system == "Linux"
18
18
  Requires-Dist: dspy (>=2.6.14,<3.0.0)
19
19
  Requires-Dist: g4f
20
20
  Requires-Dist: gensim
@@ -0,0 +1 @@
1
+ 1.5.0
@@ -18,6 +18,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
18
18
  import torch
19
19
  import torch.nn.functional as F
20
20
  from sentence_transformers import SentenceTransformer
21
+ from collections import defaultdict
21
22
 
22
23
  class AutoLearner(ABC):
23
24
  """
@@ -70,6 +71,7 @@ class AutoLearner(ABC):
70
71
  - "term-typing": Predict semantic types for terms
71
72
  - "taxonomy-discovery": Identify hierarchical relationships
72
73
  - "non-taxonomy-discovery": Identify non-hierarchical relationships
74
+ - "text2onto" : Extract ontology terms and their semantic types from documents
73
75
 
74
76
  Raises:
75
77
  NotImplementedError: If not implemented by concrete class.
@@ -81,6 +83,8 @@ class AutoLearner(ABC):
81
83
  self._taxonomy_discovery(train_data, test=False)
82
84
  elif task == 'non-taxonomic-re':
83
85
  self._non_taxonomic_re(train_data, test=False)
86
+ elif task == 'text2onto':
87
+ self._text2onto(train_data, test=False)
84
88
  else:
85
89
  raise ValueError(f"{task} is not a valid task.")
86
90
 
@@ -103,6 +107,7 @@ class AutoLearner(ABC):
103
107
  - term-typing: List of predicted types for each term
104
108
  - taxonomy-discovery: Boolean predictions for relationships
105
109
  - non-taxonomy-discovery: Predicted relation types
110
+ - text2onto : Extract ontology terms and their semantic types from documents
106
111
 
107
112
  Raises:
108
113
  NotImplementedError: If not implemented by concrete class.
@@ -115,6 +120,8 @@ class AutoLearner(ABC):
115
120
  return self._taxonomy_discovery(eval_data, test=True)
116
121
  elif task == 'non-taxonomic-re':
117
122
  return self._non_taxonomic_re(eval_data, test=True)
123
+ elif task == 'text2onto':
124
+ return self._text2onto(eval_data, test=True)
118
125
  else:
119
126
  raise ValueError(f"{task} is not a valid task.")
120
127
 
@@ -147,6 +154,9 @@ class AutoLearner(ABC):
147
154
  def _non_taxonomic_re(self, data: Any, test: bool = False) -> Optional[Any]:
148
155
  pass
149
156
 
157
+ def _text2onto(self, data: Any, test: bool = False) -> Optional[Any]:
158
+ pass
159
+
150
160
  def tasks_data_former(self, data: Any, task: str, test: bool = False) -> List[str | Dict[str, str]]:
151
161
  formatted_data = []
152
162
  if task == "term-typing":
@@ -171,6 +181,7 @@ class AutoLearner(ABC):
171
181
  non_taxonomic_types = list(set(non_taxonomic_types))
172
182
  non_taxonomic_res = list(set(non_taxonomic_res))
173
183
  formatted_data = {"types": non_taxonomic_types, "relations": non_taxonomic_res}
184
+
174
185
  return formatted_data
175
186
 
176
187
  def tasks_ground_truth_former(self, data: Any, task: str) -> List[Dict[str, str]]:
@@ -186,6 +197,26 @@ class AutoLearner(ABC):
186
197
  formatted_data.append({"head": non_taxonomic_triplets.head,
187
198
  "tail": non_taxonomic_triplets.tail,
188
199
  "relation": non_taxonomic_triplets.relation})
200
+ if task == "text2onto":
201
+ terms2docs = data.get("terms2docs", {}) or {}
202
+ terms2types = data.get("terms2types", {}) or {}
203
+
204
+ # gold doc→terms
205
+ gold_terms = []
206
+ for term, doc_ids in terms2docs.items():
207
+ for doc_id in doc_ids or []:
208
+ gold_terms.append({"doc_id": doc_id, "term": term})
209
+
210
+ # gold doc→types derived via doc→terms + term→types
211
+ doc2types = defaultdict(set)
212
+ for term, doc_ids in terms2docs.items():
213
+ for doc_id in doc_ids or []:
214
+ for ty in (terms2types.get(term, []) or []):
215
+ if isinstance(ty, str) and ty.strip():
216
+ doc2types[doc_id].add(ty.strip())
217
+ gold_types = [{"doc_id": doc_id, "type": ty} for doc_id, tys in doc2types.items() for ty in tys]
218
+ return {"terms": gold_terms, "types": gold_types}
219
+
189
220
  return formatted_data
190
221
 
191
222
  class AutoLLM(ABC):
@@ -201,7 +232,7 @@ class AutoLLM(ABC):
201
232
  tokenizer: The tokenizer associated with the model.
202
233
  """
203
234
 
204
- def __init__(self, label_mapper: Any, device: str='cpu', token: str="") -> None:
235
+ def __init__(self, label_mapper: Any, device: str='cpu', token: str="", max_length: int = 512) -> None:
205
236
  """
206
237
  Initialize the LLM component.
207
238
 
@@ -213,6 +244,7 @@ class AutoLLM(ABC):
213
244
  self.device=device
214
245
  self.model: Optional[Any] = None
215
246
  self.tokenizer: Optional[Any] = None
247
+ self.max_length = max_length
216
248
 
217
249
 
218
250
  def load(self, model_id: str) -> None:
@@ -236,10 +268,8 @@ class AutoLLM(ABC):
236
268
  self.tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side='left', token=self.token)
237
269
  self.tokenizer.pad_token = self.tokenizer.eos_token
238
270
  if self.device == "cpu":
239
- # device_map = "cpu"
240
271
  self.model = AutoModelForCausalLM.from_pretrained(
241
272
  model_id,
242
- # device_map=device_map,
243
273
  torch_dtype=torch.bfloat16,
244
274
  token=self.token
245
275
  )
@@ -248,11 +278,12 @@ class AutoLLM(ABC):
248
278
  self.model = AutoModelForCausalLM.from_pretrained(
249
279
  model_id,
250
280
  device_map=device_map,
251
- torch_dtype=torch.bfloat16,
252
- token=self.token
281
+ token=self.token,
282
+ trust_remote_code=True,
253
283
  )
254
284
  self.label_mapper.fit()
255
285
 
286
+ @torch.no_grad()
256
287
  def generate(self, inputs: List[str], max_new_tokens: int = 50) -> List[str]:
257
288
  """
258
289
  Generate text responses for the given input prompts.
@@ -276,29 +307,21 @@ class AutoLLM(ABC):
276
307
  List of generated text responses, one for each input prompt.
277
308
  Responses include the original input plus generated continuation.
278
309
  """
279
- # Tokenize inputs and move to device
280
310
  encoded_inputs = self.tokenizer(inputs,
281
311
  return_tensors="pt",
282
- padding=True,
283
- truncation=True).to(self.model.device)
312
+ max_length=self.max_length,
313
+ truncation=True,
314
+ padding=True).to(self.model.device)
284
315
  input_ids = encoded_inputs["input_ids"]
285
316
  input_length = input_ids.shape[1]
286
-
287
- # Generate output
288
317
  outputs = self.model.generate(
289
318
  **encoded_inputs,
290
319
  max_new_tokens=max_new_tokens,
291
- pad_token_id=self.tokenizer.eos_token_id
320
+ pad_token_id=self.tokenizer.eos_token_id,
321
+ eos_token_id=self.tokenizer.eos_token_id
292
322
  )
293
-
294
- # Extract only the newly generated tokens (excluding prompt)
295
323
  generated_tokens = outputs[:, input_length:]
296
-
297
- # Decode only the generated part
298
324
  decoded_outputs = [self.tokenizer.decode(g, skip_special_tokens=True).strip() for g in generated_tokens]
299
- print(decoded_outputs)
300
- print(self.label_mapper.predict(decoded_outputs))
301
- # Map the decoded text to labels
302
325
  return self.label_mapper.predict(decoded_outputs)
303
326
 
304
327
  class AutoRetriever(ABC):
@@ -11,44 +11,84 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- from typing import List, Dict, Tuple, Set
14
+ from typing import List, Dict, Tuple, Set, Any, Union
15
15
 
16
16
  SYMMETRIC_RELATIONS = {"equivalentclass", "sameas", "disjointwith"}
17
17
 
18
- def text2onto_metrics(y_true: List[str], y_pred: List[str], similarity_threshold: float = 0.8) -> Dict[str, float | int]:
19
- def jaccard_similarity(a: str, b: str) -> float:
20
- set_a = set(a.lower().split())
21
- set_b = set(b.lower().split())
22
- if not set_a and not set_b:
18
+ def text2onto_metrics(
19
+ y_true: Dict[str, Any],
20
+ y_pred: Dict[str, Any],
21
+ similarity_threshold: float = 0.8
22
+ ) -> Dict[str, Any]:
23
+ """
24
+ Expects:
25
+ y_true = {"terms": [{"doc_id": str, "term": str}, ...],
26
+ "types": [{"doc_id": str, "type": str}, ...]}
27
+ y_pred = same shape
28
+
29
+ Returns:
30
+ {"terms": {...}, "types": {...}}
31
+ """
32
+
33
+ def jaccard_similarity(text_a: str, text_b: str) -> float:
34
+ tokens_a = set(text_a.lower().split())
35
+ tokens_b = set(text_b.lower().split())
36
+ if not tokens_a and not tokens_b:
23
37
  return 1.0
24
- return len(set_a & set_b) / len(set_a | set_b)
25
-
26
- matched_gt_indices = set()
27
- matched_pred_indices = set()
28
- for i, pred_label in enumerate(y_pred):
29
- for j, gt_label in enumerate(y_true):
30
- if j in matched_gt_indices:
31
- continue
32
- sim = jaccard_similarity(pred_label, gt_label)
33
- if sim >= similarity_threshold:
34
- matched_pred_indices.add(i)
35
- matched_gt_indices.add(j)
36
- break # each gt matched once
37
-
38
- total_correct = len(matched_pred_indices)
39
- total_predicted = len(y_pred)
40
- total_ground_truth = len(y_true)
38
+ return len(tokens_a & tokens_b) / len(tokens_a | tokens_b)
39
+
40
+ def pairs_to_strings(rows: List[Dict[str, str]], value_key: str) -> List[str]:
41
+ paired_strings: List[str] = []
42
+ for row in rows or []:
43
+ doc_id = (row.get("doc_id") or "").strip()
44
+ value = (row.get(value_key) or "").strip()
45
+ if doc_id and value:
46
+ # keep doc association + allow token Jaccard
47
+ paired_strings.append(f"{doc_id} {value}")
48
+ return paired_strings
49
+
50
+ def score_list(ground_truth_items: List[str], predicted_items: List[str]) -> Dict[str, Union[float, int]]:
51
+ matched_ground_truth_indices: Set[int] = set()
52
+ matched_predicted_indices: Set[int] = set()
53
+
54
+ for predicted_index, predicted_item in enumerate(predicted_items):
55
+ for ground_truth_index, ground_truth_item in enumerate(ground_truth_items):
56
+ if ground_truth_index in matched_ground_truth_indices:
57
+ continue
58
+
59
+ if jaccard_similarity(predicted_item, ground_truth_item) >= similarity_threshold:
60
+ matched_predicted_indices.add(predicted_index)
61
+ matched_ground_truth_indices.add(ground_truth_index)
62
+ break
63
+
64
+ total_correct = len(matched_predicted_indices)
65
+ total_predicted = len(predicted_items)
66
+ total_ground_truth = len(ground_truth_items)
67
+
68
+ precision = total_correct / total_predicted if total_predicted else 0.0
69
+ recall = total_correct / total_ground_truth if total_ground_truth else 0.0
70
+ f1 = (2 * precision * recall / (precision + recall)) if (precision + recall) else 0.0
71
+
72
+ return {
73
+ "f1_score": f1,
74
+ "precision": precision,
75
+ "recall": recall,
76
+ "total_correct": total_correct,
77
+ "total_predicted": total_predicted,
78
+ "total_ground_truth": total_ground_truth,
79
+ }
80
+
81
+ ground_truth_terms = pairs_to_strings(y_true.get("terms", []), "term")
82
+ predicted_terms = pairs_to_strings(y_pred.get("terms", []), "term")
83
+ ground_truth_types = pairs_to_strings(y_true.get("types", []), "type")
84
+ predicted_types = pairs_to_strings(y_pred.get("types", []), "type")
85
+
86
+ terms_metrics = score_list(ground_truth_terms, predicted_terms)
87
+ types_metrics = score_list(ground_truth_types, predicted_types)
41
88
 
42
- precision = total_correct / total_predicted if total_predicted > 0 else 0
43
- recall = total_correct / total_ground_truth if total_ground_truth > 0 else 0
44
- f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
45
89
  return {
46
- "f1_score": f1_score,
47
- "precision": precision,
48
- "recall": recall,
49
- "total_correct": total_correct,
50
- "total_predicted": total_predicted,
51
- "total_ground_truth": total_ground_truth
90
+ "terms": terms_metrics,
91
+ "types": types_metrics,
52
92
  }
53
93
 
54
94
  def term_typing_metrics(y_true: List[Dict[str, List[str]]], y_pred: List[Dict[str, List[str]]]) -> Dict[str, float | int]:
@@ -12,8 +12,9 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from .llm import AutoLLMLearner, FalconLLM, MistralLLM
15
+ from .llm import AutoLLMLearner, FalconLLM, MistralLLM, LogitMistralLLM, \
16
+ QwenInstructLLM, QwenThinkingLLM, LogitAutoLLM, LogitQuantAutoLLM
16
17
  from .retriever import AutoRetrieverLearner, LLMAugmentedRetrieverLearner
17
- from .rag import AutoRAGLearner
18
+ from .rag import AutoRAGLearner, LLMAugmentedRAGLearner
18
19
  from .prompt import StandardizedPrompting
19
20
  from .label_mapper import LabelMapper
@@ -31,7 +31,7 @@ class LabelMapper:
31
31
  ngram_range: Tuple=(1, 1),
32
32
  label_dict: Dict[str, List[str]]=None,
33
33
  analyzer: str = 'word',
34
- iterator_no: int = 100):
34
+ iterator_no: int = 1000):
35
35
  """
36
36
  Initializes the TFIDFLabelMapper with a specified classifier and TF-IDF configuration.
37
37
 
@@ -45,11 +45,12 @@ class LabelMapper:
45
45
  if label_dict is None:
46
46
  label_dict = {
47
47
  "yes": ["yes", "true"],
48
- "no": ["no", "false", " "]
48
+ "no": ["no", "false"]
49
49
  }
50
- self.labels = [label.lower() for label in list(label_dict.keys())]
50
+ self.label_dict = label_dict
51
+ self.labels = [label.lower() for label in list(self.label_dict.keys())]
51
52
  self.x_train, self.y_train = [], []
52
- for label, candidates in label_dict.items():
53
+ for label, candidates in self.label_dict.items():
53
54
  self.x_train += [label] + candidates
54
55
  self.y_train += [label] * (len(candidates) + 1)
55
56
  self.x_train = iterator_no * self.x_train