OntoLearner 1.4.10__py3-none-any.whl → 1.4.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -200,10 +200,73 @@ class SyntheticDataSplitter:
200
200
 
201
201
  return terms_splits, types_splits, docs_split, types2docs_splits
202
202
 
203
- def split(self, train: float = 0.8, val: float = 0.1, test: float = 0.1):
204
- split_targets, split_docs_targets = self.set_train_val_test_sizes(train_percentage=train,
205
- val_percentage=val,
206
- test_percentage=test)
203
+ def split_fine_grained(self, doc_ids):
204
+ """
205
+ Build a single split bundle containing only:
206
+ - docs
207
+ - terms
208
+ - types
209
+ - terms2docs
210
+ - terms2types
211
+ """
212
+ # normalize to string ids (constructor uses str(row.id))
213
+ doc_ids = {str(d) for d in (doc_ids or [])}
214
+
215
+ # docs + collect terms/types from docs
216
+ docs = []
217
+ terms_set = set()
218
+ types_set = set()
219
+
220
+ for doc_id in doc_ids:
221
+ doc = self.doc_id_to_doc[doc_id]
222
+ docs.append({"id": str(doc.id), "title": doc.title, "text": doc.text})
223
+
224
+ terms_set.update(self.doc_id_to_terms[doc_id])
225
+ types_set.update(self.doc_id_to_types[doc_id])
226
+
227
+ terms = sorted(terms_set)
228
+ types = sorted(types_set)
229
+
230
+ # terms2docs: use the constructor-built mapping and restrict to this split's doc_ids
231
+ terms2docs = {
232
+ term: sorted(list(self.term_to_doc_id.get(term, set()) & doc_ids))
233
+ for term in terms
234
+ }
235
+
236
+ # terms2types: ontology lookup (term -> parent types)
237
+ terms2types = {term: self.child_to_parent.get(term, []) for term in terms}
238
+
239
+ return {
240
+ "documents": docs,
241
+ "terms": terms,
242
+ "types": types,
243
+ "terms2docs": terms2docs,
244
+ "terms2types": terms2types,
245
+ }
246
+
247
+ def train_test_val_split(self, train: float = 0.8, val: float = 0.1, test: float = 0.1):
248
+ """
249
+ Returns:
250
+ train_split, val_split, test_split
251
+
252
+ Each split is a dict with keys:
253
+ - "docs"
254
+ - "terms"
255
+ - "types"
256
+ - "terms2docs"
257
+ - "terms2types"
258
+ """
259
+ # compute which docs go to which split
260
+ split_targets, split_docs_targets = self.set_train_val_test_sizes(
261
+ train_percentage=train,
262
+ val_percentage=val,
263
+ test_percentage=test,
264
+ )
207
265
  split_docs = self.create_train_val_test_splits(split_targets, split_docs_targets)
208
- terms, types, docs, types2docs = self.generate_split_artefacts(split_docs)
209
- return terms, types, docs, types2docs
266
+ # split_docs: {"train": set(doc_ids), "val": set(doc_ids), "test": set(doc_ids)}
267
+
268
+ train_split = self.split_fine_grained(split_docs.get("train", set()))
269
+ val_split = self.split_fine_grained(split_docs.get("val", set()))
270
+ test_split = self.split_fine_grained(split_docs.get("test", set()))
271
+
272
+ return train_split, val_split, test_split
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: OntoLearner
3
- Version: 1.4.10
3
+ Version: 1.4.11
4
4
  Summary: OntoLearner: A Modular Python Library for Ontology Learning with LLMs.
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
16
  Requires-Dist: Levenshtein
17
- Requires-Dist: bitsandbytes (>=0.45.1,<0.46.0)
17
+ Requires-Dist: bitsandbytes (>=0.45.1,<1.0.0) ; platform_system == "Linux"
18
18
  Requires-Dist: dspy (>=2.6.14,<3.0.0)
19
19
  Requires-Dist: g4f
20
20
  Requires-Dist: gensim
@@ -1,9 +1,9 @@
1
- ontolearner/VERSION,sha256=yg62XB5UNHqKQzpcdrkSfe7_qhlmlTw-ksDKQaqtI30,7
1
+ ontolearner/VERSION,sha256=IUVii4YAC88nU9izX_pBt1ZZ3pyLpyv3xOe7B9Tzuzo,7
2
2
  ontolearner/__init__.py,sha256=E4yukFv2PV4uyztTPDWljCySY9AVDcDDzabuvxfabYE,1889
3
3
  ontolearner/_learner.py,sha256=2CRQvpsz8akIOdxTs2-KLJ-MssULrjpK-QDD3QXUJXI,5297
4
4
  ontolearner/_ontology.py,sha256=W1mp195SImqLKwaj4ueEaBWuLJg2jUdx1JT20Ds3fmQ,6950
5
5
  ontolearner/base/__init__.py,sha256=5pf-ltxzGp32xhEcPdbtm11wXJrYJMUeWG-mbcAYD8Q,705
6
- ontolearner/base/learner.py,sha256=latiGv8p3nyPrxMp7g5B2MSF-JEInRwIlbOn09uh7io,18899
6
+ ontolearner/base/learner.py,sha256=dWMiIBhdvxZLxIWrTq4d4LbyCqDuAmTwfmxwa7UkjfQ,20075
7
7
  ontolearner/base/ontology.py,sha256=t7n81Vk8Y5BCK88AYIyNKd7d1LjJnoTlXigyPvrLxR4,24784
8
8
  ontolearner/base/text2onto.py,sha256=iUXYZoqnwgebQuQzM-XSGTVRfHLlhjUK_z5XUvhRICc,5388
9
9
  ontolearner/data_structure/__init__.py,sha256=1HiKvk8FKjhYeI92RHnJXxyQbUJBi3JFytjQjthsY_s,599
@@ -11,17 +11,18 @@ ontolearner/data_structure/data.py,sha256=jUUDfqsOZcEqIR83SRboiKibPdA_JquI1uOEiQ
11
11
  ontolearner/data_structure/metric.py,sha256=4QKkZ5L1YK6hDTU-N5Z9I9Ha99DVHmGfYxK7N2qdhfc,7589
12
12
  ontolearner/evaluation/__init__.py,sha256=4BZr3BUXjQDTj4Aqlqy4THa80lZPsMuh1EBTCyi9Wig,842
13
13
  ontolearner/evaluation/evaluate.py,sha256=NYCVcmPqpyIxYZrMAim37gL-erdh698RD3t3eNTTgZc,1163
14
- ontolearner/evaluation/metrics.py,sha256=3Aw6ycJ3_Q6xfj4tMBJP6QcexUei0G16H0ZQWt87aRU,6286
15
- ontolearner/learner/__init__.py,sha256=RKREPrrjzQ5KYvcOwC_2l7yFKwFBd6HoCwhX2H6Spg8,798
16
- ontolearner/learner/label_mapper.py,sha256=YMPeFKzJxoCYNU5z7QRYPbB88sWdu1iT6iBDpPsjn-4,3792
14
+ ontolearner/evaluation/metrics.py,sha256=rgEHwkvtWIZ8BB8dNN5bjwptV70F4Y9RRLp9j2xeAuE,7914
15
+ ontolearner/learner/__init__.py,sha256=8NOPB8IaU04Ae5aWnIm6B0rcijSYN6z3xJElzzKD34I,822
16
+ ontolearner/learner/label_mapper.py,sha256=SiceknqOW2ORX7K4-ljLJYay8DQrKF6Dwv-wUg7uQ78,3793
17
17
  ontolearner/learner/llm.py,sha256=3kq_IrwEPTFgeNVKZH9Er_OydJuDpRBtM3YXNNa8_KA,10343
18
- ontolearner/learner/prompt.py,sha256=0ckH7xphIDKczPe7G-rwiOxFGZ7RsLnpPlNW92b-31U,1574
19
- ontolearner/learner/rag.py,sha256=eysB2RvcWkVo53s8-kSbZtJv904YVTmdtxplM4ukUKM,4283
20
- ontolearner/learner/retriever/__init__.py,sha256=G5XuJcTblqXVWboVW9StJ2Vo2xACp_kG5_w2nrueqlc,854
18
+ ontolearner/learner/prompt.py,sha256=1z8KRLrvRBS8QFoJYGlaajwHi4P4CZezhNQv3WcIfNA,2464
19
+ ontolearner/learner/rag/__init__.py,sha256=NpnBBw5wqZ1MNtpIJ7zT-LWm5IL9aDEzwlbKPo_kCis,612
20
+ ontolearner/learner/rag/rag.py,sha256=apnjK4KvVzFmzF6CmxtZoYoE9NAscRVULTt8Nj5wcWk,4430
21
+ ontolearner/learner/retriever/__init__.py,sha256=ECMEEtwacnugiJ-sADVGidS88pC57nVi299vKb5R16U,860
22
+ ontolearner/learner/retriever/augmented_retriever.py,sha256=tI4z7IbbaShOY-BxOreEGe7fhemz7l48diB2Erri3ek,14004
21
23
  ontolearner/learner/retriever/crossencoder.py,sha256=yurzGE4zydlBSwUefi1CugsWv34HEZ61qADG_-nILbo,4996
22
24
  ontolearner/learner/retriever/embedding.py,sha256=Lp9oA7LiOYaSWDvzG779KMv5keNl6Xv7hw0WpeaepDE,7875
23
- ontolearner/learner/retriever/learner.py,sha256=VcarTwwR8HNddJCh0loCQejDzZ_GO4NkdQUjEhLVy48,11181
24
- ontolearner/learner/retriever/llm_retriever.py,sha256=goInWYxrD9PSo_EsSKbNV8wEaSPvWY3LEC8XM7jlH64,12917
25
+ ontolearner/learner/retriever/learner.py,sha256=bMkXj_MnzBRQDjPloqnOYEj400fsO6CFBfUql7gHIxw,11184
25
26
  ontolearner/learner/retriever/ngram.py,sha256=XgS1OeheKEIi7wfJHZgS8mWxKv9MQrP0apOJD_XSOnM,4575
26
27
  ontolearner/learner/taxonomy_discovery/__init__.py,sha256=-Hb5Dl6_6c4l1uIT2zWtyBWMq5cjVD4PNjxt5qJePl4,747
27
28
  ontolearner/learner/taxonomy_discovery/alexbek.py,sha256=kFEDvoKxLf-sB7-d5REkcC0DqXZpcA6ZSJ2QHrNoC5E,19010
@@ -32,9 +33,9 @@ ontolearner/learner/term_typing/__init__.py,sha256=2rBbgp8683GNVgB58T4xe76l4m-NT
32
33
  ontolearner/learner/term_typing/alexbek.py,sha256=SzWQbndkhAjxETVbrJ4uyH7ykL_TMIwHozSS08zwjoM,46684
33
34
  ontolearner/learner/term_typing/rwthdbis.py,sha256=F6Jr1SrsbDOIe0Ee_FkDVGTG4wRWpM-R2YqrqEQiex0,14576
34
35
  ontolearner/learner/term_typing/sbunlp.py,sha256=Xd3UqMO3m_Skn_2geTN22MGQmSD6R8bYfPgubZre3IE,19820
35
- ontolearner/learner/text2onto/__init__.py,sha256=4-G6iel0Nxcj4nzPxUDqtFf9CMCzi8LghooOSAnbNfc,641
36
- ontolearner/learner/text2onto/alexbek.py,sha256=MySzxJUR0F3UyeS5rPIN988xxtPaoAxDFkBc-Q0vFTE,45494
37
- ontolearner/learner/text2onto/sbunlp.py,sha256=5p-s2Ixtntws5eO3gOUyYLpfZpCbOE0hG5gEcCwKHz4,24177
36
+ ontolearner/learner/text2onto/__init__.py,sha256=bLv25lJmgQymgMfhr6JTezMndpDMk9ihheY-VLE-nRI,644
37
+ ontolearner/learner/text2onto/alexbek.py,sha256=0CE5KHgB47tXASgscYH-W3X17XtI2QBtTuhDNpGGaUI,23347
38
+ ontolearner/learner/text2onto/sbunlp.py,sha256=-ULysm_iFUMqEsxNRwgZVcq-70nBzlXMR5BeMezUjjw,23786
38
39
  ontolearner/ontology/__init__.py,sha256=F9Ta1qCX9mOxIK5CPRypEoglQNkpJ6SJpqziz73xKQE,1328
39
40
  ontolearner/ontology/agriculture.py,sha256=ZaXHNEFjbtsMH8M7HQ8ypnfJS4TUQy_as16fwv-kOKA,5903
40
41
  ontolearner/ontology/arts_humanities.py,sha256=K4ceDJL6PfIfSJZ86uQUkUXOVoiERG6ItgvVE2lhLKk,3996
@@ -62,7 +63,7 @@ ontolearner/processor.py,sha256=LaPUr4BSmPZDINo5t55q9U0i9lLXa77u4pN38usQMBc,4817
62
63
  ontolearner/text2onto/__init__.py,sha256=YbbDYpHYSMA4dof-7y40PKYsiRO7wvoXZ2LbsRwpPJE,645
63
64
  ontolearner/text2onto/batchifier.py,sha256=2CljvcZo0EDW3sHHcG9d5w26RcRwbMsQdFB1j-vCam4,6646
64
65
  ontolearner/text2onto/general.py,sha256=2RUFMbWm7qLq3MJHsyNb3rgYkGcicnkbiH2wdPBsBps,1099
65
- ontolearner/text2onto/splitter.py,sha256=7SrFeUM5GZTTvbrve9RRKtBjELlkpnMkyPluO614PYM,10941
66
+ ontolearner/text2onto/splitter.py,sha256=PeiVbw5zuNlX3dhtcIJRVCghNizWE8ugIGV7OigR5Ac,12743
66
67
  ontolearner/text2onto/synthesizer.py,sha256=tSJgPTFWVKBQi2RqLQfMhX_noXeNLh2Wq2Ezbqyv-OA,5486
67
68
  ontolearner/tools/__init__.py,sha256=IB5ycAW5vUDKeq-NAMMbwjSFzwSzC-5j0UobIzO3ZmI,623
68
69
  ontolearner/tools/analyzer.py,sha256=1SooAT7qYqDIrHyvHXnrBRmuPwZhLK1uj26OiKRECc0,12989
@@ -70,7 +71,7 @@ ontolearner/tools/visualizer.py,sha256=cwijl4yYaS1SCLM5wbvRTEcbQj9Bjo4fHzZR6q6o8
70
71
  ontolearner/utils/__init__.py,sha256=pSEyU3dlPMADBqygqaaid44RdWf0Lo3Fvz-K_rQ7_Bw,733
71
72
  ontolearner/utils/io.py,sha256=3DqGK2p7c0onKi0Xxs16WB08uHfHUId3bW0dDKwyS0g,2110
72
73
  ontolearner/utils/train_test_split.py,sha256=Zlm42eT6QGWwlySyomCPIiTGmGqeN_h4z4xBY2EAOR8,11530
73
- ontolearner-1.4.10.dist-info/METADATA,sha256=NlTab0joIb4EJ_xHQg9mQ0oHuZjoA4P-unkUYLbEqyI,11445
74
- ontolearner-1.4.10.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
75
- ontolearner-1.4.10.dist-info/licenses/LICENSE,sha256=krXMLuMKgzX-UgaufgfJdm9ojIloZot7ZdvJUnNxl4I,1067
76
- ontolearner-1.4.10.dist-info/RECORD,,
74
+ ontolearner-1.4.11.dist-info/METADATA,sha256=YDJySz7VAXa80XACaj-WDyuHtFhticcNqEmQVaR8Jsg,11473
75
+ ontolearner-1.4.11.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
76
+ ontolearner-1.4.11.dist-info/licenses/LICENSE,sha256=krXMLuMKgzX-UgaufgfJdm9ojIloZot7ZdvJUnNxl4I,1067
77
+ ontolearner-1.4.11.dist-info/RECORD,,