OntoLearner 1.4.10__py3-none-any.whl → 1.4.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ontolearner/VERSION +1 -1
- ontolearner/base/learner.py +38 -17
- ontolearner/evaluation/metrics.py +72 -32
- ontolearner/learner/__init__.py +1 -1
- ontolearner/learner/label_mapper.py +1 -1
- ontolearner/learner/prompt.py +40 -5
- ontolearner/learner/rag/__init__.py +14 -0
- ontolearner/learner/{rag.py → rag/rag.py} +7 -2
- ontolearner/learner/retriever/__init__.py +1 -1
- ontolearner/learner/retriever/{llm_retriever.py → augmented_retriever.py} +48 -39
- ontolearner/learner/retriever/learner.py +3 -4
- ontolearner/learner/text2onto/__init__.py +1 -1
- ontolearner/learner/text2onto/alexbek.py +484 -1105
- ontolearner/learner/text2onto/sbunlp.py +498 -493
- ontolearner/text2onto/splitter.py +69 -6
- {ontolearner-1.4.10.dist-info → ontolearner-1.4.11.dist-info}/METADATA +2 -2
- {ontolearner-1.4.10.dist-info → ontolearner-1.4.11.dist-info}/RECORD +19 -18
- {ontolearner-1.4.10.dist-info → ontolearner-1.4.11.dist-info}/WHEEL +0 -0
- {ontolearner-1.4.10.dist-info → ontolearner-1.4.11.dist-info}/licenses/LICENSE +0 -0
|
@@ -200,10 +200,73 @@ class SyntheticDataSplitter:
|
|
|
200
200
|
|
|
201
201
|
return terms_splits, types_splits, docs_split, types2docs_splits
|
|
202
202
|
|
|
203
|
-
def
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
203
|
+
def split_fine_grained(self, doc_ids):
|
|
204
|
+
"""
|
|
205
|
+
Build a single split bundle containing only:
|
|
206
|
+
- docs
|
|
207
|
+
- terms
|
|
208
|
+
- types
|
|
209
|
+
- terms2docs
|
|
210
|
+
- terms2types
|
|
211
|
+
"""
|
|
212
|
+
# normalize to string ids (constructor uses str(row.id))
|
|
213
|
+
doc_ids = {str(d) for d in (doc_ids or [])}
|
|
214
|
+
|
|
215
|
+
# docs + collect terms/types from docs
|
|
216
|
+
docs = []
|
|
217
|
+
terms_set = set()
|
|
218
|
+
types_set = set()
|
|
219
|
+
|
|
220
|
+
for doc_id in doc_ids:
|
|
221
|
+
doc = self.doc_id_to_doc[doc_id]
|
|
222
|
+
docs.append({"id": str(doc.id), "title": doc.title, "text": doc.text})
|
|
223
|
+
|
|
224
|
+
terms_set.update(self.doc_id_to_terms[doc_id])
|
|
225
|
+
types_set.update(self.doc_id_to_types[doc_id])
|
|
226
|
+
|
|
227
|
+
terms = sorted(terms_set)
|
|
228
|
+
types = sorted(types_set)
|
|
229
|
+
|
|
230
|
+
# terms2docs: use the constructor-built mapping and restrict to this split's doc_ids
|
|
231
|
+
terms2docs = {
|
|
232
|
+
term: sorted(list(self.term_to_doc_id.get(term, set()) & doc_ids))
|
|
233
|
+
for term in terms
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
# terms2types: ontology lookup (term -> parent types)
|
|
237
|
+
terms2types = {term: self.child_to_parent.get(term, []) for term in terms}
|
|
238
|
+
|
|
239
|
+
return {
|
|
240
|
+
"documents": docs,
|
|
241
|
+
"terms": terms,
|
|
242
|
+
"types": types,
|
|
243
|
+
"terms2docs": terms2docs,
|
|
244
|
+
"terms2types": terms2types,
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
def train_test_val_split(self, train: float = 0.8, val: float = 0.1, test: float = 0.1):
|
|
248
|
+
"""
|
|
249
|
+
Returns:
|
|
250
|
+
train_split, val_split, test_split
|
|
251
|
+
|
|
252
|
+
Each split is a dict with keys:
|
|
253
|
+
- "docs"
|
|
254
|
+
- "terms"
|
|
255
|
+
- "types"
|
|
256
|
+
- "terms2docs"
|
|
257
|
+
- "terms2types"
|
|
258
|
+
"""
|
|
259
|
+
# compute which docs go to which split
|
|
260
|
+
split_targets, split_docs_targets = self.set_train_val_test_sizes(
|
|
261
|
+
train_percentage=train,
|
|
262
|
+
val_percentage=val,
|
|
263
|
+
test_percentage=test,
|
|
264
|
+
)
|
|
207
265
|
split_docs = self.create_train_val_test_splits(split_targets, split_docs_targets)
|
|
208
|
-
|
|
209
|
-
|
|
266
|
+
# split_docs: {"train": set(doc_ids), "val": set(doc_ids), "test": set(doc_ids)}
|
|
267
|
+
|
|
268
|
+
train_split = self.split_fine_grained(split_docs.get("train", set()))
|
|
269
|
+
val_split = self.split_fine_grained(split_docs.get("val", set()))
|
|
270
|
+
test_split = self.split_fine_grained(split_docs.get("test", set()))
|
|
271
|
+
|
|
272
|
+
return train_split, val_split, test_split
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: OntoLearner
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.11
|
|
4
4
|
Summary: OntoLearner: A Modular Python Library for Ontology Learning with LLMs.
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
16
|
Requires-Dist: Levenshtein
|
|
17
|
-
Requires-Dist: bitsandbytes (>=0.45.1,<0.
|
|
17
|
+
Requires-Dist: bitsandbytes (>=0.45.1,<1.0.0) ; platform_system == "Linux"
|
|
18
18
|
Requires-Dist: dspy (>=2.6.14,<3.0.0)
|
|
19
19
|
Requires-Dist: g4f
|
|
20
20
|
Requires-Dist: gensim
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
ontolearner/VERSION,sha256=
|
|
1
|
+
ontolearner/VERSION,sha256=IUVii4YAC88nU9izX_pBt1ZZ3pyLpyv3xOe7B9Tzuzo,7
|
|
2
2
|
ontolearner/__init__.py,sha256=E4yukFv2PV4uyztTPDWljCySY9AVDcDDzabuvxfabYE,1889
|
|
3
3
|
ontolearner/_learner.py,sha256=2CRQvpsz8akIOdxTs2-KLJ-MssULrjpK-QDD3QXUJXI,5297
|
|
4
4
|
ontolearner/_ontology.py,sha256=W1mp195SImqLKwaj4ueEaBWuLJg2jUdx1JT20Ds3fmQ,6950
|
|
5
5
|
ontolearner/base/__init__.py,sha256=5pf-ltxzGp32xhEcPdbtm11wXJrYJMUeWG-mbcAYD8Q,705
|
|
6
|
-
ontolearner/base/learner.py,sha256=
|
|
6
|
+
ontolearner/base/learner.py,sha256=dWMiIBhdvxZLxIWrTq4d4LbyCqDuAmTwfmxwa7UkjfQ,20075
|
|
7
7
|
ontolearner/base/ontology.py,sha256=t7n81Vk8Y5BCK88AYIyNKd7d1LjJnoTlXigyPvrLxR4,24784
|
|
8
8
|
ontolearner/base/text2onto.py,sha256=iUXYZoqnwgebQuQzM-XSGTVRfHLlhjUK_z5XUvhRICc,5388
|
|
9
9
|
ontolearner/data_structure/__init__.py,sha256=1HiKvk8FKjhYeI92RHnJXxyQbUJBi3JFytjQjthsY_s,599
|
|
@@ -11,17 +11,18 @@ ontolearner/data_structure/data.py,sha256=jUUDfqsOZcEqIR83SRboiKibPdA_JquI1uOEiQ
|
|
|
11
11
|
ontolearner/data_structure/metric.py,sha256=4QKkZ5L1YK6hDTU-N5Z9I9Ha99DVHmGfYxK7N2qdhfc,7589
|
|
12
12
|
ontolearner/evaluation/__init__.py,sha256=4BZr3BUXjQDTj4Aqlqy4THa80lZPsMuh1EBTCyi9Wig,842
|
|
13
13
|
ontolearner/evaluation/evaluate.py,sha256=NYCVcmPqpyIxYZrMAim37gL-erdh698RD3t3eNTTgZc,1163
|
|
14
|
-
ontolearner/evaluation/metrics.py,sha256=
|
|
15
|
-
ontolearner/learner/__init__.py,sha256=
|
|
16
|
-
ontolearner/learner/label_mapper.py,sha256=
|
|
14
|
+
ontolearner/evaluation/metrics.py,sha256=rgEHwkvtWIZ8BB8dNN5bjwptV70F4Y9RRLp9j2xeAuE,7914
|
|
15
|
+
ontolearner/learner/__init__.py,sha256=8NOPB8IaU04Ae5aWnIm6B0rcijSYN6z3xJElzzKD34I,822
|
|
16
|
+
ontolearner/learner/label_mapper.py,sha256=SiceknqOW2ORX7K4-ljLJYay8DQrKF6Dwv-wUg7uQ78,3793
|
|
17
17
|
ontolearner/learner/llm.py,sha256=3kq_IrwEPTFgeNVKZH9Er_OydJuDpRBtM3YXNNa8_KA,10343
|
|
18
|
-
ontolearner/learner/prompt.py,sha256=
|
|
19
|
-
ontolearner/learner/rag.py,sha256=
|
|
20
|
-
ontolearner/learner/
|
|
18
|
+
ontolearner/learner/prompt.py,sha256=1z8KRLrvRBS8QFoJYGlaajwHi4P4CZezhNQv3WcIfNA,2464
|
|
19
|
+
ontolearner/learner/rag/__init__.py,sha256=NpnBBw5wqZ1MNtpIJ7zT-LWm5IL9aDEzwlbKPo_kCis,612
|
|
20
|
+
ontolearner/learner/rag/rag.py,sha256=apnjK4KvVzFmzF6CmxtZoYoE9NAscRVULTt8Nj5wcWk,4430
|
|
21
|
+
ontolearner/learner/retriever/__init__.py,sha256=ECMEEtwacnugiJ-sADVGidS88pC57nVi299vKb5R16U,860
|
|
22
|
+
ontolearner/learner/retriever/augmented_retriever.py,sha256=tI4z7IbbaShOY-BxOreEGe7fhemz7l48diB2Erri3ek,14004
|
|
21
23
|
ontolearner/learner/retriever/crossencoder.py,sha256=yurzGE4zydlBSwUefi1CugsWv34HEZ61qADG_-nILbo,4996
|
|
22
24
|
ontolearner/learner/retriever/embedding.py,sha256=Lp9oA7LiOYaSWDvzG779KMv5keNl6Xv7hw0WpeaepDE,7875
|
|
23
|
-
ontolearner/learner/retriever/learner.py,sha256=
|
|
24
|
-
ontolearner/learner/retriever/llm_retriever.py,sha256=goInWYxrD9PSo_EsSKbNV8wEaSPvWY3LEC8XM7jlH64,12917
|
|
25
|
+
ontolearner/learner/retriever/learner.py,sha256=bMkXj_MnzBRQDjPloqnOYEj400fsO6CFBfUql7gHIxw,11184
|
|
25
26
|
ontolearner/learner/retriever/ngram.py,sha256=XgS1OeheKEIi7wfJHZgS8mWxKv9MQrP0apOJD_XSOnM,4575
|
|
26
27
|
ontolearner/learner/taxonomy_discovery/__init__.py,sha256=-Hb5Dl6_6c4l1uIT2zWtyBWMq5cjVD4PNjxt5qJePl4,747
|
|
27
28
|
ontolearner/learner/taxonomy_discovery/alexbek.py,sha256=kFEDvoKxLf-sB7-d5REkcC0DqXZpcA6ZSJ2QHrNoC5E,19010
|
|
@@ -32,9 +33,9 @@ ontolearner/learner/term_typing/__init__.py,sha256=2rBbgp8683GNVgB58T4xe76l4m-NT
|
|
|
32
33
|
ontolearner/learner/term_typing/alexbek.py,sha256=SzWQbndkhAjxETVbrJ4uyH7ykL_TMIwHozSS08zwjoM,46684
|
|
33
34
|
ontolearner/learner/term_typing/rwthdbis.py,sha256=F6Jr1SrsbDOIe0Ee_FkDVGTG4wRWpM-R2YqrqEQiex0,14576
|
|
34
35
|
ontolearner/learner/term_typing/sbunlp.py,sha256=Xd3UqMO3m_Skn_2geTN22MGQmSD6R8bYfPgubZre3IE,19820
|
|
35
|
-
ontolearner/learner/text2onto/__init__.py,sha256=
|
|
36
|
-
ontolearner/learner/text2onto/alexbek.py,sha256=
|
|
37
|
-
ontolearner/learner/text2onto/sbunlp.py,sha256
|
|
36
|
+
ontolearner/learner/text2onto/__init__.py,sha256=bLv25lJmgQymgMfhr6JTezMndpDMk9ihheY-VLE-nRI,644
|
|
37
|
+
ontolearner/learner/text2onto/alexbek.py,sha256=0CE5KHgB47tXASgscYH-W3X17XtI2QBtTuhDNpGGaUI,23347
|
|
38
|
+
ontolearner/learner/text2onto/sbunlp.py,sha256=-ULysm_iFUMqEsxNRwgZVcq-70nBzlXMR5BeMezUjjw,23786
|
|
38
39
|
ontolearner/ontology/__init__.py,sha256=F9Ta1qCX9mOxIK5CPRypEoglQNkpJ6SJpqziz73xKQE,1328
|
|
39
40
|
ontolearner/ontology/agriculture.py,sha256=ZaXHNEFjbtsMH8M7HQ8ypnfJS4TUQy_as16fwv-kOKA,5903
|
|
40
41
|
ontolearner/ontology/arts_humanities.py,sha256=K4ceDJL6PfIfSJZ86uQUkUXOVoiERG6ItgvVE2lhLKk,3996
|
|
@@ -62,7 +63,7 @@ ontolearner/processor.py,sha256=LaPUr4BSmPZDINo5t55q9U0i9lLXa77u4pN38usQMBc,4817
|
|
|
62
63
|
ontolearner/text2onto/__init__.py,sha256=YbbDYpHYSMA4dof-7y40PKYsiRO7wvoXZ2LbsRwpPJE,645
|
|
63
64
|
ontolearner/text2onto/batchifier.py,sha256=2CljvcZo0EDW3sHHcG9d5w26RcRwbMsQdFB1j-vCam4,6646
|
|
64
65
|
ontolearner/text2onto/general.py,sha256=2RUFMbWm7qLq3MJHsyNb3rgYkGcicnkbiH2wdPBsBps,1099
|
|
65
|
-
ontolearner/text2onto/splitter.py,sha256=
|
|
66
|
+
ontolearner/text2onto/splitter.py,sha256=PeiVbw5zuNlX3dhtcIJRVCghNizWE8ugIGV7OigR5Ac,12743
|
|
66
67
|
ontolearner/text2onto/synthesizer.py,sha256=tSJgPTFWVKBQi2RqLQfMhX_noXeNLh2Wq2Ezbqyv-OA,5486
|
|
67
68
|
ontolearner/tools/__init__.py,sha256=IB5ycAW5vUDKeq-NAMMbwjSFzwSzC-5j0UobIzO3ZmI,623
|
|
68
69
|
ontolearner/tools/analyzer.py,sha256=1SooAT7qYqDIrHyvHXnrBRmuPwZhLK1uj26OiKRECc0,12989
|
|
@@ -70,7 +71,7 @@ ontolearner/tools/visualizer.py,sha256=cwijl4yYaS1SCLM5wbvRTEcbQj9Bjo4fHzZR6q6o8
|
|
|
70
71
|
ontolearner/utils/__init__.py,sha256=pSEyU3dlPMADBqygqaaid44RdWf0Lo3Fvz-K_rQ7_Bw,733
|
|
71
72
|
ontolearner/utils/io.py,sha256=3DqGK2p7c0onKi0Xxs16WB08uHfHUId3bW0dDKwyS0g,2110
|
|
72
73
|
ontolearner/utils/train_test_split.py,sha256=Zlm42eT6QGWwlySyomCPIiTGmGqeN_h4z4xBY2EAOR8,11530
|
|
73
|
-
ontolearner-1.4.
|
|
74
|
-
ontolearner-1.4.
|
|
75
|
-
ontolearner-1.4.
|
|
76
|
-
ontolearner-1.4.
|
|
74
|
+
ontolearner-1.4.11.dist-info/METADATA,sha256=YDJySz7VAXa80XACaj-WDyuHtFhticcNqEmQVaR8Jsg,11473
|
|
75
|
+
ontolearner-1.4.11.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
76
|
+
ontolearner-1.4.11.dist-info/licenses/LICENSE,sha256=krXMLuMKgzX-UgaufgfJdm9ojIloZot7ZdvJUnNxl4I,1067
|
|
77
|
+
ontolearner-1.4.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|