OntoLearner 1.4.9__py3-none-any.whl → 1.4.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ontolearner/VERSION +1 -1
- ontolearner/base/learner.py +38 -17
- ontolearner/base/ontology.py +2 -2
- ontolearner/evaluation/metrics.py +72 -32
- ontolearner/learner/__init__.py +1 -1
- ontolearner/learner/label_mapper.py +1 -1
- ontolearner/learner/prompt.py +40 -5
- ontolearner/learner/rag/__init__.py +14 -0
- ontolearner/learner/{rag.py → rag/rag.py} +7 -2
- ontolearner/learner/retriever/__init__.py +1 -1
- ontolearner/learner/retriever/{llm_retriever.py → augmented_retriever.py} +48 -39
- ontolearner/learner/retriever/learner.py +3 -4
- ontolearner/learner/text2onto/__init__.py +1 -1
- ontolearner/learner/text2onto/alexbek.py +484 -1105
- ontolearner/learner/text2onto/sbunlp.py +498 -493
- ontolearner/text2onto/splitter.py +69 -6
- ontolearner/tools/analyzer.py +51 -0
- {ontolearner-1.4.9.dist-info → ontolearner-1.4.11.dist-info}/METADATA +2 -2
- {ontolearner-1.4.9.dist-info → ontolearner-1.4.11.dist-info}/RECORD +21 -20
- {ontolearner-1.4.9.dist-info → ontolearner-1.4.11.dist-info}/WHEEL +0 -0
- {ontolearner-1.4.9.dist-info → ontolearner-1.4.11.dist-info}/licenses/LICENSE +0 -0
|
@@ -200,10 +200,73 @@ class SyntheticDataSplitter:
|
|
|
200
200
|
|
|
201
201
|
return terms_splits, types_splits, docs_split, types2docs_splits
|
|
202
202
|
|
|
203
|
-
def
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
203
|
+
def split_fine_grained(self, doc_ids):
|
|
204
|
+
"""
|
|
205
|
+
Build a single split bundle containing only:
|
|
206
|
+
- docs
|
|
207
|
+
- terms
|
|
208
|
+
- types
|
|
209
|
+
- terms2docs
|
|
210
|
+
- terms2types
|
|
211
|
+
"""
|
|
212
|
+
# normalize to string ids (constructor uses str(row.id))
|
|
213
|
+
doc_ids = {str(d) for d in (doc_ids or [])}
|
|
214
|
+
|
|
215
|
+
# docs + collect terms/types from docs
|
|
216
|
+
docs = []
|
|
217
|
+
terms_set = set()
|
|
218
|
+
types_set = set()
|
|
219
|
+
|
|
220
|
+
for doc_id in doc_ids:
|
|
221
|
+
doc = self.doc_id_to_doc[doc_id]
|
|
222
|
+
docs.append({"id": str(doc.id), "title": doc.title, "text": doc.text})
|
|
223
|
+
|
|
224
|
+
terms_set.update(self.doc_id_to_terms[doc_id])
|
|
225
|
+
types_set.update(self.doc_id_to_types[doc_id])
|
|
226
|
+
|
|
227
|
+
terms = sorted(terms_set)
|
|
228
|
+
types = sorted(types_set)
|
|
229
|
+
|
|
230
|
+
# terms2docs: use the constructor-built mapping and restrict to this split's doc_ids
|
|
231
|
+
terms2docs = {
|
|
232
|
+
term: sorted(list(self.term_to_doc_id.get(term, set()) & doc_ids))
|
|
233
|
+
for term in terms
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
# terms2types: ontology lookup (term -> parent types)
|
|
237
|
+
terms2types = {term: self.child_to_parent.get(term, []) for term in terms}
|
|
238
|
+
|
|
239
|
+
return {
|
|
240
|
+
"documents": docs,
|
|
241
|
+
"terms": terms,
|
|
242
|
+
"types": types,
|
|
243
|
+
"terms2docs": terms2docs,
|
|
244
|
+
"terms2types": terms2types,
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
def train_test_val_split(self, train: float = 0.8, val: float = 0.1, test: float = 0.1):
|
|
248
|
+
"""
|
|
249
|
+
Returns:
|
|
250
|
+
train_split, val_split, test_split
|
|
251
|
+
|
|
252
|
+
Each split is a dict with keys:
|
|
253
|
+
- "docs"
|
|
254
|
+
- "terms"
|
|
255
|
+
- "types"
|
|
256
|
+
- "terms2docs"
|
|
257
|
+
- "terms2types"
|
|
258
|
+
"""
|
|
259
|
+
# compute which docs go to which split
|
|
260
|
+
split_targets, split_docs_targets = self.set_train_val_test_sizes(
|
|
261
|
+
train_percentage=train,
|
|
262
|
+
val_percentage=val,
|
|
263
|
+
test_percentage=test,
|
|
264
|
+
)
|
|
207
265
|
split_docs = self.create_train_val_test_splits(split_targets, split_docs_targets)
|
|
208
|
-
|
|
209
|
-
|
|
266
|
+
# split_docs: {"train": set(doc_ids), "val": set(doc_ids), "test": set(doc_ids)}
|
|
267
|
+
|
|
268
|
+
train_split = self.split_fine_grained(split_docs.get("train", set()))
|
|
269
|
+
val_split = self.split_fine_grained(split_docs.get("val", set()))
|
|
270
|
+
test_split = self.split_fine_grained(split_docs.get("test", set()))
|
|
271
|
+
|
|
272
|
+
return train_split, val_split, test_split
|
ontolearner/tools/analyzer.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import logging
|
|
16
16
|
import time
|
|
17
|
+
import numpy as np
|
|
17
18
|
from abc import ABC
|
|
18
19
|
from rdflib import RDF, RDFS, OWL
|
|
19
20
|
from collections import defaultdict
|
|
@@ -186,6 +187,56 @@ class Analyzer(ABC):
|
|
|
186
187
|
|
|
187
188
|
return metrics
|
|
188
189
|
|
|
190
|
+
@staticmethod
|
|
191
|
+
def compute_complexity_score(
|
|
192
|
+
topology_metrics: TopologyMetrics,
|
|
193
|
+
dataset_metrics: DatasetMetrics,
|
|
194
|
+
a: float = 0.4,
|
|
195
|
+
b: float = 6.0,
|
|
196
|
+
eps: float = 1e-12
|
|
197
|
+
) -> float:
|
|
198
|
+
"""
|
|
199
|
+
Compute a single normalized complexity score for an ontology.
|
|
200
|
+
|
|
201
|
+
This function combines structural topology metrics and dataset quality metrics
|
|
202
|
+
into a weighted aggregate score, then applies a logistic transformation to
|
|
203
|
+
normalize it to the range [0, 1]. The score reflects overall ontology complexity,
|
|
204
|
+
considering graph structure, hierarchy, breadth, coverage, and dataset richness.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
topology_metrics (TopologyMetrics): Precomputed structural metrics of the ontology graph.
|
|
208
|
+
dataset_metrics (DatasetMetrics): Precomputed metrics of extracted learning datasets.
|
|
209
|
+
a (float, optional): Steepness parameter for the logistic normalization function. Default is 0.4.
|
|
210
|
+
b (float, optional): Centering parameter for the logistic function, should be tuned to match the scale of aggregated metrics. Default is 6.0.
|
|
211
|
+
eps (float, optional): Small epsilon to prevent numerical issues in logistic computation. Default is 1e-12.
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
float: Normalized complexity score in [0, 1], where higher values indicate more complex ontologies.
|
|
215
|
+
|
|
216
|
+
Notes:
|
|
217
|
+
- Weights are assigned to different metric categories: graph metrics, coverage metrics, hierarchy metrics,
|
|
218
|
+
breadth metrics, and dataset metrics (term-types, taxonomic, non-taxonomic relations).
|
|
219
|
+
- Metrics are log-normalized before weighting to reduce scale differences.
|
|
220
|
+
- The logistic transformation ensures the final score is bounded and interpretable.
|
|
221
|
+
"""
|
|
222
|
+
# Define metric categories with their weights
|
|
223
|
+
metric_categories = {
|
|
224
|
+
0.3: ["total_nodes", "total_edges", "num_root_nodes", "num_leaf_nodes"],
|
|
225
|
+
0.25: ["num_classes", "num_properties", "num_individuals"],
|
|
226
|
+
0.10: ["max_depth", "min_depth", "avg_depth", "depth_variance"],
|
|
227
|
+
0.20: ["max_breadth", "min_breadth", "avg_breadth", "breadth_variance"],
|
|
228
|
+
0.15: ["num_term_types", "num_taxonomic_relations", "num_non_taxonomic_relations", "avg_terms"]
|
|
229
|
+
}
|
|
230
|
+
weights = {metric: weight for weight, metrics in metric_categories.items() for metric in metrics}
|
|
231
|
+
metrics = [metric for _, metric_list in metric_categories.items() for metric in metric_list]
|
|
232
|
+
onto_metrics = {**topology_metrics.__dict__, **dataset_metrics.__dict__}
|
|
233
|
+
norm_weighted_values = [np.log1p(onto_metrics[m]) * weights[m] for m in metrics if m in onto_metrics]
|
|
234
|
+
total_weight = sum(weights[m] for m in metrics if m in onto_metrics)
|
|
235
|
+
weighted_sum = sum(norm_weighted_values) / total_weight if total_weight > 0 else 0.0
|
|
236
|
+
complexity_score = 1.0 / (1.0 + np.exp(-a * (weighted_sum - b) + eps))
|
|
237
|
+
return complexity_score
|
|
238
|
+
|
|
239
|
+
|
|
189
240
|
@staticmethod
|
|
190
241
|
def compute_dataset_metrics(ontology: BaseOntology) -> DatasetMetrics:
|
|
191
242
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: OntoLearner
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.11
|
|
4
4
|
Summary: OntoLearner: A Modular Python Library for Ontology Learning with LLMs.
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
16
|
Requires-Dist: Levenshtein
|
|
17
|
-
Requires-Dist: bitsandbytes (>=0.45.1,<0.
|
|
17
|
+
Requires-Dist: bitsandbytes (>=0.45.1,<1.0.0) ; platform_system == "Linux"
|
|
18
18
|
Requires-Dist: dspy (>=2.6.14,<3.0.0)
|
|
19
19
|
Requires-Dist: g4f
|
|
20
20
|
Requires-Dist: gensim
|
|
@@ -1,27 +1,28 @@
|
|
|
1
|
-
ontolearner/VERSION,sha256=
|
|
1
|
+
ontolearner/VERSION,sha256=IUVii4YAC88nU9izX_pBt1ZZ3pyLpyv3xOe7B9Tzuzo,7
|
|
2
2
|
ontolearner/__init__.py,sha256=E4yukFv2PV4uyztTPDWljCySY9AVDcDDzabuvxfabYE,1889
|
|
3
3
|
ontolearner/_learner.py,sha256=2CRQvpsz8akIOdxTs2-KLJ-MssULrjpK-QDD3QXUJXI,5297
|
|
4
4
|
ontolearner/_ontology.py,sha256=W1mp195SImqLKwaj4ueEaBWuLJg2jUdx1JT20Ds3fmQ,6950
|
|
5
5
|
ontolearner/base/__init__.py,sha256=5pf-ltxzGp32xhEcPdbtm11wXJrYJMUeWG-mbcAYD8Q,705
|
|
6
|
-
ontolearner/base/learner.py,sha256=
|
|
7
|
-
ontolearner/base/ontology.py,sha256=
|
|
6
|
+
ontolearner/base/learner.py,sha256=dWMiIBhdvxZLxIWrTq4d4LbyCqDuAmTwfmxwa7UkjfQ,20075
|
|
7
|
+
ontolearner/base/ontology.py,sha256=t7n81Vk8Y5BCK88AYIyNKd7d1LjJnoTlXigyPvrLxR4,24784
|
|
8
8
|
ontolearner/base/text2onto.py,sha256=iUXYZoqnwgebQuQzM-XSGTVRfHLlhjUK_z5XUvhRICc,5388
|
|
9
9
|
ontolearner/data_structure/__init__.py,sha256=1HiKvk8FKjhYeI92RHnJXxyQbUJBi3JFytjQjthsY_s,599
|
|
10
10
|
ontolearner/data_structure/data.py,sha256=jUUDfqsOZcEqIR83SRboiKibPdA_JquI1uOEiQQ_lqY,11273
|
|
11
11
|
ontolearner/data_structure/metric.py,sha256=4QKkZ5L1YK6hDTU-N5Z9I9Ha99DVHmGfYxK7N2qdhfc,7589
|
|
12
12
|
ontolearner/evaluation/__init__.py,sha256=4BZr3BUXjQDTj4Aqlqy4THa80lZPsMuh1EBTCyi9Wig,842
|
|
13
13
|
ontolearner/evaluation/evaluate.py,sha256=NYCVcmPqpyIxYZrMAim37gL-erdh698RD3t3eNTTgZc,1163
|
|
14
|
-
ontolearner/evaluation/metrics.py,sha256=
|
|
15
|
-
ontolearner/learner/__init__.py,sha256=
|
|
16
|
-
ontolearner/learner/label_mapper.py,sha256=
|
|
14
|
+
ontolearner/evaluation/metrics.py,sha256=rgEHwkvtWIZ8BB8dNN5bjwptV70F4Y9RRLp9j2xeAuE,7914
|
|
15
|
+
ontolearner/learner/__init__.py,sha256=8NOPB8IaU04Ae5aWnIm6B0rcijSYN6z3xJElzzKD34I,822
|
|
16
|
+
ontolearner/learner/label_mapper.py,sha256=SiceknqOW2ORX7K4-ljLJYay8DQrKF6Dwv-wUg7uQ78,3793
|
|
17
17
|
ontolearner/learner/llm.py,sha256=3kq_IrwEPTFgeNVKZH9Er_OydJuDpRBtM3YXNNa8_KA,10343
|
|
18
|
-
ontolearner/learner/prompt.py,sha256=
|
|
19
|
-
ontolearner/learner/rag.py,sha256=
|
|
20
|
-
ontolearner/learner/
|
|
18
|
+
ontolearner/learner/prompt.py,sha256=1z8KRLrvRBS8QFoJYGlaajwHi4P4CZezhNQv3WcIfNA,2464
|
|
19
|
+
ontolearner/learner/rag/__init__.py,sha256=NpnBBw5wqZ1MNtpIJ7zT-LWm5IL9aDEzwlbKPo_kCis,612
|
|
20
|
+
ontolearner/learner/rag/rag.py,sha256=apnjK4KvVzFmzF6CmxtZoYoE9NAscRVULTt8Nj5wcWk,4430
|
|
21
|
+
ontolearner/learner/retriever/__init__.py,sha256=ECMEEtwacnugiJ-sADVGidS88pC57nVi299vKb5R16U,860
|
|
22
|
+
ontolearner/learner/retriever/augmented_retriever.py,sha256=tI4z7IbbaShOY-BxOreEGe7fhemz7l48diB2Erri3ek,14004
|
|
21
23
|
ontolearner/learner/retriever/crossencoder.py,sha256=yurzGE4zydlBSwUefi1CugsWv34HEZ61qADG_-nILbo,4996
|
|
22
24
|
ontolearner/learner/retriever/embedding.py,sha256=Lp9oA7LiOYaSWDvzG779KMv5keNl6Xv7hw0WpeaepDE,7875
|
|
23
|
-
ontolearner/learner/retriever/learner.py,sha256=
|
|
24
|
-
ontolearner/learner/retriever/llm_retriever.py,sha256=goInWYxrD9PSo_EsSKbNV8wEaSPvWY3LEC8XM7jlH64,12917
|
|
25
|
+
ontolearner/learner/retriever/learner.py,sha256=bMkXj_MnzBRQDjPloqnOYEj400fsO6CFBfUql7gHIxw,11184
|
|
25
26
|
ontolearner/learner/retriever/ngram.py,sha256=XgS1OeheKEIi7wfJHZgS8mWxKv9MQrP0apOJD_XSOnM,4575
|
|
26
27
|
ontolearner/learner/taxonomy_discovery/__init__.py,sha256=-Hb5Dl6_6c4l1uIT2zWtyBWMq5cjVD4PNjxt5qJePl4,747
|
|
27
28
|
ontolearner/learner/taxonomy_discovery/alexbek.py,sha256=kFEDvoKxLf-sB7-d5REkcC0DqXZpcA6ZSJ2QHrNoC5E,19010
|
|
@@ -32,9 +33,9 @@ ontolearner/learner/term_typing/__init__.py,sha256=2rBbgp8683GNVgB58T4xe76l4m-NT
|
|
|
32
33
|
ontolearner/learner/term_typing/alexbek.py,sha256=SzWQbndkhAjxETVbrJ4uyH7ykL_TMIwHozSS08zwjoM,46684
|
|
33
34
|
ontolearner/learner/term_typing/rwthdbis.py,sha256=F6Jr1SrsbDOIe0Ee_FkDVGTG4wRWpM-R2YqrqEQiex0,14576
|
|
34
35
|
ontolearner/learner/term_typing/sbunlp.py,sha256=Xd3UqMO3m_Skn_2geTN22MGQmSD6R8bYfPgubZre3IE,19820
|
|
35
|
-
ontolearner/learner/text2onto/__init__.py,sha256=
|
|
36
|
-
ontolearner/learner/text2onto/alexbek.py,sha256=
|
|
37
|
-
ontolearner/learner/text2onto/sbunlp.py,sha256
|
|
36
|
+
ontolearner/learner/text2onto/__init__.py,sha256=bLv25lJmgQymgMfhr6JTezMndpDMk9ihheY-VLE-nRI,644
|
|
37
|
+
ontolearner/learner/text2onto/alexbek.py,sha256=0CE5KHgB47tXASgscYH-W3X17XtI2QBtTuhDNpGGaUI,23347
|
|
38
|
+
ontolearner/learner/text2onto/sbunlp.py,sha256=-ULysm_iFUMqEsxNRwgZVcq-70nBzlXMR5BeMezUjjw,23786
|
|
38
39
|
ontolearner/ontology/__init__.py,sha256=F9Ta1qCX9mOxIK5CPRypEoglQNkpJ6SJpqziz73xKQE,1328
|
|
39
40
|
ontolearner/ontology/agriculture.py,sha256=ZaXHNEFjbtsMH8M7HQ8ypnfJS4TUQy_as16fwv-kOKA,5903
|
|
40
41
|
ontolearner/ontology/arts_humanities.py,sha256=K4ceDJL6PfIfSJZ86uQUkUXOVoiERG6ItgvVE2lhLKk,3996
|
|
@@ -62,15 +63,15 @@ ontolearner/processor.py,sha256=LaPUr4BSmPZDINo5t55q9U0i9lLXa77u4pN38usQMBc,4817
|
|
|
62
63
|
ontolearner/text2onto/__init__.py,sha256=YbbDYpHYSMA4dof-7y40PKYsiRO7wvoXZ2LbsRwpPJE,645
|
|
63
64
|
ontolearner/text2onto/batchifier.py,sha256=2CljvcZo0EDW3sHHcG9d5w26RcRwbMsQdFB1j-vCam4,6646
|
|
64
65
|
ontolearner/text2onto/general.py,sha256=2RUFMbWm7qLq3MJHsyNb3rgYkGcicnkbiH2wdPBsBps,1099
|
|
65
|
-
ontolearner/text2onto/splitter.py,sha256=
|
|
66
|
+
ontolearner/text2onto/splitter.py,sha256=PeiVbw5zuNlX3dhtcIJRVCghNizWE8ugIGV7OigR5Ac,12743
|
|
66
67
|
ontolearner/text2onto/synthesizer.py,sha256=tSJgPTFWVKBQi2RqLQfMhX_noXeNLh2Wq2Ezbqyv-OA,5486
|
|
67
68
|
ontolearner/tools/__init__.py,sha256=IB5ycAW5vUDKeq-NAMMbwjSFzwSzC-5j0UobIzO3ZmI,623
|
|
68
|
-
ontolearner/tools/analyzer.py,sha256=
|
|
69
|
+
ontolearner/tools/analyzer.py,sha256=1SooAT7qYqDIrHyvHXnrBRmuPwZhLK1uj26OiKRECc0,12989
|
|
69
70
|
ontolearner/tools/visualizer.py,sha256=cwijl4yYaS1SCLM5wbvRTEcbQj9Bjo4fHzZR6q6o8qo,6267
|
|
70
71
|
ontolearner/utils/__init__.py,sha256=pSEyU3dlPMADBqygqaaid44RdWf0Lo3Fvz-K_rQ7_Bw,733
|
|
71
72
|
ontolearner/utils/io.py,sha256=3DqGK2p7c0onKi0Xxs16WB08uHfHUId3bW0dDKwyS0g,2110
|
|
72
73
|
ontolearner/utils/train_test_split.py,sha256=Zlm42eT6QGWwlySyomCPIiTGmGqeN_h4z4xBY2EAOR8,11530
|
|
73
|
-
ontolearner-1.4.
|
|
74
|
-
ontolearner-1.4.
|
|
75
|
-
ontolearner-1.4.
|
|
76
|
-
ontolearner-1.4.
|
|
74
|
+
ontolearner-1.4.11.dist-info/METADATA,sha256=YDJySz7VAXa80XACaj-WDyuHtFhticcNqEmQVaR8Jsg,11473
|
|
75
|
+
ontolearner-1.4.11.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
76
|
+
ontolearner-1.4.11.dist-info/licenses/LICENSE,sha256=krXMLuMKgzX-UgaufgfJdm9ojIloZot7ZdvJUnNxl4I,1067
|
|
77
|
+
ontolearner-1.4.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|