rdf-construct 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_construct/__init__.py +12 -0
- rdf_construct/__main__.py +0 -0
- rdf_construct/cli.py +1762 -0
- rdf_construct/core/__init__.py +33 -0
- rdf_construct/core/config.py +116 -0
- rdf_construct/core/ordering.py +219 -0
- rdf_construct/core/predicate_order.py +212 -0
- rdf_construct/core/profile.py +157 -0
- rdf_construct/core/selector.py +64 -0
- rdf_construct/core/serialiser.py +232 -0
- rdf_construct/core/utils.py +89 -0
- rdf_construct/cq/__init__.py +77 -0
- rdf_construct/cq/expectations.py +365 -0
- rdf_construct/cq/formatters/__init__.py +45 -0
- rdf_construct/cq/formatters/json.py +104 -0
- rdf_construct/cq/formatters/junit.py +104 -0
- rdf_construct/cq/formatters/text.py +146 -0
- rdf_construct/cq/loader.py +300 -0
- rdf_construct/cq/runner.py +321 -0
- rdf_construct/diff/__init__.py +59 -0
- rdf_construct/diff/change_types.py +214 -0
- rdf_construct/diff/comparator.py +338 -0
- rdf_construct/diff/filters.py +133 -0
- rdf_construct/diff/formatters/__init__.py +71 -0
- rdf_construct/diff/formatters/json.py +192 -0
- rdf_construct/diff/formatters/markdown.py +210 -0
- rdf_construct/diff/formatters/text.py +195 -0
- rdf_construct/docs/__init__.py +60 -0
- rdf_construct/docs/config.py +238 -0
- rdf_construct/docs/extractors.py +603 -0
- rdf_construct/docs/generator.py +360 -0
- rdf_construct/docs/renderers/__init__.py +7 -0
- rdf_construct/docs/renderers/html.py +803 -0
- rdf_construct/docs/renderers/json.py +390 -0
- rdf_construct/docs/renderers/markdown.py +628 -0
- rdf_construct/docs/search.py +278 -0
- rdf_construct/docs/templates/html/base.html.jinja +44 -0
- rdf_construct/docs/templates/html/class.html.jinja +152 -0
- rdf_construct/docs/templates/html/hierarchy.html.jinja +28 -0
- rdf_construct/docs/templates/html/index.html.jinja +110 -0
- rdf_construct/docs/templates/html/instance.html.jinja +90 -0
- rdf_construct/docs/templates/html/namespaces.html.jinja +37 -0
- rdf_construct/docs/templates/html/property.html.jinja +124 -0
- rdf_construct/docs/templates/html/single_page.html.jinja +169 -0
- rdf_construct/lint/__init__.py +75 -0
- rdf_construct/lint/config.py +214 -0
- rdf_construct/lint/engine.py +396 -0
- rdf_construct/lint/formatters.py +327 -0
- rdf_construct/lint/rules.py +692 -0
- rdf_construct/main.py +6 -0
- rdf_construct/puml2rdf/__init__.py +103 -0
- rdf_construct/puml2rdf/config.py +230 -0
- rdf_construct/puml2rdf/converter.py +420 -0
- rdf_construct/puml2rdf/merger.py +200 -0
- rdf_construct/puml2rdf/model.py +202 -0
- rdf_construct/puml2rdf/parser.py +565 -0
- rdf_construct/puml2rdf/validators.py +451 -0
- rdf_construct/shacl/__init__.py +56 -0
- rdf_construct/shacl/config.py +166 -0
- rdf_construct/shacl/converters.py +520 -0
- rdf_construct/shacl/generator.py +364 -0
- rdf_construct/shacl/namespaces.py +93 -0
- rdf_construct/stats/__init__.py +29 -0
- rdf_construct/stats/collector.py +178 -0
- rdf_construct/stats/comparator.py +298 -0
- rdf_construct/stats/formatters/__init__.py +83 -0
- rdf_construct/stats/formatters/json.py +38 -0
- rdf_construct/stats/formatters/markdown.py +153 -0
- rdf_construct/stats/formatters/text.py +186 -0
- rdf_construct/stats/metrics/__init__.py +26 -0
- rdf_construct/stats/metrics/basic.py +147 -0
- rdf_construct/stats/metrics/complexity.py +137 -0
- rdf_construct/stats/metrics/connectivity.py +130 -0
- rdf_construct/stats/metrics/documentation.py +128 -0
- rdf_construct/stats/metrics/hierarchy.py +207 -0
- rdf_construct/stats/metrics/properties.py +88 -0
- rdf_construct/uml/__init__.py +22 -0
- rdf_construct/uml/context.py +194 -0
- rdf_construct/uml/mapper.py +371 -0
- rdf_construct/uml/odm_renderer.py +789 -0
- rdf_construct/uml/renderer.py +684 -0
- rdf_construct/uml/uml_layout.py +393 -0
- rdf_construct/uml/uml_style.py +613 -0
- rdf_construct-0.2.0.dist-info/METADATA +431 -0
- rdf_construct-0.2.0.dist-info/RECORD +88 -0
- rdf_construct-0.2.0.dist-info/WHEEL +4 -0
- rdf_construct-0.2.0.dist-info/entry_points.txt +3 -0
- rdf_construct-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,603 @@
|
|
|
1
|
+
"""Entity extraction from RDF graphs for documentation generation.
|
|
2
|
+
|
|
3
|
+
Extracts comprehensive information about classes, properties, and instances
|
|
4
|
+
from RDF ontologies for use in generating navigable documentation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
from rdflib import RDF, RDFS, Literal, URIRef
|
|
13
|
+
from rdflib.namespace import DCTERMS, OWL, SKOS
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from rdflib import Graph
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Common annotation predicates for extracting labels and definitions
|
|
20
|
+
LABEL_PREDICATES = [
|
|
21
|
+
RDFS.label,
|
|
22
|
+
SKOS.prefLabel,
|
|
23
|
+
DCTERMS.title,
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
DEFINITION_PREDICATES = [
|
|
27
|
+
RDFS.comment,
|
|
28
|
+
SKOS.definition,
|
|
29
|
+
DCTERMS.description,
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class PropertyInfo:
|
|
35
|
+
"""Information about an RDF property for documentation."""
|
|
36
|
+
|
|
37
|
+
uri: URIRef
|
|
38
|
+
qname: str
|
|
39
|
+
label: str | None = None
|
|
40
|
+
definition: str | None = None
|
|
41
|
+
property_type: str = "property" # object, datatype, annotation, rdf
|
|
42
|
+
domain: list[URIRef] = field(default_factory=list)
|
|
43
|
+
range: list[URIRef] = field(default_factory=list)
|
|
44
|
+
superproperties: list[URIRef] = field(default_factory=list)
|
|
45
|
+
subproperties: list[URIRef] = field(default_factory=list)
|
|
46
|
+
annotations: dict[str, list[str]] = field(default_factory=dict)
|
|
47
|
+
is_functional: bool = False
|
|
48
|
+
is_inverse_functional: bool = False
|
|
49
|
+
inverse_of: URIRef | None = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class ClassInfo:
|
|
54
|
+
"""Information about an RDF class for documentation."""
|
|
55
|
+
|
|
56
|
+
uri: URIRef
|
|
57
|
+
qname: str
|
|
58
|
+
label: str | None = None
|
|
59
|
+
definition: str | None = None
|
|
60
|
+
superclasses: list[URIRef] = field(default_factory=list)
|
|
61
|
+
subclasses: list[URIRef] = field(default_factory=list)
|
|
62
|
+
domain_of: list[PropertyInfo] = field(default_factory=list)
|
|
63
|
+
range_of: list[PropertyInfo] = field(default_factory=list)
|
|
64
|
+
inherited_properties: list[PropertyInfo] = field(default_factory=list)
|
|
65
|
+
annotations: dict[str, list[str]] = field(default_factory=dict)
|
|
66
|
+
instances: list[URIRef] = field(default_factory=list)
|
|
67
|
+
disjoint_with: list[URIRef] = field(default_factory=list)
|
|
68
|
+
equivalent_to: list[URIRef] = field(default_factory=list)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class InstanceInfo:
|
|
73
|
+
"""Information about an RDF instance for documentation."""
|
|
74
|
+
|
|
75
|
+
uri: URIRef
|
|
76
|
+
qname: str
|
|
77
|
+
label: str | None = None
|
|
78
|
+
definition: str | None = None
|
|
79
|
+
types: list[URIRef] = field(default_factory=list)
|
|
80
|
+
properties: dict[URIRef, list[str | URIRef]] = field(default_factory=dict)
|
|
81
|
+
annotations: dict[str, list[str]] = field(default_factory=dict)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass
|
|
85
|
+
class OntologyInfo:
|
|
86
|
+
"""Information about the ontology itself for documentation."""
|
|
87
|
+
|
|
88
|
+
uri: URIRef | None = None
|
|
89
|
+
title: str | None = None
|
|
90
|
+
description: str | None = None
|
|
91
|
+
version: str | None = None
|
|
92
|
+
creators: list[str] = field(default_factory=list)
|
|
93
|
+
contributors: list[str] = field(default_factory=list)
|
|
94
|
+
imports: list[URIRef] = field(default_factory=list)
|
|
95
|
+
namespaces: dict[str, str] = field(default_factory=dict)
|
|
96
|
+
annotations: dict[str, list[str]] = field(default_factory=dict)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def get_qname(graph: Graph, uri: URIRef) -> str:
|
|
100
|
+
"""Get a qualified name (CURIE) for a URI.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
graph: RDF graph with namespace bindings.
|
|
104
|
+
uri: URI to convert to QName.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
QName string like 'ex:Building' or the full URI if no prefix matches.
|
|
108
|
+
"""
|
|
109
|
+
try:
|
|
110
|
+
qname = graph.namespace_manager.qname(uri)
|
|
111
|
+
return str(qname)
|
|
112
|
+
except (ValueError, KeyError):
|
|
113
|
+
return str(uri)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def get_label(graph: Graph, uri: URIRef, lang: str | None = "en") -> str | None:
|
|
117
|
+
"""Extract the best label for an entity.
|
|
118
|
+
|
|
119
|
+
Tries multiple predicates in order of preference, optionally
|
|
120
|
+
filtering by language tag.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
graph: RDF graph to query.
|
|
124
|
+
uri: Entity URI to find label for.
|
|
125
|
+
lang: Preferred language tag (None for any).
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Label string or None if not found.
|
|
129
|
+
"""
|
|
130
|
+
for pred in LABEL_PREDICATES:
|
|
131
|
+
for obj in graph.objects(uri, pred):
|
|
132
|
+
if isinstance(obj, Literal):
|
|
133
|
+
if lang is None or obj.language == lang or obj.language is None:
|
|
134
|
+
return str(obj)
|
|
135
|
+
# Fallback: try any language
|
|
136
|
+
if lang is not None:
|
|
137
|
+
return get_label(graph, uri, lang=None)
|
|
138
|
+
return None
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def get_definition(graph: Graph, uri: URIRef, lang: str | None = "en") -> str | None:
|
|
142
|
+
"""Extract the best definition/comment for an entity.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
graph: RDF graph to query.
|
|
146
|
+
uri: Entity URI to find definition for.
|
|
147
|
+
lang: Preferred language tag (None for any).
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
Definition string or None if not found.
|
|
151
|
+
"""
|
|
152
|
+
for pred in DEFINITION_PREDICATES:
|
|
153
|
+
for obj in graph.objects(uri, pred):
|
|
154
|
+
if isinstance(obj, Literal):
|
|
155
|
+
if lang is None or obj.language == lang or obj.language is None:
|
|
156
|
+
return str(obj)
|
|
157
|
+
# Fallback: try any language
|
|
158
|
+
if lang is not None:
|
|
159
|
+
return get_definition(graph, uri, lang=None)
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def get_annotations(graph: Graph, uri: URIRef) -> dict[str, list[str]]:
|
|
164
|
+
"""Extract all annotation values for an entity.
|
|
165
|
+
|
|
166
|
+
Collects values from common annotation predicates, grouped by
|
|
167
|
+
the predicate's local name.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
graph: RDF graph to query.
|
|
171
|
+
uri: Entity URI to extract annotations from.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Dictionary mapping annotation names to lists of values.
|
|
175
|
+
"""
|
|
176
|
+
annotations: dict[str, list[str]] = {}
|
|
177
|
+
|
|
178
|
+
# Standard annotation predicates to extract
|
|
179
|
+
annotation_preds = [
|
|
180
|
+
(RDFS.seeAlso, "seeAlso"),
|
|
181
|
+
(RDFS.isDefinedBy, "isDefinedBy"),
|
|
182
|
+
(OWL.versionInfo, "versionInfo"),
|
|
183
|
+
(OWL.deprecated, "deprecated"),
|
|
184
|
+
(SKOS.example, "example"),
|
|
185
|
+
(SKOS.note, "note"),
|
|
186
|
+
(SKOS.historyNote, "historyNote"),
|
|
187
|
+
(SKOS.editorialNote, "editorialNote"),
|
|
188
|
+
(SKOS.changeNote, "changeNote"),
|
|
189
|
+
(SKOS.scopeNote, "scopeNote"),
|
|
190
|
+
(DCTERMS.creator, "creator"),
|
|
191
|
+
(DCTERMS.created, "created"),
|
|
192
|
+
(DCTERMS.modified, "modified"),
|
|
193
|
+
(DCTERMS.source, "source"),
|
|
194
|
+
]
|
|
195
|
+
|
|
196
|
+
for pred, name in annotation_preds:
|
|
197
|
+
values = []
|
|
198
|
+
for obj in graph.objects(uri, pred):
|
|
199
|
+
if isinstance(obj, Literal):
|
|
200
|
+
values.append(str(obj))
|
|
201
|
+
elif isinstance(obj, URIRef):
|
|
202
|
+
values.append(str(obj))
|
|
203
|
+
if values:
|
|
204
|
+
annotations[name] = values
|
|
205
|
+
|
|
206
|
+
return annotations
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def extract_ontology_info(graph: Graph) -> OntologyInfo:
|
|
210
|
+
"""Extract metadata about the ontology itself.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
graph: RDF graph to extract ontology info from.
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
OntologyInfo with ontology-level metadata.
|
|
217
|
+
"""
|
|
218
|
+
info = OntologyInfo()
|
|
219
|
+
|
|
220
|
+
# Find ontology URI
|
|
221
|
+
for s in graph.subjects(RDF.type, OWL.Ontology):
|
|
222
|
+
if isinstance(s, URIRef):
|
|
223
|
+
info.uri = s
|
|
224
|
+
break
|
|
225
|
+
|
|
226
|
+
if info.uri:
|
|
227
|
+
# Title
|
|
228
|
+
info.title = get_label(graph, info.uri)
|
|
229
|
+
if not info.title:
|
|
230
|
+
# Try dcterms:title
|
|
231
|
+
for obj in graph.objects(info.uri, DCTERMS.title):
|
|
232
|
+
if isinstance(obj, Literal):
|
|
233
|
+
info.title = str(obj)
|
|
234
|
+
break
|
|
235
|
+
|
|
236
|
+
# Description
|
|
237
|
+
info.description = get_definition(graph, info.uri)
|
|
238
|
+
|
|
239
|
+
# Version
|
|
240
|
+
for obj in graph.objects(info.uri, OWL.versionInfo):
|
|
241
|
+
if isinstance(obj, Literal):
|
|
242
|
+
info.version = str(obj)
|
|
243
|
+
break
|
|
244
|
+
|
|
245
|
+
# Creators
|
|
246
|
+
for obj in graph.objects(info.uri, DCTERMS.creator):
|
|
247
|
+
if isinstance(obj, Literal):
|
|
248
|
+
info.creators.append(str(obj))
|
|
249
|
+
elif isinstance(obj, URIRef):
|
|
250
|
+
info.creators.append(str(obj))
|
|
251
|
+
|
|
252
|
+
# Contributors
|
|
253
|
+
for obj in graph.objects(info.uri, DCTERMS.contributor):
|
|
254
|
+
if isinstance(obj, Literal):
|
|
255
|
+
info.contributors.append(str(obj))
|
|
256
|
+
elif isinstance(obj, URIRef):
|
|
257
|
+
info.contributors.append(str(obj))
|
|
258
|
+
|
|
259
|
+
# Imports
|
|
260
|
+
for obj in graph.objects(info.uri, OWL.imports):
|
|
261
|
+
if isinstance(obj, URIRef):
|
|
262
|
+
info.imports.append(obj)
|
|
263
|
+
|
|
264
|
+
# Annotations
|
|
265
|
+
info.annotations = get_annotations(graph, info.uri)
|
|
266
|
+
|
|
267
|
+
# Namespaces - only include those actually used in triples
|
|
268
|
+
used_uris: set[str] = set()
|
|
269
|
+
for s, p, o in graph:
|
|
270
|
+
if isinstance(s, URIRef):
|
|
271
|
+
used_uris.add(str(s))
|
|
272
|
+
if isinstance(p, URIRef):
|
|
273
|
+
used_uris.add(str(p))
|
|
274
|
+
if isinstance(o, URIRef):
|
|
275
|
+
used_uris.add(str(o))
|
|
276
|
+
|
|
277
|
+
# Only include namespaces that match at least one used URI
|
|
278
|
+
for prefix, namespace in graph.namespaces():
|
|
279
|
+
ns_str = str(namespace)
|
|
280
|
+
if any(uri.startswith(ns_str) for uri in used_uris):
|
|
281
|
+
info.namespaces[prefix] = ns_str
|
|
282
|
+
|
|
283
|
+
return info
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def extract_class_info(graph: Graph, uri: URIRef) -> ClassInfo:
|
|
287
|
+
"""Extract comprehensive information about a class.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
graph: RDF graph to query.
|
|
291
|
+
uri: Class URI to extract info for.
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
ClassInfo with all available metadata.
|
|
295
|
+
"""
|
|
296
|
+
info = ClassInfo(
|
|
297
|
+
uri=uri,
|
|
298
|
+
qname=get_qname(graph, uri),
|
|
299
|
+
label=get_label(graph, uri),
|
|
300
|
+
definition=get_definition(graph, uri),
|
|
301
|
+
annotations=get_annotations(graph, uri),
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# Superclasses (direct)
|
|
305
|
+
for obj in graph.objects(uri, RDFS.subClassOf):
|
|
306
|
+
if isinstance(obj, URIRef):
|
|
307
|
+
info.superclasses.append(obj)
|
|
308
|
+
|
|
309
|
+
# Subclasses (direct)
|
|
310
|
+
for subj in graph.subjects(RDFS.subClassOf, uri):
|
|
311
|
+
if isinstance(subj, URIRef):
|
|
312
|
+
info.subclasses.append(subj)
|
|
313
|
+
|
|
314
|
+
# Properties with this class as domain
|
|
315
|
+
for prop in graph.subjects(RDFS.domain, uri):
|
|
316
|
+
if isinstance(prop, URIRef):
|
|
317
|
+
prop_info = extract_property_info(graph, prop)
|
|
318
|
+
info.domain_of.append(prop_info)
|
|
319
|
+
|
|
320
|
+
# Properties with this class as range
|
|
321
|
+
for prop in graph.subjects(RDFS.range, uri):
|
|
322
|
+
if isinstance(prop, URIRef):
|
|
323
|
+
prop_info = extract_property_info(graph, prop)
|
|
324
|
+
info.range_of.append(prop_info)
|
|
325
|
+
|
|
326
|
+
# Instances of this class
|
|
327
|
+
for inst in graph.subjects(RDF.type, uri):
|
|
328
|
+
if isinstance(inst, URIRef):
|
|
329
|
+
# Skip if it's a class itself
|
|
330
|
+
if (inst, RDF.type, OWL.Class) in graph:
|
|
331
|
+
continue
|
|
332
|
+
if (inst, RDF.type, RDFS.Class) in graph:
|
|
333
|
+
continue
|
|
334
|
+
info.instances.append(inst)
|
|
335
|
+
|
|
336
|
+
# Disjoint classes
|
|
337
|
+
for obj in graph.objects(uri, OWL.disjointWith):
|
|
338
|
+
if isinstance(obj, URIRef):
|
|
339
|
+
info.disjoint_with.append(obj)
|
|
340
|
+
|
|
341
|
+
# Equivalent classes
|
|
342
|
+
for obj in graph.objects(uri, OWL.equivalentClass):
|
|
343
|
+
if isinstance(obj, URIRef):
|
|
344
|
+
info.equivalent_to.append(obj)
|
|
345
|
+
|
|
346
|
+
return info
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def extract_property_info(graph: Graph, uri: URIRef) -> PropertyInfo:
|
|
350
|
+
"""Extract comprehensive information about a property.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
graph: RDF graph to query.
|
|
354
|
+
uri: Property URI to extract info for.
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
PropertyInfo with all available metadata.
|
|
358
|
+
"""
|
|
359
|
+
info = PropertyInfo(
|
|
360
|
+
uri=uri,
|
|
361
|
+
qname=get_qname(graph, uri),
|
|
362
|
+
label=get_label(graph, uri),
|
|
363
|
+
definition=get_definition(graph, uri),
|
|
364
|
+
annotations=get_annotations(graph, uri),
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
# Determine property type
|
|
368
|
+
if (uri, RDF.type, OWL.ObjectProperty) in graph:
|
|
369
|
+
info.property_type = "object"
|
|
370
|
+
elif (uri, RDF.type, OWL.DatatypeProperty) in graph:
|
|
371
|
+
info.property_type = "datatype"
|
|
372
|
+
elif (uri, RDF.type, OWL.AnnotationProperty) in graph:
|
|
373
|
+
info.property_type = "annotation"
|
|
374
|
+
elif (uri, RDF.type, RDF.Property) in graph:
|
|
375
|
+
info.property_type = "rdf"
|
|
376
|
+
|
|
377
|
+
# Domain
|
|
378
|
+
for obj in graph.objects(uri, RDFS.domain):
|
|
379
|
+
if isinstance(obj, URIRef):
|
|
380
|
+
info.domain.append(obj)
|
|
381
|
+
|
|
382
|
+
# Range
|
|
383
|
+
for obj in graph.objects(uri, RDFS.range):
|
|
384
|
+
if isinstance(obj, URIRef):
|
|
385
|
+
info.range.append(obj)
|
|
386
|
+
|
|
387
|
+
# Superproperties
|
|
388
|
+
for obj in graph.objects(uri, RDFS.subPropertyOf):
|
|
389
|
+
if isinstance(obj, URIRef):
|
|
390
|
+
info.superproperties.append(obj)
|
|
391
|
+
|
|
392
|
+
# Subproperties
|
|
393
|
+
for subj in graph.subjects(RDFS.subPropertyOf, uri):
|
|
394
|
+
if isinstance(subj, URIRef):
|
|
395
|
+
info.subproperties.append(subj)
|
|
396
|
+
|
|
397
|
+
# Functional property
|
|
398
|
+
info.is_functional = (uri, RDF.type, OWL.FunctionalProperty) in graph
|
|
399
|
+
|
|
400
|
+
# Inverse functional property
|
|
401
|
+
info.is_inverse_functional = (uri, RDF.type, OWL.InverseFunctionalProperty) in graph
|
|
402
|
+
|
|
403
|
+
# Inverse of
|
|
404
|
+
for obj in graph.objects(uri, OWL.inverseOf):
|
|
405
|
+
if isinstance(obj, URIRef):
|
|
406
|
+
info.inverse_of = obj
|
|
407
|
+
break
|
|
408
|
+
|
|
409
|
+
return info
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def extract_instance_info(graph: Graph, uri: URIRef) -> InstanceInfo:
|
|
413
|
+
"""Extract information about an instance/individual.
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
graph: RDF graph to query.
|
|
417
|
+
uri: Instance URI to extract info for.
|
|
418
|
+
|
|
419
|
+
Returns:
|
|
420
|
+
InstanceInfo with all available metadata.
|
|
421
|
+
"""
|
|
422
|
+
info = InstanceInfo(
|
|
423
|
+
uri=uri,
|
|
424
|
+
qname=get_qname(graph, uri),
|
|
425
|
+
label=get_label(graph, uri),
|
|
426
|
+
definition=get_definition(graph, uri),
|
|
427
|
+
annotations=get_annotations(graph, uri),
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
# Types
|
|
431
|
+
for obj in graph.objects(uri, RDF.type):
|
|
432
|
+
if isinstance(obj, URIRef):
|
|
433
|
+
info.types.append(obj)
|
|
434
|
+
|
|
435
|
+
# All other properties
|
|
436
|
+
for pred, obj in graph.predicate_objects(uri):
|
|
437
|
+
if pred == RDF.type:
|
|
438
|
+
continue
|
|
439
|
+
# Skip standard annotation predicates (already captured)
|
|
440
|
+
if pred in [p for p, _ in [
|
|
441
|
+
(RDFS.label, None), (RDFS.comment, None),
|
|
442
|
+
(SKOS.prefLabel, None), (SKOS.definition, None),
|
|
443
|
+
]]:
|
|
444
|
+
continue
|
|
445
|
+
|
|
446
|
+
if pred not in info.properties:
|
|
447
|
+
info.properties[pred] = []
|
|
448
|
+
|
|
449
|
+
if isinstance(obj, Literal):
|
|
450
|
+
info.properties[pred].append(str(obj))
|
|
451
|
+
elif isinstance(obj, URIRef):
|
|
452
|
+
info.properties[pred].append(obj)
|
|
453
|
+
|
|
454
|
+
return info
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def extract_all_classes(graph: Graph) -> list[ClassInfo]:
|
|
458
|
+
"""Extract information for all classes in the graph.
|
|
459
|
+
|
|
460
|
+
Args:
|
|
461
|
+
graph: RDF graph to query.
|
|
462
|
+
|
|
463
|
+
Returns:
|
|
464
|
+
List of ClassInfo objects for all classes.
|
|
465
|
+
"""
|
|
466
|
+
classes = []
|
|
467
|
+
seen: set[URIRef] = set()
|
|
468
|
+
|
|
469
|
+
# OWL classes
|
|
470
|
+
for uri in graph.subjects(RDF.type, OWL.Class):
|
|
471
|
+
if isinstance(uri, URIRef) and uri not in seen:
|
|
472
|
+
seen.add(uri)
|
|
473
|
+
classes.append(extract_class_info(graph, uri))
|
|
474
|
+
|
|
475
|
+
# RDFS classes
|
|
476
|
+
for uri in graph.subjects(RDF.type, RDFS.Class):
|
|
477
|
+
if isinstance(uri, URIRef) and uri not in seen:
|
|
478
|
+
seen.add(uri)
|
|
479
|
+
classes.append(extract_class_info(graph, uri))
|
|
480
|
+
|
|
481
|
+
# Sort by qname for consistent ordering
|
|
482
|
+
classes.sort(key=lambda c: c.qname)
|
|
483
|
+
return classes
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def extract_all_properties(graph: Graph) -> list[PropertyInfo]:
|
|
487
|
+
"""Extract information for all properties in the graph.
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
graph: RDF graph to query.
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
List of PropertyInfo objects for all properties.
|
|
494
|
+
"""
|
|
495
|
+
properties = []
|
|
496
|
+
seen: set[URIRef] = set()
|
|
497
|
+
|
|
498
|
+
property_types = [
|
|
499
|
+
OWL.ObjectProperty,
|
|
500
|
+
OWL.DatatypeProperty,
|
|
501
|
+
OWL.AnnotationProperty,
|
|
502
|
+
RDF.Property,
|
|
503
|
+
]
|
|
504
|
+
|
|
505
|
+
for prop_type in property_types:
|
|
506
|
+
for uri in graph.subjects(RDF.type, prop_type):
|
|
507
|
+
if isinstance(uri, URIRef) and uri not in seen:
|
|
508
|
+
seen.add(uri)
|
|
509
|
+
properties.append(extract_property_info(graph, uri))
|
|
510
|
+
|
|
511
|
+
# Sort by qname for consistent ordering
|
|
512
|
+
properties.sort(key=lambda p: p.qname)
|
|
513
|
+
return properties
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
def extract_all_instances(graph: Graph) -> list[InstanceInfo]:
|
|
517
|
+
"""Extract information for all instances in the graph.
|
|
518
|
+
|
|
519
|
+
Instances are entities that have rdf:type but are not themselves
|
|
520
|
+
classes or properties.
|
|
521
|
+
|
|
522
|
+
Args:
|
|
523
|
+
graph: RDF graph to query.
|
|
524
|
+
|
|
525
|
+
Returns:
|
|
526
|
+
List of InstanceInfo objects for all instances.
|
|
527
|
+
"""
|
|
528
|
+
instances = []
|
|
529
|
+
seen: set[URIRef] = set()
|
|
530
|
+
|
|
531
|
+
# Get all class URIs to exclude
|
|
532
|
+
class_uris: set[URIRef] = set()
|
|
533
|
+
for uri in graph.subjects(RDF.type, OWL.Class):
|
|
534
|
+
if isinstance(uri, URIRef):
|
|
535
|
+
class_uris.add(uri)
|
|
536
|
+
for uri in graph.subjects(RDF.type, RDFS.Class):
|
|
537
|
+
if isinstance(uri, URIRef):
|
|
538
|
+
class_uris.add(uri)
|
|
539
|
+
|
|
540
|
+
# Get all property URIs to exclude
|
|
541
|
+
property_uris: set[URIRef] = set()
|
|
542
|
+
for prop_type in [OWL.ObjectProperty, OWL.DatatypeProperty, OWL.AnnotationProperty, RDF.Property]:
|
|
543
|
+
for uri in graph.subjects(RDF.type, prop_type):
|
|
544
|
+
if isinstance(uri, URIRef):
|
|
545
|
+
property_uris.add(uri)
|
|
546
|
+
|
|
547
|
+
# Also exclude the ontology itself
|
|
548
|
+
for uri in graph.subjects(RDF.type, OWL.Ontology):
|
|
549
|
+
if isinstance(uri, URIRef):
|
|
550
|
+
class_uris.add(uri)
|
|
551
|
+
|
|
552
|
+
# Find all subjects with rdf:type that aren't classes or properties
|
|
553
|
+
for subj, _, obj in graph.triples((None, RDF.type, None)):
|
|
554
|
+
if isinstance(subj, URIRef) and subj not in seen:
|
|
555
|
+
if subj not in class_uris and subj not in property_uris:
|
|
556
|
+
seen.add(subj)
|
|
557
|
+
instances.append(extract_instance_info(graph, subj))
|
|
558
|
+
|
|
559
|
+
# Sort by qname for consistent ordering
|
|
560
|
+
instances.sort(key=lambda i: i.qname)
|
|
561
|
+
return instances
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
@dataclass
|
|
565
|
+
class ExtractedEntities:
|
|
566
|
+
"""Container for all extracted entities from an ontology."""
|
|
567
|
+
|
|
568
|
+
ontology: OntologyInfo
|
|
569
|
+
classes: list[ClassInfo]
|
|
570
|
+
properties: list[PropertyInfo]
|
|
571
|
+
instances: list[InstanceInfo]
|
|
572
|
+
|
|
573
|
+
@property
|
|
574
|
+
def object_properties(self) -> list[PropertyInfo]:
|
|
575
|
+
"""Get only object properties."""
|
|
576
|
+
return [p for p in self.properties if p.property_type == "object"]
|
|
577
|
+
|
|
578
|
+
@property
|
|
579
|
+
def datatype_properties(self) -> list[PropertyInfo]:
|
|
580
|
+
"""Get only datatype properties."""
|
|
581
|
+
return [p for p in self.properties if p.property_type == "datatype"]
|
|
582
|
+
|
|
583
|
+
@property
|
|
584
|
+
def annotation_properties(self) -> list[PropertyInfo]:
|
|
585
|
+
"""Get only annotation properties."""
|
|
586
|
+
return [p for p in self.properties if p.property_type == "annotation"]
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
def extract_all(graph: Graph) -> ExtractedEntities:
|
|
590
|
+
"""Extract all entities from an ontology graph.
|
|
591
|
+
|
|
592
|
+
Args:
|
|
593
|
+
graph: RDF graph to extract from.
|
|
594
|
+
|
|
595
|
+
Returns:
|
|
596
|
+
ExtractedEntities containing all classes, properties, and instances.
|
|
597
|
+
"""
|
|
598
|
+
return ExtractedEntities(
|
|
599
|
+
ontology=extract_ontology_info(graph),
|
|
600
|
+
classes=extract_all_classes(graph),
|
|
601
|
+
properties=extract_all_properties(graph),
|
|
602
|
+
instances=extract_all_instances(graph),
|
|
603
|
+
)
|