rdf-construct 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_construct/__init__.py +12 -0
- rdf_construct/__main__.py +0 -0
- rdf_construct/cli.py +1762 -0
- rdf_construct/core/__init__.py +33 -0
- rdf_construct/core/config.py +116 -0
- rdf_construct/core/ordering.py +219 -0
- rdf_construct/core/predicate_order.py +212 -0
- rdf_construct/core/profile.py +157 -0
- rdf_construct/core/selector.py +64 -0
- rdf_construct/core/serialiser.py +232 -0
- rdf_construct/core/utils.py +89 -0
- rdf_construct/cq/__init__.py +77 -0
- rdf_construct/cq/expectations.py +365 -0
- rdf_construct/cq/formatters/__init__.py +45 -0
- rdf_construct/cq/formatters/json.py +104 -0
- rdf_construct/cq/formatters/junit.py +104 -0
- rdf_construct/cq/formatters/text.py +146 -0
- rdf_construct/cq/loader.py +300 -0
- rdf_construct/cq/runner.py +321 -0
- rdf_construct/diff/__init__.py +59 -0
- rdf_construct/diff/change_types.py +214 -0
- rdf_construct/diff/comparator.py +338 -0
- rdf_construct/diff/filters.py +133 -0
- rdf_construct/diff/formatters/__init__.py +71 -0
- rdf_construct/diff/formatters/json.py +192 -0
- rdf_construct/diff/formatters/markdown.py +210 -0
- rdf_construct/diff/formatters/text.py +195 -0
- rdf_construct/docs/__init__.py +60 -0
- rdf_construct/docs/config.py +238 -0
- rdf_construct/docs/extractors.py +603 -0
- rdf_construct/docs/generator.py +360 -0
- rdf_construct/docs/renderers/__init__.py +7 -0
- rdf_construct/docs/renderers/html.py +803 -0
- rdf_construct/docs/renderers/json.py +390 -0
- rdf_construct/docs/renderers/markdown.py +628 -0
- rdf_construct/docs/search.py +278 -0
- rdf_construct/docs/templates/html/base.html.jinja +44 -0
- rdf_construct/docs/templates/html/class.html.jinja +152 -0
- rdf_construct/docs/templates/html/hierarchy.html.jinja +28 -0
- rdf_construct/docs/templates/html/index.html.jinja +110 -0
- rdf_construct/docs/templates/html/instance.html.jinja +90 -0
- rdf_construct/docs/templates/html/namespaces.html.jinja +37 -0
- rdf_construct/docs/templates/html/property.html.jinja +124 -0
- rdf_construct/docs/templates/html/single_page.html.jinja +169 -0
- rdf_construct/lint/__init__.py +75 -0
- rdf_construct/lint/config.py +214 -0
- rdf_construct/lint/engine.py +396 -0
- rdf_construct/lint/formatters.py +327 -0
- rdf_construct/lint/rules.py +692 -0
- rdf_construct/main.py +6 -0
- rdf_construct/puml2rdf/__init__.py +103 -0
- rdf_construct/puml2rdf/config.py +230 -0
- rdf_construct/puml2rdf/converter.py +420 -0
- rdf_construct/puml2rdf/merger.py +200 -0
- rdf_construct/puml2rdf/model.py +202 -0
- rdf_construct/puml2rdf/parser.py +565 -0
- rdf_construct/puml2rdf/validators.py +451 -0
- rdf_construct/shacl/__init__.py +56 -0
- rdf_construct/shacl/config.py +166 -0
- rdf_construct/shacl/converters.py +520 -0
- rdf_construct/shacl/generator.py +364 -0
- rdf_construct/shacl/namespaces.py +93 -0
- rdf_construct/stats/__init__.py +29 -0
- rdf_construct/stats/collector.py +178 -0
- rdf_construct/stats/comparator.py +298 -0
- rdf_construct/stats/formatters/__init__.py +83 -0
- rdf_construct/stats/formatters/json.py +38 -0
- rdf_construct/stats/formatters/markdown.py +153 -0
- rdf_construct/stats/formatters/text.py +186 -0
- rdf_construct/stats/metrics/__init__.py +26 -0
- rdf_construct/stats/metrics/basic.py +147 -0
- rdf_construct/stats/metrics/complexity.py +137 -0
- rdf_construct/stats/metrics/connectivity.py +130 -0
- rdf_construct/stats/metrics/documentation.py +128 -0
- rdf_construct/stats/metrics/hierarchy.py +207 -0
- rdf_construct/stats/metrics/properties.py +88 -0
- rdf_construct/uml/__init__.py +22 -0
- rdf_construct/uml/context.py +194 -0
- rdf_construct/uml/mapper.py +371 -0
- rdf_construct/uml/odm_renderer.py +789 -0
- rdf_construct/uml/renderer.py +684 -0
- rdf_construct/uml/uml_layout.py +393 -0
- rdf_construct/uml/uml_style.py +613 -0
- rdf_construct-0.2.0.dist-info/METADATA +431 -0
- rdf_construct-0.2.0.dist-info/RECORD +88 -0
- rdf_construct-0.2.0.dist-info/WHEEL +4 -0
- rdf_construct-0.2.0.dist-info/entry_points.txt +3 -0
- rdf_construct-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
"""Convert parsed PlantUML models to RDF graphs.
|
|
2
|
+
|
|
3
|
+
This module transforms the intermediate PumlModel representation
|
|
4
|
+
into a proper RDF/OWL ontology using rdflib.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
from rdflib import Graph, Literal, Namespace, URIRef, RDF, RDFS
|
|
12
|
+
from rdflib.namespace import OWL, XSD
|
|
13
|
+
|
|
14
|
+
from rdf_construct.puml2rdf.model import (
|
|
15
|
+
PumlAttribute,
|
|
16
|
+
PumlClass,
|
|
17
|
+
PumlModel,
|
|
18
|
+
PumlPackage,
|
|
19
|
+
PumlRelationship,
|
|
20
|
+
RelationshipType,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Standard XSD datatype mappings from common PlantUML/UML type names
|
|
25
|
+
XSD_TYPE_MAP: dict[str, URIRef] = {
|
|
26
|
+
# String types
|
|
27
|
+
"string": XSD.string,
|
|
28
|
+
"str": XSD.string,
|
|
29
|
+
"text": XSD.string,
|
|
30
|
+
# Numeric types
|
|
31
|
+
"integer": XSD.integer,
|
|
32
|
+
"int": XSD.integer,
|
|
33
|
+
"decimal": XSD.decimal,
|
|
34
|
+
"float": XSD.float,
|
|
35
|
+
"double": XSD.double,
|
|
36
|
+
"number": XSD.decimal,
|
|
37
|
+
# Boolean
|
|
38
|
+
"boolean": XSD.boolean,
|
|
39
|
+
"bool": XSD.boolean,
|
|
40
|
+
# Date/time types
|
|
41
|
+
"date": XSD.date,
|
|
42
|
+
"datetime": XSD.dateTime,
|
|
43
|
+
"time": XSD.time,
|
|
44
|
+
"gYear": XSD.gYear,
|
|
45
|
+
"gyear": XSD.gYear,
|
|
46
|
+
"gYearMonth": XSD.gYearMonth,
|
|
47
|
+
"duration": XSD.duration,
|
|
48
|
+
# URI types
|
|
49
|
+
"uri": XSD.anyURI,
|
|
50
|
+
"anyURI": XSD.anyURI,
|
|
51
|
+
"anyuri": XSD.anyURI,
|
|
52
|
+
"url": XSD.anyURI,
|
|
53
|
+
# Other common types
|
|
54
|
+
"base64": XSD.base64Binary,
|
|
55
|
+
"hexBinary": XSD.hexBinary,
|
|
56
|
+
"language": XSD.language,
|
|
57
|
+
"token": XSD.token,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class ConversionConfig:
|
|
63
|
+
"""Configuration options for PlantUML to RDF conversion.
|
|
64
|
+
|
|
65
|
+
Attributes:
|
|
66
|
+
default_namespace: Default namespace URI for entities without explicit package
|
|
67
|
+
language: Language tag for labels and comments (default: 'en')
|
|
68
|
+
generate_labels: Whether to generate rdfs:label from names
|
|
69
|
+
generate_inverse_properties: Whether to create inverse properties
|
|
70
|
+
camel_to_label: Convert camelCase names to readable labels
|
|
71
|
+
use_owl_thing: Whether to make classes subclass of owl:Thing explicitly
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
default_namespace: str = "http://example.org/ontology#"
|
|
75
|
+
language: str = "en"
|
|
76
|
+
generate_labels: bool = True
|
|
77
|
+
generate_inverse_properties: bool = False
|
|
78
|
+
camel_to_label: bool = True
|
|
79
|
+
use_owl_thing: bool = False
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dataclass
|
|
83
|
+
class ConversionResult:
|
|
84
|
+
"""Result of converting a PlantUML model to RDF.
|
|
85
|
+
|
|
86
|
+
Attributes:
|
|
87
|
+
graph: The generated RDF graph
|
|
88
|
+
class_uris: Mapping from class names to their URIs
|
|
89
|
+
property_uris: Mapping from property names to their URIs
|
|
90
|
+
warnings: Any warnings generated during conversion
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
graph: Graph
|
|
94
|
+
class_uris: dict[str, URIRef] = field(default_factory=dict)
|
|
95
|
+
property_uris: dict[str, URIRef] = field(default_factory=dict)
|
|
96
|
+
warnings: list[str] = field(default_factory=list)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class PumlToRdfConverter:
|
|
100
|
+
"""Converts parsed PlantUML models to RDF/OWL ontologies.
|
|
101
|
+
|
|
102
|
+
This converter produces OWL 2 ontologies following common patterns:
|
|
103
|
+
- Classes become owl:Class
|
|
104
|
+
- Attributes become owl:DatatypeProperty
|
|
105
|
+
- Associations become owl:ObjectProperty
|
|
106
|
+
- Inheritance becomes rdfs:subClassOf
|
|
107
|
+
- Notes become rdfs:comment
|
|
108
|
+
|
|
109
|
+
Example:
|
|
110
|
+
converter = PumlToRdfConverter()
|
|
111
|
+
result = converter.convert(model)
|
|
112
|
+
result.graph.serialize("ontology.ttl", format="turtle")
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
def __init__(self, config: Optional[ConversionConfig] = None) -> None:
|
|
116
|
+
"""Initialise the converter.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
config: Conversion configuration options
|
|
120
|
+
"""
|
|
121
|
+
self.config = config or ConversionConfig()
|
|
122
|
+
self._graph: Graph = Graph()
|
|
123
|
+
self._namespaces: dict[str, Namespace] = {}
|
|
124
|
+
self._class_uris: dict[str, URIRef] = {}
|
|
125
|
+
self._property_uris: dict[str, URIRef] = {}
|
|
126
|
+
self._warnings: list[str] = []
|
|
127
|
+
|
|
128
|
+
def convert(self, model: PumlModel) -> ConversionResult:
|
|
129
|
+
"""Convert a PlantUML model to an RDF graph."""
|
|
130
|
+
self._graph = Graph()
|
|
131
|
+
self._namespaces = {}
|
|
132
|
+
self._class_uris = {}
|
|
133
|
+
self._property_uris = {}
|
|
134
|
+
self._warnings = []
|
|
135
|
+
|
|
136
|
+
# Set up namespaces from packages AND class prefixes
|
|
137
|
+
self._setup_namespaces(model.packages, model.classes)
|
|
138
|
+
|
|
139
|
+
# Create ontology header
|
|
140
|
+
self._create_ontology_header(model)
|
|
141
|
+
|
|
142
|
+
# Convert classes
|
|
143
|
+
for cls in model.classes:
|
|
144
|
+
self._convert_class(cls)
|
|
145
|
+
|
|
146
|
+
# Convert relationships
|
|
147
|
+
for rel in model.relationships:
|
|
148
|
+
self._convert_relationship(rel)
|
|
149
|
+
|
|
150
|
+
return ConversionResult(
|
|
151
|
+
graph=self._graph,
|
|
152
|
+
class_uris=self._class_uris,
|
|
153
|
+
property_uris=self._property_uris,
|
|
154
|
+
warnings=self._warnings,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
def _setup_namespaces(self, packages: list[PumlPackage], classes: list[PumlClass]) -> None:
|
|
158
|
+
"""Set up RDF namespaces from PlantUML packages and class prefixes."""
|
|
159
|
+
# Standard namespaces
|
|
160
|
+
self._graph.bind("owl", OWL)
|
|
161
|
+
self._graph.bind("rdfs", RDFS)
|
|
162
|
+
self._graph.bind("xsd", XSD)
|
|
163
|
+
|
|
164
|
+
# Default namespace
|
|
165
|
+
default_ns = Namespace(self.config.default_namespace)
|
|
166
|
+
self._namespaces[None] = default_ns # None key for unpackaged classes
|
|
167
|
+
self._graph.bind("", default_ns)
|
|
168
|
+
|
|
169
|
+
# Collect all unique package prefixes from classes
|
|
170
|
+
prefixes = {cls.package for cls in classes if cls.package}
|
|
171
|
+
|
|
172
|
+
# Also add packages from PlantUML package declarations
|
|
173
|
+
for pkg in packages:
|
|
174
|
+
if pkg.namespace_uri:
|
|
175
|
+
ns_uri = pkg.namespace_uri
|
|
176
|
+
if not ns_uri.endswith(("#", "/")):
|
|
177
|
+
ns_uri += "#"
|
|
178
|
+
ns = Namespace(ns_uri)
|
|
179
|
+
self._namespaces[pkg.name] = ns
|
|
180
|
+
self._graph.bind(pkg.name, ns)
|
|
181
|
+
prefixes.discard(pkg.name) # Don't auto-generate
|
|
182
|
+
|
|
183
|
+
# Auto-generate namespaces for remaining prefixes
|
|
184
|
+
base = self.config.default_namespace.rstrip("#/")
|
|
185
|
+
for prefix in prefixes:
|
|
186
|
+
ns_uri = f"{base}/{prefix}#"
|
|
187
|
+
ns = Namespace(ns_uri)
|
|
188
|
+
self._namespaces[prefix] = ns
|
|
189
|
+
self._graph.bind(prefix, ns)
|
|
190
|
+
|
|
191
|
+
def _generate_prefix(self, name: str) -> str:
|
|
192
|
+
"""Generate a namespace prefix from a package name.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
name: Package name (may be a URI or display name)
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
Short lowercase prefix string
|
|
199
|
+
"""
|
|
200
|
+
# If it looks like a URI, extract the last segment
|
|
201
|
+
if "://" in name or name.startswith("http"):
|
|
202
|
+
# Extract meaningful part from URI
|
|
203
|
+
name = name.rstrip("#/")
|
|
204
|
+
if "/" in name:
|
|
205
|
+
name = name.rsplit("/", 1)[-1]
|
|
206
|
+
if "#" in name:
|
|
207
|
+
name = name.rsplit("#", 1)[-1]
|
|
208
|
+
|
|
209
|
+
# Clean and shorten
|
|
210
|
+
prefix = re.sub(r"[^a-zA-Z0-9]", "", name).lower()
|
|
211
|
+
return prefix[:10] if len(prefix) > 10 else prefix or "ns"
|
|
212
|
+
|
|
213
|
+
def _create_ontology_header(self, model: PumlModel) -> None:
|
|
214
|
+
"""Create the owl:Ontology declaration."""
|
|
215
|
+
ont_uri = URIRef(self.config.default_namespace.rstrip("#/"))
|
|
216
|
+
|
|
217
|
+
self._graph.add((ont_uri, RDF.type, OWL.Ontology))
|
|
218
|
+
|
|
219
|
+
if model.title:
|
|
220
|
+
self._graph.add(
|
|
221
|
+
(ont_uri, RDFS.label, Literal(model.title, lang=self.config.language))
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
def _convert_class(self, cls: PumlClass) -> None:
|
|
225
|
+
"""Convert a PlantUML class to owl:Class and properties."""
|
|
226
|
+
ns = self._get_namespace_for_class(cls)
|
|
227
|
+
class_uri = ns[cls.name] # Use local name only
|
|
228
|
+
|
|
229
|
+
# Store by qualified name for relationship lookups
|
|
230
|
+
self._class_uris[cls.name] = class_uri
|
|
231
|
+
self._class_uris[cls.qualified_name] = class_uri
|
|
232
|
+
|
|
233
|
+
self._graph.add((class_uri, RDF.type, OWL.Class))
|
|
234
|
+
|
|
235
|
+
# Use display_name for label if available, else convert local name
|
|
236
|
+
if self.config.generate_labels:
|
|
237
|
+
if cls.display_name:
|
|
238
|
+
label = cls.display_name
|
|
239
|
+
elif self.config.camel_to_label:
|
|
240
|
+
label = self._camel_to_label(cls.name)
|
|
241
|
+
else:
|
|
242
|
+
label = cls.name
|
|
243
|
+
self._graph.add(
|
|
244
|
+
(class_uri, RDFS.label, Literal(label, lang=self.config.language))
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Add comment from note
|
|
248
|
+
if cls.note:
|
|
249
|
+
self._graph.add(
|
|
250
|
+
(class_uri, RDFS.comment, Literal(cls.note, lang=self.config.language))
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Handle abstract classes - add deprecated or custom annotation
|
|
254
|
+
if cls.is_abstract:
|
|
255
|
+
# We could add a custom annotation here
|
|
256
|
+
pass
|
|
257
|
+
|
|
258
|
+
# Convert attributes to datatype properties
|
|
259
|
+
for attr in cls.attributes:
|
|
260
|
+
self._convert_attribute(attr, class_uri, ns)
|
|
261
|
+
|
|
262
|
+
def _convert_attribute(
|
|
263
|
+
self, attr: PumlAttribute, domain_class: URIRef, ns: Namespace
|
|
264
|
+
) -> None:
|
|
265
|
+
"""Convert a class attribute to owl:DatatypeProperty."""
|
|
266
|
+
prop_uri = ns[attr.name]
|
|
267
|
+
self._property_uris[attr.name] = prop_uri
|
|
268
|
+
|
|
269
|
+
# Add property declaration
|
|
270
|
+
self._graph.add((prop_uri, RDF.type, OWL.DatatypeProperty))
|
|
271
|
+
|
|
272
|
+
# Add domain
|
|
273
|
+
self._graph.add((prop_uri, RDFS.domain, domain_class))
|
|
274
|
+
|
|
275
|
+
# Add range if datatype specified
|
|
276
|
+
if attr.datatype:
|
|
277
|
+
xsd_type = self._map_datatype(attr.datatype)
|
|
278
|
+
self._graph.add((prop_uri, RDFS.range, xsd_type))
|
|
279
|
+
|
|
280
|
+
# Add label
|
|
281
|
+
if self.config.generate_labels:
|
|
282
|
+
label = self._camel_to_label(attr.name) if self.config.camel_to_label else attr.name
|
|
283
|
+
self._graph.add(
|
|
284
|
+
(prop_uri, RDFS.label, Literal(label, lang=self.config.language))
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
def _convert_relationship(self, rel: PumlRelationship) -> None:
|
|
288
|
+
"""Convert a PlantUML relationship to RDF."""
|
|
289
|
+
# Get class URIs
|
|
290
|
+
source_uri = self._class_uris.get(rel.source)
|
|
291
|
+
target_uri = self._class_uris.get(rel.target)
|
|
292
|
+
|
|
293
|
+
if not source_uri:
|
|
294
|
+
self._warnings.append(f"Unknown source class in relationship: {rel.source}")
|
|
295
|
+
return
|
|
296
|
+
if not target_uri:
|
|
297
|
+
self._warnings.append(f"Unknown target class in relationship: {rel.target}")
|
|
298
|
+
return
|
|
299
|
+
|
|
300
|
+
if rel.rel_type == RelationshipType.INHERITANCE:
|
|
301
|
+
# Source is subclass of target
|
|
302
|
+
self._graph.add((source_uri, RDFS.subClassOf, target_uri))
|
|
303
|
+
else:
|
|
304
|
+
# Create object property for associations
|
|
305
|
+
self._convert_association(rel, source_uri, target_uri)
|
|
306
|
+
|
|
307
|
+
def _convert_association(
|
|
308
|
+
self, rel: PumlRelationship, source_uri: URIRef, target_uri: URIRef
|
|
309
|
+
) -> None:
|
|
310
|
+
"""Convert an association to owl:ObjectProperty."""
|
|
311
|
+
# Generate property name from label or classes
|
|
312
|
+
if rel.label:
|
|
313
|
+
prop_name = self._label_to_property_name(rel.label)
|
|
314
|
+
else:
|
|
315
|
+
# Generate name from class names
|
|
316
|
+
target_name = rel.target
|
|
317
|
+
prop_name = f"has{target_name}"
|
|
318
|
+
|
|
319
|
+
# Get namespace from source class
|
|
320
|
+
ns = self._get_namespace_for_class_uri(source_uri)
|
|
321
|
+
|
|
322
|
+
prop_uri = ns[prop_name]
|
|
323
|
+
self._property_uris[prop_name] = prop_uri
|
|
324
|
+
|
|
325
|
+
# Add property declaration
|
|
326
|
+
self._graph.add((prop_uri, RDF.type, OWL.ObjectProperty))
|
|
327
|
+
|
|
328
|
+
# Add domain and range
|
|
329
|
+
self._graph.add((prop_uri, RDFS.domain, source_uri))
|
|
330
|
+
self._graph.add((prop_uri, RDFS.range, target_uri))
|
|
331
|
+
|
|
332
|
+
# Add label
|
|
333
|
+
if self.config.generate_labels:
|
|
334
|
+
label = rel.label or self._camel_to_label(prop_name)
|
|
335
|
+
self._graph.add(
|
|
336
|
+
(prop_uri, RDFS.label, Literal(label, lang=self.config.language))
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
# Add cardinality constraints as comments for now
|
|
340
|
+
# Full OWL restrictions would be more complex
|
|
341
|
+
if rel.source_cardinality or rel.target_cardinality:
|
|
342
|
+
card_note = f"Cardinality: {rel.source_cardinality or '*'} -> {rel.target_cardinality or '*'}"
|
|
343
|
+
# Could add as annotation or restriction
|
|
344
|
+
|
|
345
|
+
def _get_namespace_for_class(self, cls: PumlClass) -> Namespace:
|
|
346
|
+
"""Get the appropriate namespace for a class."""
|
|
347
|
+
if cls.package and cls.package in self._namespaces:
|
|
348
|
+
return self._namespaces[cls.package]
|
|
349
|
+
return self._namespaces[None]
|
|
350
|
+
|
|
351
|
+
def _get_namespace_for_class_uri(self, class_uri: URIRef) -> Namespace:
|
|
352
|
+
"""Get the namespace containing a class URI."""
|
|
353
|
+
uri_str = str(class_uri)
|
|
354
|
+
for ns in self._namespaces.values():
|
|
355
|
+
if uri_str.startswith(str(ns)):
|
|
356
|
+
return ns
|
|
357
|
+
return self._namespaces["default"]
|
|
358
|
+
|
|
359
|
+
def _map_datatype(self, type_name: str) -> URIRef:
|
|
360
|
+
"""Map a PlantUML type name to XSD datatype."""
|
|
361
|
+
normalised = type_name.lower().strip()
|
|
362
|
+
|
|
363
|
+
if normalised in XSD_TYPE_MAP:
|
|
364
|
+
return XSD_TYPE_MAP[normalised]
|
|
365
|
+
|
|
366
|
+
# Check for qualified XSD types
|
|
367
|
+
if type_name.startswith("xsd:"):
|
|
368
|
+
local = type_name[4:]
|
|
369
|
+
return XSD[local]
|
|
370
|
+
|
|
371
|
+
# Default to string
|
|
372
|
+
self._warnings.append(f"Unknown datatype '{type_name}', defaulting to xsd:string")
|
|
373
|
+
return XSD.string
|
|
374
|
+
|
|
375
|
+
def _camel_to_label(self, name: str) -> str:
|
|
376
|
+
"""Convert camelCase or PascalCase to readable label.
|
|
377
|
+
|
|
378
|
+
Examples:
|
|
379
|
+
'FloorArea' -> 'floor area'
|
|
380
|
+
'hasBuilding' -> 'has building'
|
|
381
|
+
'constructionYear' -> 'construction year'
|
|
382
|
+
"""
|
|
383
|
+
# Insert space before uppercase letters
|
|
384
|
+
result = re.sub(r"([a-z])([A-Z])", r"\1 \2", name)
|
|
385
|
+
# Insert space between consecutive uppercase and following lowercase
|
|
386
|
+
result = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", result)
|
|
387
|
+
return result.lower()
|
|
388
|
+
|
|
389
|
+
def _label_to_property_name(self, label: str) -> str:
|
|
390
|
+
"""Convert a relationship label to a valid property name.
|
|
391
|
+
|
|
392
|
+
If the label is already a valid identifier (no spaces), preserve it.
|
|
393
|
+
Otherwise, convert multi-word labels to camelCase.
|
|
394
|
+
|
|
395
|
+
Examples:
|
|
396
|
+
'hasFloor' -> 'hasFloor' (preserved)
|
|
397
|
+
'has floor' -> 'hasFloor' (converted)
|
|
398
|
+
'is located in' -> 'isLocatedIn'
|
|
399
|
+
"""
|
|
400
|
+
label = label.strip()
|
|
401
|
+
|
|
402
|
+
# If no spaces, assume it's already a valid property name - preserve case
|
|
403
|
+
if " " not in label:
|
|
404
|
+
# Just remove non-alphanumeric characters
|
|
405
|
+
return re.sub(r"[^a-zA-Z0-9]", "", label)
|
|
406
|
+
|
|
407
|
+
# Multi-word: convert to camelCase
|
|
408
|
+
words = label.split()
|
|
409
|
+
if not words:
|
|
410
|
+
return "property"
|
|
411
|
+
|
|
412
|
+
# First word lowercase, rest capitalised
|
|
413
|
+
result = words[0].lower()
|
|
414
|
+
for word in words[1:]:
|
|
415
|
+
result += word.capitalize()
|
|
416
|
+
|
|
417
|
+
# Remove non-alphanumeric characters
|
|
418
|
+
result = re.sub(r"[^a-zA-Z0-9]", "", result)
|
|
419
|
+
|
|
420
|
+
return result
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""Merge generated RDF with existing ontologies.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to merge newly generated RDF
|
|
4
|
+
from PlantUML with existing ontology files, preserving manually
|
|
5
|
+
added content while updating what's defined in the diagram.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
from rdflib import Graph, Namespace, URIRef, RDF, RDFS
|
|
13
|
+
from rdflib.namespace import OWL
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class MergeResult:
|
|
18
|
+
"""Result of merging two graphs.
|
|
19
|
+
|
|
20
|
+
Attributes:
|
|
21
|
+
graph: The merged graph
|
|
22
|
+
added_count: Number of triples added from new graph
|
|
23
|
+
updated_count: Number of triples updated (replaced)
|
|
24
|
+
preserved_count: Number of triples preserved from existing
|
|
25
|
+
conflicts: List of conflict descriptions
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
graph: Graph
|
|
29
|
+
added_count: int = 0
|
|
30
|
+
updated_count: int = 0
|
|
31
|
+
preserved_count: int = 0
|
|
32
|
+
conflicts: list[str] = None
|
|
33
|
+
|
|
34
|
+
def __post_init__(self):
|
|
35
|
+
if self.conflicts is None:
|
|
36
|
+
self.conflicts = []
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class OntologyMerger:
|
|
40
|
+
"""Merges generated RDF with existing ontology content.
|
|
41
|
+
|
|
42
|
+
The merger follows these principles:
|
|
43
|
+
1. Entities defined in PlantUML are authoritative - their
|
|
44
|
+
rdfs:subClassOf, domain, range etc. are updated
|
|
45
|
+
2. Additional annotations (comments, labels) in the existing
|
|
46
|
+
file are preserved if not explicitly defined in PlantUML
|
|
47
|
+
3. Entities only in the existing file are preserved
|
|
48
|
+
4. Conflicts are reported but existing content wins by default
|
|
49
|
+
|
|
50
|
+
Example:
|
|
51
|
+
merger = OntologyMerger()
|
|
52
|
+
result = merger.merge(new_graph, existing_path)
|
|
53
|
+
result.graph.serialize("merged.ttl", format="turtle")
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
# Predicates that PlantUML defines authoritatively
|
|
57
|
+
AUTHORITATIVE_PREDICATES = {
|
|
58
|
+
RDF.type,
|
|
59
|
+
RDFS.subClassOf,
|
|
60
|
+
RDFS.domain,
|
|
61
|
+
RDFS.range,
|
|
62
|
+
RDFS.subPropertyOf,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
# Predicates to merge (keep both if different)
|
|
66
|
+
MERGEABLE_PREDICATES = {
|
|
67
|
+
RDFS.label,
|
|
68
|
+
RDFS.comment,
|
|
69
|
+
RDFS.seeAlso,
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
def __init__(self, preserve_existing: bool = True) -> None:
|
|
73
|
+
"""Initialise the merger.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
preserve_existing: If True, existing content wins on conflict
|
|
77
|
+
"""
|
|
78
|
+
self.preserve_existing = preserve_existing
|
|
79
|
+
|
|
80
|
+
def merge(
|
|
81
|
+
self,
|
|
82
|
+
new_graph: Graph,
|
|
83
|
+
existing_path: Path,
|
|
84
|
+
output_format: str = "turtle",
|
|
85
|
+
) -> MergeResult:
|
|
86
|
+
"""Merge new graph with existing ontology file.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
new_graph: Newly generated RDF graph
|
|
90
|
+
existing_path: Path to existing ontology file
|
|
91
|
+
output_format: RDF format for parsing existing file
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
MergeResult with merged graph and statistics
|
|
95
|
+
"""
|
|
96
|
+
# Load existing graph
|
|
97
|
+
existing = Graph()
|
|
98
|
+
existing.parse(str(existing_path), format=output_format)
|
|
99
|
+
|
|
100
|
+
return self.merge_graphs(new_graph, existing)
|
|
101
|
+
|
|
102
|
+
def merge_graphs(
|
|
103
|
+
self,
|
|
104
|
+
new_graph: Graph,
|
|
105
|
+
existing: Graph,
|
|
106
|
+
) -> MergeResult:
|
|
107
|
+
"""Merge two graphs.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
new_graph: Newly generated RDF graph
|
|
111
|
+
existing: Existing ontology graph
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
MergeResult with merged graph and statistics
|
|
115
|
+
"""
|
|
116
|
+
result = MergeResult(graph=Graph())
|
|
117
|
+
conflicts = []
|
|
118
|
+
|
|
119
|
+
# Copy all prefixes from both
|
|
120
|
+
for prefix, ns in existing.namespace_manager.namespaces():
|
|
121
|
+
result.graph.bind(prefix, ns, override=False)
|
|
122
|
+
for prefix, ns in new_graph.namespace_manager.namespaces():
|
|
123
|
+
result.graph.bind(prefix, ns, override=False)
|
|
124
|
+
|
|
125
|
+
# Get all subjects defined in new graph
|
|
126
|
+
new_subjects = set(new_graph.subjects())
|
|
127
|
+
|
|
128
|
+
# Process existing triples
|
|
129
|
+
for s, p, o in existing:
|
|
130
|
+
if s in new_subjects:
|
|
131
|
+
# Subject is also in new graph - check for conflicts
|
|
132
|
+
if p in self.AUTHORITATIVE_PREDICATES:
|
|
133
|
+
# New graph is authoritative for these
|
|
134
|
+
new_values = set(new_graph.objects(s, p))
|
|
135
|
+
if new_values:
|
|
136
|
+
# Will be added from new graph
|
|
137
|
+
result.updated_count += 1
|
|
138
|
+
continue
|
|
139
|
+
else:
|
|
140
|
+
# Keep existing if not in new
|
|
141
|
+
result.graph.add((s, p, o))
|
|
142
|
+
result.preserved_count += 1
|
|
143
|
+
elif p in self.MERGEABLE_PREDICATES:
|
|
144
|
+
# Keep existing and add new if different
|
|
145
|
+
result.graph.add((s, p, o))
|
|
146
|
+
result.preserved_count += 1
|
|
147
|
+
else:
|
|
148
|
+
# Other predicates - preserve existing
|
|
149
|
+
result.graph.add((s, p, o))
|
|
150
|
+
result.preserved_count += 1
|
|
151
|
+
else:
|
|
152
|
+
# Subject only in existing - preserve
|
|
153
|
+
result.graph.add((s, p, o))
|
|
154
|
+
result.preserved_count += 1
|
|
155
|
+
|
|
156
|
+
# Add triples from new graph
|
|
157
|
+
for s, p, o in new_graph:
|
|
158
|
+
if (s, p, o) not in result.graph:
|
|
159
|
+
# Check for conflicting values on authoritative predicates
|
|
160
|
+
if p in self.AUTHORITATIVE_PREDICATES:
|
|
161
|
+
existing_values = list(result.graph.objects(s, p))
|
|
162
|
+
for ev in existing_values:
|
|
163
|
+
if ev != o:
|
|
164
|
+
conflicts.append(
|
|
165
|
+
f"Conflict on {s} {p}: existing={ev}, new={o}"
|
|
166
|
+
)
|
|
167
|
+
if not self.preserve_existing:
|
|
168
|
+
result.graph.remove((s, p, ev))
|
|
169
|
+
|
|
170
|
+
result.graph.add((s, p, o))
|
|
171
|
+
result.added_count += 1
|
|
172
|
+
|
|
173
|
+
result.conflicts = conflicts
|
|
174
|
+
return result
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def merge_with_existing(
|
|
178
|
+
new_graph: Graph,
|
|
179
|
+
existing_path: Path,
|
|
180
|
+
output_path: Optional[Path] = None,
|
|
181
|
+
output_format: str = "turtle",
|
|
182
|
+
) -> MergeResult:
|
|
183
|
+
"""Convenience function to merge and optionally save.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
new_graph: Newly generated RDF graph
|
|
187
|
+
existing_path: Path to existing ontology
|
|
188
|
+
output_path: Path to write merged result (optional)
|
|
189
|
+
output_format: RDF serialization format
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
MergeResult with merged graph and statistics
|
|
193
|
+
"""
|
|
194
|
+
merger = OntologyMerger()
|
|
195
|
+
result = merger.merge(new_graph, existing_path, output_format)
|
|
196
|
+
|
|
197
|
+
if output_path:
|
|
198
|
+
result.graph.serialize(str(output_path), format=output_format)
|
|
199
|
+
|
|
200
|
+
return result
|