rdf-construct 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. rdf_construct/__init__.py +12 -0
  2. rdf_construct/__main__.py +0 -0
  3. rdf_construct/cli.py +3429 -0
  4. rdf_construct/core/__init__.py +33 -0
  5. rdf_construct/core/config.py +116 -0
  6. rdf_construct/core/ordering.py +219 -0
  7. rdf_construct/core/predicate_order.py +212 -0
  8. rdf_construct/core/profile.py +157 -0
  9. rdf_construct/core/selector.py +64 -0
  10. rdf_construct/core/serialiser.py +232 -0
  11. rdf_construct/core/utils.py +89 -0
  12. rdf_construct/cq/__init__.py +77 -0
  13. rdf_construct/cq/expectations.py +365 -0
  14. rdf_construct/cq/formatters/__init__.py +45 -0
  15. rdf_construct/cq/formatters/json.py +104 -0
  16. rdf_construct/cq/formatters/junit.py +104 -0
  17. rdf_construct/cq/formatters/text.py +146 -0
  18. rdf_construct/cq/loader.py +300 -0
  19. rdf_construct/cq/runner.py +321 -0
  20. rdf_construct/diff/__init__.py +59 -0
  21. rdf_construct/diff/change_types.py +214 -0
  22. rdf_construct/diff/comparator.py +338 -0
  23. rdf_construct/diff/filters.py +133 -0
  24. rdf_construct/diff/formatters/__init__.py +71 -0
  25. rdf_construct/diff/formatters/json.py +192 -0
  26. rdf_construct/diff/formatters/markdown.py +210 -0
  27. rdf_construct/diff/formatters/text.py +195 -0
  28. rdf_construct/docs/__init__.py +60 -0
  29. rdf_construct/docs/config.py +238 -0
  30. rdf_construct/docs/extractors.py +603 -0
  31. rdf_construct/docs/generator.py +360 -0
  32. rdf_construct/docs/renderers/__init__.py +7 -0
  33. rdf_construct/docs/renderers/html.py +803 -0
  34. rdf_construct/docs/renderers/json.py +390 -0
  35. rdf_construct/docs/renderers/markdown.py +628 -0
  36. rdf_construct/docs/search.py +278 -0
  37. rdf_construct/docs/templates/html/base.html.jinja +44 -0
  38. rdf_construct/docs/templates/html/class.html.jinja +152 -0
  39. rdf_construct/docs/templates/html/hierarchy.html.jinja +28 -0
  40. rdf_construct/docs/templates/html/index.html.jinja +110 -0
  41. rdf_construct/docs/templates/html/instance.html.jinja +90 -0
  42. rdf_construct/docs/templates/html/namespaces.html.jinja +37 -0
  43. rdf_construct/docs/templates/html/property.html.jinja +124 -0
  44. rdf_construct/docs/templates/html/single_page.html.jinja +169 -0
  45. rdf_construct/lint/__init__.py +75 -0
  46. rdf_construct/lint/config.py +214 -0
  47. rdf_construct/lint/engine.py +396 -0
  48. rdf_construct/lint/formatters.py +327 -0
  49. rdf_construct/lint/rules.py +692 -0
  50. rdf_construct/localise/__init__.py +114 -0
  51. rdf_construct/localise/config.py +508 -0
  52. rdf_construct/localise/extractor.py +427 -0
  53. rdf_construct/localise/formatters/__init__.py +36 -0
  54. rdf_construct/localise/formatters/markdown.py +229 -0
  55. rdf_construct/localise/formatters/text.py +224 -0
  56. rdf_construct/localise/merger.py +346 -0
  57. rdf_construct/localise/reporter.py +356 -0
  58. rdf_construct/main.py +6 -0
  59. rdf_construct/merge/__init__.py +165 -0
  60. rdf_construct/merge/config.py +354 -0
  61. rdf_construct/merge/conflicts.py +281 -0
  62. rdf_construct/merge/formatters.py +426 -0
  63. rdf_construct/merge/merger.py +425 -0
  64. rdf_construct/merge/migrator.py +339 -0
  65. rdf_construct/merge/rules.py +377 -0
  66. rdf_construct/merge/splitter.py +1102 -0
  67. rdf_construct/puml2rdf/__init__.py +103 -0
  68. rdf_construct/puml2rdf/config.py +230 -0
  69. rdf_construct/puml2rdf/converter.py +420 -0
  70. rdf_construct/puml2rdf/merger.py +200 -0
  71. rdf_construct/puml2rdf/model.py +202 -0
  72. rdf_construct/puml2rdf/parser.py +565 -0
  73. rdf_construct/puml2rdf/validators.py +451 -0
  74. rdf_construct/refactor/__init__.py +72 -0
  75. rdf_construct/refactor/config.py +362 -0
  76. rdf_construct/refactor/deprecator.py +328 -0
  77. rdf_construct/refactor/formatters/__init__.py +8 -0
  78. rdf_construct/refactor/formatters/text.py +311 -0
  79. rdf_construct/refactor/renamer.py +294 -0
  80. rdf_construct/shacl/__init__.py +56 -0
  81. rdf_construct/shacl/config.py +166 -0
  82. rdf_construct/shacl/converters.py +520 -0
  83. rdf_construct/shacl/generator.py +364 -0
  84. rdf_construct/shacl/namespaces.py +93 -0
  85. rdf_construct/stats/__init__.py +29 -0
  86. rdf_construct/stats/collector.py +178 -0
  87. rdf_construct/stats/comparator.py +298 -0
  88. rdf_construct/stats/formatters/__init__.py +83 -0
  89. rdf_construct/stats/formatters/json.py +38 -0
  90. rdf_construct/stats/formatters/markdown.py +153 -0
  91. rdf_construct/stats/formatters/text.py +186 -0
  92. rdf_construct/stats/metrics/__init__.py +26 -0
  93. rdf_construct/stats/metrics/basic.py +147 -0
  94. rdf_construct/stats/metrics/complexity.py +137 -0
  95. rdf_construct/stats/metrics/connectivity.py +130 -0
  96. rdf_construct/stats/metrics/documentation.py +128 -0
  97. rdf_construct/stats/metrics/hierarchy.py +207 -0
  98. rdf_construct/stats/metrics/properties.py +88 -0
  99. rdf_construct/uml/__init__.py +22 -0
  100. rdf_construct/uml/context.py +194 -0
  101. rdf_construct/uml/mapper.py +371 -0
  102. rdf_construct/uml/odm_renderer.py +789 -0
  103. rdf_construct/uml/renderer.py +684 -0
  104. rdf_construct/uml/uml_layout.py +393 -0
  105. rdf_construct/uml/uml_style.py +613 -0
  106. rdf_construct-0.3.0.dist-info/METADATA +496 -0
  107. rdf_construct-0.3.0.dist-info/RECORD +110 -0
  108. rdf_construct-0.3.0.dist-info/WHEEL +4 -0
  109. rdf_construct-0.3.0.dist-info/entry_points.txt +3 -0
  110. rdf_construct-0.3.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,420 @@
1
+ """Convert parsed PlantUML models to RDF graphs.
2
+
3
+ This module transforms the intermediate PumlModel representation
4
+ into a proper RDF/OWL ontology using rdflib.
5
+ """
6
+
7
+ import re
8
+ from dataclasses import dataclass, field
9
+ from typing import Optional
10
+
11
+ from rdflib import Graph, Literal, Namespace, URIRef, RDF, RDFS
12
+ from rdflib.namespace import OWL, XSD
13
+
14
+ from rdf_construct.puml2rdf.model import (
15
+ PumlAttribute,
16
+ PumlClass,
17
+ PumlModel,
18
+ PumlPackage,
19
+ PumlRelationship,
20
+ RelationshipType,
21
+ )
22
+
23
+
24
+ # Standard XSD datatype mappings from common PlantUML/UML type names
25
+ XSD_TYPE_MAP: dict[str, URIRef] = {
26
+ # String types
27
+ "string": XSD.string,
28
+ "str": XSD.string,
29
+ "text": XSD.string,
30
+ # Numeric types
31
+ "integer": XSD.integer,
32
+ "int": XSD.integer,
33
+ "decimal": XSD.decimal,
34
+ "float": XSD.float,
35
+ "double": XSD.double,
36
+ "number": XSD.decimal,
37
+ # Boolean
38
+ "boolean": XSD.boolean,
39
+ "bool": XSD.boolean,
40
+ # Date/time types
41
+ "date": XSD.date,
42
+ "datetime": XSD.dateTime,
43
+ "time": XSD.time,
44
+ "gYear": XSD.gYear,
45
+ "gyear": XSD.gYear,
46
+ "gYearMonth": XSD.gYearMonth,
47
+ "duration": XSD.duration,
48
+ # URI types
49
+ "uri": XSD.anyURI,
50
+ "anyURI": XSD.anyURI,
51
+ "anyuri": XSD.anyURI,
52
+ "url": XSD.anyURI,
53
+ # Other common types
54
+ "base64": XSD.base64Binary,
55
+ "hexBinary": XSD.hexBinary,
56
+ "language": XSD.language,
57
+ "token": XSD.token,
58
+ }
59
+
60
+
61
+ @dataclass
62
+ class ConversionConfig:
63
+ """Configuration options for PlantUML to RDF conversion.
64
+
65
+ Attributes:
66
+ default_namespace: Default namespace URI for entities without explicit package
67
+ language: Language tag for labels and comments (default: 'en')
68
+ generate_labels: Whether to generate rdfs:label from names
69
+ generate_inverse_properties: Whether to create inverse properties
70
+ camel_to_label: Convert camelCase names to readable labels
71
+ use_owl_thing: Whether to make classes subclass of owl:Thing explicitly
72
+ """
73
+
74
+ default_namespace: str = "http://example.org/ontology#"
75
+ language: str = "en"
76
+ generate_labels: bool = True
77
+ generate_inverse_properties: bool = False
78
+ camel_to_label: bool = True
79
+ use_owl_thing: bool = False
80
+
81
+
82
+ @dataclass
83
+ class ConversionResult:
84
+ """Result of converting a PlantUML model to RDF.
85
+
86
+ Attributes:
87
+ graph: The generated RDF graph
88
+ class_uris: Mapping from class names to their URIs
89
+ property_uris: Mapping from property names to their URIs
90
+ warnings: Any warnings generated during conversion
91
+ """
92
+
93
+ graph: Graph
94
+ class_uris: dict[str, URIRef] = field(default_factory=dict)
95
+ property_uris: dict[str, URIRef] = field(default_factory=dict)
96
+ warnings: list[str] = field(default_factory=list)
97
+
98
+
99
+ class PumlToRdfConverter:
100
+ """Converts parsed PlantUML models to RDF/OWL ontologies.
101
+
102
+ This converter produces OWL 2 ontologies following common patterns:
103
+ - Classes become owl:Class
104
+ - Attributes become owl:DatatypeProperty
105
+ - Associations become owl:ObjectProperty
106
+ - Inheritance becomes rdfs:subClassOf
107
+ - Notes become rdfs:comment
108
+
109
+ Example:
110
+ converter = PumlToRdfConverter()
111
+ result = converter.convert(model)
112
+ result.graph.serialize("ontology.ttl", format="turtle")
113
+ """
114
+
115
+ def __init__(self, config: Optional[ConversionConfig] = None) -> None:
116
+ """Initialise the converter.
117
+
118
+ Args:
119
+ config: Conversion configuration options
120
+ """
121
+ self.config = config or ConversionConfig()
122
+ self._graph: Graph = Graph()
123
+ self._namespaces: dict[str, Namespace] = {}
124
+ self._class_uris: dict[str, URIRef] = {}
125
+ self._property_uris: dict[str, URIRef] = {}
126
+ self._warnings: list[str] = []
127
+
128
+ def convert(self, model: PumlModel) -> ConversionResult:
129
+ """Convert a PlantUML model to an RDF graph."""
130
+ self._graph = Graph()
131
+ self._namespaces = {}
132
+ self._class_uris = {}
133
+ self._property_uris = {}
134
+ self._warnings = []
135
+
136
+ # Set up namespaces from packages AND class prefixes
137
+ self._setup_namespaces(model.packages, model.classes)
138
+
139
+ # Create ontology header
140
+ self._create_ontology_header(model)
141
+
142
+ # Convert classes
143
+ for cls in model.classes:
144
+ self._convert_class(cls)
145
+
146
+ # Convert relationships
147
+ for rel in model.relationships:
148
+ self._convert_relationship(rel)
149
+
150
+ return ConversionResult(
151
+ graph=self._graph,
152
+ class_uris=self._class_uris,
153
+ property_uris=self._property_uris,
154
+ warnings=self._warnings,
155
+ )
156
+
157
+ def _setup_namespaces(self, packages: list[PumlPackage], classes: list[PumlClass]) -> None:
158
+ """Set up RDF namespaces from PlantUML packages and class prefixes."""
159
+ # Standard namespaces
160
+ self._graph.bind("owl", OWL)
161
+ self._graph.bind("rdfs", RDFS)
162
+ self._graph.bind("xsd", XSD)
163
+
164
+ # Default namespace
165
+ default_ns = Namespace(self.config.default_namespace)
166
+ self._namespaces[None] = default_ns # None key for unpackaged classes
167
+ self._graph.bind("", default_ns)
168
+
169
+ # Collect all unique package prefixes from classes
170
+ prefixes = {cls.package for cls in classes if cls.package}
171
+
172
+ # Also add packages from PlantUML package declarations
173
+ for pkg in packages:
174
+ if pkg.namespace_uri:
175
+ ns_uri = pkg.namespace_uri
176
+ if not ns_uri.endswith(("#", "/")):
177
+ ns_uri += "#"
178
+ ns = Namespace(ns_uri)
179
+ self._namespaces[pkg.name] = ns
180
+ self._graph.bind(pkg.name, ns)
181
+ prefixes.discard(pkg.name) # Don't auto-generate
182
+
183
+ # Auto-generate namespaces for remaining prefixes
184
+ base = self.config.default_namespace.rstrip("#/")
185
+ for prefix in prefixes:
186
+ ns_uri = f"{base}/{prefix}#"
187
+ ns = Namespace(ns_uri)
188
+ self._namespaces[prefix] = ns
189
+ self._graph.bind(prefix, ns)
190
+
191
+ def _generate_prefix(self, name: str) -> str:
192
+ """Generate a namespace prefix from a package name.
193
+
194
+ Args:
195
+ name: Package name (may be a URI or display name)
196
+
197
+ Returns:
198
+ Short lowercase prefix string
199
+ """
200
+ # If it looks like a URI, extract the last segment
201
+ if "://" in name or name.startswith("http"):
202
+ # Extract meaningful part from URI
203
+ name = name.rstrip("#/")
204
+ if "/" in name:
205
+ name = name.rsplit("/", 1)[-1]
206
+ if "#" in name:
207
+ name = name.rsplit("#", 1)[-1]
208
+
209
+ # Clean and shorten
210
+ prefix = re.sub(r"[^a-zA-Z0-9]", "", name).lower()
211
+ return prefix[:10] if len(prefix) > 10 else prefix or "ns"
212
+
213
+ def _create_ontology_header(self, model: PumlModel) -> None:
214
+ """Create the owl:Ontology declaration."""
215
+ ont_uri = URIRef(self.config.default_namespace.rstrip("#/"))
216
+
217
+ self._graph.add((ont_uri, RDF.type, OWL.Ontology))
218
+
219
+ if model.title:
220
+ self._graph.add(
221
+ (ont_uri, RDFS.label, Literal(model.title, lang=self.config.language))
222
+ )
223
+
224
+ def _convert_class(self, cls: PumlClass) -> None:
225
+ """Convert a PlantUML class to owl:Class and properties."""
226
+ ns = self._get_namespace_for_class(cls)
227
+ class_uri = ns[cls.name] # Use local name only
228
+
229
+ # Store by qualified name for relationship lookups
230
+ self._class_uris[cls.name] = class_uri
231
+ self._class_uris[cls.qualified_name] = class_uri
232
+
233
+ self._graph.add((class_uri, RDF.type, OWL.Class))
234
+
235
+ # Use display_name for label if available, else convert local name
236
+ if self.config.generate_labels:
237
+ if cls.display_name:
238
+ label = cls.display_name
239
+ elif self.config.camel_to_label:
240
+ label = self._camel_to_label(cls.name)
241
+ else:
242
+ label = cls.name
243
+ self._graph.add(
244
+ (class_uri, RDFS.label, Literal(label, lang=self.config.language))
245
+ )
246
+
247
+ # Add comment from note
248
+ if cls.note:
249
+ self._graph.add(
250
+ (class_uri, RDFS.comment, Literal(cls.note, lang=self.config.language))
251
+ )
252
+
253
+ # Handle abstract classes - add deprecated or custom annotation
254
+ if cls.is_abstract:
255
+ # We could add a custom annotation here
256
+ pass
257
+
258
+ # Convert attributes to datatype properties
259
+ for attr in cls.attributes:
260
+ self._convert_attribute(attr, class_uri, ns)
261
+
262
+ def _convert_attribute(
263
+ self, attr: PumlAttribute, domain_class: URIRef, ns: Namespace
264
+ ) -> None:
265
+ """Convert a class attribute to owl:DatatypeProperty."""
266
+ prop_uri = ns[attr.name]
267
+ self._property_uris[attr.name] = prop_uri
268
+
269
+ # Add property declaration
270
+ self._graph.add((prop_uri, RDF.type, OWL.DatatypeProperty))
271
+
272
+ # Add domain
273
+ self._graph.add((prop_uri, RDFS.domain, domain_class))
274
+
275
+ # Add range if datatype specified
276
+ if attr.datatype:
277
+ xsd_type = self._map_datatype(attr.datatype)
278
+ self._graph.add((prop_uri, RDFS.range, xsd_type))
279
+
280
+ # Add label
281
+ if self.config.generate_labels:
282
+ label = self._camel_to_label(attr.name) if self.config.camel_to_label else attr.name
283
+ self._graph.add(
284
+ (prop_uri, RDFS.label, Literal(label, lang=self.config.language))
285
+ )
286
+
287
+ def _convert_relationship(self, rel: PumlRelationship) -> None:
288
+ """Convert a PlantUML relationship to RDF."""
289
+ # Get class URIs
290
+ source_uri = self._class_uris.get(rel.source)
291
+ target_uri = self._class_uris.get(rel.target)
292
+
293
+ if not source_uri:
294
+ self._warnings.append(f"Unknown source class in relationship: {rel.source}")
295
+ return
296
+ if not target_uri:
297
+ self._warnings.append(f"Unknown target class in relationship: {rel.target}")
298
+ return
299
+
300
+ if rel.rel_type == RelationshipType.INHERITANCE:
301
+ # Source is subclass of target
302
+ self._graph.add((source_uri, RDFS.subClassOf, target_uri))
303
+ else:
304
+ # Create object property for associations
305
+ self._convert_association(rel, source_uri, target_uri)
306
+
307
+ def _convert_association(
308
+ self, rel: PumlRelationship, source_uri: URIRef, target_uri: URIRef
309
+ ) -> None:
310
+ """Convert an association to owl:ObjectProperty."""
311
+ # Generate property name from label or classes
312
+ if rel.label:
313
+ prop_name = self._label_to_property_name(rel.label)
314
+ else:
315
+ # Generate name from class names
316
+ target_name = rel.target
317
+ prop_name = f"has{target_name}"
318
+
319
+ # Get namespace from source class
320
+ ns = self._get_namespace_for_class_uri(source_uri)
321
+
322
+ prop_uri = ns[prop_name]
323
+ self._property_uris[prop_name] = prop_uri
324
+
325
+ # Add property declaration
326
+ self._graph.add((prop_uri, RDF.type, OWL.ObjectProperty))
327
+
328
+ # Add domain and range
329
+ self._graph.add((prop_uri, RDFS.domain, source_uri))
330
+ self._graph.add((prop_uri, RDFS.range, target_uri))
331
+
332
+ # Add label
333
+ if self.config.generate_labels:
334
+ label = rel.label or self._camel_to_label(prop_name)
335
+ self._graph.add(
336
+ (prop_uri, RDFS.label, Literal(label, lang=self.config.language))
337
+ )
338
+
339
+ # Add cardinality constraints as comments for now
340
+ # Full OWL restrictions would be more complex
341
+ if rel.source_cardinality or rel.target_cardinality:
342
+ card_note = f"Cardinality: {rel.source_cardinality or '*'} -> {rel.target_cardinality or '*'}"
343
+ # Could add as annotation or restriction
344
+
345
+ def _get_namespace_for_class(self, cls: PumlClass) -> Namespace:
346
+ """Get the appropriate namespace for a class."""
347
+ if cls.package and cls.package in self._namespaces:
348
+ return self._namespaces[cls.package]
349
+ return self._namespaces[None]
350
+
351
+ def _get_namespace_for_class_uri(self, class_uri: URIRef) -> Namespace:
352
+ """Get the namespace containing a class URI."""
353
+ uri_str = str(class_uri)
354
+ for ns in self._namespaces.values():
355
+ if uri_str.startswith(str(ns)):
356
+ return ns
357
+ return self._namespaces["default"]
358
+
359
+ def _map_datatype(self, type_name: str) -> URIRef:
360
+ """Map a PlantUML type name to XSD datatype."""
361
+ normalised = type_name.lower().strip()
362
+
363
+ if normalised in XSD_TYPE_MAP:
364
+ return XSD_TYPE_MAP[normalised]
365
+
366
+ # Check for qualified XSD types
367
+ if type_name.startswith("xsd:"):
368
+ local = type_name[4:]
369
+ return XSD[local]
370
+
371
+ # Default to string
372
+ self._warnings.append(f"Unknown datatype '{type_name}', defaulting to xsd:string")
373
+ return XSD.string
374
+
375
+ def _camel_to_label(self, name: str) -> str:
376
+ """Convert camelCase or PascalCase to readable label.
377
+
378
+ Examples:
379
+ 'FloorArea' -> 'floor area'
380
+ 'hasBuilding' -> 'has building'
381
+ 'constructionYear' -> 'construction year'
382
+ """
383
+ # Insert space before uppercase letters
384
+ result = re.sub(r"([a-z])([A-Z])", r"\1 \2", name)
385
+ # Insert space between consecutive uppercase and following lowercase
386
+ result = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", result)
387
+ return result.lower()
388
+
389
+ def _label_to_property_name(self, label: str) -> str:
390
+ """Convert a relationship label to a valid property name.
391
+
392
+ If the label is already a valid identifier (no spaces), preserve it.
393
+ Otherwise, convert multi-word labels to camelCase.
394
+
395
+ Examples:
396
+ 'hasFloor' -> 'hasFloor' (preserved)
397
+ 'has floor' -> 'hasFloor' (converted)
398
+ 'is located in' -> 'isLocatedIn'
399
+ """
400
+ label = label.strip()
401
+
402
+ # If no spaces, assume it's already a valid property name - preserve case
403
+ if " " not in label:
404
+ # Just remove non-alphanumeric characters
405
+ return re.sub(r"[^a-zA-Z0-9]", "", label)
406
+
407
+ # Multi-word: convert to camelCase
408
+ words = label.split()
409
+ if not words:
410
+ return "property"
411
+
412
+ # First word lowercase, rest capitalised
413
+ result = words[0].lower()
414
+ for word in words[1:]:
415
+ result += word.capitalize()
416
+
417
+ # Remove non-alphanumeric characters
418
+ result = re.sub(r"[^a-zA-Z0-9]", "", result)
419
+
420
+ return result
@@ -0,0 +1,200 @@
1
+ """Merge generated RDF with existing ontologies.
2
+
3
+ This module provides functionality to merge newly generated RDF
4
+ from PlantUML with existing ontology files, preserving manually
5
+ added content while updating what's defined in the diagram.
6
+ """
7
+
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import Optional
11
+
12
+ from rdflib import Graph, Namespace, URIRef, RDF, RDFS
13
+ from rdflib.namespace import OWL
14
+
15
+
16
+ @dataclass
17
+ class MergeResult:
18
+ """Result of merging two graphs.
19
+
20
+ Attributes:
21
+ graph: The merged graph
22
+ added_count: Number of triples added from new graph
23
+ updated_count: Number of triples updated (replaced)
24
+ preserved_count: Number of triples preserved from existing
25
+ conflicts: List of conflict descriptions
26
+ """
27
+
28
+ graph: Graph
29
+ added_count: int = 0
30
+ updated_count: int = 0
31
+ preserved_count: int = 0
32
+ conflicts: list[str] = None
33
+
34
+ def __post_init__(self):
35
+ if self.conflicts is None:
36
+ self.conflicts = []
37
+
38
+
39
+ class OntologyMerger:
40
+ """Merges generated RDF with existing ontology content.
41
+
42
+ The merger follows these principles:
43
+ 1. Entities defined in PlantUML are authoritative - their
44
+ rdfs:subClassOf, domain, range etc. are updated
45
+ 2. Additional annotations (comments, labels) in the existing
46
+ file are preserved if not explicitly defined in PlantUML
47
+ 3. Entities only in the existing file are preserved
48
+ 4. Conflicts are reported but existing content wins by default
49
+
50
+ Example:
51
+ merger = OntologyMerger()
52
+ result = merger.merge(new_graph, existing_path)
53
+ result.graph.serialize("merged.ttl", format="turtle")
54
+ """
55
+
56
+ # Predicates that PlantUML defines authoritatively
57
+ AUTHORITATIVE_PREDICATES = {
58
+ RDF.type,
59
+ RDFS.subClassOf,
60
+ RDFS.domain,
61
+ RDFS.range,
62
+ RDFS.subPropertyOf,
63
+ }
64
+
65
+ # Predicates to merge (keep both if different)
66
+ MERGEABLE_PREDICATES = {
67
+ RDFS.label,
68
+ RDFS.comment,
69
+ RDFS.seeAlso,
70
+ }
71
+
72
+ def __init__(self, preserve_existing: bool = True) -> None:
73
+ """Initialise the merger.
74
+
75
+ Args:
76
+ preserve_existing: If True, existing content wins on conflict
77
+ """
78
+ self.preserve_existing = preserve_existing
79
+
80
+ def merge(
81
+ self,
82
+ new_graph: Graph,
83
+ existing_path: Path,
84
+ output_format: str = "turtle",
85
+ ) -> MergeResult:
86
+ """Merge new graph with existing ontology file.
87
+
88
+ Args:
89
+ new_graph: Newly generated RDF graph
90
+ existing_path: Path to existing ontology file
91
+ output_format: RDF format for parsing existing file
92
+
93
+ Returns:
94
+ MergeResult with merged graph and statistics
95
+ """
96
+ # Load existing graph
97
+ existing = Graph()
98
+ existing.parse(str(existing_path), format=output_format)
99
+
100
+ return self.merge_graphs(new_graph, existing)
101
+
102
+ def merge_graphs(
103
+ self,
104
+ new_graph: Graph,
105
+ existing: Graph,
106
+ ) -> MergeResult:
107
+ """Merge two graphs.
108
+
109
+ Args:
110
+ new_graph: Newly generated RDF graph
111
+ existing: Existing ontology graph
112
+
113
+ Returns:
114
+ MergeResult with merged graph and statistics
115
+ """
116
+ result = MergeResult(graph=Graph())
117
+ conflicts = []
118
+
119
+ # Copy all prefixes from both
120
+ for prefix, ns in existing.namespace_manager.namespaces():
121
+ result.graph.bind(prefix, ns, override=False)
122
+ for prefix, ns in new_graph.namespace_manager.namespaces():
123
+ result.graph.bind(prefix, ns, override=False)
124
+
125
+ # Get all subjects defined in new graph
126
+ new_subjects = set(new_graph.subjects())
127
+
128
+ # Process existing triples
129
+ for s, p, o in existing:
130
+ if s in new_subjects:
131
+ # Subject is also in new graph - check for conflicts
132
+ if p in self.AUTHORITATIVE_PREDICATES:
133
+ # New graph is authoritative for these
134
+ new_values = set(new_graph.objects(s, p))
135
+ if new_values:
136
+ # Will be added from new graph
137
+ result.updated_count += 1
138
+ continue
139
+ else:
140
+ # Keep existing if not in new
141
+ result.graph.add((s, p, o))
142
+ result.preserved_count += 1
143
+ elif p in self.MERGEABLE_PREDICATES:
144
+ # Keep existing and add new if different
145
+ result.graph.add((s, p, o))
146
+ result.preserved_count += 1
147
+ else:
148
+ # Other predicates - preserve existing
149
+ result.graph.add((s, p, o))
150
+ result.preserved_count += 1
151
+ else:
152
+ # Subject only in existing - preserve
153
+ result.graph.add((s, p, o))
154
+ result.preserved_count += 1
155
+
156
+ # Add triples from new graph
157
+ for s, p, o in new_graph:
158
+ if (s, p, o) not in result.graph:
159
+ # Check for conflicting values on authoritative predicates
160
+ if p in self.AUTHORITATIVE_PREDICATES:
161
+ existing_values = list(result.graph.objects(s, p))
162
+ for ev in existing_values:
163
+ if ev != o:
164
+ conflicts.append(
165
+ f"Conflict on {s} {p}: existing={ev}, new={o}"
166
+ )
167
+ if not self.preserve_existing:
168
+ result.graph.remove((s, p, ev))
169
+
170
+ result.graph.add((s, p, o))
171
+ result.added_count += 1
172
+
173
+ result.conflicts = conflicts
174
+ return result
175
+
176
+
177
+ def merge_with_existing(
178
+ new_graph: Graph,
179
+ existing_path: Path,
180
+ output_path: Optional[Path] = None,
181
+ output_format: str = "turtle",
182
+ ) -> MergeResult:
183
+ """Convenience function to merge and optionally save.
184
+
185
+ Args:
186
+ new_graph: Newly generated RDF graph
187
+ existing_path: Path to existing ontology
188
+ output_path: Path to write merged result (optional)
189
+ output_format: RDF serialization format
190
+
191
+ Returns:
192
+ MergeResult with merged graph and statistics
193
+ """
194
+ merger = OntologyMerger()
195
+ result = merger.merge(new_graph, existing_path, output_format)
196
+
197
+ if output_path:
198
+ result.graph.serialize(str(output_path), format=output_format)
199
+
200
+ return result