rdf-construct 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_construct/__init__.py +12 -0
- rdf_construct/__main__.py +0 -0
- rdf_construct/cli.py +3429 -0
- rdf_construct/core/__init__.py +33 -0
- rdf_construct/core/config.py +116 -0
- rdf_construct/core/ordering.py +219 -0
- rdf_construct/core/predicate_order.py +212 -0
- rdf_construct/core/profile.py +157 -0
- rdf_construct/core/selector.py +64 -0
- rdf_construct/core/serialiser.py +232 -0
- rdf_construct/core/utils.py +89 -0
- rdf_construct/cq/__init__.py +77 -0
- rdf_construct/cq/expectations.py +365 -0
- rdf_construct/cq/formatters/__init__.py +45 -0
- rdf_construct/cq/formatters/json.py +104 -0
- rdf_construct/cq/formatters/junit.py +104 -0
- rdf_construct/cq/formatters/text.py +146 -0
- rdf_construct/cq/loader.py +300 -0
- rdf_construct/cq/runner.py +321 -0
- rdf_construct/diff/__init__.py +59 -0
- rdf_construct/diff/change_types.py +214 -0
- rdf_construct/diff/comparator.py +338 -0
- rdf_construct/diff/filters.py +133 -0
- rdf_construct/diff/formatters/__init__.py +71 -0
- rdf_construct/diff/formatters/json.py +192 -0
- rdf_construct/diff/formatters/markdown.py +210 -0
- rdf_construct/diff/formatters/text.py +195 -0
- rdf_construct/docs/__init__.py +60 -0
- rdf_construct/docs/config.py +238 -0
- rdf_construct/docs/extractors.py +603 -0
- rdf_construct/docs/generator.py +360 -0
- rdf_construct/docs/renderers/__init__.py +7 -0
- rdf_construct/docs/renderers/html.py +803 -0
- rdf_construct/docs/renderers/json.py +390 -0
- rdf_construct/docs/renderers/markdown.py +628 -0
- rdf_construct/docs/search.py +278 -0
- rdf_construct/docs/templates/html/base.html.jinja +44 -0
- rdf_construct/docs/templates/html/class.html.jinja +152 -0
- rdf_construct/docs/templates/html/hierarchy.html.jinja +28 -0
- rdf_construct/docs/templates/html/index.html.jinja +110 -0
- rdf_construct/docs/templates/html/instance.html.jinja +90 -0
- rdf_construct/docs/templates/html/namespaces.html.jinja +37 -0
- rdf_construct/docs/templates/html/property.html.jinja +124 -0
- rdf_construct/docs/templates/html/single_page.html.jinja +169 -0
- rdf_construct/lint/__init__.py +75 -0
- rdf_construct/lint/config.py +214 -0
- rdf_construct/lint/engine.py +396 -0
- rdf_construct/lint/formatters.py +327 -0
- rdf_construct/lint/rules.py +692 -0
- rdf_construct/localise/__init__.py +114 -0
- rdf_construct/localise/config.py +508 -0
- rdf_construct/localise/extractor.py +427 -0
- rdf_construct/localise/formatters/__init__.py +36 -0
- rdf_construct/localise/formatters/markdown.py +229 -0
- rdf_construct/localise/formatters/text.py +224 -0
- rdf_construct/localise/merger.py +346 -0
- rdf_construct/localise/reporter.py +356 -0
- rdf_construct/main.py +6 -0
- rdf_construct/merge/__init__.py +165 -0
- rdf_construct/merge/config.py +354 -0
- rdf_construct/merge/conflicts.py +281 -0
- rdf_construct/merge/formatters.py +426 -0
- rdf_construct/merge/merger.py +425 -0
- rdf_construct/merge/migrator.py +339 -0
- rdf_construct/merge/rules.py +377 -0
- rdf_construct/merge/splitter.py +1102 -0
- rdf_construct/puml2rdf/__init__.py +103 -0
- rdf_construct/puml2rdf/config.py +230 -0
- rdf_construct/puml2rdf/converter.py +420 -0
- rdf_construct/puml2rdf/merger.py +200 -0
- rdf_construct/puml2rdf/model.py +202 -0
- rdf_construct/puml2rdf/parser.py +565 -0
- rdf_construct/puml2rdf/validators.py +451 -0
- rdf_construct/refactor/__init__.py +72 -0
- rdf_construct/refactor/config.py +362 -0
- rdf_construct/refactor/deprecator.py +328 -0
- rdf_construct/refactor/formatters/__init__.py +8 -0
- rdf_construct/refactor/formatters/text.py +311 -0
- rdf_construct/refactor/renamer.py +294 -0
- rdf_construct/shacl/__init__.py +56 -0
- rdf_construct/shacl/config.py +166 -0
- rdf_construct/shacl/converters.py +520 -0
- rdf_construct/shacl/generator.py +364 -0
- rdf_construct/shacl/namespaces.py +93 -0
- rdf_construct/stats/__init__.py +29 -0
- rdf_construct/stats/collector.py +178 -0
- rdf_construct/stats/comparator.py +298 -0
- rdf_construct/stats/formatters/__init__.py +83 -0
- rdf_construct/stats/formatters/json.py +38 -0
- rdf_construct/stats/formatters/markdown.py +153 -0
- rdf_construct/stats/formatters/text.py +186 -0
- rdf_construct/stats/metrics/__init__.py +26 -0
- rdf_construct/stats/metrics/basic.py +147 -0
- rdf_construct/stats/metrics/complexity.py +137 -0
- rdf_construct/stats/metrics/connectivity.py +130 -0
- rdf_construct/stats/metrics/documentation.py +128 -0
- rdf_construct/stats/metrics/hierarchy.py +207 -0
- rdf_construct/stats/metrics/properties.py +88 -0
- rdf_construct/uml/__init__.py +22 -0
- rdf_construct/uml/context.py +194 -0
- rdf_construct/uml/mapper.py +371 -0
- rdf_construct/uml/odm_renderer.py +789 -0
- rdf_construct/uml/renderer.py +684 -0
- rdf_construct/uml/uml_layout.py +393 -0
- rdf_construct/uml/uml_style.py +613 -0
- rdf_construct-0.3.0.dist-info/METADATA +496 -0
- rdf_construct-0.3.0.dist-info/RECORD +110 -0
- rdf_construct-0.3.0.dist-info/WHEEL +4 -0
- rdf_construct-0.3.0.dist-info/entry_points.txt +3 -0
- rdf_construct-0.3.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1102 @@
|
|
|
1
|
+
"""Core split logic for modularising RDF ontologies.
|
|
2
|
+
|
|
3
|
+
This module provides the OntologySplitter class that:
|
|
4
|
+
- Splits a monolithic ontology into multiple modules
|
|
5
|
+
- Supports namespace-based and explicit entity-based splitting
|
|
6
|
+
- Tracks cross-module dependencies
|
|
7
|
+
- Generates owl:imports declarations
|
|
8
|
+
- Produces a manifest documenting the split
|
|
9
|
+
- Supports data migration by instance type
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import yaml
|
|
17
|
+
from rdflib import Graph, URIRef, Namespace
|
|
18
|
+
from rdflib.namespace import RDF, RDFS, OWL
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def select_classes(graph: Graph) -> set[URIRef]:
|
|
22
|
+
"""Select all class entities from a graph.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
graph: RDF graph to select from.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Set of URIRefs for classes (owl:Class and rdfs:Class).
|
|
29
|
+
"""
|
|
30
|
+
classes: set[URIRef] = set()
|
|
31
|
+
for s in graph.subjects(RDF.type, OWL.Class):
|
|
32
|
+
if isinstance(s, URIRef):
|
|
33
|
+
classes.add(s)
|
|
34
|
+
for s in graph.subjects(RDF.type, RDFS.Class):
|
|
35
|
+
if isinstance(s, URIRef):
|
|
36
|
+
classes.add(s)
|
|
37
|
+
return classes
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def select_properties(graph: Graph) -> set[URIRef]:
|
|
41
|
+
"""Select all property entities from a graph.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
graph: RDF graph to select from.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Set of URIRefs for properties (owl:ObjectProperty, DatatypeProperty, etc.).
|
|
48
|
+
"""
|
|
49
|
+
properties: set[URIRef] = set()
|
|
50
|
+
property_types = [
|
|
51
|
+
OWL.ObjectProperty,
|
|
52
|
+
OWL.DatatypeProperty,
|
|
53
|
+
OWL.AnnotationProperty,
|
|
54
|
+
RDF.Property,
|
|
55
|
+
]
|
|
56
|
+
for prop_type in property_types:
|
|
57
|
+
for s in graph.subjects(RDF.type, prop_type):
|
|
58
|
+
if isinstance(s, URIRef):
|
|
59
|
+
properties.add(s)
|
|
60
|
+
return properties
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class ModuleDefinition:
|
|
65
|
+
"""Definition of a single module to extract.
|
|
66
|
+
|
|
67
|
+
Attributes:
|
|
68
|
+
name: Module identifier (used in manifest).
|
|
69
|
+
output: Output filename.
|
|
70
|
+
description: Human-readable description.
|
|
71
|
+
classes: Explicit list of class URIs to include.
|
|
72
|
+
properties: Explicit list of property URIs to include.
|
|
73
|
+
namespaces: Namespace prefixes to include (for auto-detection).
|
|
74
|
+
include_descendants: Whether to include rdfs:subClassOf/subPropertyOf descendants.
|
|
75
|
+
imports: Explicit owl:imports to add.
|
|
76
|
+
auto_imports: Whether to generate imports from detected dependencies.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
name: str
|
|
80
|
+
output: str
|
|
81
|
+
description: str | None = None
|
|
82
|
+
classes: list[str] = field(default_factory=list)
|
|
83
|
+
properties: list[str] = field(default_factory=list)
|
|
84
|
+
namespaces: list[str] = field(default_factory=list)
|
|
85
|
+
include_descendants: bool = False
|
|
86
|
+
imports: list[str] = field(default_factory=list)
|
|
87
|
+
auto_imports: bool = True
|
|
88
|
+
|
|
89
|
+
@classmethod
|
|
90
|
+
def from_dict(cls, data: dict[str, Any]) -> "ModuleDefinition":
|
|
91
|
+
"""Create from dictionary.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
data: Dictionary with module configuration.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
ModuleDefinition instance.
|
|
98
|
+
"""
|
|
99
|
+
include = data.get("include", {})
|
|
100
|
+
return cls(
|
|
101
|
+
name=data["name"],
|
|
102
|
+
output=data.get("output", f"{data['name']}.ttl"),
|
|
103
|
+
description=data.get("description"),
|
|
104
|
+
classes=include.get("classes", data.get("classes", [])),
|
|
105
|
+
properties=include.get("properties", data.get("properties", [])),
|
|
106
|
+
namespaces=data.get("namespaces", []),
|
|
107
|
+
include_descendants=data.get("include_descendants", False),
|
|
108
|
+
imports=data.get("imports", []),
|
|
109
|
+
auto_imports=data.get("auto_imports", True),
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass
|
|
114
|
+
class UnmatchedStrategy:
|
|
115
|
+
"""Configuration for handling entities that don't match any module.
|
|
116
|
+
|
|
117
|
+
Attributes:
|
|
118
|
+
strategy: Either 'common' (put in common module) or 'error' (fail).
|
|
119
|
+
common_module: Name of the common module if strategy is 'common'.
|
|
120
|
+
common_output: Output filename for common module.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
strategy: str = "common" # "common" or "error"
|
|
124
|
+
common_module: str = "common"
|
|
125
|
+
common_output: str = "common.ttl"
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@dataclass
|
|
129
|
+
class SplitDataConfig:
|
|
130
|
+
"""Configuration for splitting data files by instance type.
|
|
131
|
+
|
|
132
|
+
Attributes:
|
|
133
|
+
sources: Data files to split.
|
|
134
|
+
output_dir: Directory for split data files.
|
|
135
|
+
prefix: Prefix for output filenames (e.g., "data_").
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
sources: list[Path] = field(default_factory=list)
|
|
139
|
+
output_dir: Path | None = None
|
|
140
|
+
prefix: str = "data_"
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@dataclass
|
|
144
|
+
class SplitConfig:
|
|
145
|
+
"""Complete configuration for a split operation.
|
|
146
|
+
|
|
147
|
+
Attributes:
|
|
148
|
+
source: Path to the source ontology file.
|
|
149
|
+
output_dir: Directory for output module files.
|
|
150
|
+
modules: List of module definitions.
|
|
151
|
+
unmatched: Strategy for unmatched entities.
|
|
152
|
+
split_data: Optional data splitting configuration.
|
|
153
|
+
generate_manifest: Whether to generate manifest.yml.
|
|
154
|
+
dry_run: If True, report what would happen without writing.
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
source: Path
|
|
158
|
+
output_dir: Path
|
|
159
|
+
modules: list[ModuleDefinition] = field(default_factory=list)
|
|
160
|
+
unmatched: UnmatchedStrategy = field(default_factory=UnmatchedStrategy)
|
|
161
|
+
split_data: SplitDataConfig | None = None
|
|
162
|
+
generate_manifest: bool = True
|
|
163
|
+
dry_run: bool = False
|
|
164
|
+
|
|
165
|
+
@classmethod
|
|
166
|
+
def from_yaml(cls, path: Path) -> "SplitConfig":
|
|
167
|
+
"""Load configuration from a YAML file.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
path: Path to YAML configuration file.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
SplitConfig instance.
|
|
174
|
+
|
|
175
|
+
Raises:
|
|
176
|
+
FileNotFoundError: If config file doesn't exist.
|
|
177
|
+
ValueError: If config is invalid.
|
|
178
|
+
"""
|
|
179
|
+
if not path.exists():
|
|
180
|
+
raise FileNotFoundError(f"Config file not found: {path}")
|
|
181
|
+
|
|
182
|
+
with open(path) as f:
|
|
183
|
+
data = yaml.safe_load(f)
|
|
184
|
+
|
|
185
|
+
return cls.from_dict(data, config_dir=path.parent)
|
|
186
|
+
|
|
187
|
+
@classmethod
|
|
188
|
+
def from_dict(cls, data: dict[str, Any], config_dir: Path | None = None) -> "SplitConfig":
|
|
189
|
+
"""Create from dictionary.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
data: Dictionary with configuration.
|
|
193
|
+
config_dir: Directory containing config file (for relative paths).
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
SplitConfig instance.
|
|
197
|
+
"""
|
|
198
|
+
config_dir = config_dir or Path(".")
|
|
199
|
+
split_data = data.get("split", data)
|
|
200
|
+
|
|
201
|
+
# Parse source
|
|
202
|
+
source = Path(split_data.get("source", ""))
|
|
203
|
+
if not source.is_absolute():
|
|
204
|
+
source = config_dir / source
|
|
205
|
+
|
|
206
|
+
# Parse output directory
|
|
207
|
+
output_dir = Path(split_data.get("output_dir", "modules"))
|
|
208
|
+
if not output_dir.is_absolute():
|
|
209
|
+
output_dir = config_dir / output_dir
|
|
210
|
+
|
|
211
|
+
# Parse modules
|
|
212
|
+
modules = [
|
|
213
|
+
ModuleDefinition.from_dict(m)
|
|
214
|
+
for m in split_data.get("modules", [])
|
|
215
|
+
]
|
|
216
|
+
|
|
217
|
+
# Parse unmatched strategy
|
|
218
|
+
unmatched_data = split_data.get("unmatched", {})
|
|
219
|
+
unmatched = UnmatchedStrategy(
|
|
220
|
+
strategy=unmatched_data.get("strategy", "common"),
|
|
221
|
+
common_module=unmatched_data.get("module", "common"),
|
|
222
|
+
common_output=unmatched_data.get("output", "common.ttl"),
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
# Parse data splitting config
|
|
226
|
+
split_data_config = None
|
|
227
|
+
if "split_data" in split_data:
|
|
228
|
+
sd = split_data["split_data"]
|
|
229
|
+
sources = [
|
|
230
|
+
config_dir / Path(p) if not Path(p).is_absolute() else Path(p)
|
|
231
|
+
for p in sd.get("sources", [])
|
|
232
|
+
]
|
|
233
|
+
output = sd.get("output_dir")
|
|
234
|
+
split_data_config = SplitDataConfig(
|
|
235
|
+
sources=sources,
|
|
236
|
+
output_dir=config_dir / Path(output) if output else None,
|
|
237
|
+
prefix=sd.get("prefix", "data_"),
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
return cls(
|
|
241
|
+
source=source,
|
|
242
|
+
output_dir=output_dir,
|
|
243
|
+
modules=modules,
|
|
244
|
+
unmatched=unmatched,
|
|
245
|
+
split_data=split_data_config,
|
|
246
|
+
generate_manifest=split_data.get("generate_manifest", True),
|
|
247
|
+
dry_run=split_data.get("dry_run", False),
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
@dataclass
|
|
252
|
+
class ModuleStats:
|
|
253
|
+
"""Statistics for a single module.
|
|
254
|
+
|
|
255
|
+
Attributes:
|
|
256
|
+
name: Module name.
|
|
257
|
+
file: Output filename.
|
|
258
|
+
classes: Number of classes in module.
|
|
259
|
+
properties: Number of properties in module.
|
|
260
|
+
triples: Total triples in module.
|
|
261
|
+
imports: List of owl:imports.
|
|
262
|
+
dependencies: Modules this module depends on.
|
|
263
|
+
"""
|
|
264
|
+
|
|
265
|
+
name: str
|
|
266
|
+
file: str
|
|
267
|
+
classes: int = 0
|
|
268
|
+
properties: int = 0
|
|
269
|
+
triples: int = 0
|
|
270
|
+
imports: list[str] = field(default_factory=list)
|
|
271
|
+
dependencies: list[str] = field(default_factory=list)
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
@dataclass
|
|
275
|
+
class SplitResult:
|
|
276
|
+
"""Result of a split operation.
|
|
277
|
+
|
|
278
|
+
Attributes:
|
|
279
|
+
modules: Dictionary of module name -> Graph.
|
|
280
|
+
module_stats: Statistics per module.
|
|
281
|
+
entity_assignments: Mapping of entity URI -> module name.
|
|
282
|
+
unmatched_entities: Entities not assigned to any module.
|
|
283
|
+
dependencies: Cross-module dependency graph.
|
|
284
|
+
success: Whether split completed without errors.
|
|
285
|
+
error: Error message if success is False.
|
|
286
|
+
data_modules: Split data graphs by module (if data splitting enabled).
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
modules: dict[str, Graph] = field(default_factory=dict)
|
|
290
|
+
module_stats: list[ModuleStats] = field(default_factory=list)
|
|
291
|
+
entity_assignments: dict[str, str] = field(default_factory=dict)
|
|
292
|
+
unmatched_entities: set[str] = field(default_factory=set)
|
|
293
|
+
dependencies: dict[str, set[str]] = field(default_factory=dict)
|
|
294
|
+
success: bool = True
|
|
295
|
+
error: str | None = None
|
|
296
|
+
data_modules: dict[str, Graph] = field(default_factory=dict)
|
|
297
|
+
|
|
298
|
+
@property
|
|
299
|
+
def total_modules(self) -> int:
|
|
300
|
+
"""Total number of modules created."""
|
|
301
|
+
return len(self.modules)
|
|
302
|
+
|
|
303
|
+
@property
|
|
304
|
+
def total_triples(self) -> int:
|
|
305
|
+
"""Total triples across all modules."""
|
|
306
|
+
return sum(len(g) for g in self.modules.values())
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
class OntologySplitter:
|
|
310
|
+
"""Splits a monolithic ontology into multiple modules.
|
|
311
|
+
|
|
312
|
+
The splitter:
|
|
313
|
+
1. Loads the source ontology
|
|
314
|
+
2. Assigns entities to modules based on configuration
|
|
315
|
+
3. Handles unmatched entities per strategy
|
|
316
|
+
4. Detects cross-module dependencies
|
|
317
|
+
5. Generates owl:imports declarations
|
|
318
|
+
6. Writes module files
|
|
319
|
+
7. Produces a manifest documenting the split
|
|
320
|
+
|
|
321
|
+
Example:
|
|
322
|
+
config = SplitConfig.from_yaml(Path("split.yml"))
|
|
323
|
+
splitter = OntologySplitter(config)
|
|
324
|
+
result = splitter.split()
|
|
325
|
+
|
|
326
|
+
if result.success:
|
|
327
|
+
splitter.write_modules(result)
|
|
328
|
+
"""
|
|
329
|
+
|
|
330
|
+
def __init__(self, config: SplitConfig):
|
|
331
|
+
"""Initialize the splitter.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
config: Split configuration.
|
|
335
|
+
"""
|
|
336
|
+
self.config = config
|
|
337
|
+
self.source_graph: Graph | None = None
|
|
338
|
+
self.namespace_map: dict[str, str] = {} # namespace -> module name
|
|
339
|
+
|
|
340
|
+
def split(self) -> SplitResult:
|
|
341
|
+
"""Execute the split operation.
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
SplitResult with module graphs and statistics.
|
|
345
|
+
"""
|
|
346
|
+
result = SplitResult()
|
|
347
|
+
|
|
348
|
+
# Load source ontology
|
|
349
|
+
try:
|
|
350
|
+
self.source_graph = self._load_source()
|
|
351
|
+
except Exception as e:
|
|
352
|
+
result.success = False
|
|
353
|
+
result.error = f"Failed to load source: {e}"
|
|
354
|
+
return result
|
|
355
|
+
|
|
356
|
+
# Build namespace -> module mapping (for namespace-based splitting)
|
|
357
|
+
self._build_namespace_map()
|
|
358
|
+
|
|
359
|
+
# Assign entities to modules
|
|
360
|
+
assignments = self._assign_entities(result)
|
|
361
|
+
|
|
362
|
+
if not result.success:
|
|
363
|
+
return result
|
|
364
|
+
|
|
365
|
+
# Create module graphs
|
|
366
|
+
self._create_module_graphs(assignments, result)
|
|
367
|
+
|
|
368
|
+
# Handle unmatched entities
|
|
369
|
+
if result.unmatched_entities:
|
|
370
|
+
self._handle_unmatched(result)
|
|
371
|
+
|
|
372
|
+
# Detect dependencies and generate imports
|
|
373
|
+
self._detect_dependencies(result)
|
|
374
|
+
self._add_imports(result)
|
|
375
|
+
|
|
376
|
+
# Calculate statistics
|
|
377
|
+
self._calculate_stats(result)
|
|
378
|
+
|
|
379
|
+
# Split data if configured
|
|
380
|
+
if self.config.split_data and self.config.split_data.sources:
|
|
381
|
+
self._split_data(result)
|
|
382
|
+
|
|
383
|
+
return result
|
|
384
|
+
|
|
385
|
+
def _load_source(self) -> Graph:
|
|
386
|
+
"""Load the source ontology file.
|
|
387
|
+
|
|
388
|
+
Returns:
|
|
389
|
+
Loaded RDF graph.
|
|
390
|
+
|
|
391
|
+
Raises:
|
|
392
|
+
FileNotFoundError: If source doesn't exist.
|
|
393
|
+
ValueError: If source can't be parsed.
|
|
394
|
+
"""
|
|
395
|
+
if not self.config.source.exists():
|
|
396
|
+
raise FileNotFoundError(f"Source not found: {self.config.source}")
|
|
397
|
+
|
|
398
|
+
graph = Graph()
|
|
399
|
+
|
|
400
|
+
# Determine format from extension
|
|
401
|
+
ext = self.config.source.suffix.lower()
|
|
402
|
+
format_map = {
|
|
403
|
+
".ttl": "turtle",
|
|
404
|
+
".turtle": "turtle",
|
|
405
|
+
".rdf": "xml",
|
|
406
|
+
".xml": "xml",
|
|
407
|
+
".owl": "xml",
|
|
408
|
+
".n3": "n3",
|
|
409
|
+
".nt": "nt",
|
|
410
|
+
".jsonld": "json-ld",
|
|
411
|
+
}
|
|
412
|
+
rdf_format = format_map.get(ext, "turtle")
|
|
413
|
+
|
|
414
|
+
graph.parse(self.config.source.as_posix(), format=rdf_format)
|
|
415
|
+
|
|
416
|
+
return graph
|
|
417
|
+
|
|
418
|
+
def _build_namespace_map(self) -> None:
|
|
419
|
+
"""Build mapping from namespaces to module names."""
|
|
420
|
+
self.namespace_map = {}
|
|
421
|
+
|
|
422
|
+
for module in self.config.modules:
|
|
423
|
+
for ns in module.namespaces:
|
|
424
|
+
self.namespace_map[ns] = module.name
|
|
425
|
+
|
|
426
|
+
def _assign_entities(self, result: SplitResult) -> dict[str, set[URIRef]]:
|
|
427
|
+
"""Assign entities to modules.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
result: SplitResult to populate with assignments.
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
Dictionary of module name -> set of entity URIs.
|
|
434
|
+
"""
|
|
435
|
+
if self.source_graph is None:
|
|
436
|
+
result.success = False
|
|
437
|
+
result.error = "Source graph not loaded"
|
|
438
|
+
return {}
|
|
439
|
+
|
|
440
|
+
assignments: dict[str, set[URIRef]] = {m.name: set() for m in self.config.modules}
|
|
441
|
+
|
|
442
|
+
# Get all classes and properties from source
|
|
443
|
+
all_classes = select_classes(self.source_graph)
|
|
444
|
+
all_properties = select_properties(self.source_graph)
|
|
445
|
+
all_entities = all_classes | all_properties
|
|
446
|
+
|
|
447
|
+
# Assign entities to modules
|
|
448
|
+
for module in self.config.modules:
|
|
449
|
+
# By explicit class list
|
|
450
|
+
for cls_uri in module.classes:
|
|
451
|
+
uri = self._expand_curie(cls_uri)
|
|
452
|
+
if uri in all_entities:
|
|
453
|
+
assignments[module.name].add(uri)
|
|
454
|
+
result.entity_assignments[str(uri)] = module.name
|
|
455
|
+
|
|
456
|
+
# Include descendants if requested
|
|
457
|
+
if module.include_descendants:
|
|
458
|
+
descendants = self._get_descendants(uri, all_classes)
|
|
459
|
+
for desc in descendants:
|
|
460
|
+
if str(desc) not in result.entity_assignments:
|
|
461
|
+
assignments[module.name].add(desc)
|
|
462
|
+
result.entity_assignments[str(desc)] = module.name
|
|
463
|
+
|
|
464
|
+
# By explicit property list
|
|
465
|
+
for prop_uri in module.properties:
|
|
466
|
+
uri = self._expand_curie(prop_uri)
|
|
467
|
+
if uri in all_entities:
|
|
468
|
+
assignments[module.name].add(uri)
|
|
469
|
+
result.entity_assignments[str(uri)] = module.name
|
|
470
|
+
|
|
471
|
+
# Include descendants if requested
|
|
472
|
+
if module.include_descendants:
|
|
473
|
+
descendants = self._get_descendants(uri, all_properties, is_property=True)
|
|
474
|
+
for desc in descendants:
|
|
475
|
+
if str(desc) not in result.entity_assignments:
|
|
476
|
+
assignments[module.name].add(desc)
|
|
477
|
+
result.entity_assignments[str(desc)] = module.name
|
|
478
|
+
|
|
479
|
+
# By namespace
|
|
480
|
+
for ns in module.namespaces:
|
|
481
|
+
for entity in all_entities:
|
|
482
|
+
if str(entity).startswith(ns):
|
|
483
|
+
if str(entity) not in result.entity_assignments:
|
|
484
|
+
assignments[module.name].add(entity)
|
|
485
|
+
result.entity_assignments[str(entity)] = module.name
|
|
486
|
+
|
|
487
|
+
# Find unmatched entities
|
|
488
|
+
for entity in all_entities:
|
|
489
|
+
if str(entity) not in result.entity_assignments:
|
|
490
|
+
result.unmatched_entities.add(str(entity))
|
|
491
|
+
|
|
492
|
+
return assignments
|
|
493
|
+
|
|
494
|
+
def _expand_curie(self, curie: str) -> URIRef:
|
|
495
|
+
"""Expand a CURIE to a full URI using the source graph's namespace bindings.
|
|
496
|
+
|
|
497
|
+
Args:
|
|
498
|
+
curie: CURIE or full URI string.
|
|
499
|
+
|
|
500
|
+
Returns:
|
|
501
|
+
URIRef of the expanded URI.
|
|
502
|
+
"""
|
|
503
|
+
if self.source_graph is None:
|
|
504
|
+
return URIRef(curie)
|
|
505
|
+
|
|
506
|
+
# If already a full URI
|
|
507
|
+
if curie.startswith("http://") or curie.startswith("https://"):
|
|
508
|
+
return URIRef(curie)
|
|
509
|
+
|
|
510
|
+
# Try to expand as CURIE
|
|
511
|
+
if ":" in curie:
|
|
512
|
+
prefix, local = curie.split(":", 1)
|
|
513
|
+
for ns_prefix, ns_uri in self.source_graph.namespace_manager.namespaces():
|
|
514
|
+
if ns_prefix == prefix:
|
|
515
|
+
return URIRef(str(ns_uri) + local)
|
|
516
|
+
|
|
517
|
+
return URIRef(curie)
|
|
518
|
+
|
|
519
|
+
def _get_descendants(
|
|
520
|
+
self,
|
|
521
|
+
uri: URIRef,
|
|
522
|
+
entity_set: set[URIRef],
|
|
523
|
+
is_property: bool = False,
|
|
524
|
+
) -> set[URIRef]:
|
|
525
|
+
"""Get all descendants (subclasses/subproperties) of an entity.
|
|
526
|
+
|
|
527
|
+
Args:
|
|
528
|
+
uri: Parent entity URI.
|
|
529
|
+
entity_set: Set of all entities to consider.
|
|
530
|
+
is_property: Whether to look for subPropertyOf instead of subClassOf.
|
|
531
|
+
|
|
532
|
+
Returns:
|
|
533
|
+
Set of descendant URIs.
|
|
534
|
+
"""
|
|
535
|
+
if self.source_graph is None:
|
|
536
|
+
return set()
|
|
537
|
+
|
|
538
|
+
predicate = RDFS.subPropertyOf if is_property else RDFS.subClassOf
|
|
539
|
+
descendants: set[URIRef] = set()
|
|
540
|
+
to_check = [uri]
|
|
541
|
+
|
|
542
|
+
while to_check:
|
|
543
|
+
parent = to_check.pop()
|
|
544
|
+
for s, p, o in self.source_graph.triples((None, predicate, parent)):
|
|
545
|
+
if isinstance(s, URIRef) and s in entity_set:
|
|
546
|
+
if s not in descendants:
|
|
547
|
+
descendants.add(s)
|
|
548
|
+
to_check.append(s)
|
|
549
|
+
|
|
550
|
+
return descendants
|
|
551
|
+
|
|
552
|
+
def _create_module_graphs(
|
|
553
|
+
self,
|
|
554
|
+
assignments: dict[str, set[URIRef]],
|
|
555
|
+
result: SplitResult,
|
|
556
|
+
) -> None:
|
|
557
|
+
"""Create RDF graphs for each module.
|
|
558
|
+
|
|
559
|
+
Args:
|
|
560
|
+
assignments: Entity assignments per module.
|
|
561
|
+
result: SplitResult to populate with graphs.
|
|
562
|
+
"""
|
|
563
|
+
if self.source_graph is None:
|
|
564
|
+
return
|
|
565
|
+
|
|
566
|
+
for module in self.config.modules:
|
|
567
|
+
module_graph = Graph()
|
|
568
|
+
|
|
569
|
+
# Copy namespace bindings
|
|
570
|
+
for prefix, ns in self.source_graph.namespace_manager.namespaces():
|
|
571
|
+
module_graph.bind(prefix, ns)
|
|
572
|
+
|
|
573
|
+
# Add triples for assigned entities
|
|
574
|
+
entities = assignments.get(module.name, set())
|
|
575
|
+
for entity in entities:
|
|
576
|
+
# All triples where entity is subject
|
|
577
|
+
for s, p, o in self.source_graph.triples((entity, None, None)):
|
|
578
|
+
module_graph.add((s, p, o))
|
|
579
|
+
|
|
580
|
+
result.modules[module.name] = module_graph
|
|
581
|
+
|
|
582
|
+
def _handle_unmatched(self, result: SplitResult) -> None:
|
|
583
|
+
"""Handle entities that weren't assigned to any module.
|
|
584
|
+
|
|
585
|
+
Args:
|
|
586
|
+
result: SplitResult with unmatched entities.
|
|
587
|
+
"""
|
|
588
|
+
if not result.unmatched_entities:
|
|
589
|
+
return
|
|
590
|
+
|
|
591
|
+
if self.config.unmatched.strategy == "error":
|
|
592
|
+
result.success = False
|
|
593
|
+
result.error = (
|
|
594
|
+
f"Unmatched entities ({len(result.unmatched_entities)}): "
|
|
595
|
+
+ ", ".join(list(result.unmatched_entities)[:5])
|
|
596
|
+
+ ("..." if len(result.unmatched_entities) > 5 else "")
|
|
597
|
+
)
|
|
598
|
+
return
|
|
599
|
+
|
|
600
|
+
# Create common module
|
|
601
|
+
common_graph = Graph()
|
|
602
|
+
|
|
603
|
+
if self.source_graph is not None:
|
|
604
|
+
# Copy namespace bindings
|
|
605
|
+
for prefix, ns in self.source_graph.namespace_manager.namespaces():
|
|
606
|
+
common_graph.bind(prefix, ns)
|
|
607
|
+
|
|
608
|
+
# Add triples for unmatched entities
|
|
609
|
+
for entity_str in result.unmatched_entities:
|
|
610
|
+
entity = URIRef(entity_str)
|
|
611
|
+
for s, p, o in self.source_graph.triples((entity, None, None)):
|
|
612
|
+
common_graph.add((s, p, o))
|
|
613
|
+
|
|
614
|
+
# Record assignment
|
|
615
|
+
result.entity_assignments[entity_str] = self.config.unmatched.common_module
|
|
616
|
+
|
|
617
|
+
result.modules[self.config.unmatched.common_module] = common_graph
|
|
618
|
+
|
|
619
|
+
def _detect_dependencies(self, result: SplitResult) -> None:
|
|
620
|
+
"""Detect cross-module dependencies.
|
|
621
|
+
|
|
622
|
+
A module depends on another if it references entities from that module.
|
|
623
|
+
|
|
624
|
+
Args:
|
|
625
|
+
result: SplitResult to populate with dependencies.
|
|
626
|
+
"""
|
|
627
|
+
if self.source_graph is None:
|
|
628
|
+
return
|
|
629
|
+
|
|
630
|
+
for module_name, graph in result.modules.items():
|
|
631
|
+
deps: set[str] = set()
|
|
632
|
+
|
|
633
|
+
for s, p, o in graph:
|
|
634
|
+
# Check if object references an entity in another module
|
|
635
|
+
if isinstance(o, URIRef):
|
|
636
|
+
o_str = str(o)
|
|
637
|
+
if o_str in result.entity_assignments:
|
|
638
|
+
other_module = result.entity_assignments[o_str]
|
|
639
|
+
if other_module != module_name:
|
|
640
|
+
deps.add(other_module)
|
|
641
|
+
|
|
642
|
+
result.dependencies[module_name] = deps
|
|
643
|
+
|
|
644
|
+
def _add_imports(self, result: SplitResult) -> None:
|
|
645
|
+
"""Add owl:imports declarations to module graphs.
|
|
646
|
+
|
|
647
|
+
Args:
|
|
648
|
+
result: SplitResult with module graphs.
|
|
649
|
+
"""
|
|
650
|
+
for module in self.config.modules:
|
|
651
|
+
if module.name not in result.modules:
|
|
652
|
+
continue
|
|
653
|
+
|
|
654
|
+
graph = result.modules[module.name]
|
|
655
|
+
|
|
656
|
+
# Find or create ontology declaration
|
|
657
|
+
ontology_uri = self._get_or_create_ontology_uri(graph, module)
|
|
658
|
+
|
|
659
|
+
# Add explicit imports
|
|
660
|
+
for imp in module.imports:
|
|
661
|
+
graph.add((ontology_uri, OWL.imports, URIRef(imp)))
|
|
662
|
+
|
|
663
|
+
# Add auto-generated imports from dependencies
|
|
664
|
+
if module.auto_imports:
|
|
665
|
+
deps = result.dependencies.get(module.name, set())
|
|
666
|
+
for dep in deps:
|
|
667
|
+
# Find the module definition to get its output filename
|
|
668
|
+
dep_file = self._get_module_file(dep, result)
|
|
669
|
+
if dep_file:
|
|
670
|
+
graph.add((ontology_uri, OWL.imports, URIRef(dep_file)))
|
|
671
|
+
|
|
672
|
+
def _get_or_create_ontology_uri(self, graph: Graph, module: ModuleDefinition) -> URIRef:
|
|
673
|
+
"""Get or create the ontology URI for a module.
|
|
674
|
+
|
|
675
|
+
Args:
|
|
676
|
+
graph: Module graph.
|
|
677
|
+
module: Module definition.
|
|
678
|
+
|
|
679
|
+
Returns:
|
|
680
|
+
Ontology URI.
|
|
681
|
+
"""
|
|
682
|
+
# Look for existing ontology declaration
|
|
683
|
+
for s in graph.subjects(RDF.type, OWL.Ontology):
|
|
684
|
+
return s
|
|
685
|
+
|
|
686
|
+
# Create one based on module name
|
|
687
|
+
base_ns = None
|
|
688
|
+
for prefix, ns in graph.namespace_manager.namespaces():
|
|
689
|
+
if prefix == "":
|
|
690
|
+
base_ns = str(ns)
|
|
691
|
+
break
|
|
692
|
+
|
|
693
|
+
if base_ns:
|
|
694
|
+
ont_uri = URIRef(base_ns.rstrip("#/"))
|
|
695
|
+
else:
|
|
696
|
+
ont_uri = URIRef(f"http://example.org/{module.name}")
|
|
697
|
+
|
|
698
|
+
graph.add((ont_uri, RDF.type, OWL.Ontology))
|
|
699
|
+
return ont_uri
|
|
700
|
+
|
|
701
|
+
def _get_module_file(self, module_name: str, result: SplitResult) -> str | None:
|
|
702
|
+
"""Get the output filename for a module.
|
|
703
|
+
|
|
704
|
+
Args:
|
|
705
|
+
module_name: Name of the module.
|
|
706
|
+
result: SplitResult.
|
|
707
|
+
|
|
708
|
+
Returns:
|
|
709
|
+
Output filename or None.
|
|
710
|
+
"""
|
|
711
|
+
# Check defined modules
|
|
712
|
+
for module in self.config.modules:
|
|
713
|
+
if module.name == module_name:
|
|
714
|
+
return module.output
|
|
715
|
+
|
|
716
|
+
# Check common module
|
|
717
|
+
if module_name == self.config.unmatched.common_module:
|
|
718
|
+
return self.config.unmatched.common_output
|
|
719
|
+
|
|
720
|
+
return None
|
|
721
|
+
|
|
722
|
+
def _calculate_stats(self, result: SplitResult) -> None:
|
|
723
|
+
"""Calculate statistics for each module.
|
|
724
|
+
|
|
725
|
+
Args:
|
|
726
|
+
result: SplitResult to populate with stats.
|
|
727
|
+
"""
|
|
728
|
+
for module in self.config.modules:
|
|
729
|
+
if module.name not in result.modules:
|
|
730
|
+
continue
|
|
731
|
+
|
|
732
|
+
graph = result.modules[module.name]
|
|
733
|
+
stats = self._calculate_module_stats(module.name, module.output, graph, result)
|
|
734
|
+
result.module_stats.append(stats)
|
|
735
|
+
|
|
736
|
+
# Stats for common module
|
|
737
|
+
if self.config.unmatched.common_module in result.modules:
|
|
738
|
+
graph = result.modules[self.config.unmatched.common_module]
|
|
739
|
+
stats = self._calculate_module_stats(
|
|
740
|
+
self.config.unmatched.common_module,
|
|
741
|
+
self.config.unmatched.common_output,
|
|
742
|
+
graph,
|
|
743
|
+
result,
|
|
744
|
+
)
|
|
745
|
+
result.module_stats.append(stats)
|
|
746
|
+
|
|
747
|
+
def _calculate_module_stats(
|
|
748
|
+
self,
|
|
749
|
+
name: str,
|
|
750
|
+
output: str,
|
|
751
|
+
graph: Graph,
|
|
752
|
+
result: SplitResult,
|
|
753
|
+
) -> ModuleStats:
|
|
754
|
+
"""Calculate statistics for a single module.
|
|
755
|
+
|
|
756
|
+
Args:
|
|
757
|
+
name: Module name.
|
|
758
|
+
output: Output filename.
|
|
759
|
+
graph: Module graph.
|
|
760
|
+
result: SplitResult.
|
|
761
|
+
|
|
762
|
+
Returns:
|
|
763
|
+
ModuleStats instance.
|
|
764
|
+
"""
|
|
765
|
+
# Count classes and properties
|
|
766
|
+
classes = set(graph.subjects(RDF.type, OWL.Class)) | set(
|
|
767
|
+
graph.subjects(RDF.type, RDFS.Class)
|
|
768
|
+
)
|
|
769
|
+
properties = (
|
|
770
|
+
set(graph.subjects(RDF.type, OWL.ObjectProperty))
|
|
771
|
+
| set(graph.subjects(RDF.type, OWL.DatatypeProperty))
|
|
772
|
+
| set(graph.subjects(RDF.type, OWL.AnnotationProperty))
|
|
773
|
+
| set(graph.subjects(RDF.type, RDF.Property))
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
# Get imports
|
|
777
|
+
imports = [str(o) for s, p, o in graph.triples((None, OWL.imports, None))]
|
|
778
|
+
|
|
779
|
+
# Get dependencies
|
|
780
|
+
deps = list(result.dependencies.get(name, set()))
|
|
781
|
+
|
|
782
|
+
return ModuleStats(
|
|
783
|
+
name=name,
|
|
784
|
+
file=output,
|
|
785
|
+
classes=len(classes),
|
|
786
|
+
properties=len(properties),
|
|
787
|
+
triples=len(graph),
|
|
788
|
+
imports=imports,
|
|
789
|
+
dependencies=deps,
|
|
790
|
+
)
|
|
791
|
+
|
|
792
|
+
def _split_data(self, result: SplitResult) -> None:
|
|
793
|
+
"""Split data files by instance type.
|
|
794
|
+
|
|
795
|
+
Instances are assigned to the module containing their rdf:type.
|
|
796
|
+
|
|
797
|
+
Args:
|
|
798
|
+
result: SplitResult to populate with data modules.
|
|
799
|
+
"""
|
|
800
|
+
if self.config.split_data is None:
|
|
801
|
+
return
|
|
802
|
+
|
|
803
|
+
# Load all data files
|
|
804
|
+
data_graph = Graph()
|
|
805
|
+
for data_path in self.config.split_data.sources:
|
|
806
|
+
if data_path.exists():
|
|
807
|
+
data_graph.parse(data_path.as_posix())
|
|
808
|
+
|
|
809
|
+
# Create data graphs per module
|
|
810
|
+
data_modules: dict[str, Graph] = {m.name: Graph() for m in self.config.modules}
|
|
811
|
+
if self.config.unmatched.common_module in result.modules:
|
|
812
|
+
data_modules[self.config.unmatched.common_module] = Graph()
|
|
813
|
+
|
|
814
|
+
# Copy namespace bindings to all data modules
|
|
815
|
+
for module_name in data_modules:
|
|
816
|
+
for prefix, ns in data_graph.namespace_manager.namespaces():
|
|
817
|
+
data_modules[module_name].bind(prefix, ns)
|
|
818
|
+
|
|
819
|
+
# Assign instances by type
|
|
820
|
+
for s, p, o in data_graph.triples((None, RDF.type, None)):
|
|
821
|
+
if isinstance(o, URIRef):
|
|
822
|
+
type_str = str(o)
|
|
823
|
+
if type_str in result.entity_assignments:
|
|
824
|
+
module_name = result.entity_assignments[type_str]
|
|
825
|
+
if module_name in data_modules:
|
|
826
|
+
# Add all triples for this subject
|
|
827
|
+
for triple in data_graph.triples((s, None, None)):
|
|
828
|
+
data_modules[module_name].add(triple)
|
|
829
|
+
|
|
830
|
+
result.data_modules = data_modules
|
|
831
|
+
|
|
832
|
+
def write_modules(self, result: SplitResult) -> None:
|
|
833
|
+
"""Write module files to disk.
|
|
834
|
+
|
|
835
|
+
Args:
|
|
836
|
+
result: SplitResult with module graphs.
|
|
837
|
+
"""
|
|
838
|
+
if self.config.dry_run:
|
|
839
|
+
return
|
|
840
|
+
|
|
841
|
+
# Ensure output directory exists
|
|
842
|
+
self.config.output_dir.mkdir(parents=True, exist_ok=True)
|
|
843
|
+
|
|
844
|
+
# Write module files
|
|
845
|
+
for module in self.config.modules:
|
|
846
|
+
if module.name in result.modules:
|
|
847
|
+
output_path = self.config.output_dir / module.output
|
|
848
|
+
result.modules[module.name].serialize(
|
|
849
|
+
destination=output_path.as_posix(), format="turtle"
|
|
850
|
+
)
|
|
851
|
+
|
|
852
|
+
# Write common module
|
|
853
|
+
if self.config.unmatched.common_module in result.modules:
|
|
854
|
+
output_path = self.config.output_dir / self.config.unmatched.common_output
|
|
855
|
+
result.modules[self.config.unmatched.common_module].serialize(
|
|
856
|
+
destination=output_path.as_posix(), format="turtle"
|
|
857
|
+
)
|
|
858
|
+
|
|
859
|
+
# Write data modules
|
|
860
|
+
if result.data_modules and self.config.split_data:
|
|
861
|
+
data_dir = self.config.split_data.output_dir or self.config.output_dir
|
|
862
|
+
data_dir.mkdir(parents=True, exist_ok=True)
|
|
863
|
+
|
|
864
|
+
for module_name, graph in result.data_modules.items():
|
|
865
|
+
if len(graph) > 0:
|
|
866
|
+
prefix = self.config.split_data.prefix
|
|
867
|
+
output_path = data_dir / f"{prefix}{module_name}.ttl"
|
|
868
|
+
graph.serialize(destination=output_path.as_posix(), format="turtle")
|
|
869
|
+
|
|
870
|
+
def write_manifest(self, result: SplitResult) -> None:
|
|
871
|
+
"""Write manifest file documenting the split.
|
|
872
|
+
|
|
873
|
+
Args:
|
|
874
|
+
result: SplitResult with statistics.
|
|
875
|
+
"""
|
|
876
|
+
if self.config.dry_run or not self.config.generate_manifest:
|
|
877
|
+
return
|
|
878
|
+
|
|
879
|
+
manifest = {
|
|
880
|
+
"source": str(self.config.source),
|
|
881
|
+
"output_dir": str(self.config.output_dir),
|
|
882
|
+
"modules": [],
|
|
883
|
+
"summary": {
|
|
884
|
+
"total_modules": result.total_modules,
|
|
885
|
+
"total_triples": result.total_triples,
|
|
886
|
+
"unmatched_entities": len(result.unmatched_entities),
|
|
887
|
+
},
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
for stats in result.module_stats:
|
|
891
|
+
manifest["modules"].append({
|
|
892
|
+
"name": stats.name,
|
|
893
|
+
"file": stats.file,
|
|
894
|
+
"classes": stats.classes,
|
|
895
|
+
"properties": stats.properties,
|
|
896
|
+
"triples": stats.triples,
|
|
897
|
+
"imports": stats.imports,
|
|
898
|
+
"dependencies": stats.dependencies,
|
|
899
|
+
})
|
|
900
|
+
|
|
901
|
+
# Generate dependency graph as ASCII art
|
|
902
|
+
dep_lines = self._format_dependency_graph(result)
|
|
903
|
+
if dep_lines:
|
|
904
|
+
manifest["dependency_graph"] = dep_lines
|
|
905
|
+
|
|
906
|
+
manifest_path = self.config.output_dir / "manifest.yml"
|
|
907
|
+
with open(manifest_path, "w") as f:
|
|
908
|
+
yaml.safe_dump(manifest, f, default_flow_style=False, sort_keys=False)
|
|
909
|
+
|
|
910
|
+
def _format_dependency_graph(self, result: SplitResult) -> str:
|
|
911
|
+
"""Format dependency graph as ASCII tree.
|
|
912
|
+
|
|
913
|
+
Args:
|
|
914
|
+
result: SplitResult with dependencies.
|
|
915
|
+
|
|
916
|
+
Returns:
|
|
917
|
+
ASCII representation of dependency graph.
|
|
918
|
+
"""
|
|
919
|
+
if not result.dependencies:
|
|
920
|
+
return ""
|
|
921
|
+
|
|
922
|
+
# Find root modules (those with no dependents)
|
|
923
|
+
all_deps: set[str] = set()
|
|
924
|
+
for deps in result.dependencies.values():
|
|
925
|
+
all_deps.update(deps)
|
|
926
|
+
|
|
927
|
+
roots = [m for m in result.modules if m not in all_deps]
|
|
928
|
+
|
|
929
|
+
if not roots:
|
|
930
|
+
roots = list(result.modules.keys())[:1]
|
|
931
|
+
|
|
932
|
+
lines = []
|
|
933
|
+
for root in roots:
|
|
934
|
+
self._format_tree(root, result.dependencies, lines, "")
|
|
935
|
+
|
|
936
|
+
return "\n".join(lines)
|
|
937
|
+
|
|
938
|
+
def _format_tree(
|
|
939
|
+
self,
|
|
940
|
+
node: str,
|
|
941
|
+
deps: dict[str, set[str]],
|
|
942
|
+
lines: list[str],
|
|
943
|
+
prefix: str,
|
|
944
|
+
visited: set[str] | None = None,
|
|
945
|
+
) -> None:
|
|
946
|
+
"""Recursively format a dependency tree.
|
|
947
|
+
|
|
948
|
+
Args:
|
|
949
|
+
node: Current node.
|
|
950
|
+
deps: Dependency graph.
|
|
951
|
+
lines: Output lines.
|
|
952
|
+
prefix: Current prefix for indentation.
|
|
953
|
+
visited: Already visited nodes (to detect cycles).
|
|
954
|
+
"""
|
|
955
|
+
if visited is None:
|
|
956
|
+
visited = set()
|
|
957
|
+
|
|
958
|
+
# Get the output file for this node
|
|
959
|
+
file_name = self._get_module_file(node, SplitResult(modules={node: Graph()})) or node
|
|
960
|
+
lines.append(f"{prefix}{file_name}")
|
|
961
|
+
|
|
962
|
+
if node in visited:
|
|
963
|
+
return
|
|
964
|
+
|
|
965
|
+
visited.add(node)
|
|
966
|
+
|
|
967
|
+
# Find modules that depend on this one
|
|
968
|
+
dependents = [m for m, d in deps.items() if node in d]
|
|
969
|
+
|
|
970
|
+
for i, dep in enumerate(dependents):
|
|
971
|
+
is_last = i == len(dependents) - 1
|
|
972
|
+
child_prefix = prefix + ("└── " if is_last else "├── ")
|
|
973
|
+
next_prefix = prefix + (" " if is_last else "│ ")
|
|
974
|
+
self._format_tree(dep, deps, lines, child_prefix, visited.copy())
|
|
975
|
+
|
|
976
|
+
|
|
977
|
+
def split_by_namespace(
|
|
978
|
+
source: Path,
|
|
979
|
+
output_dir: Path,
|
|
980
|
+
dry_run: bool = False,
|
|
981
|
+
) -> SplitResult:
|
|
982
|
+
"""Convenience function to split an ontology by namespace.
|
|
983
|
+
|
|
984
|
+
Automatically detects modules from distinct namespaces in the source.
|
|
985
|
+
|
|
986
|
+
Args:
|
|
987
|
+
source: Path to source ontology.
|
|
988
|
+
output_dir: Directory for output modules.
|
|
989
|
+
dry_run: If True, don't write files.
|
|
990
|
+
|
|
991
|
+
Returns:
|
|
992
|
+
SplitResult with split information.
|
|
993
|
+
"""
|
|
994
|
+
# Load source to detect namespaces
|
|
995
|
+
graph = Graph()
|
|
996
|
+
graph.parse(source.as_posix())
|
|
997
|
+
|
|
998
|
+
# Find distinct namespaces used in the ontology
|
|
999
|
+
namespaces: dict[str, str] = {} # namespace -> prefix
|
|
1000
|
+
for prefix, ns in graph.namespace_manager.namespaces():
|
|
1001
|
+
ns_str = str(ns)
|
|
1002
|
+
# Skip common namespaces
|
|
1003
|
+
if any(
|
|
1004
|
+
skip in ns_str
|
|
1005
|
+
for skip in ["w3.org", "purl.org", "xmlns.com"]
|
|
1006
|
+
):
|
|
1007
|
+
continue
|
|
1008
|
+
namespaces[ns_str] = prefix or "default"
|
|
1009
|
+
|
|
1010
|
+
# Create module definitions
|
|
1011
|
+
modules = []
|
|
1012
|
+
for ns, prefix in namespaces.items():
|
|
1013
|
+
modules.append(
|
|
1014
|
+
ModuleDefinition(
|
|
1015
|
+
name=prefix,
|
|
1016
|
+
output=f"{prefix}.ttl",
|
|
1017
|
+
namespaces=[ns],
|
|
1018
|
+
)
|
|
1019
|
+
)
|
|
1020
|
+
|
|
1021
|
+
config = SplitConfig(
|
|
1022
|
+
source=source,
|
|
1023
|
+
output_dir=output_dir,
|
|
1024
|
+
modules=modules,
|
|
1025
|
+
dry_run=dry_run,
|
|
1026
|
+
)
|
|
1027
|
+
|
|
1028
|
+
splitter = OntologySplitter(config)
|
|
1029
|
+
result = splitter.split()
|
|
1030
|
+
|
|
1031
|
+
if result.success and not dry_run:
|
|
1032
|
+
splitter.write_modules(result)
|
|
1033
|
+
splitter.write_manifest(result)
|
|
1034
|
+
|
|
1035
|
+
return result
|
|
1036
|
+
|
|
1037
|
+
|
|
1038
|
+
def create_default_split_config() -> str:
|
|
1039
|
+
"""Generate default split configuration as YAML string.
|
|
1040
|
+
|
|
1041
|
+
Returns:
|
|
1042
|
+
YAML configuration template.
|
|
1043
|
+
"""
|
|
1044
|
+
return '''# rdf-construct split configuration
|
|
1045
|
+
# See MERGE_GUIDE.md for full documentation
|
|
1046
|
+
|
|
1047
|
+
split:
|
|
1048
|
+
# Source ontology to split
|
|
1049
|
+
source: ontology/split_monolith.ttl
|
|
1050
|
+
|
|
1051
|
+
# Output directory for modules
|
|
1052
|
+
output_dir: modules/
|
|
1053
|
+
|
|
1054
|
+
# Module definitions
|
|
1055
|
+
modules:
|
|
1056
|
+
# Split by explicit class list
|
|
1057
|
+
- name: core
|
|
1058
|
+
description: "Core upper ontology concepts"
|
|
1059
|
+
output: core.ttl
|
|
1060
|
+
include:
|
|
1061
|
+
classes:
|
|
1062
|
+
- ex:Entity
|
|
1063
|
+
- ex:Event
|
|
1064
|
+
- ex:State
|
|
1065
|
+
properties:
|
|
1066
|
+
- ex:identifier
|
|
1067
|
+
- ex:name
|
|
1068
|
+
include_descendants: true
|
|
1069
|
+
|
|
1070
|
+
# Split by namespace
|
|
1071
|
+
- name: organisation
|
|
1072
|
+
description: "Organisation domain module"
|
|
1073
|
+
output: organisation.ttl
|
|
1074
|
+
namespaces:
|
|
1075
|
+
- "http://example.org/ontology/org#"
|
|
1076
|
+
|
|
1077
|
+
# Module with explicit imports
|
|
1078
|
+
- name: building
|
|
1079
|
+
description: "Building domain module"
|
|
1080
|
+
output: building.ttl
|
|
1081
|
+
namespaces:
|
|
1082
|
+
- "http://example.org/ontology/building#"
|
|
1083
|
+
imports:
|
|
1084
|
+
- core.ttl
|
|
1085
|
+
auto_imports: true
|
|
1086
|
+
|
|
1087
|
+
# Handling for entities that don't match any module
|
|
1088
|
+
unmatched:
|
|
1089
|
+
strategy: common # "common" or "error"
|
|
1090
|
+
module: common
|
|
1091
|
+
output: common.ttl
|
|
1092
|
+
|
|
1093
|
+
# Generate manifest file
|
|
1094
|
+
generate_manifest: true
|
|
1095
|
+
|
|
1096
|
+
# Optional: Split data files by instance type
|
|
1097
|
+
# split_data:
|
|
1098
|
+
# sources:
|
|
1099
|
+
# - data/split_instances.ttl
|
|
1100
|
+
# output_dir: data/
|
|
1101
|
+
# prefix: data_
|
|
1102
|
+
'''
|