rdf-construct 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_construct/__init__.py +1 -1
- rdf_construct/cli.py +1794 -0
- rdf_construct/describe/__init__.py +93 -0
- rdf_construct/describe/analyzer.py +176 -0
- rdf_construct/describe/documentation.py +146 -0
- rdf_construct/describe/formatters/__init__.py +47 -0
- rdf_construct/describe/formatters/json.py +65 -0
- rdf_construct/describe/formatters/markdown.py +275 -0
- rdf_construct/describe/formatters/text.py +315 -0
- rdf_construct/describe/hierarchy.py +232 -0
- rdf_construct/describe/imports.py +213 -0
- rdf_construct/describe/metadata.py +187 -0
- rdf_construct/describe/metrics.py +145 -0
- rdf_construct/describe/models.py +552 -0
- rdf_construct/describe/namespaces.py +180 -0
- rdf_construct/describe/profiles.py +415 -0
- rdf_construct/localise/__init__.py +114 -0
- rdf_construct/localise/config.py +508 -0
- rdf_construct/localise/extractor.py +427 -0
- rdf_construct/localise/formatters/__init__.py +36 -0
- rdf_construct/localise/formatters/markdown.py +229 -0
- rdf_construct/localise/formatters/text.py +224 -0
- rdf_construct/localise/merger.py +346 -0
- rdf_construct/localise/reporter.py +356 -0
- rdf_construct/merge/__init__.py +165 -0
- rdf_construct/merge/config.py +354 -0
- rdf_construct/merge/conflicts.py +281 -0
- rdf_construct/merge/formatters.py +426 -0
- rdf_construct/merge/merger.py +425 -0
- rdf_construct/merge/migrator.py +339 -0
- rdf_construct/merge/rules.py +377 -0
- rdf_construct/merge/splitter.py +1102 -0
- rdf_construct/refactor/__init__.py +72 -0
- rdf_construct/refactor/config.py +362 -0
- rdf_construct/refactor/deprecator.py +328 -0
- rdf_construct/refactor/formatters/__init__.py +8 -0
- rdf_construct/refactor/formatters/text.py +311 -0
- rdf_construct/refactor/renamer.py +294 -0
- {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/METADATA +91 -6
- {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/RECORD +43 -7
- {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/WHEEL +0 -0
- {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/entry_points.txt +0 -0
- {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
"""Configuration dataclasses for the merge command.
|
|
2
|
+
|
|
3
|
+
Defines configuration structures for:
|
|
4
|
+
- Source files with priority ordering
|
|
5
|
+
- Namespace remapping rules
|
|
6
|
+
- Conflict resolution strategies
|
|
7
|
+
- Data migration settings
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from enum import Enum, auto
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
import yaml
|
|
16
|
+
from rdflib import URIRef
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ConflictStrategy(Enum):
|
|
20
|
+
"""Strategy for resolving conflicting values."""
|
|
21
|
+
|
|
22
|
+
PRIORITY = auto() # Higher priority source wins
|
|
23
|
+
FIRST = auto() # First source encountered wins
|
|
24
|
+
LAST = auto() # Last source encountered wins
|
|
25
|
+
MARK_ALL = auto() # Mark all conflicts for manual resolution
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ImportsStrategy(Enum):
|
|
29
|
+
"""Strategy for handling owl:imports statements."""
|
|
30
|
+
|
|
31
|
+
PRESERVE = auto() # Keep all imports from all sources
|
|
32
|
+
REMOVE = auto() # Remove all imports
|
|
33
|
+
UPDATE = auto() # Update imports to point to merged output
|
|
34
|
+
MERGE = auto() # Merge and deduplicate imports
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class SourceConfig:
|
|
39
|
+
"""Configuration for a single source file.
|
|
40
|
+
|
|
41
|
+
Attributes:
|
|
42
|
+
path: Path to the source RDF file.
|
|
43
|
+
priority: Priority for conflict resolution (higher wins).
|
|
44
|
+
namespace_remap: Optional namespace remapping rules.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
path: Path
|
|
48
|
+
priority: int = 1
|
|
49
|
+
namespace_remap: dict[str, str] = field(default_factory=dict)
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def from_dict(cls, data: dict[str, Any] | str) -> "SourceConfig":
|
|
53
|
+
"""Create from dictionary or simple path string.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
data: Either a path string or dict with path, priority, remap
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
SourceConfig instance
|
|
60
|
+
"""
|
|
61
|
+
if isinstance(data, str):
|
|
62
|
+
return cls(path=Path(data))
|
|
63
|
+
|
|
64
|
+
return cls(
|
|
65
|
+
path=Path(data["path"]),
|
|
66
|
+
priority=data.get("priority", 1),
|
|
67
|
+
namespace_remap=data.get("namespace_remap", {}),
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class NamespaceConfig:
|
|
73
|
+
"""Configuration for namespace handling.
|
|
74
|
+
|
|
75
|
+
Attributes:
|
|
76
|
+
base: Base namespace for the merged output.
|
|
77
|
+
remappings: Global namespace remapping rules.
|
|
78
|
+
preferred_prefixes: Preferred prefix bindings for output.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
base: str | None = None
|
|
82
|
+
remappings: dict[str, str] = field(default_factory=dict)
|
|
83
|
+
preferred_prefixes: dict[str, str] = field(default_factory=dict)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass
|
|
87
|
+
class ConflictConfig:
|
|
88
|
+
"""Configuration for conflict handling.
|
|
89
|
+
|
|
90
|
+
Attributes:
|
|
91
|
+
strategy: How to resolve conflicts.
|
|
92
|
+
report_path: Optional path to write conflict report.
|
|
93
|
+
ignore_predicates: Predicates to ignore in conflict detection.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
strategy: ConflictStrategy = ConflictStrategy.PRIORITY
|
|
97
|
+
report_path: Path | None = None
|
|
98
|
+
ignore_predicates: set[str] = field(default_factory=set)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class MigrationRule:
|
|
103
|
+
"""A single data migration rule.
|
|
104
|
+
|
|
105
|
+
Supports two types:
|
|
106
|
+
- rename: Simple URI substitution
|
|
107
|
+
- transform: SPARQL CONSTRUCT-style transformation
|
|
108
|
+
|
|
109
|
+
Attributes:
|
|
110
|
+
type: Either "rename" or "transform"
|
|
111
|
+
description: Human-readable description of the rule
|
|
112
|
+
from_uri: For rename: source URI to match
|
|
113
|
+
to_uri: For rename: target URI to replace with
|
|
114
|
+
match: For transform: SPARQL pattern to match
|
|
115
|
+
construct: For transform: list of patterns to construct
|
|
116
|
+
delete_matched: Whether to delete matched triples
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
type: str # "rename" or "transform"
|
|
120
|
+
description: str = ""
|
|
121
|
+
# For rename type
|
|
122
|
+
from_uri: str | None = None
|
|
123
|
+
to_uri: str | None = None
|
|
124
|
+
# For transform type
|
|
125
|
+
match: str | None = None
|
|
126
|
+
construct: list[dict[str, str]] = field(default_factory=list)
|
|
127
|
+
delete_matched: bool = True
|
|
128
|
+
|
|
129
|
+
@classmethod
|
|
130
|
+
def from_dict(cls, data: dict[str, Any]) -> "MigrationRule":
|
|
131
|
+
"""Create from dictionary.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
data: Dictionary with rule configuration
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
MigrationRule instance
|
|
138
|
+
"""
|
|
139
|
+
return cls(
|
|
140
|
+
type=data.get("type", "rename"),
|
|
141
|
+
description=data.get("description", ""),
|
|
142
|
+
from_uri=data.get("from"),
|
|
143
|
+
to_uri=data.get("to"),
|
|
144
|
+
match=data.get("match"),
|
|
145
|
+
construct=data.get("construct", []),
|
|
146
|
+
delete_matched=data.get("delete_matched", True),
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@dataclass
|
|
151
|
+
class DataMigrationConfig:
|
|
152
|
+
"""Configuration for data graph migration.
|
|
153
|
+
|
|
154
|
+
Attributes:
|
|
155
|
+
data_sources: Paths to data files to migrate.
|
|
156
|
+
output_path: Path for migrated data output.
|
|
157
|
+
rules: List of migration rules to apply.
|
|
158
|
+
rules_file: Optional path to YAML file with rules.
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
data_sources: list[Path] = field(default_factory=list)
|
|
162
|
+
output_path: Path | None = None
|
|
163
|
+
rules: list[MigrationRule] = field(default_factory=list)
|
|
164
|
+
rules_file: Path | None = None
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@dataclass
|
|
168
|
+
class OutputConfig:
|
|
169
|
+
"""Configuration for output generation.
|
|
170
|
+
|
|
171
|
+
Attributes:
|
|
172
|
+
path: Output file path.
|
|
173
|
+
format: RDF serialization format.
|
|
174
|
+
preserve_prefixes: Whether to preserve source prefix bindings.
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
path: Path
|
|
178
|
+
format: str = "turtle"
|
|
179
|
+
preserve_prefixes: bool = True
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@dataclass
|
|
183
|
+
class MergeConfig:
|
|
184
|
+
"""Complete configuration for a merge operation.
|
|
185
|
+
|
|
186
|
+
Attributes:
|
|
187
|
+
sources: List of source file configurations.
|
|
188
|
+
output: Output configuration.
|
|
189
|
+
namespaces: Namespace handling configuration.
|
|
190
|
+
conflicts: Conflict resolution configuration.
|
|
191
|
+
imports: owl:imports handling strategy.
|
|
192
|
+
migrate_data: Optional data migration configuration.
|
|
193
|
+
dry_run: If True, report what would happen without writing.
|
|
194
|
+
"""
|
|
195
|
+
|
|
196
|
+
sources: list[SourceConfig] = field(default_factory=list)
|
|
197
|
+
output: OutputConfig | None = None
|
|
198
|
+
namespaces: NamespaceConfig = field(default_factory=NamespaceConfig)
|
|
199
|
+
conflicts: ConflictConfig = field(default_factory=ConflictConfig)
|
|
200
|
+
imports: ImportsStrategy = ImportsStrategy.PRESERVE
|
|
201
|
+
migrate_data: DataMigrationConfig | None = None
|
|
202
|
+
dry_run: bool = False
|
|
203
|
+
|
|
204
|
+
@classmethod
|
|
205
|
+
def from_yaml(cls, path: Path) -> "MergeConfig":
|
|
206
|
+
"""Load configuration from a YAML file.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
path: Path to YAML configuration file
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
MergeConfig instance
|
|
213
|
+
|
|
214
|
+
Raises:
|
|
215
|
+
FileNotFoundError: If config file doesn't exist
|
|
216
|
+
ValueError: If config is invalid
|
|
217
|
+
"""
|
|
218
|
+
if not path.exists():
|
|
219
|
+
raise FileNotFoundError(f"Config file not found: {path}")
|
|
220
|
+
|
|
221
|
+
with open(path) as f:
|
|
222
|
+
data = yaml.safe_load(f)
|
|
223
|
+
|
|
224
|
+
return cls.from_dict(data)
|
|
225
|
+
|
|
226
|
+
@classmethod
|
|
227
|
+
def from_dict(cls, data: dict[str, Any]) -> "MergeConfig":
|
|
228
|
+
"""Create from dictionary.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
data: Dictionary with configuration
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
MergeConfig instance
|
|
235
|
+
"""
|
|
236
|
+
# Parse sources
|
|
237
|
+
sources = []
|
|
238
|
+
for src in data.get("sources", []):
|
|
239
|
+
sources.append(SourceConfig.from_dict(src))
|
|
240
|
+
|
|
241
|
+
# Parse output
|
|
242
|
+
output = None
|
|
243
|
+
if "output" in data:
|
|
244
|
+
out_data = data["output"]
|
|
245
|
+
if isinstance(out_data, str):
|
|
246
|
+
output = OutputConfig(path=Path(out_data))
|
|
247
|
+
else:
|
|
248
|
+
output = OutputConfig(
|
|
249
|
+
path=Path(out_data["path"]),
|
|
250
|
+
format=out_data.get("format", "turtle"),
|
|
251
|
+
preserve_prefixes=out_data.get("preserve_prefixes", True),
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
# Parse namespaces
|
|
255
|
+
ns_data = data.get("namespaces", {})
|
|
256
|
+
namespaces = NamespaceConfig(
|
|
257
|
+
base=ns_data.get("base"),
|
|
258
|
+
remappings=ns_data.get("remappings", {}),
|
|
259
|
+
preferred_prefixes=ns_data.get("preferred_prefixes", {}),
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# Parse conflicts
|
|
263
|
+
conf_data = data.get("conflicts", {})
|
|
264
|
+
strategy_str = conf_data.get("strategy", "priority").upper()
|
|
265
|
+
conflicts = ConflictConfig(
|
|
266
|
+
strategy=ConflictStrategy[strategy_str],
|
|
267
|
+
report_path=Path(conf_data["report"]) if conf_data.get("report") else None,
|
|
268
|
+
ignore_predicates=set(conf_data.get("ignore_predicates", [])),
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
# Parse imports strategy
|
|
272
|
+
imports_str = data.get("imports", "preserve").upper()
|
|
273
|
+
imports = ImportsStrategy[imports_str]
|
|
274
|
+
|
|
275
|
+
# Parse data migration
|
|
276
|
+
migrate_data = None
|
|
277
|
+
if "migrate_data" in data:
|
|
278
|
+
mig_data = data["migrate_data"]
|
|
279
|
+
rules = [MigrationRule.from_dict(r) for r in mig_data.get("rules", [])]
|
|
280
|
+
migrate_data = DataMigrationConfig(
|
|
281
|
+
data_sources=[Path(p) for p in mig_data.get("sources", [])],
|
|
282
|
+
output_path=Path(mig_data["output"]) if mig_data.get("output") else None,
|
|
283
|
+
rules=rules,
|
|
284
|
+
rules_file=Path(mig_data["rules_file"]) if mig_data.get("rules_file") else None,
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
return cls(
|
|
288
|
+
sources=sources,
|
|
289
|
+
output=output,
|
|
290
|
+
namespaces=namespaces,
|
|
291
|
+
conflicts=conflicts,
|
|
292
|
+
imports=imports,
|
|
293
|
+
migrate_data=migrate_data,
|
|
294
|
+
dry_run=data.get("dry_run", False),
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def load_merge_config(path: Path) -> MergeConfig:
|
|
299
|
+
"""Load merge configuration from a YAML file.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
path: Path to configuration file
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
MergeConfig instance
|
|
306
|
+
"""
|
|
307
|
+
return MergeConfig.from_yaml(path)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def create_default_config() -> str:
|
|
311
|
+
"""Generate default merge configuration as YAML string.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
YAML configuration template
|
|
315
|
+
"""
|
|
316
|
+
return '''# rdf-construct merge configuration
|
|
317
|
+
# See MERGE_GUIDE.md for full documentation
|
|
318
|
+
|
|
319
|
+
# Source files to merge (in order of priority, lowest to highest)
|
|
320
|
+
sources:
|
|
321
|
+
- path: core.ttl
|
|
322
|
+
priority: 1
|
|
323
|
+
- path: extension.ttl
|
|
324
|
+
priority: 2
|
|
325
|
+
|
|
326
|
+
# Output configuration
|
|
327
|
+
output:
|
|
328
|
+
path: merged.ttl
|
|
329
|
+
format: turtle
|
|
330
|
+
|
|
331
|
+
# Namespace handling
|
|
332
|
+
namespaces:
|
|
333
|
+
# base: "http://example.org/merged#"
|
|
334
|
+
remappings: {}
|
|
335
|
+
preferred_prefixes: {}
|
|
336
|
+
|
|
337
|
+
# Conflict resolution
|
|
338
|
+
conflicts:
|
|
339
|
+
strategy: priority # priority, first, last, or mark_all
|
|
340
|
+
# report: conflicts.md # Optional conflict report
|
|
341
|
+
|
|
342
|
+
# owl:imports handling
|
|
343
|
+
imports: preserve # preserve, remove, update, or merge
|
|
344
|
+
|
|
345
|
+
# Optional data migration
|
|
346
|
+
# migrate_data:
|
|
347
|
+
# sources:
|
|
348
|
+
# - split_instances.ttl
|
|
349
|
+
# output: migrated.ttl
|
|
350
|
+
# rules:
|
|
351
|
+
# - type: rename
|
|
352
|
+
# from: "http://old.example.org/ont#OldClass"
|
|
353
|
+
# to: "http://example.org/ont#NewClass"
|
|
354
|
+
'''
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"""Conflict detection and resolution for ontology merging.
|
|
2
|
+
|
|
3
|
+
This module handles:
|
|
4
|
+
- Detecting conflicting values for the same subject+predicate
|
|
5
|
+
- Resolving conflicts based on configured strategy
|
|
6
|
+
- Marking unresolved conflicts in output
|
|
7
|
+
- Generating conflict reports
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from enum import Enum, auto
|
|
12
|
+
from typing import Iterator
|
|
13
|
+
|
|
14
|
+
from rdflib import Graph, URIRef, Literal, BNode
|
|
15
|
+
from rdflib.namespace import RDF, RDFS, OWL
|
|
16
|
+
from rdflib.term import Node
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ConflictType(Enum):
|
|
20
|
+
"""Classification of conflict types."""
|
|
21
|
+
|
|
22
|
+
VALUE_DIFFERENCE = auto() # Same predicate, different literal values
|
|
23
|
+
TYPE_DIFFERENCE = auto() # Different rdf:type declarations
|
|
24
|
+
HIERARCHY_DIFFERENCE = auto() # Different subClassOf/subPropertyOf
|
|
25
|
+
SEMANTIC_CONTRADICTION = auto() # Logically incompatible (e.g., disjoint + subclass)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class ConflictValue:
|
|
30
|
+
"""A single value in a conflict.
|
|
31
|
+
|
|
32
|
+
Attributes:
|
|
33
|
+
value: The RDF value (Literal, URIRef, or BNode)
|
|
34
|
+
source_path: Path to the source file
|
|
35
|
+
priority: Priority of the source
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
value: Node
|
|
39
|
+
source_path: str
|
|
40
|
+
priority: int
|
|
41
|
+
|
|
42
|
+
def __str__(self) -> str:
|
|
43
|
+
"""Return string representation of the value."""
|
|
44
|
+
if isinstance(self.value, Literal):
|
|
45
|
+
lang = f"@{self.value.language}" if self.value.language else ""
|
|
46
|
+
dtype = f"^^{self.value.datatype}" if self.value.datatype else ""
|
|
47
|
+
return f'"{self.value}"{lang}{dtype}'
|
|
48
|
+
return str(self.value)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class Conflict:
|
|
53
|
+
"""Represents a conflict between source files.
|
|
54
|
+
|
|
55
|
+
Attributes:
|
|
56
|
+
subject: The subject URI where conflict occurs
|
|
57
|
+
predicate: The predicate where conflict occurs
|
|
58
|
+
values: List of conflicting values from different sources
|
|
59
|
+
conflict_type: Classification of the conflict
|
|
60
|
+
resolution: The resolved value, if any
|
|
61
|
+
is_resolved: Whether the conflict was automatically resolved
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
subject: URIRef | BNode
|
|
65
|
+
predicate: URIRef
|
|
66
|
+
values: list[ConflictValue]
|
|
67
|
+
conflict_type: ConflictType = ConflictType.VALUE_DIFFERENCE
|
|
68
|
+
resolution: ConflictValue | None = None
|
|
69
|
+
is_resolved: bool = False
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def requires_attention(self) -> bool:
|
|
73
|
+
"""Check if this conflict requires manual attention."""
|
|
74
|
+
return not self.is_resolved
|
|
75
|
+
|
|
76
|
+
def resolve_by_priority(self) -> None:
|
|
77
|
+
"""Resolve conflict by choosing highest priority value."""
|
|
78
|
+
if self.values:
|
|
79
|
+
sorted_vals = sorted(self.values, key=lambda v: v.priority, reverse=True)
|
|
80
|
+
self.resolution = sorted_vals[0]
|
|
81
|
+
self.is_resolved = True
|
|
82
|
+
|
|
83
|
+
def resolve_by_first(self) -> None:
|
|
84
|
+
"""Resolve conflict by choosing first value."""
|
|
85
|
+
if self.values:
|
|
86
|
+
self.resolution = self.values[0]
|
|
87
|
+
self.is_resolved = True
|
|
88
|
+
|
|
89
|
+
def resolve_by_last(self) -> None:
|
|
90
|
+
"""Resolve conflict by choosing last value."""
|
|
91
|
+
if self.values:
|
|
92
|
+
self.resolution = self.values[-1]
|
|
93
|
+
self.is_resolved = True
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@dataclass
|
|
97
|
+
class SourceGraph:
|
|
98
|
+
"""A loaded source graph with metadata.
|
|
99
|
+
|
|
100
|
+
Attributes:
|
|
101
|
+
graph: The RDF graph
|
|
102
|
+
path: Path to the source file
|
|
103
|
+
priority: Priority for conflict resolution
|
|
104
|
+
triple_count: Number of triples in the graph
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
graph: Graph
|
|
108
|
+
path: str
|
|
109
|
+
priority: int
|
|
110
|
+
triple_count: int = 0
|
|
111
|
+
|
|
112
|
+
def __post_init__(self):
|
|
113
|
+
"""Calculate triple count."""
|
|
114
|
+
self.triple_count = len(self.graph)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class ConflictDetector:
|
|
118
|
+
"""Detects conflicts between multiple source graphs.
|
|
119
|
+
|
|
120
|
+
A conflict occurs when the same subject has different values for
|
|
121
|
+
the same predicate across different sources. This is particularly
|
|
122
|
+
important for functional properties or single-valued predicates.
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
# Predicates that typically should have single values
|
|
126
|
+
SINGLE_VALUE_PREDICATES: set[URIRef] = {
|
|
127
|
+
RDFS.label,
|
|
128
|
+
RDFS.comment,
|
|
129
|
+
RDFS.domain,
|
|
130
|
+
RDFS.range,
|
|
131
|
+
OWL.inverseOf,
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
def __init__(self, ignore_predicates: set[str] | None = None):
|
|
135
|
+
"""Initialize the conflict detector.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
ignore_predicates: Predicates to ignore in conflict detection
|
|
139
|
+
"""
|
|
140
|
+
self.ignore_predicates: set[URIRef] = set()
|
|
141
|
+
if ignore_predicates:
|
|
142
|
+
self.ignore_predicates = {URIRef(p) for p in ignore_predicates}
|
|
143
|
+
|
|
144
|
+
def detect_conflicts(self, sources: list[SourceGraph]) -> list[Conflict]:
|
|
145
|
+
"""Detect conflicts across multiple source graphs.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
sources: List of source graphs to compare
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
List of detected conflicts
|
|
152
|
+
"""
|
|
153
|
+
conflicts: list[Conflict] = []
|
|
154
|
+
|
|
155
|
+
# Build index: subject -> predicate -> [(value, source, priority)]
|
|
156
|
+
index: dict[Node, dict[URIRef, list[ConflictValue]]] = {}
|
|
157
|
+
|
|
158
|
+
for source in sources:
|
|
159
|
+
for s, p, o in source.graph:
|
|
160
|
+
# Skip ignored predicates
|
|
161
|
+
if p in self.ignore_predicates:
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
# Skip blank node subjects for now (complex to handle)
|
|
165
|
+
if isinstance(s, BNode):
|
|
166
|
+
continue
|
|
167
|
+
|
|
168
|
+
if s not in index:
|
|
169
|
+
index[s] = {}
|
|
170
|
+
if p not in index[s]:
|
|
171
|
+
index[s][p] = []
|
|
172
|
+
|
|
173
|
+
# Check if this exact value already exists
|
|
174
|
+
existing_values = [cv.value for cv in index[s][p]]
|
|
175
|
+
if o not in existing_values:
|
|
176
|
+
index[s][p].append(
|
|
177
|
+
ConflictValue(value=o, source_path=source.path, priority=source.priority)
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Find predicates with multiple different values
|
|
181
|
+
for subject, predicates in index.items():
|
|
182
|
+
for predicate, values in predicates.items():
|
|
183
|
+
if len(values) > 1:
|
|
184
|
+
conflict_type = self._classify_conflict(predicate)
|
|
185
|
+
conflicts.append(
|
|
186
|
+
Conflict(
|
|
187
|
+
subject=subject,
|
|
188
|
+
predicate=predicate,
|
|
189
|
+
values=values,
|
|
190
|
+
conflict_type=conflict_type,
|
|
191
|
+
)
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
return conflicts
|
|
195
|
+
|
|
196
|
+
def _classify_conflict(self, predicate: URIRef) -> ConflictType:
|
|
197
|
+
"""Classify the type of conflict based on the predicate.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
predicate: The conflicting predicate
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
ConflictType classification
|
|
204
|
+
"""
|
|
205
|
+
pred_str = str(predicate)
|
|
206
|
+
|
|
207
|
+
if predicate == RDF.type:
|
|
208
|
+
return ConflictType.TYPE_DIFFERENCE
|
|
209
|
+
|
|
210
|
+
if any(
|
|
211
|
+
x in pred_str for x in ["subClassOf", "subPropertyOf", "equivalentClass"]
|
|
212
|
+
):
|
|
213
|
+
return ConflictType.HIERARCHY_DIFFERENCE
|
|
214
|
+
|
|
215
|
+
if "disjoint" in pred_str.lower():
|
|
216
|
+
return ConflictType.SEMANTIC_CONTRADICTION
|
|
217
|
+
|
|
218
|
+
return ConflictType.VALUE_DIFFERENCE
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def generate_conflict_marker(conflict: Conflict, graph: Graph) -> str:
|
|
222
|
+
"""Generate a conflict marker comment for Turtle output.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
conflict: The conflict to mark
|
|
226
|
+
graph: Graph for namespace resolution
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
Multi-line comment string marking the conflict
|
|
230
|
+
"""
|
|
231
|
+
lines = []
|
|
232
|
+
|
|
233
|
+
# Try to get a readable name for the subject
|
|
234
|
+
try:
|
|
235
|
+
subject_name = graph.namespace_manager.normalizeUri(conflict.subject)
|
|
236
|
+
except Exception:
|
|
237
|
+
subject_name = str(conflict.subject)
|
|
238
|
+
|
|
239
|
+
try:
|
|
240
|
+
pred_name = graph.namespace_manager.normalizeUri(conflict.predicate)
|
|
241
|
+
except Exception:
|
|
242
|
+
pred_name = str(conflict.predicate)
|
|
243
|
+
|
|
244
|
+
lines.append(f"# === CONFLICT: {subject_name} {pred_name} ===")
|
|
245
|
+
|
|
246
|
+
for cv in conflict.values:
|
|
247
|
+
lines.append(f"# Source: {cv.source_path} (priority {cv.priority}): {cv}")
|
|
248
|
+
|
|
249
|
+
if conflict.is_resolved and conflict.resolution:
|
|
250
|
+
lines.append(f"# Resolution: Used {conflict.resolution} (highest priority)")
|
|
251
|
+
else:
|
|
252
|
+
lines.append("# Resolution: UNRESOLVED - values differ, manual review required")
|
|
253
|
+
lines.append(
|
|
254
|
+
"# To resolve: keep one value below, delete the other and this comment block"
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
return "\n".join(lines)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def generate_conflict_end_marker() -> str:
|
|
261
|
+
"""Generate the end marker for a conflict block.
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
End marker comment string
|
|
265
|
+
"""
|
|
266
|
+
return "# === END CONFLICT ==="
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def filter_semantic_conflicts(conflicts: list[Conflict]) -> list[Conflict]:
|
|
270
|
+
"""Filter to only semantic contradictions that require attention.
|
|
271
|
+
|
|
272
|
+
Semantic contradictions are logically incompatible assertions,
|
|
273
|
+
such as declaring two classes both disjoint and related by subclass.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
conflicts: All detected conflicts
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
Only conflicts classified as semantic contradictions
|
|
280
|
+
"""
|
|
281
|
+
return [c for c in conflicts if c.conflict_type == ConflictType.SEMANTIC_CONTRADICTION]
|