rdf-construct 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. rdf_construct/__init__.py +12 -0
  2. rdf_construct/__main__.py +0 -0
  3. rdf_construct/cli.py +3429 -0
  4. rdf_construct/core/__init__.py +33 -0
  5. rdf_construct/core/config.py +116 -0
  6. rdf_construct/core/ordering.py +219 -0
  7. rdf_construct/core/predicate_order.py +212 -0
  8. rdf_construct/core/profile.py +157 -0
  9. rdf_construct/core/selector.py +64 -0
  10. rdf_construct/core/serialiser.py +232 -0
  11. rdf_construct/core/utils.py +89 -0
  12. rdf_construct/cq/__init__.py +77 -0
  13. rdf_construct/cq/expectations.py +365 -0
  14. rdf_construct/cq/formatters/__init__.py +45 -0
  15. rdf_construct/cq/formatters/json.py +104 -0
  16. rdf_construct/cq/formatters/junit.py +104 -0
  17. rdf_construct/cq/formatters/text.py +146 -0
  18. rdf_construct/cq/loader.py +300 -0
  19. rdf_construct/cq/runner.py +321 -0
  20. rdf_construct/diff/__init__.py +59 -0
  21. rdf_construct/diff/change_types.py +214 -0
  22. rdf_construct/diff/comparator.py +338 -0
  23. rdf_construct/diff/filters.py +133 -0
  24. rdf_construct/diff/formatters/__init__.py +71 -0
  25. rdf_construct/diff/formatters/json.py +192 -0
  26. rdf_construct/diff/formatters/markdown.py +210 -0
  27. rdf_construct/diff/formatters/text.py +195 -0
  28. rdf_construct/docs/__init__.py +60 -0
  29. rdf_construct/docs/config.py +238 -0
  30. rdf_construct/docs/extractors.py +603 -0
  31. rdf_construct/docs/generator.py +360 -0
  32. rdf_construct/docs/renderers/__init__.py +7 -0
  33. rdf_construct/docs/renderers/html.py +803 -0
  34. rdf_construct/docs/renderers/json.py +390 -0
  35. rdf_construct/docs/renderers/markdown.py +628 -0
  36. rdf_construct/docs/search.py +278 -0
  37. rdf_construct/docs/templates/html/base.html.jinja +44 -0
  38. rdf_construct/docs/templates/html/class.html.jinja +152 -0
  39. rdf_construct/docs/templates/html/hierarchy.html.jinja +28 -0
  40. rdf_construct/docs/templates/html/index.html.jinja +110 -0
  41. rdf_construct/docs/templates/html/instance.html.jinja +90 -0
  42. rdf_construct/docs/templates/html/namespaces.html.jinja +37 -0
  43. rdf_construct/docs/templates/html/property.html.jinja +124 -0
  44. rdf_construct/docs/templates/html/single_page.html.jinja +169 -0
  45. rdf_construct/lint/__init__.py +75 -0
  46. rdf_construct/lint/config.py +214 -0
  47. rdf_construct/lint/engine.py +396 -0
  48. rdf_construct/lint/formatters.py +327 -0
  49. rdf_construct/lint/rules.py +692 -0
  50. rdf_construct/localise/__init__.py +114 -0
  51. rdf_construct/localise/config.py +508 -0
  52. rdf_construct/localise/extractor.py +427 -0
  53. rdf_construct/localise/formatters/__init__.py +36 -0
  54. rdf_construct/localise/formatters/markdown.py +229 -0
  55. rdf_construct/localise/formatters/text.py +224 -0
  56. rdf_construct/localise/merger.py +346 -0
  57. rdf_construct/localise/reporter.py +356 -0
  58. rdf_construct/main.py +6 -0
  59. rdf_construct/merge/__init__.py +165 -0
  60. rdf_construct/merge/config.py +354 -0
  61. rdf_construct/merge/conflicts.py +281 -0
  62. rdf_construct/merge/formatters.py +426 -0
  63. rdf_construct/merge/merger.py +425 -0
  64. rdf_construct/merge/migrator.py +339 -0
  65. rdf_construct/merge/rules.py +377 -0
  66. rdf_construct/merge/splitter.py +1102 -0
  67. rdf_construct/puml2rdf/__init__.py +103 -0
  68. rdf_construct/puml2rdf/config.py +230 -0
  69. rdf_construct/puml2rdf/converter.py +420 -0
  70. rdf_construct/puml2rdf/merger.py +200 -0
  71. rdf_construct/puml2rdf/model.py +202 -0
  72. rdf_construct/puml2rdf/parser.py +565 -0
  73. rdf_construct/puml2rdf/validators.py +451 -0
  74. rdf_construct/refactor/__init__.py +72 -0
  75. rdf_construct/refactor/config.py +362 -0
  76. rdf_construct/refactor/deprecator.py +328 -0
  77. rdf_construct/refactor/formatters/__init__.py +8 -0
  78. rdf_construct/refactor/formatters/text.py +311 -0
  79. rdf_construct/refactor/renamer.py +294 -0
  80. rdf_construct/shacl/__init__.py +56 -0
  81. rdf_construct/shacl/config.py +166 -0
  82. rdf_construct/shacl/converters.py +520 -0
  83. rdf_construct/shacl/generator.py +364 -0
  84. rdf_construct/shacl/namespaces.py +93 -0
  85. rdf_construct/stats/__init__.py +29 -0
  86. rdf_construct/stats/collector.py +178 -0
  87. rdf_construct/stats/comparator.py +298 -0
  88. rdf_construct/stats/formatters/__init__.py +83 -0
  89. rdf_construct/stats/formatters/json.py +38 -0
  90. rdf_construct/stats/formatters/markdown.py +153 -0
  91. rdf_construct/stats/formatters/text.py +186 -0
  92. rdf_construct/stats/metrics/__init__.py +26 -0
  93. rdf_construct/stats/metrics/basic.py +147 -0
  94. rdf_construct/stats/metrics/complexity.py +137 -0
  95. rdf_construct/stats/metrics/connectivity.py +130 -0
  96. rdf_construct/stats/metrics/documentation.py +128 -0
  97. rdf_construct/stats/metrics/hierarchy.py +207 -0
  98. rdf_construct/stats/metrics/properties.py +88 -0
  99. rdf_construct/uml/__init__.py +22 -0
  100. rdf_construct/uml/context.py +194 -0
  101. rdf_construct/uml/mapper.py +371 -0
  102. rdf_construct/uml/odm_renderer.py +789 -0
  103. rdf_construct/uml/renderer.py +684 -0
  104. rdf_construct/uml/uml_layout.py +393 -0
  105. rdf_construct/uml/uml_style.py +613 -0
  106. rdf_construct-0.3.0.dist-info/METADATA +496 -0
  107. rdf_construct-0.3.0.dist-info/RECORD +110 -0
  108. rdf_construct-0.3.0.dist-info/WHEEL +4 -0
  109. rdf_construct-0.3.0.dist-info/entry_points.txt +3 -0
  110. rdf_construct-0.3.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,339 @@
1
+ """Data graph migration for ontology changes.
2
+
3
+ This module handles migrating instance data when ontologies change:
4
+ - Simple URI substitution (renames, namespace changes)
5
+ - Complex CONSTRUCT-style transformations (property splits, type migrations)
6
+
7
+ The migrator is reusable by merge, split, and refactor commands.
8
+ """
9
+
10
+ from dataclasses import dataclass, field
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from rdflib import Graph, URIRef, Literal, BNode
15
+ from rdflib.namespace import RDF
16
+
17
+ from rdf_construct.merge.config import MigrationRule, DataMigrationConfig
18
+ from rdf_construct.merge.rules import RuleEngine
19
+
20
+
21
+ @dataclass
22
+ class MigrationStats:
23
+ """Statistics from a migration operation.
24
+
25
+ Attributes:
26
+ subjects_updated: Number of subject URIs substituted
27
+ objects_updated: Number of object URIs substituted
28
+ triples_added: Number of triples added by transformations
29
+ triples_removed: Number of triples removed by transformations
30
+ rules_applied: Count of each rule applied
31
+ """
32
+
33
+ subjects_updated: int = 0
34
+ objects_updated: int = 0
35
+ triples_added: int = 0
36
+ triples_removed: int = 0
37
+ rules_applied: dict[str, int] = field(default_factory=dict)
38
+
39
+ @property
40
+ def total_changes(self) -> int:
41
+ """Total number of changes made."""
42
+ return (
43
+ self.subjects_updated
44
+ + self.objects_updated
45
+ + self.triples_added
46
+ + self.triples_removed
47
+ )
48
+
49
+
50
+ @dataclass
51
+ class MigrationResult:
52
+ """Result of a data migration operation.
53
+
54
+ Attributes:
55
+ migrated_graph: The migrated RDF graph
56
+ stats: Migration statistics
57
+ success: Whether migration completed without errors
58
+ error: Error message if success is False
59
+ source_triples: Original triple count
60
+ result_triples: Final triple count
61
+ """
62
+
63
+ migrated_graph: Graph | None = None
64
+ stats: MigrationStats = field(default_factory=MigrationStats)
65
+ success: bool = True
66
+ error: str | None = None
67
+ source_triples: int = 0
68
+ result_triples: int = 0
69
+
70
+
71
+ class DataMigrator:
72
+ """Migrates instance data graphs when ontology structure changes.
73
+
74
+ Supports two types of migration:
75
+
76
+ 1. **Simple URI substitution**: Replaces URIs throughout the graph.
77
+ Used for renames, namespace changes, and class/property moves.
78
+
79
+ 2. **Complex transformations**: SPARQL CONSTRUCT-style rules that can:
80
+ - Split properties (fullName → givenName + familyName)
81
+ - Merge properties
82
+ - Change types (Company → Organisation)
83
+ - Transform values (Fahrenheit → Celsius)
84
+
85
+ Example usage:
86
+ migrator = DataMigrator()
87
+ uri_map = {
88
+ URIRef("http://old.org/Class"): URIRef("http://new.org/Class")
89
+ }
90
+ result = migrator.migrate(data_graph, uri_map=uri_map)
91
+ """
92
+
93
+ def __init__(self):
94
+ """Initialize the data migrator."""
95
+ self.rule_engine = RuleEngine()
96
+
97
+ def migrate(
98
+ self,
99
+ data: Graph,
100
+ uri_map: dict[URIRef, URIRef] | None = None,
101
+ rules: list[MigrationRule] | None = None,
102
+ ) -> MigrationResult:
103
+ """Migrate a data graph.
104
+
105
+ Args:
106
+ data: Source data graph to migrate
107
+ uri_map: Simple URI substitution map (old -> new)
108
+ rules: Complex migration rules to apply
109
+
110
+ Returns:
111
+ MigrationResult with migrated graph and statistics
112
+ """
113
+ result = MigrationResult()
114
+ result.source_triples = len(data)
115
+
116
+ # Create a new graph for the migrated data
117
+ migrated = Graph()
118
+
119
+ # Copy namespace bindings
120
+ for prefix, ns in data.namespace_manager.namespaces():
121
+ migrated.bind(prefix, ns)
122
+
123
+ # Phase 1: Apply simple URI substitutions
124
+ if uri_map:
125
+ for s, p, o in data:
126
+ new_s = self._substitute_uri(s, uri_map, result.stats, is_subject=True)
127
+ new_o = self._substitute_uri(o, uri_map, result.stats, is_subject=False)
128
+ migrated.add((new_s, p, new_o))
129
+ else:
130
+ # No substitutions, just copy
131
+ for triple in data:
132
+ migrated.add(triple)
133
+
134
+ # Phase 2: Apply complex transformation rules
135
+ if rules:
136
+ for rule in rules:
137
+ if rule.type == "rename":
138
+ # Handle rename rules that weren't in uri_map
139
+ if rule.from_uri and rule.to_uri:
140
+ single_map = {
141
+ URIRef(rule.from_uri): URIRef(rule.to_uri)
142
+ }
143
+ migrated = self._apply_uri_substitution(
144
+ migrated, single_map, result.stats
145
+ )
146
+ result.stats.rules_applied[rule.description or "rename"] = (
147
+ result.stats.rules_applied.get(rule.description or "rename", 0) + 1
148
+ )
149
+
150
+ elif rule.type == "transform":
151
+ changes = self.rule_engine.apply_rule(migrated, rule)
152
+ result.stats.triples_added += changes.get("added", 0)
153
+ result.stats.triples_removed += changes.get("removed", 0)
154
+ result.stats.rules_applied[rule.description or "transform"] = (
155
+ result.stats.rules_applied.get(rule.description or "transform", 0)
156
+ + changes.get("instances", 0)
157
+ )
158
+
159
+ result.migrated_graph = migrated
160
+ result.result_triples = len(migrated)
161
+ result.success = True
162
+
163
+ return result
164
+
165
+ def _substitute_uri(
166
+ self,
167
+ term: Any,
168
+ uri_map: dict[URIRef, URIRef],
169
+ stats: MigrationStats,
170
+ is_subject: bool,
171
+ ) -> Any:
172
+ """Substitute a URI if it's in the mapping.
173
+
174
+ Args:
175
+ term: RDF term to potentially substitute
176
+ uri_map: URI substitution map
177
+ stats: Statistics to update
178
+ is_subject: Whether this is a subject position
179
+
180
+ Returns:
181
+ Substituted term or original
182
+ """
183
+ if isinstance(term, URIRef) and term in uri_map:
184
+ if is_subject:
185
+ stats.subjects_updated += 1
186
+ else:
187
+ stats.objects_updated += 1
188
+ return uri_map[term]
189
+ return term
190
+
191
+ def _apply_uri_substitution(
192
+ self,
193
+ graph: Graph,
194
+ uri_map: dict[URIRef, URIRef],
195
+ stats: MigrationStats,
196
+ ) -> Graph:
197
+ """Apply URI substitution to an entire graph.
198
+
199
+ Args:
200
+ graph: Graph to transform
201
+ uri_map: URI substitution map
202
+ stats: Statistics to update
203
+
204
+ Returns:
205
+ New graph with substitutions applied
206
+ """
207
+ new_graph = Graph()
208
+
209
+ # Copy namespace bindings, updating if needed
210
+ for prefix, ns in graph.namespace_manager.namespaces():
211
+ new_graph.bind(prefix, ns)
212
+
213
+ for s, p, o in graph:
214
+ new_s = self._substitute_uri(s, uri_map, stats, is_subject=True)
215
+ new_o = self._substitute_uri(o, uri_map, stats, is_subject=False)
216
+ new_graph.add((new_s, p, new_o))
217
+
218
+ return new_graph
219
+
220
+ def build_uri_map_from_namespaces(
221
+ self,
222
+ graph: Graph,
223
+ namespace_remaps: dict[str, str],
224
+ ) -> dict[URIRef, URIRef]:
225
+ """Build a URI map from namespace remappings.
226
+
227
+ Scans the graph for all URIs and creates substitution entries
228
+ for those that fall within remapped namespaces.
229
+
230
+ Args:
231
+ graph: Graph to scan for URIs
232
+ namespace_remaps: Old namespace -> new namespace mapping
233
+
234
+ Returns:
235
+ URI substitution map
236
+ """
237
+ uri_map: dict[URIRef, URIRef] = {}
238
+
239
+ # Collect all URIs from the graph
240
+ all_uris: set[URIRef] = set()
241
+ for s, p, o in graph:
242
+ if isinstance(s, URIRef):
243
+ all_uris.add(s)
244
+ if isinstance(p, URIRef):
245
+ all_uris.add(p)
246
+ if isinstance(o, URIRef):
247
+ all_uris.add(o)
248
+
249
+ # Build substitution map
250
+ for uri in all_uris:
251
+ uri_str = str(uri)
252
+ for old_ns, new_ns in namespace_remaps.items():
253
+ if uri_str.startswith(old_ns):
254
+ new_uri_str = uri_str.replace(old_ns, new_ns, 1)
255
+ uri_map[uri] = URIRef(new_uri_str)
256
+ break
257
+
258
+ return uri_map
259
+
260
+
261
+ def migrate_data_files(
262
+ data_paths: list[Path],
263
+ uri_map: dict[URIRef, URIRef] | None = None,
264
+ rules: list[MigrationRule] | None = None,
265
+ output_path: Path | None = None,
266
+ ) -> MigrationResult:
267
+ """Convenience function to migrate multiple data files.
268
+
269
+ Args:
270
+ data_paths: Paths to data files to migrate
271
+ uri_map: URI substitution map
272
+ rules: Migration rules to apply
273
+ output_path: Path to write combined migrated output
274
+
275
+ Returns:
276
+ MigrationResult with combined statistics
277
+ """
278
+ migrator = DataMigrator()
279
+ combined_result = MigrationResult()
280
+ combined_graph = Graph()
281
+ combined_stats = MigrationStats()
282
+
283
+ for data_path in data_paths:
284
+ if not data_path.exists():
285
+ combined_result.success = False
286
+ combined_result.error = f"Data file not found: {data_path}"
287
+ return combined_result
288
+
289
+ # Load the data file
290
+ data = Graph()
291
+ try:
292
+ data.parse(data_path.as_posix())
293
+ except Exception as e:
294
+ combined_result.success = False
295
+ combined_result.error = f"Failed to parse {data_path}: {e}"
296
+ return combined_result
297
+
298
+ # Migrate this file
299
+ result = migrator.migrate(data, uri_map=uri_map, rules=rules)
300
+
301
+ if not result.success:
302
+ return result
303
+
304
+ # Combine results
305
+ combined_result.source_triples += result.source_triples
306
+ if result.migrated_graph:
307
+ for triple in result.migrated_graph:
308
+ combined_graph.add(triple)
309
+
310
+ # Copy namespace bindings
311
+ for prefix, ns in result.migrated_graph.namespace_manager.namespaces():
312
+ try:
313
+ combined_graph.bind(prefix, ns, override=False)
314
+ except Exception:
315
+ pass
316
+
317
+ # Combine stats
318
+ combined_stats.subjects_updated += result.stats.subjects_updated
319
+ combined_stats.objects_updated += result.stats.objects_updated
320
+ combined_stats.triples_added += result.stats.triples_added
321
+ combined_stats.triples_removed += result.stats.triples_removed
322
+ for rule_name, count in result.stats.rules_applied.items():
323
+ combined_stats.rules_applied[rule_name] = (
324
+ combined_stats.rules_applied.get(rule_name, 0) + count
325
+ )
326
+
327
+ combined_result.migrated_graph = combined_graph
328
+ combined_result.stats = combined_stats
329
+ combined_result.result_triples = len(combined_graph)
330
+ combined_result.success = True
331
+
332
+ # Write output if path provided
333
+ if output_path and combined_result.migrated_graph:
334
+ output_path.parent.mkdir(parents=True, exist_ok=True)
335
+ combined_result.migrated_graph.serialize(
336
+ destination=output_path.as_posix(), format="turtle"
337
+ )
338
+
339
+ return combined_result
@@ -0,0 +1,377 @@
1
+ """Migration rule parsing and execution.
2
+
3
+ This module handles complex transformation rules using a SPARQL-like
4
+ pattern matching approach:
5
+
6
+ - Match patterns: Find triples matching a pattern
7
+ - Construct patterns: Create new triples from matches
8
+ - Delete patterns: Remove matched triples
9
+
10
+ This enables structural transformations like:
11
+ - Property splits (fullName → givenName + familyName)
12
+ - Type migrations (Company → Organisation)
13
+ - Value transformations
14
+ """
15
+
16
+ import re
17
+ from dataclasses import dataclass, field
18
+ from typing import Any
19
+
20
+ from rdflib import Graph, URIRef, Literal, BNode, Namespace
21
+ from rdflib.namespace import RDF, RDFS, XSD
22
+ from rdflib.term import Node
23
+
24
+ from rdf_construct.merge.config import MigrationRule
25
+
26
+
27
+ @dataclass
28
+ class Binding:
29
+ """A variable binding from pattern matching.
30
+
31
+ Attributes:
32
+ variable: Variable name (without ?)
33
+ value: Bound RDF value
34
+ """
35
+
36
+ variable: str
37
+ value: Node
38
+
39
+
40
+ @dataclass
41
+ class Match:
42
+ """A single match result from pattern matching.
43
+
44
+ Attributes:
45
+ bindings: Dictionary of variable -> value bindings
46
+ matched_triples: Triples that were matched
47
+ """
48
+
49
+ bindings: dict[str, Node] = field(default_factory=dict)
50
+ matched_triples: list[tuple] = field(default_factory=list)
51
+
52
+
53
+ class PatternParser:
54
+ """Parses simple SPARQL-like triple patterns.
55
+
56
+ Supports patterns like:
57
+ - "?s ex:fullName ?name"
58
+ - "?s a ex:Company"
59
+
60
+ Variables start with ?
61
+ URIs can be full or prefixed (requires namespace context)
62
+ """
63
+
64
+ # Pattern for variables like ?s, ?name
65
+ VARIABLE_PATTERN = re.compile(r"\?(\w+)")
66
+
67
+ def __init__(self, namespaces: dict[str, Namespace] | None = None):
68
+ """Initialize the parser.
69
+
70
+ Args:
71
+ namespaces: Prefix -> Namespace mapping for expanding CURIEs
72
+ """
73
+ self.namespaces = namespaces or {}
74
+
75
+ def parse_pattern(self, pattern: str) -> tuple[Any, Any, Any]:
76
+ """Parse a triple pattern into (subject, predicate, object).
77
+
78
+ Args:
79
+ pattern: Triple pattern string like "?s ex:hasName ?name"
80
+
81
+ Returns:
82
+ Tuple of (subject, predicate, object) where variables are
83
+ represented as strings starting with ?
84
+ """
85
+ # Handle special "a" for rdf:type
86
+ pattern = pattern.replace(" a ", f" {RDF.type} ")
87
+
88
+ parts = pattern.strip().split(None, 2)
89
+ if len(parts) != 3:
90
+ raise ValueError(f"Invalid pattern (expected 3 parts): {pattern}")
91
+
92
+ return (
93
+ self._parse_term(parts[0]),
94
+ self._parse_term(parts[1]),
95
+ self._parse_term(parts[2]),
96
+ )
97
+
98
+ def _parse_term(self, term: str) -> Any:
99
+ """Parse a single term from a pattern.
100
+
101
+ Args:
102
+ term: Term string (variable, URI, or literal)
103
+
104
+ Returns:
105
+ Parsed term: string for variables, URIRef for URIs, etc.
106
+ """
107
+ term = term.strip()
108
+
109
+ # Variable
110
+ if term.startswith("?"):
111
+ return term # Keep as string marker
112
+
113
+ # Full URI in angle brackets
114
+ if term.startswith("<") and term.endswith(">"):
115
+ return URIRef(term[1:-1])
116
+
117
+ # Already a URIRef (from pattern replacement like RDF.type)
118
+ if isinstance(term, URIRef):
119
+ return term
120
+ if term.startswith("http://") or term.startswith("https://"):
121
+ return URIRef(term)
122
+
123
+ # Prefixed name
124
+ if ":" in term:
125
+ prefix, local = term.split(":", 1)
126
+ if prefix in self.namespaces:
127
+ return URIRef(str(self.namespaces[prefix]) + local)
128
+ # If prefix not found, return as-is (may be handled later)
129
+ return URIRef(term)
130
+
131
+ # Literal (quoted string)
132
+ if term.startswith('"') and term.endswith('"'):
133
+ return Literal(term[1:-1])
134
+
135
+ # Default: treat as local name (would need base URI)
136
+ return term
137
+
138
+
139
+ class RuleEngine:
140
+ """Executes transformation rules on RDF graphs.
141
+
142
+ Supports:
143
+ - Pattern matching with variable bindings
144
+ - Triple construction from bindings
145
+ - Simple value transformations (STRBEFORE, STRAFTER)
146
+ """
147
+
148
+ def __init__(self):
149
+ """Initialize the rule engine."""
150
+ self.parser = PatternParser()
151
+
152
+ def set_namespaces(self, graph: Graph) -> None:
153
+ """Update parser namespaces from a graph.
154
+
155
+ Args:
156
+ graph: Graph to extract namespaces from
157
+ """
158
+ self.parser.namespaces = {
159
+ prefix: Namespace(str(ns))
160
+ for prefix, ns in graph.namespace_manager.namespaces()
161
+ }
162
+
163
+ def apply_rule(self, graph: Graph, rule: MigrationRule) -> dict[str, int]:
164
+ """Apply a transformation rule to a graph.
165
+
166
+ Modifies the graph in place.
167
+
168
+ Args:
169
+ graph: Graph to transform
170
+ rule: Transformation rule to apply
171
+
172
+ Returns:
173
+ Statistics: {"added": n, "removed": n, "instances": n}
174
+ """
175
+ self.set_namespaces(graph)
176
+
177
+ if rule.type != "transform" or not rule.match:
178
+ return {"added": 0, "removed": 0, "instances": 0}
179
+
180
+ stats = {"added": 0, "removed": 0, "instances": 0}
181
+
182
+ # Find all matches
183
+ matches = self._find_matches(graph, rule.match)
184
+
185
+ for match in matches:
186
+ stats["instances"] += 1
187
+
188
+ # Construct new triples
189
+ if rule.construct:
190
+ for construct_spec in rule.construct:
191
+ new_triples = self._construct_triples(
192
+ match, construct_spec, graph
193
+ )
194
+ for triple in new_triples:
195
+ graph.add(triple)
196
+ stats["added"] += 1
197
+
198
+ # Delete matched triples if configured
199
+ if rule.delete_matched:
200
+ for triple in match.matched_triples:
201
+ graph.remove(triple)
202
+ stats["removed"] += 1
203
+
204
+ return stats
205
+
206
+ def _find_matches(self, graph: Graph, pattern_str: str) -> list[Match]:
207
+ """Find all matches for a pattern in the graph.
208
+
209
+ Args:
210
+ graph: Graph to search
211
+ pattern_str: Pattern string to match
212
+
213
+ Returns:
214
+ List of Match objects with bindings
215
+ """
216
+ try:
217
+ pattern = self.parser.parse_pattern(pattern_str)
218
+ except ValueError:
219
+ return []
220
+
221
+ matches: list[Match] = []
222
+
223
+ # Build query pattern for graph iteration
224
+ query_s = None if isinstance(pattern[0], str) else pattern[0]
225
+ query_p = None if isinstance(pattern[1], str) else pattern[1]
226
+ query_o = None if isinstance(pattern[2], str) else pattern[2]
227
+
228
+ for s, p, o in graph.triples((query_s, query_p, query_o)):
229
+ bindings: dict[str, Node] = {}
230
+
231
+ # Check and bind each position
232
+ if isinstance(pattern[0], str): # Variable
233
+ bindings[pattern[0][1:]] = s # Remove ? prefix
234
+ elif pattern[0] != s:
235
+ continue
236
+
237
+ if isinstance(pattern[1], str):
238
+ bindings[pattern[1][1:]] = p
239
+ elif pattern[1] != p:
240
+ continue
241
+
242
+ if isinstance(pattern[2], str):
243
+ bindings[pattern[2][1:]] = o
244
+ elif pattern[2] != o:
245
+ continue
246
+
247
+ matches.append(
248
+ Match(bindings=bindings, matched_triples=[(s, p, o)])
249
+ )
250
+
251
+ return matches
252
+
253
+ def _construct_triples(
254
+ self,
255
+ match: Match,
256
+ construct_spec: dict[str, str],
257
+ graph: Graph,
258
+ ) -> list[tuple]:
259
+ """Construct new triples from a match and specification.
260
+
261
+ Args:
262
+ match: Match with variable bindings
263
+ construct_spec: Construction specification with pattern and optional bind
264
+ graph: Graph for namespace resolution
265
+
266
+ Returns:
267
+ List of new triples to add
268
+ """
269
+ pattern_str = construct_spec.get("pattern", "")
270
+ bind_expr = construct_spec.get("bind")
271
+
272
+ try:
273
+ pattern = self.parser.parse_pattern(pattern_str)
274
+ except ValueError:
275
+ return []
276
+
277
+ # Substitute variables in pattern
278
+ result_s = self._substitute_variable(pattern[0], match.bindings)
279
+ result_p = self._substitute_variable(pattern[1], match.bindings)
280
+
281
+ if bind_expr:
282
+ # Evaluate bind expression
283
+ result_o = self._evaluate_bind(bind_expr, match.bindings)
284
+ else:
285
+ result_o = self._substitute_variable(pattern[2], match.bindings)
286
+
287
+ if result_s is None or result_p is None or result_o is None:
288
+ return []
289
+
290
+ return [(result_s, result_p, result_o)]
291
+
292
+ def _substitute_variable(
293
+ self, term: Any, bindings: dict[str, Node]
294
+ ) -> Node | None:
295
+ """Substitute a variable with its bound value.
296
+
297
+ Args:
298
+ term: Term to substitute (may be a variable string)
299
+ bindings: Variable bindings
300
+
301
+ Returns:
302
+ Substituted value or None if variable not bound
303
+ """
304
+ if isinstance(term, str) and term.startswith("?"):
305
+ var_name = term[1:]
306
+ return bindings.get(var_name)
307
+ return term
308
+
309
+ def _evaluate_bind(
310
+ self, expression: str, bindings: dict[str, Node]
311
+ ) -> Node | None:
312
+ """Evaluate a simple bind expression.
313
+
314
+ Supports:
315
+ - STRBEFORE(?var, 'delimiter') - substring before delimiter
316
+ - STRAFTER(?var, 'delimiter') - substring after delimiter
317
+ - Simple arithmetic with +, -, *, /
318
+
319
+ Args:
320
+ expression: Bind expression string
321
+ bindings: Variable bindings
322
+
323
+ Returns:
324
+ Evaluated value or None
325
+ """
326
+ # Handle STRBEFORE
327
+ strbefore_match = re.match(
328
+ r"STRBEFORE\(\?(\w+),\s*['\"](.+)['\"]\)\s*AS\s*\?(\w+)",
329
+ expression,
330
+ re.IGNORECASE,
331
+ )
332
+ if strbefore_match:
333
+ var_name = strbefore_match.group(1)
334
+ delimiter = strbefore_match.group(2)
335
+ if var_name in bindings:
336
+ value = str(bindings[var_name])
337
+ idx = value.find(delimiter)
338
+ if idx >= 0:
339
+ return Literal(value[:idx])
340
+ return None
341
+
342
+ # Handle STRAFTER
343
+ strafter_match = re.match(
344
+ r"STRAFTER\(\?(\w+),\s*['\"](.+)['\"]\)\s*AS\s*\?(\w+)",
345
+ expression,
346
+ re.IGNORECASE,
347
+ )
348
+ if strafter_match:
349
+ var_name = strafter_match.group(1)
350
+ delimiter = strafter_match.group(2)
351
+ if var_name in bindings:
352
+ value = str(bindings[var_name])
353
+ idx = value.find(delimiter)
354
+ if idx >= 0:
355
+ return Literal(value[idx + len(delimiter) :])
356
+ return None
357
+
358
+ # Handle simple arithmetic: ((?var - n) * m / d) AS ?result
359
+ arith_match = re.match(
360
+ r"\(\(\?(\w+)\s*-\s*(\d+)\)\s*\*\s*(\d+)/(\d+)\)\s*AS\s*\?(\w+)",
361
+ expression,
362
+ )
363
+ if arith_match:
364
+ var_name = arith_match.group(1)
365
+ sub = float(arith_match.group(2))
366
+ mult = float(arith_match.group(3))
367
+ div = float(arith_match.group(4))
368
+ if var_name in bindings:
369
+ try:
370
+ value = float(str(bindings[var_name]))
371
+ result = ((value - sub) * mult) / div
372
+ return Literal(result, datatype=XSD.decimal)
373
+ except (ValueError, ZeroDivisionError):
374
+ return None
375
+ return None
376
+
377
+ return None