rdf-construct 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. rdf_construct/__init__.py +12 -0
  2. rdf_construct/__main__.py +0 -0
  3. rdf_construct/cli.py +3429 -0
  4. rdf_construct/core/__init__.py +33 -0
  5. rdf_construct/core/config.py +116 -0
  6. rdf_construct/core/ordering.py +219 -0
  7. rdf_construct/core/predicate_order.py +212 -0
  8. rdf_construct/core/profile.py +157 -0
  9. rdf_construct/core/selector.py +64 -0
  10. rdf_construct/core/serialiser.py +232 -0
  11. rdf_construct/core/utils.py +89 -0
  12. rdf_construct/cq/__init__.py +77 -0
  13. rdf_construct/cq/expectations.py +365 -0
  14. rdf_construct/cq/formatters/__init__.py +45 -0
  15. rdf_construct/cq/formatters/json.py +104 -0
  16. rdf_construct/cq/formatters/junit.py +104 -0
  17. rdf_construct/cq/formatters/text.py +146 -0
  18. rdf_construct/cq/loader.py +300 -0
  19. rdf_construct/cq/runner.py +321 -0
  20. rdf_construct/diff/__init__.py +59 -0
  21. rdf_construct/diff/change_types.py +214 -0
  22. rdf_construct/diff/comparator.py +338 -0
  23. rdf_construct/diff/filters.py +133 -0
  24. rdf_construct/diff/formatters/__init__.py +71 -0
  25. rdf_construct/diff/formatters/json.py +192 -0
  26. rdf_construct/diff/formatters/markdown.py +210 -0
  27. rdf_construct/diff/formatters/text.py +195 -0
  28. rdf_construct/docs/__init__.py +60 -0
  29. rdf_construct/docs/config.py +238 -0
  30. rdf_construct/docs/extractors.py +603 -0
  31. rdf_construct/docs/generator.py +360 -0
  32. rdf_construct/docs/renderers/__init__.py +7 -0
  33. rdf_construct/docs/renderers/html.py +803 -0
  34. rdf_construct/docs/renderers/json.py +390 -0
  35. rdf_construct/docs/renderers/markdown.py +628 -0
  36. rdf_construct/docs/search.py +278 -0
  37. rdf_construct/docs/templates/html/base.html.jinja +44 -0
  38. rdf_construct/docs/templates/html/class.html.jinja +152 -0
  39. rdf_construct/docs/templates/html/hierarchy.html.jinja +28 -0
  40. rdf_construct/docs/templates/html/index.html.jinja +110 -0
  41. rdf_construct/docs/templates/html/instance.html.jinja +90 -0
  42. rdf_construct/docs/templates/html/namespaces.html.jinja +37 -0
  43. rdf_construct/docs/templates/html/property.html.jinja +124 -0
  44. rdf_construct/docs/templates/html/single_page.html.jinja +169 -0
  45. rdf_construct/lint/__init__.py +75 -0
  46. rdf_construct/lint/config.py +214 -0
  47. rdf_construct/lint/engine.py +396 -0
  48. rdf_construct/lint/formatters.py +327 -0
  49. rdf_construct/lint/rules.py +692 -0
  50. rdf_construct/localise/__init__.py +114 -0
  51. rdf_construct/localise/config.py +508 -0
  52. rdf_construct/localise/extractor.py +427 -0
  53. rdf_construct/localise/formatters/__init__.py +36 -0
  54. rdf_construct/localise/formatters/markdown.py +229 -0
  55. rdf_construct/localise/formatters/text.py +224 -0
  56. rdf_construct/localise/merger.py +346 -0
  57. rdf_construct/localise/reporter.py +356 -0
  58. rdf_construct/main.py +6 -0
  59. rdf_construct/merge/__init__.py +165 -0
  60. rdf_construct/merge/config.py +354 -0
  61. rdf_construct/merge/conflicts.py +281 -0
  62. rdf_construct/merge/formatters.py +426 -0
  63. rdf_construct/merge/merger.py +425 -0
  64. rdf_construct/merge/migrator.py +339 -0
  65. rdf_construct/merge/rules.py +377 -0
  66. rdf_construct/merge/splitter.py +1102 -0
  67. rdf_construct/puml2rdf/__init__.py +103 -0
  68. rdf_construct/puml2rdf/config.py +230 -0
  69. rdf_construct/puml2rdf/converter.py +420 -0
  70. rdf_construct/puml2rdf/merger.py +200 -0
  71. rdf_construct/puml2rdf/model.py +202 -0
  72. rdf_construct/puml2rdf/parser.py +565 -0
  73. rdf_construct/puml2rdf/validators.py +451 -0
  74. rdf_construct/refactor/__init__.py +72 -0
  75. rdf_construct/refactor/config.py +362 -0
  76. rdf_construct/refactor/deprecator.py +328 -0
  77. rdf_construct/refactor/formatters/__init__.py +8 -0
  78. rdf_construct/refactor/formatters/text.py +311 -0
  79. rdf_construct/refactor/renamer.py +294 -0
  80. rdf_construct/shacl/__init__.py +56 -0
  81. rdf_construct/shacl/config.py +166 -0
  82. rdf_construct/shacl/converters.py +520 -0
  83. rdf_construct/shacl/generator.py +364 -0
  84. rdf_construct/shacl/namespaces.py +93 -0
  85. rdf_construct/stats/__init__.py +29 -0
  86. rdf_construct/stats/collector.py +178 -0
  87. rdf_construct/stats/comparator.py +298 -0
  88. rdf_construct/stats/formatters/__init__.py +83 -0
  89. rdf_construct/stats/formatters/json.py +38 -0
  90. rdf_construct/stats/formatters/markdown.py +153 -0
  91. rdf_construct/stats/formatters/text.py +186 -0
  92. rdf_construct/stats/metrics/__init__.py +26 -0
  93. rdf_construct/stats/metrics/basic.py +147 -0
  94. rdf_construct/stats/metrics/complexity.py +137 -0
  95. rdf_construct/stats/metrics/connectivity.py +130 -0
  96. rdf_construct/stats/metrics/documentation.py +128 -0
  97. rdf_construct/stats/metrics/hierarchy.py +207 -0
  98. rdf_construct/stats/metrics/properties.py +88 -0
  99. rdf_construct/uml/__init__.py +22 -0
  100. rdf_construct/uml/context.py +194 -0
  101. rdf_construct/uml/mapper.py +371 -0
  102. rdf_construct/uml/odm_renderer.py +789 -0
  103. rdf_construct/uml/renderer.py +684 -0
  104. rdf_construct/uml/uml_layout.py +393 -0
  105. rdf_construct/uml/uml_style.py +613 -0
  106. rdf_construct-0.3.0.dist-info/METADATA +496 -0
  107. rdf_construct-0.3.0.dist-info/RECORD +110 -0
  108. rdf_construct-0.3.0.dist-info/WHEEL +4 -0
  109. rdf_construct-0.3.0.dist-info/entry_points.txt +3 -0
  110. rdf_construct-0.3.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,356 @@
1
+ """Translation coverage reporting.
2
+
3
+ Analyses ontologies and reports translation coverage across languages,
4
+ identifying missing translations and tracking progress.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
+
10
+ from rdflib import Graph, Literal, URIRef
11
+ from rdflib.namespace import OWL, RDF, RDFS
12
+
13
+
14
+ @dataclass
15
+ class PropertyCoverage:
16
+ """Coverage for a single property.
17
+
18
+ Attributes:
19
+ property: Property URI (shortened).
20
+ total: Total strings in source language.
21
+ translated: Number translated.
22
+ """
23
+
24
+ property: str
25
+ total: int = 0
26
+ translated: int = 0
27
+
28
+ @property
29
+ def coverage(self) -> float:
30
+ """Coverage percentage."""
31
+ if self.total == 0:
32
+ return 0.0
33
+ return (self.translated / self.total) * 100
34
+
35
+
36
+ @dataclass
37
+ class LanguageCoverage:
38
+ """Coverage statistics for a single language.
39
+
40
+ Attributes:
41
+ language: Language code.
42
+ is_source: Whether this is the source language.
43
+ total_strings: Total translatable strings.
44
+ translated: Number translated.
45
+ by_property: Coverage broken down by property.
46
+ missing_entities: URIs of entities missing translations.
47
+ """
48
+
49
+ language: str
50
+ is_source: bool = False
51
+ total_strings: int = 0
52
+ translated: int = 0
53
+ by_property: dict[str, PropertyCoverage] = field(default_factory=dict)
54
+ missing_entities: list[str] = field(default_factory=list)
55
+
56
+ @property
57
+ def coverage(self) -> float:
58
+ """Overall coverage percentage."""
59
+ if self.total_strings == 0:
60
+ return 0.0
61
+ return (self.translated / self.total_strings) * 100
62
+
63
+ @property
64
+ def pending(self) -> int:
65
+ """Number of pending translations."""
66
+ return self.total_strings - self.translated
67
+
68
+
69
+ @dataclass
70
+ class CoverageReport:
71
+ """Complete translation coverage report.
72
+
73
+ Attributes:
74
+ source_file: Source ontology file.
75
+ source_language: Base language.
76
+ total_entities: Total entities in ontology.
77
+ properties: Properties analysed.
78
+ languages: Coverage by language.
79
+ """
80
+
81
+ source_file: str
82
+ source_language: str
83
+ total_entities: int = 0
84
+ properties: list[str] = field(default_factory=list)
85
+ languages: dict[str, LanguageCoverage] = field(default_factory=dict)
86
+
87
+
88
+ class CoverageReporter:
89
+ """Analyses and reports translation coverage.
90
+
91
+ The reporter examines an ontology and determines what percentage
92
+ of translatable content has been translated into each target language.
93
+ """
94
+
95
+ def __init__(
96
+ self,
97
+ source_language: str = "en",
98
+ properties: list[str] | None = None,
99
+ ):
100
+ """Initialise the reporter.
101
+
102
+ Args:
103
+ source_language: Base language code.
104
+ properties: Properties to check. Uses defaults if not provided.
105
+ """
106
+ self.source_language = source_language
107
+ self.properties = properties or [
108
+ "http://www.w3.org/2000/01/rdf-schema#label",
109
+ "http://www.w3.org/2000/01/rdf-schema#comment",
110
+ ]
111
+
112
+ def report(
113
+ self,
114
+ graph: Graph,
115
+ languages: list[str],
116
+ source_file: str | Path = "",
117
+ ) -> CoverageReport:
118
+ """Generate coverage report for specified languages.
119
+
120
+ Args:
121
+ graph: RDF graph to analyse.
122
+ languages: List of language codes to check.
123
+ source_file: Source file path for metadata.
124
+
125
+ Returns:
126
+ CoverageReport with detailed statistics.
127
+ """
128
+ # Collect entities
129
+ entities = self._collect_entities(graph)
130
+
131
+ # Build report
132
+ report = CoverageReport(
133
+ source_file=str(source_file),
134
+ source_language=self.source_language,
135
+ total_entities=len(entities),
136
+ properties=[self._shorten_property(p) for p in self.properties],
137
+ )
138
+
139
+ # Analyse source language first
140
+ source_coverage = self._analyse_language(
141
+ graph, entities, self.source_language, is_source=True
142
+ )
143
+ report.languages[self.source_language] = source_coverage
144
+
145
+ # Analyse each target language
146
+ for lang in languages:
147
+ if lang == self.source_language:
148
+ continue
149
+
150
+ lang_coverage = self._analyse_language(
151
+ graph,
152
+ entities,
153
+ lang,
154
+ is_source=False,
155
+ source_coverage=source_coverage,
156
+ )
157
+ report.languages[lang] = lang_coverage
158
+
159
+ return report
160
+
161
+ def _collect_entities(self, graph: Graph) -> list[URIRef]:
162
+ """Collect all entities from the graph.
163
+
164
+ Args:
165
+ graph: RDF graph.
166
+
167
+ Returns:
168
+ List of entity URIs.
169
+ """
170
+ entities: set[URIRef] = set()
171
+
172
+ # Classes
173
+ for cls_type in [OWL.Class, RDFS.Class]:
174
+ for s in graph.subjects(RDF.type, cls_type):
175
+ if isinstance(s, URIRef):
176
+ entities.add(s)
177
+
178
+ # Properties
179
+ property_types = [
180
+ OWL.ObjectProperty,
181
+ OWL.DatatypeProperty,
182
+ OWL.AnnotationProperty,
183
+ RDF.Property,
184
+ ]
185
+ for prop_type in property_types:
186
+ for s in graph.subjects(RDF.type, prop_type):
187
+ if isinstance(s, URIRef):
188
+ entities.add(s)
189
+
190
+ # Named Individuals
191
+ for s in graph.subjects(RDF.type, OWL.NamedIndividual):
192
+ if isinstance(s, URIRef):
193
+ entities.add(s)
194
+
195
+ return sorted(entities, key=str)
196
+
197
+ def _analyse_language(
198
+ self,
199
+ graph: Graph,
200
+ entities: list[URIRef],
201
+ language: str,
202
+ is_source: bool = False,
203
+ source_coverage: LanguageCoverage | None = None,
204
+ ) -> LanguageCoverage:
205
+ """Analyse coverage for a single language.
206
+
207
+ Args:
208
+ graph: RDF graph.
209
+ entities: List of entity URIs.
210
+ language: Language code to analyse.
211
+ is_source: Whether this is the source language.
212
+ source_coverage: Source language coverage (for comparison).
213
+
214
+ Returns:
215
+ LanguageCoverage statistics.
216
+ """
217
+ coverage = LanguageCoverage(
218
+ language=language,
219
+ is_source=is_source,
220
+ )
221
+
222
+ # Initialise property coverage
223
+ for prop_uri_str in self.properties:
224
+ short_prop = self._shorten_property(prop_uri_str)
225
+ coverage.by_property[short_prop] = PropertyCoverage(property=short_prop)
226
+
227
+ missing: list[str] = []
228
+
229
+ for entity in entities:
230
+ entity_has_any = False
231
+ entity_missing_any = False
232
+
233
+ for prop_uri_str in self.properties:
234
+ prop_uri = URIRef(prop_uri_str)
235
+ short_prop = self._shorten_property(prop_uri_str)
236
+
237
+ # Count strings in this language
238
+ count = self._count_language_literals(graph, entity, prop_uri, language)
239
+
240
+ if is_source:
241
+ # For source language, all found strings count as "total"
242
+ coverage.by_property[short_prop].total += count
243
+ coverage.by_property[short_prop].translated += count
244
+ coverage.total_strings += count
245
+ coverage.translated += count
246
+ if count > 0:
247
+ entity_has_any = True
248
+ else:
249
+ # For target languages, compare against source
250
+ if source_coverage:
251
+ source_count = self._count_language_literals(
252
+ graph, entity, prop_uri, self.source_language
253
+ )
254
+ coverage.by_property[short_prop].total += source_count
255
+ coverage.total_strings += source_count
256
+
257
+ if count > 0:
258
+ coverage.by_property[short_prop].translated += count
259
+ coverage.translated += count
260
+ entity_has_any = True
261
+
262
+ if source_count > 0 and count == 0:
263
+ entity_missing_any = True
264
+
265
+ # Track missing entities (have source but no target)
266
+ if entity_missing_any and not is_source:
267
+ missing.append(str(entity))
268
+
269
+ coverage.missing_entities = missing
270
+ return coverage
271
+
272
+ def _count_language_literals(
273
+ self,
274
+ graph: Graph,
275
+ subject: URIRef,
276
+ predicate: URIRef,
277
+ language: str,
278
+ ) -> int:
279
+ """Count literals with a specific language tag.
280
+
281
+ Args:
282
+ graph: RDF graph.
283
+ subject: Subject URI.
284
+ predicate: Predicate URI.
285
+ language: Language code.
286
+
287
+ Returns:
288
+ Count of matching literals.
289
+ """
290
+ count = 0
291
+
292
+ for obj in graph.objects(subject, predicate):
293
+ if isinstance(obj, Literal):
294
+ if obj.language == language:
295
+ count += 1
296
+ elif obj.language is None and language == self.source_language:
297
+ # Treat untagged as source language
298
+ count += 1
299
+
300
+ return count
301
+
302
+ def _shorten_property(self, prop: str) -> str:
303
+ """Shorten a full URI to CURIE if possible.
304
+
305
+ Args:
306
+ prop: Full property URI.
307
+
308
+ Returns:
309
+ CURIE or original URI.
310
+ """
311
+ namespaces = {
312
+ "http://www.w3.org/2000/01/rdf-schema#": "rdfs:",
313
+ "http://www.w3.org/2004/02/skos/core#": "skos:",
314
+ "http://www.w3.org/2002/07/owl#": "owl:",
315
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf:",
316
+ "http://purl.org/dc/elements/1.1/": "dc:",
317
+ "http://purl.org/dc/terms/": "dcterms:",
318
+ }
319
+
320
+ for namespace, prefix in namespaces.items():
321
+ if prop.startswith(namespace):
322
+ return prefix + prop[len(namespace) :]
323
+
324
+ return prop
325
+
326
+
327
+ def generate_coverage_report(
328
+ source: Path,
329
+ languages: list[str],
330
+ source_language: str = "en",
331
+ properties: list[str] | None = None,
332
+ ) -> CoverageReport:
333
+ """Generate translation coverage report for an ontology.
334
+
335
+ Convenience function for simple reporting.
336
+
337
+ Args:
338
+ source: Source ontology file.
339
+ languages: List of language codes to check.
340
+ source_language: Base language code.
341
+ properties: Properties to analyse.
342
+
343
+ Returns:
344
+ CoverageReport with detailed statistics.
345
+ """
346
+ # Load graph
347
+ graph = Graph()
348
+ graph.parse(source)
349
+
350
+ # Report
351
+ reporter = CoverageReporter(
352
+ source_language=source_language,
353
+ properties=properties,
354
+ )
355
+
356
+ return reporter.report(graph, languages, source)
rdf_construct/main.py ADDED
@@ -0,0 +1,6 @@
1
+ """Entry point for python -m rdf_construct."""
2
+
3
+ from .cli import cli
4
+
5
+ if __name__ == "__main__":
6
+ cli()
@@ -0,0 +1,165 @@
1
+ """Ontology merge and modularisation tools.
2
+
3
+ This module provides tools for combining and splitting RDF ontology files
4
+ with intelligent conflict detection, namespace management, and optional
5
+ data migration support.
6
+
7
+ Usage:
8
+ # Merge multiple ontologies
9
+ from rdf_construct.merge import merge_files, OntologyMerger
10
+
11
+ result = merge_files(
12
+ sources=[Path("core.ttl"), Path("ext.ttl")],
13
+ output=Path("merged.ttl"),
14
+ )
15
+
16
+ # With configuration
17
+ from rdf_construct.merge import MergeConfig, load_merge_config
18
+
19
+ config = load_merge_config(Path("merge.yml"))
20
+ merger = OntologyMerger(config)
21
+ result = merger.merge()
22
+
23
+ # Split a monolithic ontology
24
+ from rdf_construct.merge import OntologySplitter, SplitConfig
25
+
26
+ config = SplitConfig.from_yaml(Path("split.yml"))
27
+ splitter = OntologySplitter(config)
28
+ result = splitter.split()
29
+ splitter.write_modules(result)
30
+
31
+ # Split by namespace (auto-detect)
32
+ from rdf_construct.merge import split_by_namespace
33
+
34
+ result = split_by_namespace(Path("large.ttl"), Path("modules/"))
35
+
36
+ CLI:
37
+ # Basic merge
38
+ rdf-construct merge core.ttl ext.ttl -o merged.ttl
39
+
40
+ # With conflict report
41
+ rdf-construct merge core.ttl ext.ttl -o merged.ttl --report conflicts.md
42
+
43
+ # Split by namespace
44
+ rdf-construct split large.ttl -o modules/ --by-namespace
45
+
46
+ # Split with config
47
+ rdf-construct split large.ttl -o modules/ -c split.yml
48
+ """
49
+
50
+ from rdf_construct.merge.config import (
51
+ MergeConfig,
52
+ SourceConfig,
53
+ NamespaceConfig,
54
+ ConflictConfig,
55
+ OutputConfig,
56
+ DataMigrationConfig,
57
+ MigrationRule,
58
+ ConflictStrategy,
59
+ ImportsStrategy,
60
+ load_merge_config,
61
+ create_default_config,
62
+ )
63
+
64
+ from rdf_construct.merge.conflicts import (
65
+ Conflict,
66
+ ConflictType,
67
+ ConflictValue,
68
+ ConflictDetector,
69
+ SourceGraph,
70
+ generate_conflict_marker,
71
+ generate_conflict_end_marker,
72
+ filter_semantic_conflicts,
73
+ )
74
+
75
+ from rdf_construct.merge.merger import (
76
+ OntologyMerger,
77
+ MergeResult,
78
+ merge_files,
79
+ )
80
+
81
+ from rdf_construct.merge.migrator import (
82
+ DataMigrator,
83
+ MigrationResult,
84
+ MigrationStats,
85
+ migrate_data_files,
86
+ )
87
+
88
+ from rdf_construct.merge.rules import (
89
+ RuleEngine,
90
+ PatternParser,
91
+ Match,
92
+ Binding,
93
+ )
94
+
95
+ from rdf_construct.merge.formatters import (
96
+ TextFormatter,
97
+ MarkdownFormatter,
98
+ get_formatter,
99
+ FORMATTERS,
100
+ )
101
+
102
+ from rdf_construct.merge.splitter import (
103
+ OntologySplitter,
104
+ SplitConfig,
105
+ SplitResult,
106
+ ModuleDefinition,
107
+ UnmatchedStrategy,
108
+ SplitDataConfig,
109
+ ModuleStats,
110
+ split_by_namespace,
111
+ create_default_split_config,
112
+ )
113
+
114
+ __all__ = [
115
+ # Configuration
116
+ "MergeConfig",
117
+ "SourceConfig",
118
+ "NamespaceConfig",
119
+ "ConflictConfig",
120
+ "OutputConfig",
121
+ "DataMigrationConfig",
122
+ "MigrationRule",
123
+ "ConflictStrategy",
124
+ "ImportsStrategy",
125
+ "load_merge_config",
126
+ "create_default_config",
127
+ # Conflicts
128
+ "Conflict",
129
+ "ConflictType",
130
+ "ConflictValue",
131
+ "ConflictDetector",
132
+ "SourceGraph",
133
+ "generate_conflict_marker",
134
+ "generate_conflict_end_marker",
135
+ "filter_semantic_conflicts",
136
+ # Merger
137
+ "OntologyMerger",
138
+ "MergeResult",
139
+ "merge_files",
140
+ # Migrator
141
+ "DataMigrator",
142
+ "MigrationResult",
143
+ "MigrationStats",
144
+ "migrate_data_files",
145
+ # Rules
146
+ "RuleEngine",
147
+ "PatternParser",
148
+ "Match",
149
+ "Binding",
150
+ # Formatters
151
+ "TextFormatter",
152
+ "MarkdownFormatter",
153
+ "get_formatter",
154
+ "FORMATTERS",
155
+ # Splitter
156
+ "OntologySplitter",
157
+ "SplitConfig",
158
+ "SplitResult",
159
+ "ModuleDefinition",
160
+ "UnmatchedStrategy",
161
+ "SplitDataConfig",
162
+ "ModuleStats",
163
+ "split_by_namespace",
164
+ "create_default_split_config",
165
+ ]