rdf-construct 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. rdf_construct/__init__.py +1 -1
  2. rdf_construct/cli.py +1794 -0
  3. rdf_construct/describe/__init__.py +93 -0
  4. rdf_construct/describe/analyzer.py +176 -0
  5. rdf_construct/describe/documentation.py +146 -0
  6. rdf_construct/describe/formatters/__init__.py +47 -0
  7. rdf_construct/describe/formatters/json.py +65 -0
  8. rdf_construct/describe/formatters/markdown.py +275 -0
  9. rdf_construct/describe/formatters/text.py +315 -0
  10. rdf_construct/describe/hierarchy.py +232 -0
  11. rdf_construct/describe/imports.py +213 -0
  12. rdf_construct/describe/metadata.py +187 -0
  13. rdf_construct/describe/metrics.py +145 -0
  14. rdf_construct/describe/models.py +552 -0
  15. rdf_construct/describe/namespaces.py +180 -0
  16. rdf_construct/describe/profiles.py +415 -0
  17. rdf_construct/localise/__init__.py +114 -0
  18. rdf_construct/localise/config.py +508 -0
  19. rdf_construct/localise/extractor.py +427 -0
  20. rdf_construct/localise/formatters/__init__.py +36 -0
  21. rdf_construct/localise/formatters/markdown.py +229 -0
  22. rdf_construct/localise/formatters/text.py +224 -0
  23. rdf_construct/localise/merger.py +346 -0
  24. rdf_construct/localise/reporter.py +356 -0
  25. rdf_construct/merge/__init__.py +165 -0
  26. rdf_construct/merge/config.py +354 -0
  27. rdf_construct/merge/conflicts.py +281 -0
  28. rdf_construct/merge/formatters.py +426 -0
  29. rdf_construct/merge/merger.py +425 -0
  30. rdf_construct/merge/migrator.py +339 -0
  31. rdf_construct/merge/rules.py +377 -0
  32. rdf_construct/merge/splitter.py +1102 -0
  33. rdf_construct/refactor/__init__.py +72 -0
  34. rdf_construct/refactor/config.py +362 -0
  35. rdf_construct/refactor/deprecator.py +328 -0
  36. rdf_construct/refactor/formatters/__init__.py +8 -0
  37. rdf_construct/refactor/formatters/text.py +311 -0
  38. rdf_construct/refactor/renamer.py +294 -0
  39. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/METADATA +91 -6
  40. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/RECORD +43 -7
  41. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/WHEEL +0 -0
  42. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/entry_points.txt +0 -0
  43. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,508 @@
1
+ """Configuration dataclasses for the localise command.
2
+
3
+ Defines configuration structures for:
4
+ - Translation entries and files
5
+ - Extraction settings
6
+ - Merge settings
7
+ - Coverage reporting
8
+ """
9
+
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime
12
+ from enum import Enum
13
+ from pathlib import Path
14
+ from typing import Any, Literal
15
+
16
+ import yaml
17
+ from rdflib import URIRef
18
+
19
+
20
+ class TranslationStatus(str, Enum):
21
+ """Status of a translation entry."""
22
+
23
+ PENDING = "pending"
24
+ TRANSLATED = "translated"
25
+ NEEDS_REVIEW = "needs_review"
26
+ APPROVED = "approved"
27
+
28
+ def __str__(self) -> str:
29
+ return self.value
30
+
31
+
32
+ @dataclass
33
+ class TranslationEntry:
34
+ """A single translation entry for a property.
35
+
36
+ Attributes:
37
+ property: URI of the property (e.g., rdfs:label).
38
+ source_text: Original text in source language.
39
+ translation: Translated text (empty if pending).
40
+ status: Translation status.
41
+ notes: Optional notes for translators.
42
+ """
43
+
44
+ property: str
45
+ source_text: str
46
+ translation: str = ""
47
+ status: TranslationStatus = TranslationStatus.PENDING
48
+ notes: str | None = None
49
+
50
+ def to_dict(self) -> dict[str, Any]:
51
+ """Convert to dictionary for YAML serialisation."""
52
+ result: dict[str, Any] = {
53
+ "property": self.property,
54
+ "source": self.source_text,
55
+ "translation": self.translation,
56
+ "status": str(self.status),
57
+ }
58
+ if self.notes:
59
+ result["notes"] = self.notes
60
+ return result
61
+
62
+ @classmethod
63
+ def from_dict(cls, data: dict[str, Any]) -> "TranslationEntry":
64
+ """Create from dictionary."""
65
+ return cls(
66
+ property=data["property"],
67
+ source_text=data["source"],
68
+ translation=data.get("translation", ""),
69
+ status=TranslationStatus(data.get("status", "pending")),
70
+ notes=data.get("notes"),
71
+ )
72
+
73
+
74
+ @dataclass
75
+ class EntityTranslations:
76
+ """Translation entries for a single entity.
77
+
78
+ Attributes:
79
+ uri: URI of the entity.
80
+ entity_type: Type of entity (owl:Class, owl:ObjectProperty, etc.).
81
+ labels: List of translation entries for this entity.
82
+ """
83
+
84
+ uri: str
85
+ entity_type: str
86
+ labels: list[TranslationEntry] = field(default_factory=list)
87
+
88
+ def to_dict(self) -> dict[str, Any]:
89
+ """Convert to dictionary for YAML serialisation."""
90
+ return {
91
+ "uri": self.uri,
92
+ "type": self.entity_type,
93
+ "labels": [entry.to_dict() for entry in self.labels],
94
+ }
95
+
96
+ @classmethod
97
+ def from_dict(cls, data: dict[str, Any]) -> "EntityTranslations":
98
+ """Create from dictionary."""
99
+ return cls(
100
+ uri=data["uri"],
101
+ entity_type=data.get("type", "unknown"),
102
+ labels=[TranslationEntry.from_dict(entry) for entry in data.get("labels", [])],
103
+ )
104
+
105
+
106
+ @dataclass
107
+ class TranslationFileMetadata:
108
+ """Metadata for a translation file.
109
+
110
+ Attributes:
111
+ source_file: Original ontology file path.
112
+ source_language: Source language code (e.g., "en").
113
+ target_language: Target language code (e.g., "de").
114
+ generated: Timestamp when file was generated.
115
+ properties: List of properties extracted.
116
+ tool_version: Version of rdf-construct that generated this file.
117
+ """
118
+
119
+ source_file: str
120
+ source_language: str
121
+ target_language: str
122
+ generated: datetime
123
+ properties: list[str] = field(default_factory=list)
124
+ tool_version: str = "rdf-construct"
125
+
126
+ def to_dict(self) -> dict[str, Any]:
127
+ """Convert to dictionary for YAML serialisation."""
128
+ return {
129
+ "source_file": self.source_file,
130
+ "source_language": self.source_language,
131
+ "target_language": self.target_language,
132
+ "generated": self.generated.isoformat(),
133
+ "tool": self.tool_version,
134
+ "properties": self.properties,
135
+ }
136
+
137
+ @classmethod
138
+ def from_dict(cls, data: dict[str, Any]) -> "TranslationFileMetadata":
139
+ """Create from dictionary."""
140
+ generated = data.get("generated")
141
+ if isinstance(generated, str):
142
+ generated = datetime.fromisoformat(generated)
143
+ elif generated is None:
144
+ generated = datetime.now()
145
+
146
+ return cls(
147
+ source_file=data.get("source_file", ""),
148
+ source_language=data.get("source_language", "en"),
149
+ target_language=data.get("target_language", ""),
150
+ generated=generated,
151
+ properties=data.get("properties", []),
152
+ tool_version=data.get("tool", "rdf-construct"),
153
+ )
154
+
155
+
156
+ @dataclass
157
+ class TranslationSummary:
158
+ """Summary statistics for a translation file.
159
+
160
+ Attributes:
161
+ total_entities: Total number of entities.
162
+ total_strings: Total number of translatable strings.
163
+ by_status: Count of strings by status.
164
+ """
165
+
166
+ total_entities: int = 0
167
+ total_strings: int = 0
168
+ by_status: dict[str, int] = field(default_factory=dict)
169
+
170
+ @property
171
+ def translated(self) -> int:
172
+ """Number of translated strings."""
173
+ return self.by_status.get("translated", 0) + self.by_status.get("approved", 0)
174
+
175
+ @property
176
+ def coverage(self) -> float:
177
+ """Translation coverage as percentage."""
178
+ if self.total_strings == 0:
179
+ return 0.0
180
+ return (self.translated / self.total_strings) * 100
181
+
182
+ def to_dict(self) -> dict[str, Any]:
183
+ """Convert to dictionary for YAML serialisation."""
184
+ return {
185
+ "total_entities": self.total_entities,
186
+ "total_strings": self.total_strings,
187
+ "by_status": self.by_status,
188
+ "coverage": f"{self.coverage:.1f}%",
189
+ }
190
+
191
+ @classmethod
192
+ def from_dict(cls, data: dict[str, Any]) -> "TranslationSummary":
193
+ """Create from dictionary."""
194
+ return cls(
195
+ total_entities=data.get("total_entities", 0),
196
+ total_strings=data.get("total_strings", 0),
197
+ by_status=data.get("by_status", {}),
198
+ )
199
+
200
+
201
+ @dataclass
202
+ class TranslationFile:
203
+ """A complete translation file.
204
+
205
+ Contains metadata, entity translations, and summary statistics.
206
+
207
+ Attributes:
208
+ metadata: File metadata.
209
+ entities: List of entity translations.
210
+ summary: Summary statistics.
211
+ """
212
+
213
+ metadata: TranslationFileMetadata
214
+ entities: list[EntityTranslations] = field(default_factory=list)
215
+ summary: TranslationSummary | None = None
216
+
217
+ def calculate_summary(self) -> TranslationSummary:
218
+ """Calculate summary statistics from entities."""
219
+ total_strings = 0
220
+ by_status: dict[str, int] = {}
221
+
222
+ for entity in self.entities:
223
+ for label in entity.labels:
224
+ total_strings += 1
225
+ status = str(label.status)
226
+ by_status[status] = by_status.get(status, 0) + 1
227
+
228
+ return TranslationSummary(
229
+ total_entities=len(self.entities),
230
+ total_strings=total_strings,
231
+ by_status=by_status,
232
+ )
233
+
234
+ def to_yaml(self) -> str:
235
+ """Serialise to YAML string."""
236
+ # Calculate summary before serialisation
237
+ self.summary = self.calculate_summary()
238
+
239
+ header = f"""# =============================================================================
240
+ # Translation File
241
+ # =============================================================================
242
+ # Source: {self.metadata.source_file}
243
+ # Source language: {self.metadata.source_language}
244
+ # Target language: {self.metadata.target_language}
245
+ # Generated: {self.metadata.generated.isoformat()}
246
+ #
247
+ # Instructions:
248
+ # 1. Fill in the 'translation' field for each entry
249
+ # 2. Set 'status' to 'translated' when complete
250
+ # 3. Use 'needs_review' if uncertain about translation
251
+ # 4. Leave 'status' as 'pending' for incomplete entries
252
+ # =============================================================================
253
+
254
+ """
255
+
256
+ data = {
257
+ "metadata": self.metadata.to_dict(),
258
+ "entities": [entity.to_dict() for entity in self.entities],
259
+ "summary": self.summary.to_dict(),
260
+ }
261
+
262
+ return header + yaml.dump(data, default_flow_style=False, allow_unicode=True, sort_keys=False)
263
+
264
+ @classmethod
265
+ def from_yaml(cls, path: Path) -> "TranslationFile":
266
+ """Load from YAML file.
267
+
268
+ Args:
269
+ path: Path to YAML file.
270
+
271
+ Returns:
272
+ TranslationFile instance.
273
+
274
+ Raises:
275
+ FileNotFoundError: If file doesn't exist.
276
+ ValueError: If file format is invalid.
277
+ """
278
+ if not path.exists():
279
+ raise FileNotFoundError(f"Translation file not found: {path}")
280
+
281
+ with open(path, encoding="utf-8") as f:
282
+ data = yaml.safe_load(f)
283
+
284
+ if not isinstance(data, dict):
285
+ raise ValueError(f"Invalid translation file format: {path}")
286
+
287
+ metadata = TranslationFileMetadata.from_dict(data.get("metadata", {}))
288
+ entities = [
289
+ EntityTranslations.from_dict(entity) for entity in data.get("entities", [])
290
+ ]
291
+ summary = None
292
+ if "summary" in data:
293
+ summary = TranslationSummary.from_dict(data["summary"])
294
+
295
+ return cls(metadata=metadata, entities=entities, summary=summary)
296
+
297
+ def save(self, path: Path) -> None:
298
+ """Save to YAML file.
299
+
300
+ Args:
301
+ path: Output file path.
302
+ """
303
+ path.parent.mkdir(parents=True, exist_ok=True)
304
+ path.write_text(self.to_yaml(), encoding="utf-8")
305
+
306
+
307
+ class ExistingStrategy(str, Enum):
308
+ """How to handle existing translations in ontology."""
309
+
310
+ PRESERVE = "preserve"
311
+ OVERWRITE = "overwrite"
312
+
313
+
314
+ @dataclass
315
+ class ExtractConfig:
316
+ """Configuration for extraction operation.
317
+
318
+ Attributes:
319
+ source_language: Source language code (default: "en").
320
+ target_language: Target language code.
321
+ properties: List of properties to extract.
322
+ include_deprecated: Whether to include deprecated entities.
323
+ include_unlabelled: Whether to include entities without source labels.
324
+ missing_only: Only extract strings missing in target language.
325
+ """
326
+
327
+ source_language: str = "en"
328
+ target_language: str = ""
329
+ properties: list[str] = field(
330
+ default_factory=lambda: [
331
+ "http://www.w3.org/2000/01/rdf-schema#label",
332
+ "http://www.w3.org/2000/01/rdf-schema#comment",
333
+ ]
334
+ )
335
+ include_deprecated: bool = False
336
+ include_unlabelled: bool = False
337
+ missing_only: bool = False
338
+
339
+
340
+ @dataclass
341
+ class MergeConfig:
342
+ """Configuration for merge operation.
343
+
344
+ Attributes:
345
+ min_status: Minimum status required to include translation.
346
+ existing: How to handle existing translations.
347
+ """
348
+
349
+ min_status: TranslationStatus = TranslationStatus.TRANSLATED
350
+ existing: ExistingStrategy = ExistingStrategy.PRESERVE
351
+
352
+
353
+ @dataclass
354
+ class LocaliseConfig:
355
+ """Complete configuration for localise operations.
356
+
357
+ Attributes:
358
+ properties: Properties to extract/check.
359
+ source_language: Base language for translations.
360
+ target_languages: List of target language codes.
361
+ extract: Extraction settings.
362
+ merge: Merge settings.
363
+ output_dir: Output directory for translation files.
364
+ output_naming: Naming pattern for output files.
365
+ """
366
+
367
+ properties: list[str] = field(
368
+ default_factory=lambda: [
369
+ "http://www.w3.org/2000/01/rdf-schema#label",
370
+ "http://www.w3.org/2000/01/rdf-schema#comment",
371
+ "http://www.w3.org/2004/02/skos/core#prefLabel",
372
+ "http://www.w3.org/2004/02/skos/core#definition",
373
+ ]
374
+ )
375
+ source_language: str = "en"
376
+ target_languages: list[str] = field(default_factory=list)
377
+ extract: ExtractConfig = field(default_factory=ExtractConfig)
378
+ merge: MergeConfig = field(default_factory=MergeConfig)
379
+ output_dir: Path = field(default_factory=lambda: Path("translations"))
380
+ output_naming: str = "{language}.yml"
381
+
382
+ @classmethod
383
+ def from_yaml(cls, path: Path) -> "LocaliseConfig":
384
+ """Load from YAML configuration file.
385
+
386
+ Args:
387
+ path: Path to configuration file.
388
+
389
+ Returns:
390
+ LocaliseConfig instance.
391
+ """
392
+ if not path.exists():
393
+ raise FileNotFoundError(f"Config file not found: {path}")
394
+
395
+ with open(path, encoding="utf-8") as f:
396
+ data = yaml.safe_load(f)
397
+
398
+ return cls.from_dict(data.get("localise", data))
399
+
400
+ @classmethod
401
+ def from_dict(cls, data: dict[str, Any]) -> "LocaliseConfig":
402
+ """Create from dictionary.
403
+
404
+ Args:
405
+ data: Configuration dictionary.
406
+
407
+ Returns:
408
+ LocaliseConfig instance.
409
+ """
410
+ # Parse languages section
411
+ languages = data.get("languages", {})
412
+ source_language = languages.get("source", data.get("source_language", "en"))
413
+ target_languages = languages.get("targets", data.get("target_languages", []))
414
+
415
+ # Parse extract settings
416
+ extract_data = data.get("extract", {})
417
+ extract = ExtractConfig(
418
+ source_language=source_language,
419
+ target_language=extract_data.get("target_language", ""),
420
+ properties=data.get("properties", ExtractConfig().properties),
421
+ include_deprecated=extract_data.get("include_deprecated", False),
422
+ include_unlabelled=extract_data.get("include_unlabelled", False),
423
+ missing_only=extract_data.get("missing_only", False),
424
+ )
425
+
426
+ # Parse merge settings
427
+ merge_data = data.get("merge", {})
428
+ min_status_str = merge_data.get("min_status", "translated")
429
+ existing_str = merge_data.get("existing", "preserve")
430
+ merge = MergeConfig(
431
+ min_status=TranslationStatus(min_status_str),
432
+ existing=ExistingStrategy(existing_str),
433
+ )
434
+
435
+ # Parse output settings
436
+ output_data = data.get("output", {})
437
+ output_dir = Path(output_data.get("directory", "translations"))
438
+ output_naming = output_data.get("naming", "{language}.yml")
439
+
440
+ return cls(
441
+ properties=data.get("properties", cls().properties),
442
+ source_language=source_language,
443
+ target_languages=target_languages,
444
+ extract=extract,
445
+ merge=merge,
446
+ output_dir=output_dir,
447
+ output_naming=output_naming,
448
+ )
449
+
450
+
451
+ def create_default_config() -> str:
452
+ """Generate default localise configuration as YAML string.
453
+
454
+ Returns:
455
+ YAML configuration template.
456
+ """
457
+ return '''# rdf-construct localise configuration
458
+ # See LOCALISE_GUIDE.md for full documentation
459
+
460
+ localise:
461
+ # Properties to extract/check (in display order)
462
+ properties:
463
+ - rdfs:label
464
+ - skos:prefLabel
465
+ - rdfs:comment
466
+ - skos:definition
467
+ - skos:altLabel
468
+ - skos:example
469
+
470
+ # Language configuration
471
+ languages:
472
+ source: en # Base language for translations
473
+ targets:
474
+ - de
475
+ - fr
476
+ - es
477
+
478
+ # Output settings
479
+ output:
480
+ directory: translations/
481
+ naming: "{language}.yml" # e.g., de.yml, fr.yml
482
+
483
+ # Extraction options
484
+ extract:
485
+ # Include entities without source language labels?
486
+ include_unlabelled: false
487
+ # Include deprecated entities?
488
+ include_deprecated: false
489
+
490
+ # Merge options
491
+ merge:
492
+ # What to do with existing translations?
493
+ existing: preserve # preserve | overwrite
494
+ # Minimum status to include in merge
495
+ min_status: translated # pending | translated | needs_review | approved
496
+ '''
497
+
498
+
499
+ def load_localise_config(path: Path) -> LocaliseConfig:
500
+ """Load localise configuration from a YAML file.
501
+
502
+ Args:
503
+ path: Path to configuration file.
504
+
505
+ Returns:
506
+ LocaliseConfig instance.
507
+ """
508
+ return LocaliseConfig.from_yaml(path)