lumen-resources 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,270 @@
1
+ """
2
+ Configuration validator for Lumen services.
3
+
4
+ Provides validation utilities for YAML configuration files against
5
+ the Lumen configuration schema.
6
+ """
7
+
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ import yaml
12
+ from jsonschema import Draft7Validator
13
+ from pydantic import ValidationError
14
+
15
+ from .lumen_config import LumenConfig
16
+ from .exceptions import ConfigError
17
+
18
+
19
+ class ConfigValidator:
20
+ """Validator for Lumen configuration files.
21
+
22
+ Provides comprehensive validation for YAML configuration files using both
23
+ JSON Schema and Pydantic models. Supports strict validation with custom
24
+ validators and flexible validation for development scenarios.
25
+
26
+ Attributes:
27
+ schema: Loaded JSON Schema for validation.
28
+ validator: Draft7Validator instance for JSON Schema validation.
29
+
30
+ Example:
31
+ >>> validator = ConfigValidator()
32
+ >>> is_valid, errors = validator.validate_file("config.yaml")
33
+ >>> if not is_valid:
34
+ ... for error in errors:
35
+ ... print(f"Validation error: {error}")
36
+ """
37
+
38
+ def __init__(self, schema_path: Path | None = None):
39
+ """Initialize validator with optional custom schema.
40
+
41
+ Args:
42
+ schema_path: Optional path to JSON Schema file. If None, uses
43
+ the bundled schema from docs/schemas/config-schema.yaml.
44
+
45
+ Raises:
46
+ FileNotFoundError: If the schema file is not found.
47
+ yaml.YAMLError: If the schema file is invalid YAML.
48
+
49
+ Example:
50
+ >>> validator = ConfigValidator() # Uses bundled schema
51
+ >>> validator = ConfigValidator(Path("custom-schema.yaml")) # Custom schema
52
+ """
53
+ if schema_path is None:
54
+ # Use bundled schema from docs/
55
+ schema_path = Path(__file__).parent / "schemas" / "config-schema.yaml"
56
+
57
+ if not schema_path.exists():
58
+ raise FileNotFoundError(f"Schema file not found: {schema_path}")
59
+
60
+ with open(schema_path, "r", encoding="utf-8") as f:
61
+ self.schema = yaml.safe_load(f)
62
+
63
+ self.validator = Draft7Validator(self.schema)
64
+
65
+ def validate_file(
66
+ self, config_path: Path | str, strict: bool = True
67
+ ) -> tuple[bool, list[str]]:
68
+ """Validate configuration file against schema.
69
+
70
+ Performs validation of a YAML configuration file using either
71
+ JSON Schema validation (flexible) or Pydantic validation (strict).
72
+
73
+ Args:
74
+ config_path: Path to configuration YAML file.
75
+ strict: If True, use Pydantic validation with custom validators.
76
+ If False, use JSON Schema validation only.
77
+
78
+ Returns:
79
+ Tuple of (is_valid, error_messages). is_valid is True if the
80
+ configuration passes validation, False otherwise. error_messages
81
+ contains detailed validation error messages.
82
+
83
+ Example:
84
+ >>> validator = ConfigValidator()
85
+ >>> is_valid, errors = validator.validate_file("config.yaml", strict=True)
86
+ >>> if not is_valid:
87
+ ... for error in errors:
88
+ ... print(f"Error: {error}")
89
+ """
90
+ config_path = Path(config_path)
91
+
92
+ if not config_path.exists():
93
+ return False, [f"Configuration file not found: {config_path}"]
94
+
95
+ try:
96
+ with open(config_path, "r", encoding="utf-8") as f:
97
+ config_data = yaml.safe_load(f)
98
+ except yaml.YAMLError as e:
99
+ return False, [f"Invalid YAML syntax: {e}"]
100
+ except Exception as e:
101
+ return False, [f"Failed to load file: {e}"]
102
+
103
+ if strict:
104
+ # Use Pydantic validation (stricter, includes custom validators)
105
+ return self._validate_with_pydantic(config_data)
106
+ else:
107
+ # Use JSON Schema validation only
108
+ return self._validate_with_jsonschema(config_data)
109
+
110
+ def _validate_with_jsonschema(
111
+ self, config_data: dict[str, Any]
112
+ ) -> tuple[bool, list[str]]:
113
+ """Validate configuration data using JSON Schema.
114
+
115
+ Performs flexible validation using the JSON Schema specification.
116
+ This method is less strict than Pydantic validation but provides
117
+ good basic structural validation.
118
+
119
+ Args:
120
+ config_data: Parsed configuration data dictionary.
121
+
122
+ Returns:
123
+ Tuple of (is_valid, error_messages) where is_valid indicates
124
+ if the data passes JSON Schema validation.
125
+
126
+ Example:
127
+ >>> validator = ConfigValidator()
128
+ >>> is_valid, errors = validator._validate_with_jsonschema(data)
129
+ """
130
+ errors = sorted(self.validator.iter_errors(config_data), key=lambda e: e.path)
131
+
132
+ if not errors:
133
+ return True, []
134
+
135
+ error_messages = []
136
+ for error in errors:
137
+ path = ".".join(str(p) for p in error.path) if error.path else "root"
138
+ error_messages.append(f"{error.message} (at: {path})")
139
+
140
+ return False, error_messages
141
+
142
+ def _validate_with_pydantic(
143
+ self, config_data: dict[str, Any]
144
+ ) -> tuple[bool, list[str]]:
145
+ """Validate configuration data using Pydantic models.
146
+
147
+ Performs strict validation using Pydantic models with custom validators.
148
+ This provides the most comprehensive validation including type checking,
149
+ pattern matching, and business logic validation.
150
+
151
+ Args:
152
+ config_data: Parsed configuration data dictionary.
153
+
154
+ Returns:
155
+ Tuple of (is_valid, error_messages) where is_valid indicates
156
+ if the data passes Pydantic model validation.
157
+
158
+ Example:
159
+ >>> validator = ConfigValidator()
160
+ >>> is_valid, errors = validator._validate_with_pydantic(data)
161
+ """
162
+ try:
163
+ LumenConfig(**config_data)
164
+ return True, []
165
+ except ValidationError as e:
166
+ # Parse pydantic validation errors
167
+ error_messages = []
168
+ for error in e.errors():
169
+ loc = ".".join(str(loc_part) for loc_part in error["loc"])
170
+ msg = error["msg"]
171
+ error_messages.append(f"{msg} (at: {loc})")
172
+ return False, error_messages
173
+ except Exception as e:
174
+ return False, [f"Validation error: {e}"]
175
+
176
+ def validate_and_load(self, config_path: Path | str) -> LumenConfig:
177
+ """Validate and load configuration file.
178
+
179
+ Performs strict validation using Pydantic models and returns a validated
180
+ LumenConfig instance if successful. This is the recommended method
181
+ for loading configuration in production code.
182
+
183
+ Args:
184
+ config_path: Path to configuration YAML file.
185
+
186
+ Returns:
187
+ Validated LumenConfig instance with all data properly typed
188
+ and validated.
189
+
190
+ Raises:
191
+ ConfigError: If validation fails or file cannot be loaded.
192
+
193
+ Example:
194
+ >>> validator = ConfigValidator()
195
+ >>> config = validator.validate_and_load("config.yaml")
196
+ >>> print(config.metadata.version)
197
+ '1.0.0'
198
+ """
199
+ config_path = Path(config_path)
200
+
201
+ is_valid, errors = self.validate_file(config_path, strict=True)
202
+
203
+ if not is_valid:
204
+ error_msg = "Configuration validation failed:\n" + "\n".join(
205
+ f" - {err}" for err in errors
206
+ )
207
+ raise ConfigError(error_msg)
208
+
209
+ # Load and construct the validated configuration
210
+ with open(config_path, "r", encoding="utf-8") as f:
211
+ config_data = yaml.safe_load(f)
212
+
213
+ return LumenConfig(**config_data)
214
+
215
+
216
+ def validate_config_file(
217
+ config_path: Path | str, schema_path: Path | str | None = None
218
+ ) -> tuple[bool, list[str]]:
219
+ """Convenience function to validate a configuration file.
220
+
221
+ Simple one-line function for validating configuration files using
222
+ the default schema or a custom schema. Uses strict validation.
223
+
224
+ Args:
225
+ config_path: Path to configuration YAML file.
226
+ schema_path: Optional path to custom JSON Schema file.
227
+
228
+ Returns:
229
+ Tuple of (is_valid, error_messages) where is_valid is True if
230
+ the configuration passes validation, and error_messages contains
231
+ detailed validation errors if validation fails.
232
+
233
+ Example:
234
+ >>> is_valid, errors = validate_config_file("config.yaml")
235
+ >>> if not is_valid:
236
+ ... for error in errors:
237
+ ... print(f"Error: {error}")
238
+ """
239
+ schema_path_obj = Path(schema_path) if schema_path else None
240
+ validator = ConfigValidator(schema_path_obj)
241
+ return validator.validate_file(config_path, strict=True)
242
+
243
+
244
+ def load_and_validate_config(config_path: Path | str) -> LumenConfig:
245
+ """Load and validate configuration file.
246
+
247
+ This is the recommended way to load configuration in production.
248
+ Combines validation and loading into a single operation for convenience
249
+ and ensures that only validated configurations are returned.
250
+
251
+ Args:
252
+ config_path: Path to configuration YAML file.
253
+
254
+ Returns:
255
+ Validated LumenConfig instance with all data properly typed and
256
+ validated against the schema.
257
+
258
+ Raises:
259
+ ConfigError: If validation fails or file is not found.
260
+ FileNotFoundError: If the configuration file does not exist.
261
+ yaml.YAMLError: If the configuration file contains invalid YAML.
262
+
263
+ Example:
264
+ >>> from lumen_resources.lumen_config_validator import load_and_validate_config
265
+ >>> config = load_and_validate_config("config.yaml")
266
+ >>> print(config.metadata.cache_dir)
267
+ '/models'
268
+ """
269
+ validator = ConfigValidator()
270
+ return validator.validate_and_load(config_path)
@@ -0,0 +1,233 @@
1
+ # generated by datamodel-codegen:
2
+ # filename: model_info-schema.json
3
+ # timestamp: 2025-10-19T06:58:43+00:00
4
+
5
+ from __future__ import annotations
6
+
7
+ from datetime import date
8
+ from enum import Enum
9
+
10
+ from pydantic import AwareDatetime, BaseModel, ConfigDict, Field
11
+
12
+
13
+ class Format(Enum):
14
+ """Model source format type.
15
+
16
+ Defines the format and source platform for a model. Different formats
17
+ have different loading mechanisms and repository structures.
18
+
19
+ Attributes:
20
+ huggingface: Hugging Face Hub model format.
21
+ openclip: OpenCLIP model format.
22
+ modelscope: ModelScope model format.
23
+ custom: Custom model format.
24
+
25
+ Example:
26
+ >>> source = Source(format=Format.huggingface, repo_id="openai/clip-vit-base-patch32")
27
+ >>> print(source.format.value)
28
+ 'huggingface'
29
+ """
30
+
31
+ huggingface = "huggingface"
32
+ openclip = "openclip"
33
+ modelscope = "modelscope"
34
+ custom = "custom"
35
+
36
+
37
+ class Source(BaseModel):
38
+ """Model source information.
39
+
40
+ Contains information about where and how to obtain the model, including
41
+ the format type and repository identifier.
42
+
43
+ Attributes:
44
+ format: Model format type (huggingface, openclip, modelscope, custom).
45
+ repo_id: Repository identifier for the model source.
46
+
47
+ Example:
48
+ >>> source = Source(
49
+ ... format=Format.huggingface,
50
+ ... repo_id="openai/clip-vit-base-patch32"
51
+ ... )
52
+ >>> print(source.repo_id)
53
+ 'openai/clip-vit-base-patch32'
54
+ """
55
+
56
+ model_config = ConfigDict(
57
+ extra="forbid",
58
+ )
59
+ format: Format
60
+ repo_id: str = Field(
61
+ ..., description="Repository identifier for model source", min_length=1
62
+ )
63
+
64
+
65
+ class Requirements(BaseModel):
66
+ """Python environment requirements for model runtime.
67
+
68
+ Specifies the Python version and package dependencies required to run
69
+ the model in a specific runtime configuration.
70
+
71
+ Attributes:
72
+ python: Minimum Python version requirement.
73
+ dependencies: List of required Python package dependencies.
74
+
75
+ Example:
76
+ >>> req = Requirements(
77
+ ... python=">=3.8",
78
+ ... dependencies=["torch", "transformers", "pillow"]
79
+ ... )
80
+ >>> print(req.python)
81
+ '>=3.8'
82
+ """
83
+
84
+ python: str | None = None
85
+ dependencies: list[str] | None = None
86
+
87
+
88
+ class Runtimes(BaseModel):
89
+ """Runtime configuration for a specific model execution environment.
90
+
91
+ Defines the availability, file requirements, device compatibility, and
92
+ dependencies for a model runtime (e.g., torch, onnx, rknn).
93
+
94
+ Attributes:
95
+ available: Whether this runtime is available for the model.
96
+ files: List of required files or dict mapping runtime to file lists.
97
+ devices: List of compatible devices for this runtime.
98
+ requirements: Python environment requirements for this runtime.
99
+
100
+ Example:
101
+ >>> runtime = Runtimes(
102
+ ... available=True,
103
+ ... files=["model.pt", "config.json"],
104
+ ... devices=["cuda", "cpu"],
105
+ ... requirements=Requirements(python=">=3.8", dependencies=["torch"])
106
+ ... )
107
+ >>> print(runtime.available)
108
+ True
109
+ """
110
+
111
+ model_config = ConfigDict(
112
+ extra="forbid",
113
+ )
114
+ available: bool
115
+ files: list[str] | dict[str, list[str]] | None = None
116
+ devices: list[str] | None = None
117
+ requirements: Requirements | None = None
118
+
119
+
120
+ class Datasets(BaseModel):
121
+ """Dataset configuration for model evaluation and inference.
122
+
123
+ Defines the label and embedding datasets used for zero-shot classification
124
+ or other dataset-specific model operations.
125
+
126
+ Attributes:
127
+ labels: Dataset identifier for class labels.
128
+ embeddings: Dataset identifier for embeddings.
129
+
130
+ Example:
131
+ >>> dataset = Datasets(
132
+ ... labels="imagenet1k_labels",
133
+ ... embeddings="imagenet1k_embeddings"
134
+ ... )
135
+ >>> print(dataset.labels)
136
+ 'imagenet1k_labels'
137
+ """
138
+
139
+ model_config = ConfigDict(
140
+ extra="forbid",
141
+ )
142
+ labels: str
143
+ embeddings: str
144
+
145
+
146
+ class Metadata(BaseModel):
147
+ """Model metadata information.
148
+
149
+ Contains descriptive metadata about the model including licensing,
150
+ authorship, creation dates, and categorization tags.
151
+
152
+ Attributes:
153
+ license: License identifier for the model.
154
+ author: Model author or organization.
155
+ created_at: Model creation date.
156
+ updated_at: Last model update timestamp.
157
+ tags: List of descriptive tags for categorization.
158
+
159
+ Example:
160
+ >>> metadata = Metadata(
161
+ ... license="MIT",
162
+ ... author="OpenAI",
163
+ ... tags=["computer-vision", "multimodal", "clip"]
164
+ ... )
165
+ >>> print(metadata.license)
166
+ 'MIT'
167
+ """
168
+
169
+ model_config = ConfigDict(
170
+ extra="forbid",
171
+ )
172
+ license: str | None = None
173
+ author: str | None = None
174
+ created_at: date | None = None
175
+ updated_at: AwareDatetime | None = None
176
+ tags: list[str] | None = None
177
+
178
+
179
+ class ModelInfo(BaseModel):
180
+ """Schema for Lumen AI model configuration files.
181
+
182
+ Complete model definition including source information, runtime configurations,
183
+ dataset compatibility, and metadata. This is the top-level schema for
184
+ model_info.json files.
185
+
186
+ Attributes:
187
+ name: Model name identifier, also OpenCLIP model identifier if applicable.
188
+ version: Model version following semantic versioning (X.Y.Z).
189
+ description: Model description and purpose.
190
+ model_type: Type/category of the model.
191
+ embedding_dim: Dimension of the model's embedding space.
192
+ source: Model source information including format and repository.
193
+ runtimes: Dictionary mapping runtime names to runtime configurations.
194
+ datasets: Optional dataset configurations for model evaluation.
195
+ metadata: Optional model metadata including license and author.
196
+
197
+ Example:
198
+ >>> model_info = ModelInfo(
199
+ ... name="ViT-B-32",
200
+ ... version="1.0.0",
201
+ ... description="Vision Transformer for CLIP",
202
+ ... model_type="vision-transformer",
203
+ ... embedding_dim=512,
204
+ ... source=Source(format=Format.huggingface, repo_id="openai/clip-vit-base-patch32"),
205
+ ... runtimes={"torch": Runtimes(available=True)}
206
+ ... )
207
+ >>> print(model_info.name)
208
+ 'ViT-B-32'
209
+ """
210
+
211
+ model_config = ConfigDict(
212
+ extra="forbid",
213
+ )
214
+ name: str = Field(
215
+ ...,
216
+ description="Model name identifier, this is also openclip model identifier if openclip is set as source format",
217
+ max_length=100,
218
+ min_length=1,
219
+ )
220
+ version: str = Field(
221
+ ..., description="Model version", pattern="^[0-9]+\\.[0-9]+\\.[0-9]+$"
222
+ )
223
+ description: str = Field(
224
+ ..., description="Model description and purpose", max_length=500, min_length=1
225
+ )
226
+ model_type: str = Field(..., description="Type of the model")
227
+ embedding_dim: int = Field(
228
+ ..., description="Dimension of the embedding space", ge=1, le=100000
229
+ )
230
+ source: Source
231
+ runtimes: dict[str, Runtimes]
232
+ datasets: dict[str, Datasets] | None = None
233
+ metadata: Metadata | None = None