scalable-pypeline 2.0.10__py2.py3-none-any.whl → 2.1.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,541 @@
1
+ from marshmallow import Schema, fields, validate, ValidationError, validates_schema
2
+
3
+
4
+ class MissingSettingsException(Exception):
5
+ pass
6
+
7
+
8
+ def create_pipeline_settings_schema(pipeline_settings_schema_data):
9
+ """
10
+ Dynamically create a schema to validate user data based on settings.
11
+
12
+ Args:
13
+ pipeline_settings_schema_data (dict): The settings schema data containing
14
+ field configurations.
15
+
16
+ Returns:
17
+ Schema: A dynamically created schema class for validating user data.
18
+ """
19
+
20
+ # Dictionary to store dynamically generated fields
21
+ schema_fields = {}
22
+
23
+ for key, config in pipeline_settings_schema_data["properties"].items():
24
+ data_type = config.get("dataType")
25
+ input_type = config.get("inputType")
26
+ field_args = {}
27
+
28
+ # Map dataType to Marshmallow field type
29
+ field_type = {
30
+ "string": fields.String,
31
+ "int": fields.Integer,
32
+ "float": fields.Float,
33
+ "boolean": fields.Boolean,
34
+ "datetime": fields.DateTime,
35
+ "array": fields.List,
36
+ "object": fields.Nested,
37
+ }.get(data_type)
38
+
39
+ if not field_type:
40
+ raise ValidationError(f"Unsupported dataType `{data_type}` for `{key}`.")
41
+
42
+ # Handle array type
43
+ if data_type == "array":
44
+ element_type = config.get("elementType")
45
+ if not element_type:
46
+ raise ValidationError(f"`elementType` is required for array `{key}`.")
47
+ field_args["cls_or_instance"] = {
48
+ "string": fields.String,
49
+ "int": fields.Integer,
50
+ "float": fields.Float,
51
+ "boolean": fields.Boolean,
52
+ "datetime": fields.DateTime,
53
+ }.get(element_type)
54
+ if not field_args["cls_or_instance"]:
55
+ raise ValidationError(
56
+ f"Unsupported elementType `{element_type}` for array `{key}`."
57
+ )
58
+
59
+ # Handle object type
60
+ if data_type == "object":
61
+ properties = config.get("properties")
62
+ if not properties:
63
+ raise ValidationError(f"`properties` is required for object `{key}`.")
64
+ # Recursively create a schema for the nested object
65
+ nested_schema = create_pipeline_settings_schema({"properties": properties})
66
+ field_args["schema"] = nested_schema
67
+
68
+ # Handle range validation for numeric fields
69
+ if data_type in ["int", "float"]:
70
+ if "minimum" in config or "maximum" in config:
71
+ field_args["validate"] = validate.Range(
72
+ min=config.get("minimum"), max=config.get("maximum")
73
+ )
74
+
75
+ # Handle dropdown or radio input options
76
+ if input_type in ["dropdown", "radio"] and "options" in config:
77
+ allowed_values = [option["value"] for option in config["options"]]
78
+ field_args["validate"] = validate.OneOf(allowed_values)
79
+
80
+ # Mark the field as required if specified
81
+ if key in pipeline_settings_schema_data.get("required", []):
82
+ field_args["required"] = True
83
+
84
+ # Create the field and add to the schema fields dictionary
85
+ schema_fields[key] = field_type(**field_args)
86
+
87
+ # Dynamically create a schema class with the generated fields
88
+ DynamicPipelineSettingsSchema = type(
89
+ "DynamicPipelineSettingsSchema", (Schema,), schema_fields
90
+ )
91
+
92
+ return DynamicPipelineSettingsSchema()
93
+
94
+
95
+ def create_pipeline_settings_schema(pipeline_settings_schema_data):
96
+ """
97
+ Dynamically create a schema to validate user data based on settings.
98
+
99
+ Args:
100
+ pipeline_settings_schema_data (dict): The settings schema data containing
101
+ field configurations.
102
+
103
+ Returns:
104
+ Schema: A dynamically created schema class for validating user data.
105
+ """
106
+
107
+ # Dictionary to store dynamically generated fields
108
+ schema_fields = {}
109
+
110
+ for key, config in pipeline_settings_schema_data["properties"].items():
111
+ data_type = config.get("dataType")
112
+ input_type = config.get("inputType")
113
+ field_args = {}
114
+
115
+ # Map dataType to Marshmallow field type
116
+ field_type = {
117
+ "string": fields.String,
118
+ "int": fields.Integer,
119
+ "float": fields.Float,
120
+ "boolean": fields.Boolean,
121
+ "datetime": fields.DateTime,
122
+ "array": fields.List,
123
+ "object": fields.Nested,
124
+ }.get(data_type)
125
+
126
+ if not field_type:
127
+ raise ValidationError(f"Unsupported dataType `{data_type}` for `{key}`.")
128
+
129
+ # Handle array type
130
+ if data_type == "array":
131
+ element_type = config.get("elementType")
132
+ if not element_type:
133
+ raise ValidationError(f"`elementType` is required for array `{key}`.")
134
+ field_args["cls_or_instance"] = {
135
+ "string": fields.String,
136
+ "int": fields.Integer,
137
+ "float": fields.Float,
138
+ "boolean": fields.Boolean,
139
+ "datetime": fields.DateTime,
140
+ }.get(element_type)
141
+ if not field_args["cls_or_instance"]:
142
+ raise ValidationError(
143
+ f"Unsupported elementType `{element_type}` for array `{key}`."
144
+ )
145
+
146
+ # Handle object type
147
+ if data_type == "object":
148
+ properties = config.get("properties")
149
+ if not properties:
150
+ raise ValidationError(f"`properties` is required for object `{key}`.")
151
+ # Recursively create a schema for the nested object
152
+ nested_schema = create_pipeline_settings_schema({"properties": properties})
153
+ # Use the nested schema as the `nested` argument for fields.Nested
154
+ field_type = fields.Nested(nested_schema)
155
+
156
+ # Handle range validation for numeric fields
157
+ if data_type in ["int", "float"]:
158
+ if "minimum" in config or "maximum" in config:
159
+ field_args["validate"] = validate.Range(
160
+ min=config.get("minimum"), max=config.get("maximum")
161
+ )
162
+
163
+ # Handle dropdown or radio input options
164
+ if input_type in ["dropdown", "radio"] and "options" in config:
165
+ allowed_values = [option["value"] for option in config["options"]]
166
+ field_args["validate"] = validate.OneOf(allowed_values)
167
+
168
+ # Mark the field as required if specified
169
+ if key in pipeline_settings_schema_data.get("required", []):
170
+ field_args["required"] = True
171
+
172
+ # Create the field and add to the schema fields dictionary
173
+ if data_type == "object":
174
+ schema_fields[key] = field_type
175
+ else:
176
+ schema_fields[key] = field_type(**field_args)
177
+
178
+ # Dynamically create a schema class with the generated fields
179
+ DynamicPipelineSettingsSchema = type(
180
+ "DynamicPipelineSettingsSchema", (Schema,), schema_fields
181
+ )
182
+
183
+ return DynamicPipelineSettingsSchema()
184
+
185
+
186
+ class OptionSchema(Schema):
187
+ label = fields.String(
188
+ required=True,
189
+ metadata={"description": "The display label for the option"},
190
+ )
191
+ value = fields.Raw(
192
+ required=True,
193
+ metadata={"description": "The value corresponding to the option"},
194
+ )
195
+
196
+
197
+ def validate_min_max(data):
198
+ """Custom validator to ensure min/max match the dataType."""
199
+ data_type = data.get("dataType")
200
+ minimum = data.get("minimum")
201
+ maximum = data.get("maximum")
202
+
203
+ if data_type in ["int", "float"]:
204
+ if minimum is not None and not isinstance(
205
+ minimum, (int if data_type == "int" else float)
206
+ ):
207
+ raise ValidationError(f"`minimum` must be of type {data_type}.")
208
+ if maximum is not None and not isinstance(
209
+ maximum, (int if data_type == "int" else float)
210
+ ):
211
+ raise ValidationError(f"`maximum` must be of type {data_type}.")
212
+ if minimum is not None and maximum is not None and minimum > maximum:
213
+ raise ValidationError("`minimum` must be less than or equal to `maximum`.")
214
+ elif data_type not in ["int", "float"] and (
215
+ minimum is not None or maximum is not None
216
+ ):
217
+ raise ValidationError(
218
+ "`minimum` and `maximum` are only valid for numeric types (`int`, `float`)."
219
+ )
220
+
221
+
222
+ class SettingSchema(Schema):
223
+ dataType = fields.String(
224
+ required=True,
225
+ validate=validate.OneOf(
226
+ ["string", "int", "float", "boolean", "datetime", "array", "object"]
227
+ ),
228
+ metadata={"description": "The underlying data type of the setting"},
229
+ )
230
+ inputType = fields.String(
231
+ required=True,
232
+ validate=validate.OneOf(
233
+ ["text", "dropdown", "radio", "checkbox", "searchable", "custom"]
234
+ ),
235
+ metadata={"description": "The type of input UI element"},
236
+ )
237
+ label = fields.String(
238
+ required=True,
239
+ metadata={"description": "The display label for the field"},
240
+ )
241
+ placeholder = fields.String(
242
+ metadata={"description": "Placeholder text for text input fields"}
243
+ )
244
+ minimum = fields.Raw(
245
+ metadata={"description": "Minimum value for numeric data types"}
246
+ )
247
+ maximum = fields.Raw(
248
+ metadata={"description": "Maximum value for numeric data types"}
249
+ )
250
+ options = fields.List(
251
+ fields.Nested(OptionSchema),
252
+ metadata={"description": "Options for dropdown or radio input types"},
253
+ )
254
+ searchEndpoint = fields.String(
255
+ metadata={"description": "Endpoint for searchable fields"}
256
+ )
257
+ component = fields.String(
258
+ metadata={"description": "React component for custom input types"}
259
+ )
260
+ elementType = fields.String(
261
+ metadata={"description": "Element type for array data types"}
262
+ )
263
+ properties = fields.Dict(
264
+ keys=fields.String(),
265
+ values=fields.Nested(lambda: SettingSchema),
266
+ metadata={"description": "Properties for object data types"},
267
+ )
268
+
269
+ class Meta:
270
+ ordered = True
271
+
272
+ @validates_schema
273
+ def validate_min_max(self, data, **kwargs):
274
+ validate_min_max(data)
275
+
276
+ @validates_schema
277
+ def validate_options(self, data, **kwargs):
278
+ """Ensure options are provided for dropdown or radio input types and validate value types."""
279
+ input_type = data.get("inputType")
280
+ options = data.get("options")
281
+ data_type = data.get("dataType")
282
+
283
+ if input_type in ["dropdown", "radio"]:
284
+ if not options:
285
+ raise ValidationError(
286
+ "`options` are required for dropdown and radio input types.",
287
+ field_name="options",
288
+ )
289
+
290
+ for option in options:
291
+ value = option.get("value")
292
+ if data_type == "int" and not isinstance(value, int):
293
+ raise ValidationError(
294
+ f"Option value `{value}` must be of type `int`."
295
+ )
296
+ elif data_type == "float" and not isinstance(value, float):
297
+ raise ValidationError(
298
+ f"Option value `{value}` must be of type `float`."
299
+ )
300
+ elif data_type == "boolean" and not isinstance(value, bool):
301
+ raise ValidationError(
302
+ f"Option value `{value}` must be of type `boolean`."
303
+ )
304
+ elif data_type == "string" and not isinstance(value, str):
305
+ raise ValidationError(
306
+ f"Option value `{value}` must be of type `string`."
307
+ )
308
+ elif data_type == "datetime" and not isinstance(
309
+ value, str
310
+ ): # Assuming ISO 8601 strings
311
+ raise ValidationError(
312
+ f"Option value `{value}` must be an ISO 8601 string for `datetime`."
313
+ )
314
+
315
+ @validates_schema
316
+ def validate_search_endpoint(self, data, **kwargs):
317
+ """Ensure searchEndpoint is provided only for 'searchable' input types."""
318
+ input_type = data.get("inputType")
319
+ search_endpoint = data.get("searchEndpoint")
320
+
321
+ if input_type == "searchable" and not search_endpoint:
322
+ raise ValidationError(
323
+ "`searchEndpoint` is required for `searchable` input types.",
324
+ field_name="searchEndpoint",
325
+ )
326
+ elif input_type != "searchable" and search_endpoint:
327
+ raise ValidationError(
328
+ "`searchEndpoint` is not allowed for non-searchable input types.",
329
+ field_name="searchEndpoint",
330
+ )
331
+
332
+ @validates_schema
333
+ def validate_custom_component(self, data, **kwargs):
334
+ """Ensure component is provided for custom input types."""
335
+ input_type = data.get("inputType")
336
+ component = data.get("component")
337
+
338
+ if input_type == "custom" and not component:
339
+ raise ValidationError(
340
+ "`component` is required for `custom` input types.",
341
+ field_name="component",
342
+ )
343
+ elif input_type != "custom" and component:
344
+ raise ValidationError(
345
+ "`component` is not allowed for non-custom input types.",
346
+ field_name="component",
347
+ )
348
+
349
+ @validates_schema
350
+ def validate_array_element_type(self, data, **kwargs):
351
+ """Ensure elementType is provided for array data types."""
352
+ data_type = data.get("dataType")
353
+ element_type = data.get("elementType")
354
+
355
+ if data_type == "array" and not element_type:
356
+ raise ValidationError(
357
+ "`elementType` is required for `array` data types.",
358
+ field_name="elementType",
359
+ )
360
+ elif data_type != "array" and element_type:
361
+ raise ValidationError(
362
+ "`elementType` is not allowed for non-array data types.",
363
+ field_name="elementType",
364
+ )
365
+
366
+ @validates_schema
367
+ def validate_object_properties(self, data, **kwargs):
368
+ """Ensure properties are provided for object data types."""
369
+ data_type = data.get("dataType")
370
+ properties = data.get("properties")
371
+
372
+ if data_type == "object" and not properties:
373
+ raise ValidationError(
374
+ "`properties` is required for `object` data types.",
375
+ field_name="properties",
376
+ )
377
+ elif data_type != "object" and properties:
378
+ raise ValidationError(
379
+ "`properties` is not allowed for non-object data types.",
380
+ field_name="properties",
381
+ )
382
+
383
+
384
+ class PipelineSettingsSchema(Schema):
385
+ properties = fields.Dict(
386
+ keys=fields.String(),
387
+ values=fields.Nested(SettingSchema),
388
+ required=True,
389
+ metadata={"description": "A dictionary of settings with their configurations"},
390
+ )
391
+ required = fields.List(
392
+ fields.String(), required=True, description="List of required settings"
393
+ )
394
+ scenarioSettings = fields.List(
395
+ fields.String(),
396
+ required=False,
397
+ description="List of settings that can be overriding for different pipeline scenarios.",
398
+ )
399
+
400
+ @validates_schema
401
+ def validate_scenario_settings(self, data, **kwargs):
402
+ """Ensure scenarioSettings only contains keys defined in properties."""
403
+ properties = data.get("properties", {})
404
+ scenario_settings = data.get("scenarioSettings", [])
405
+
406
+ invalid_settings = [
407
+ setting for setting in scenario_settings if setting not in properties
408
+ ]
409
+ if invalid_settings:
410
+ raise ValidationError(
411
+ {
412
+ "scenario_settings": (
413
+ f"The following settings in scenarioSettings are not defined "
414
+ f"in properties: {', '.join(invalid_settings)}"
415
+ )
416
+ }
417
+ )
418
+
419
+
420
+ class PipelineScenarioSchema(Schema):
421
+ settings = fields.Dict(
422
+ required=True,
423
+ metadata={
424
+ "description": "Settings to be used for a given scenario. Should match the pypeline.yaml settings schema"
425
+ },
426
+ )
427
+ taskReplacements = fields.Dict(
428
+ keys=fields.String(),
429
+ values=fields.Integer(),
430
+ required=False,
431
+ metadata={
432
+ "description": "Tasks that should be replaced in a given scenario. "
433
+ "The key corresponds to the task definition in the pypeline.yaml and the value corresponds "
434
+ "to the index of the task handlers where 0 is the default and first task. Eg: {'a': 1}. In this case "
435
+ "if we have a task definition 'a' with 3 handlers fn_1, fn_2, fn_3 respectively then the handler to run "
436
+ "for 'a' is fn_2."
437
+ },
438
+ )
439
+
440
+ taskReruns = fields.List(
441
+ fields.String(),
442
+ required=False,
443
+ metadata={
444
+ "description": "List of task definitions that need to be run again for a given scenario. Here "
445
+ "the scenario's pipeline settings will be injected in the task being run again which could be used to "
446
+ "produce alternative calculations and or results."
447
+ },
448
+ )
449
+
450
+
451
+ class PipelineScenariosSchema(Schema):
452
+ required = fields.List(
453
+ fields.Nested(PipelineScenarioSchema),
454
+ metadata={"description": "List of scenarios to run for a given pipeline"},
455
+ )
456
+
457
+
458
+ # Example usage
459
+ if __name__ == "__main__":
460
+ pipeline_settings = {"param1": "test", "param2": 1}
461
+
462
+ yaml_data = {
463
+ "properties": {
464
+ "param1": {
465
+ "dataType": "string",
466
+ "inputType": "text",
467
+ "label": "Parameter 1",
468
+ "placeholder": "Enter a string",
469
+ },
470
+ "param2": {
471
+ "dataType": "int",
472
+ "inputType": "text",
473
+ "label": "Parameter 2",
474
+ "minimum": -1,
475
+ "maximum": 1,
476
+ },
477
+ "param3": {
478
+ "dataType": "boolean",
479
+ "inputType": "checkbox",
480
+ "label": "Enable Feature",
481
+ },
482
+ "param4": {
483
+ "dataType": "float",
484
+ "inputType": "dropdown",
485
+ "label": "Choose an Option",
486
+ "minimum": 0.5,
487
+ "maximum": 2.5,
488
+ "options": [
489
+ {"label": "Option 1", "value": 0.5},
490
+ {"label": "Option 2", "value": 1.5},
491
+ ],
492
+ },
493
+ "param5": {
494
+ "dataType": "int",
495
+ "inputType": "radio",
496
+ "label": "Select a Mode",
497
+ "options": [
498
+ {"label": "Mode A", "value": 1},
499
+ {"label": "Mode B", "value": 2},
500
+ ],
501
+ },
502
+ "param6": {
503
+ "dataType": "string",
504
+ "inputType": "searchable",
505
+ "label": "Select Pipeline",
506
+ "searchEndpoint": "/api/pipelines",
507
+ },
508
+ "param7": {
509
+ "dataType": "array",
510
+ "inputType": "text",
511
+ "label": "Array of Integers",
512
+ "elementType": "int",
513
+ },
514
+ "param8": {
515
+ "dataType": "object",
516
+ "inputType": "custom",
517
+ "label": "Custom Object",
518
+ "component": "CustomObjectComponent",
519
+ "properties": {
520
+ "subParam1": {
521
+ "dataType": "string",
522
+ "inputType": "text",
523
+ "label": "Sub Parameter 1",
524
+ },
525
+ "subParam2": {
526
+ "dataType": "int",
527
+ "inputType": "text",
528
+ "label": "Sub Parameter 2",
529
+ },
530
+ },
531
+ },
532
+ },
533
+ "required": ["param1", "param2", "param4"],
534
+ }
535
+
536
+ schema = PipelineSettingsSchema()
537
+ errors = schema.validate(yaml_data)
538
+ if errors:
539
+ print("Validation errors:", errors)
540
+ else:
541
+ print("Validation successful!")
File without changes
File without changes