scalable-pypeline 2.1.31__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. pypeline/__init__.py +1 -0
  2. pypeline/barrier.py +63 -0
  3. pypeline/constants.py +94 -0
  4. pypeline/dramatiq.py +455 -0
  5. pypeline/executable_job_config_schema.py +35 -0
  6. pypeline/extensions.py +17 -0
  7. pypeline/flask/__init__.py +16 -0
  8. pypeline/flask/api/__init__.py +0 -0
  9. pypeline/flask/api/pipelines.py +275 -0
  10. pypeline/flask/api/schedules.py +40 -0
  11. pypeline/flask/decorators.py +41 -0
  12. pypeline/flask/flask_pypeline.py +156 -0
  13. pypeline/job_runner.py +205 -0
  14. pypeline/pipeline_config_schema.py +352 -0
  15. pypeline/pipeline_settings_schema.py +561 -0
  16. pypeline/pipelines/__init__.py +0 -0
  17. pypeline/pipelines/composition/__init__.py +0 -0
  18. pypeline/pipelines/composition/parallel_pipeline_composition.py +375 -0
  19. pypeline/pipelines/composition/pypeline_composition.py +215 -0
  20. pypeline/pipelines/factory.py +86 -0
  21. pypeline/pipelines/middleware/__init__.py +0 -0
  22. pypeline/pipelines/middleware/get_active_worker_id_middleware.py +22 -0
  23. pypeline/pipelines/middleware/graceful_shutdown_middleware.py +50 -0
  24. pypeline/pipelines/middleware/parallel_pipeline_middleware.py +60 -0
  25. pypeline/pipelines/middleware/pypeline_middleware.py +202 -0
  26. pypeline/pypeline_yaml.py +468 -0
  27. pypeline/schedule_config_schema.py +125 -0
  28. pypeline/utils/__init__.py +0 -0
  29. pypeline/utils/config_utils.py +81 -0
  30. pypeline/utils/dramatiq_utils.py +134 -0
  31. pypeline/utils/executable_job_util.py +35 -0
  32. pypeline/utils/graceful_shutdown_util.py +39 -0
  33. pypeline/utils/module_utils.py +108 -0
  34. pypeline/utils/pipeline_utils.py +144 -0
  35. pypeline/utils/schema_utils.py +24 -0
  36. scalable_pypeline-2.1.31.dist-info/LICENSE +177 -0
  37. scalable_pypeline-2.1.31.dist-info/METADATA +212 -0
  38. scalable_pypeline-2.1.31.dist-info/RECORD +42 -0
  39. scalable_pypeline-2.1.31.dist-info/WHEEL +6 -0
  40. scalable_pypeline-2.1.31.dist-info/entry_points.txt +6 -0
  41. scalable_pypeline-2.1.31.dist-info/top_level.txt +2 -0
  42. tests/fixtures/__init__.py +0 -0
@@ -0,0 +1,561 @@
1
+ from datetime import date
2
+
3
+ from marshmallow import Schema, fields, validate, ValidationError, validates_schema, INCLUDE
4
+
5
+
6
+ class MissingSettingsException(Exception):
7
+ pass
8
+
9
+
10
+ def create_pipeline_settings_schema(pipeline_settings_schema_data):
11
+ """
12
+ Dynamically create a schema to validate user data based on settings.
13
+
14
+ Args:
15
+ pipeline_settings_schema_data (dict): The settings schema data containing
16
+ field configurations.
17
+
18
+ Returns:
19
+ Schema: A dynamically created schema class for validating user data.
20
+ """
21
+
22
+ # Dictionary to store dynamically generated fields
23
+ schema_fields = {}
24
+
25
+ for key, config in pipeline_settings_schema_data["properties"].items():
26
+ data_type = config.get("dataType")
27
+ input_type = config.get("inputType")
28
+ field_args = {}
29
+
30
+ # Map dataType to Marshmallow field type
31
+ field_type = {
32
+ "string": fields.String,
33
+ "int": fields.Integer,
34
+ "float": fields.Float,
35
+ "boolean": fields.Boolean,
36
+ "datetime": fields.DateTime,
37
+ "array": fields.List,
38
+ "object": fields.Nested,
39
+ "date": fields.Date,
40
+ }.get(data_type)
41
+
42
+ if not field_type:
43
+ raise ValidationError(f"Unsupported dataType `{data_type}` for `{key}`.")
44
+
45
+ # Handle array type
46
+ if data_type == "array":
47
+ element_type = config.get("elementType")
48
+ if not element_type:
49
+ raise ValidationError(f"`elementType` is required for array `{key}`.")
50
+ field_args["cls_or_instance"] = {
51
+ "string": fields.String,
52
+ "int": fields.Integer,
53
+ "float": fields.Float,
54
+ "boolean": fields.Boolean,
55
+ "datetime": fields.DateTime,
56
+ "date": fields.Date,
57
+ }.get(element_type)
58
+ if not field_args["cls_or_instance"]:
59
+ raise ValidationError(
60
+ f"Unsupported elementType `{element_type}` for array `{key}`."
61
+ )
62
+
63
+ # Handle object type
64
+ if data_type == "object":
65
+ properties = config.get("properties")
66
+ if not properties:
67
+ raise ValidationError(f"`properties` is required for object `{key}`.")
68
+ # Recursively create a schema for the nested object
69
+ nested_schema = create_pipeline_settings_schema({"properties": properties})
70
+ field_args["schema"] = nested_schema
71
+
72
+ # Handle range validation for numeric fields
73
+ if data_type in ["int", "float"]:
74
+ if "minimum" in config or "maximum" in config:
75
+ field_args["validate"] = validate.Range(
76
+ min=config.get("minimum"), max=config.get("maximum")
77
+ )
78
+
79
+ # Handle dropdown or radio input options
80
+ if input_type in ["dropdown", "radio"] and "options" in config:
81
+ allowed_values = [option["value"] for option in config["options"]]
82
+ field_args["validate"] = validate.OneOf(allowed_values)
83
+
84
+ # Mark the field as required if specified
85
+ if key in pipeline_settings_schema_data.get("required", []):
86
+ field_args["required"] = True
87
+
88
+ # Create the field and add to the schema fields dictionary
89
+ schema_fields[key] = field_type(**field_args)
90
+
91
+ # Dynamically create a schema class with the generated fields
92
+ DynamicPipelineSettingsSchema = type(
93
+ "DynamicPipelineSettingsSchema", (Schema,), schema_fields
94
+ )
95
+
96
+ return DynamicPipelineSettingsSchema()
97
+
98
+
99
+ def create_pipeline_settings_schema(pipeline_settings_schema_data):
100
+ """
101
+ Dynamically create a schema to validate user data based on settings.
102
+
103
+ Args:
104
+ pipeline_settings_schema_data (dict): The settings schema data containing
105
+ field configurations.
106
+
107
+ Returns:
108
+ Schema: A dynamically created schema class for validating user data.
109
+ """
110
+
111
+ # Dictionary to store dynamically generated fields
112
+ schema_fields = {}
113
+
114
+ for key, config in pipeline_settings_schema_data["properties"].items():
115
+ data_type = config.get("dataType")
116
+ input_type = config.get("inputType")
117
+ field_args = {}
118
+
119
+ # Map dataType to Marshmallow field type
120
+ field_type = {
121
+ "string": fields.String,
122
+ "int": fields.Integer,
123
+ "float": fields.Float,
124
+ "boolean": fields.Boolean,
125
+ "datetime": fields.DateTime,
126
+ "date": fields.Date,
127
+ "array": fields.List,
128
+ "object": fields.Nested,
129
+ }.get(data_type)
130
+
131
+ if not field_type:
132
+ raise ValidationError(f"Unsupported dataType `{data_type}` for `{key}`.")
133
+
134
+ # Handle array type
135
+ if data_type == "array":
136
+ element_type = config.get("elementType")
137
+ if not element_type:
138
+ raise ValidationError(f"`elementType` is required for array `{key}`.")
139
+ field_args["cls_or_instance"] = {
140
+ "string": fields.String,
141
+ "int": fields.Integer,
142
+ "float": fields.Float,
143
+ "boolean": fields.Boolean,
144
+ "datetime": fields.DateTime,
145
+ "date": fields.Date,
146
+ }.get(element_type)
147
+ if not field_args["cls_or_instance"]:
148
+ raise ValidationError(
149
+ f"Unsupported elementType `{element_type}` for array `{key}`."
150
+ )
151
+
152
+ # Handle object type
153
+ if data_type == "object":
154
+ properties = config.get("properties")
155
+ if not properties:
156
+ raise ValidationError(f"`properties` is required for object `{key}`.")
157
+ # Recursively create a schema for the nested object
158
+ nested_schema = create_pipeline_settings_schema({"properties": properties})
159
+ # Use the nested schema as the `nested` argument for fields.Nested
160
+ field_type = fields.Nested(nested_schema)
161
+
162
+ # Handle range validation for numeric fields
163
+ if data_type in ["int", "float"]:
164
+ if "minimum" in config or "maximum" in config:
165
+ field_args["validate"] = validate.Range(
166
+ min=config.get("minimum"), max=config.get("maximum")
167
+ )
168
+
169
+ # Handle dropdown or radio input options
170
+ if input_type in ["dropdown", "radio"] and "options" in config:
171
+ allowed_values = [option["value"] for option in config["options"]]
172
+ field_args["validate"] = validate.OneOf(allowed_values)
173
+
174
+ # Mark the field as required if specified
175
+ if key in pipeline_settings_schema_data.get("required", []):
176
+ field_args["required"] = True
177
+ else:
178
+ field_args["required"] = False
179
+ field_args["allow_none"] = True
180
+
181
+ # Create the field and add to the schema fields dictionary
182
+ if data_type == "object":
183
+ schema_fields[key] = field_type
184
+ else:
185
+ schema_fields[key] = field_type(**field_args)
186
+
187
+ schema_fields["Meta"] = type(
188
+ "Meta", (), {"unknown": INCLUDE}
189
+ )
190
+ # Dynamically create a schema class with the generated fields
191
+ DynamicPipelineSettingsSchema = type(
192
+ "DynamicPipelineSettingsSchema", (Schema,), schema_fields
193
+ )
194
+
195
+ return DynamicPipelineSettingsSchema()
196
+
197
+
198
+ class OptionSchema(Schema):
199
+ label = fields.String(
200
+ required=True,
201
+ metadata={"description": "The display label for the option"},
202
+ )
203
+ value = fields.Raw(
204
+ required=True,
205
+ metadata={"description": "The value corresponding to the option"},
206
+ )
207
+
208
+
209
+ def validate_min_max(data):
210
+ """Custom validator to ensure min/max match the dataType."""
211
+ data_type = data.get("dataType")
212
+ minimum = data.get("minimum")
213
+ maximum = data.get("maximum")
214
+
215
+ if data_type in ["int", "float"]:
216
+ if minimum is not None and not isinstance(
217
+ minimum, (int if data_type == "int" else float)
218
+ ):
219
+ raise ValidationError(f"`minimum` must be of type {data_type}.")
220
+ if maximum is not None and not isinstance(
221
+ maximum, (int if data_type == "int" else float)
222
+ ):
223
+ raise ValidationError(f"`maximum` must be of type {data_type}.")
224
+ if minimum is not None and maximum is not None and minimum > maximum:
225
+ raise ValidationError("`minimum` must be less than or equal to `maximum`.")
226
+ elif data_type not in ["int", "float"] and (
227
+ minimum is not None or maximum is not None
228
+ ):
229
+ raise ValidationError(
230
+ "`minimum` and `maximum` are only valid for numeric types (`int`, `float`)."
231
+ )
232
+
233
+
234
+ class SettingSchema(Schema):
235
+ dataType = fields.String(
236
+ required=True,
237
+ validate=validate.OneOf(
238
+ ["string", "int", "float", "boolean", "datetime", "array", "object", "date"]
239
+ ),
240
+ metadata={"description": "The underlying data type of the setting"},
241
+ )
242
+ inputType = fields.String(
243
+ required=True,
244
+ validate=validate.OneOf(
245
+ ["text", "dropdown", "radio", "checkbox", "searchable", "custom"]
246
+ ),
247
+ metadata={"description": "The type of input UI element"},
248
+ )
249
+ label = fields.String(
250
+ required=True,
251
+ metadata={"description": "The display label for the field"},
252
+ )
253
+ placeholder = fields.String(
254
+ metadata={"description": "Placeholder text for text input fields"}
255
+ )
256
+ minimum = fields.Raw(
257
+ metadata={"description": "Minimum value for numeric data types"}
258
+ )
259
+ maximum = fields.Raw(
260
+ metadata={"description": "Maximum value for numeric data types"}
261
+ )
262
+ options = fields.List(
263
+ fields.Nested(OptionSchema),
264
+ metadata={"description": "Options for dropdown or radio input types"},
265
+ )
266
+ searchEndpoint = fields.String(
267
+ metadata={"description": "Endpoint for searchable fields"}
268
+ )
269
+ component = fields.String(
270
+ metadata={"description": "React component for custom input types"}
271
+ )
272
+ elementType = fields.String(
273
+ metadata={"description": "Element type for array data types"}
274
+ )
275
+ properties = fields.Dict(
276
+ keys=fields.String(),
277
+ values=fields.Nested(lambda: SettingSchema),
278
+ metadata={"description": "Properties for object data types"},
279
+ )
280
+
281
+ class Meta:
282
+ ordered = True
283
+
284
+ @validates_schema
285
+ def validate_min_max(self, data, **kwargs):
286
+ validate_min_max(data)
287
+
288
+ @validates_schema
289
+ def validate_options(self, data, **kwargs):
290
+ """Ensure options are provided for dropdown or radio input types and validate value types."""
291
+ input_type = data.get("inputType")
292
+ options = data.get("options")
293
+ data_type = data.get("dataType")
294
+
295
+ if input_type in ["dropdown", "radio"]:
296
+ if not options:
297
+ raise ValidationError(
298
+ "`options` are required for dropdown and radio input types.",
299
+ field_name="options",
300
+ )
301
+
302
+ for option in options:
303
+ value = option.get("value")
304
+ if data_type == "int" and not isinstance(value, int):
305
+ raise ValidationError(
306
+ f"Option value `{value}` must be of type `int`."
307
+ )
308
+ elif data_type == "float" and not isinstance(value, float):
309
+ raise ValidationError(
310
+ f"Option value `{value}` must be of type `float`."
311
+ )
312
+ elif data_type == "boolean" and not isinstance(value, bool):
313
+ raise ValidationError(
314
+ f"Option value `{value}` must be of type `boolean`."
315
+ )
316
+ elif data_type == "string" and not isinstance(value, str):
317
+ raise ValidationError(
318
+ f"Option value `{value}` must be of type `string`."
319
+ )
320
+ elif data_type == "datetime" and not isinstance(
321
+ value, str
322
+ ): # Assuming ISO 8601 strings
323
+ raise ValidationError(
324
+ f"Option value `{value}` must be an ISO 8601 string for `datetime`."
325
+ )
326
+ elif data_type == "date":
327
+ try:
328
+ date.fromisoformat(value)
329
+ except Exception:
330
+ raise ValidationError(
331
+ f"Option value `{value}` must be an ISO 8601 string for `date`."
332
+ )
333
+
334
+ @validates_schema
335
+ def validate_search_endpoint(self, data, **kwargs):
336
+ """Ensure searchEndpoint is provided only for 'searchable' input types."""
337
+ input_type = data.get("inputType")
338
+ search_endpoint = data.get("searchEndpoint")
339
+
340
+ if input_type == "searchable" and not search_endpoint:
341
+ raise ValidationError(
342
+ "`searchEndpoint` is required for `searchable` input types.",
343
+ field_name="searchEndpoint",
344
+ )
345
+ elif input_type != "searchable" and search_endpoint:
346
+ raise ValidationError(
347
+ "`searchEndpoint` is not allowed for non-searchable input types.",
348
+ field_name="searchEndpoint",
349
+ )
350
+
351
+ @validates_schema
352
+ def validate_custom_component(self, data, **kwargs):
353
+ """Ensure component is provided for custom input types."""
354
+ input_type = data.get("inputType")
355
+ component = data.get("component")
356
+
357
+ if input_type == "custom" and not component:
358
+ raise ValidationError(
359
+ "`component` is required for `custom` input types.",
360
+ field_name="component",
361
+ )
362
+ elif input_type != "custom" and component:
363
+ raise ValidationError(
364
+ "`component` is not allowed for non-custom input types.",
365
+ field_name="component",
366
+ )
367
+
368
+ @validates_schema
369
+ def validate_array_element_type(self, data, **kwargs):
370
+ """Ensure elementType is provided for array data types."""
371
+ data_type = data.get("dataType")
372
+ element_type = data.get("elementType")
373
+
374
+ if data_type == "array" and not element_type:
375
+ raise ValidationError(
376
+ "`elementType` is required for `array` data types.",
377
+ field_name="elementType",
378
+ )
379
+ elif data_type != "array" and element_type:
380
+ raise ValidationError(
381
+ "`elementType` is not allowed for non-array data types.",
382
+ field_name="elementType",
383
+ )
384
+
385
+ @validates_schema
386
+ def validate_object_properties(self, data, **kwargs):
387
+ """Ensure properties are provided for object data types."""
388
+ data_type = data.get("dataType")
389
+ properties = data.get("properties")
390
+
391
+ if data_type == "object" and not properties:
392
+ raise ValidationError(
393
+ "`properties` is required for `object` data types.",
394
+ field_name="properties",
395
+ )
396
+ elif data_type != "object" and properties:
397
+ raise ValidationError(
398
+ "`properties` is not allowed for non-object data types.",
399
+ field_name="properties",
400
+ )
401
+
402
+
403
+ class PipelineSettingsSchema(Schema):
404
+ properties = fields.Dict(
405
+ keys=fields.String(),
406
+ values=fields.Nested(SettingSchema),
407
+ required=True,
408
+ metadata={"description": "A dictionary of settings with their configurations"},
409
+ )
410
+ required = fields.List(
411
+ fields.String(), required=True, description="List of required settings"
412
+ )
413
+ scenarioSettings = fields.List(
414
+ fields.String(),
415
+ required=False,
416
+ description="List of settings that can be overriding for different pipeline scenarios.",
417
+ )
418
+
419
+ @validates_schema
420
+ def validate_scenario_settings(self, data, **kwargs):
421
+ """Ensure scenarioSettings only contains keys defined in properties."""
422
+ properties = data.get("properties", {})
423
+ scenario_settings = data.get("scenarioSettings", [])
424
+
425
+ invalid_settings = [
426
+ setting for setting in scenario_settings if setting not in properties
427
+ ]
428
+ if invalid_settings:
429
+ raise ValidationError(
430
+ {
431
+ "scenario_settings": (
432
+ f"The following settings in scenarioSettings are not defined "
433
+ f"in properties: {', '.join(invalid_settings)}"
434
+ )
435
+ }
436
+ )
437
+
438
+
439
+ class PipelineScenarioSchema(Schema):
440
+ settings = fields.Dict(
441
+ required=True,
442
+ metadata={
443
+ "description": "Settings to be used for a given scenario. Should match the pypeline.yaml settings schema"
444
+ },
445
+ )
446
+ taskReplacements = fields.Dict(
447
+ keys=fields.String(),
448
+ values=fields.Integer(),
449
+ required=False,
450
+ metadata={
451
+ "description": "Tasks that should be replaced in a given scenario. "
452
+ "The key corresponds to the task definition in the pypeline.yaml and the value corresponds "
453
+ "to the index of the task handlers where 0 is the default and first task. Eg: {'a': 1}. In this case "
454
+ "if we have a task definition 'a' with 3 handlers fn_1, fn_2, fn_3 respectively then the handler to run "
455
+ "for 'a' is fn_2."
456
+ },
457
+ )
458
+
459
+ taskReruns = fields.List(
460
+ fields.String(),
461
+ required=False,
462
+ metadata={
463
+ "description": "List of task definitions that need to be run again for a given scenario. Here "
464
+ "the scenario's pipeline settings will be injected in the task being run again which could be used to "
465
+ "produce alternative calculations and or results."
466
+ },
467
+ )
468
+ execution_id = fields.String(required=False, metadata={"description":"Execution id for a known scenario"})
469
+
470
+
471
+ class PipelineScenariosSchema(Schema):
472
+ required = fields.List(
473
+ fields.Nested(PipelineScenarioSchema),
474
+ metadata={"description": "List of scenarios to run for a given pipeline"},
475
+ )
476
+
477
+
478
+ # Example usage
479
+ if __name__ == "__main__":
480
+ pipeline_settings = {"param1": "test", "param2": 1}
481
+
482
+ yaml_data = {
483
+ "properties": {
484
+ "param1": {
485
+ "dataType": "string",
486
+ "inputType": "text",
487
+ "label": "Parameter 1",
488
+ "placeholder": "Enter a string",
489
+ },
490
+ "param2": {
491
+ "dataType": "int",
492
+ "inputType": "text",
493
+ "label": "Parameter 2",
494
+ "minimum": -1,
495
+ "maximum": 1,
496
+ },
497
+ "param3": {
498
+ "dataType": "boolean",
499
+ "inputType": "checkbox",
500
+ "label": "Enable Feature",
501
+ },
502
+ "param4": {
503
+ "dataType": "float",
504
+ "inputType": "dropdown",
505
+ "label": "Choose an Option",
506
+ "minimum": 0.5,
507
+ "maximum": 2.5,
508
+ "options": [
509
+ {"label": "Option 1", "value": 0.5},
510
+ {"label": "Option 2", "value": 1.5},
511
+ ],
512
+ },
513
+ "param5": {
514
+ "dataType": "int",
515
+ "inputType": "radio",
516
+ "label": "Select a Mode",
517
+ "options": [
518
+ {"label": "Mode A", "value": 1},
519
+ {"label": "Mode B", "value": 2},
520
+ ],
521
+ },
522
+ "param6": {
523
+ "dataType": "string",
524
+ "inputType": "searchable",
525
+ "label": "Select Pipeline",
526
+ "searchEndpoint": "/api/pipelines",
527
+ },
528
+ "param7": {
529
+ "dataType": "array",
530
+ "inputType": "text",
531
+ "label": "Array of Integers",
532
+ "elementType": "int",
533
+ },
534
+ "param8": {
535
+ "dataType": "object",
536
+ "inputType": "custom",
537
+ "label": "Custom Object",
538
+ "component": "CustomObjectComponent",
539
+ "properties": {
540
+ "subParam1": {
541
+ "dataType": "string",
542
+ "inputType": "text",
543
+ "label": "Sub Parameter 1",
544
+ },
545
+ "subParam2": {
546
+ "dataType": "int",
547
+ "inputType": "text",
548
+ "label": "Sub Parameter 2",
549
+ },
550
+ },
551
+ },
552
+ },
553
+ "required": ["param1", "param2", "param4"],
554
+ }
555
+
556
+ schema = PipelineSettingsSchema()
557
+ errors = schema.validate(yaml_data)
558
+ if errors:
559
+ print("Validation errors:", errors)
560
+ else:
561
+ print("Validation successful!")
File without changes
File without changes