scalable-pypeline 2.1.31__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. pypeline/__init__.py +1 -0
  2. pypeline/barrier.py +63 -0
  3. pypeline/constants.py +94 -0
  4. pypeline/dramatiq.py +455 -0
  5. pypeline/executable_job_config_schema.py +35 -0
  6. pypeline/extensions.py +17 -0
  7. pypeline/flask/__init__.py +16 -0
  8. pypeline/flask/api/__init__.py +0 -0
  9. pypeline/flask/api/pipelines.py +275 -0
  10. pypeline/flask/api/schedules.py +40 -0
  11. pypeline/flask/decorators.py +41 -0
  12. pypeline/flask/flask_pypeline.py +156 -0
  13. pypeline/job_runner.py +205 -0
  14. pypeline/pipeline_config_schema.py +352 -0
  15. pypeline/pipeline_settings_schema.py +561 -0
  16. pypeline/pipelines/__init__.py +0 -0
  17. pypeline/pipelines/composition/__init__.py +0 -0
  18. pypeline/pipelines/composition/parallel_pipeline_composition.py +375 -0
  19. pypeline/pipelines/composition/pypeline_composition.py +215 -0
  20. pypeline/pipelines/factory.py +86 -0
  21. pypeline/pipelines/middleware/__init__.py +0 -0
  22. pypeline/pipelines/middleware/get_active_worker_id_middleware.py +22 -0
  23. pypeline/pipelines/middleware/graceful_shutdown_middleware.py +50 -0
  24. pypeline/pipelines/middleware/parallel_pipeline_middleware.py +60 -0
  25. pypeline/pipelines/middleware/pypeline_middleware.py +202 -0
  26. pypeline/pypeline_yaml.py +468 -0
  27. pypeline/schedule_config_schema.py +125 -0
  28. pypeline/utils/__init__.py +0 -0
  29. pypeline/utils/config_utils.py +81 -0
  30. pypeline/utils/dramatiq_utils.py +134 -0
  31. pypeline/utils/executable_job_util.py +35 -0
  32. pypeline/utils/graceful_shutdown_util.py +39 -0
  33. pypeline/utils/module_utils.py +108 -0
  34. pypeline/utils/pipeline_utils.py +144 -0
  35. pypeline/utils/schema_utils.py +24 -0
  36. scalable_pypeline-2.1.31.dist-info/LICENSE +177 -0
  37. scalable_pypeline-2.1.31.dist-info/METADATA +212 -0
  38. scalable_pypeline-2.1.31.dist-info/RECORD +42 -0
  39. scalable_pypeline-2.1.31.dist-info/WHEEL +6 -0
  40. scalable_pypeline-2.1.31.dist-info/entry_points.txt +6 -0
  41. scalable_pypeline-2.1.31.dist-info/top_level.txt +2 -0
  42. tests/fixtures/__init__.py +0 -0
@@ -0,0 +1,468 @@
1
+ """ Definition of the `pypeline.yaml` file.
2
+
3
+ If using, a basic file may look like::
4
+ serviceConfig:
5
+ - name: pypeline-worker
6
+ registeredTasks:
7
+ - handler: pypeline_demo_client.workers.demo_worker.demo_worker_task
8
+ - handler: pypeline_demo_client.workers.demo_worker.demo_model_task
9
+
10
+ pipelines:
11
+ demo-pipeline:
12
+ name: demo-pipeline
13
+ description: Demo Pipeline.
14
+ schemaVersion: 1
15
+ config:
16
+ dagAdjacency:
17
+ node_a:
18
+ - node_b
19
+ - node_c
20
+ metadata:
21
+ maxRetry: 3
22
+ maxTtl: 60
23
+ queue: default-task-queue
24
+ taskDefinitions:
25
+ node_a:
26
+ handler: pypeline_demo_client.workers.demo_pipeline.demo_pipeline_node_a
27
+ node_b:
28
+ handler: pypeline_demo_client.workers.demo_pipeline.demo_pipeline_node_b
29
+ queue: node-b-queue
30
+ node_c:
31
+ handler: pypeline_demo_client.workers.demo_pipeline.demo_pipeline_node_c
32
+
33
+ scheduledTasks:
34
+ demo-model-task:
35
+ name: Demo Model Task
36
+ enabled: true
37
+ config:
38
+ task: pypeline_demo_client.workers.demo_worker.demo_model_task
39
+ queue: default-task-queue
40
+ schedule:
41
+ minute: '*'
42
+ hour: '*'
43
+ dayOfWeek: '*'
44
+ dayOfMonth: '*'
45
+ monthOfYear: '*'
46
+ schemaVersion: 1
47
+
48
+ """
49
+ import re
50
+ import os
51
+ import logging
52
+ import pkg_resources
53
+ import yaml
54
+ from yaml.loader import SafeLoader
55
+ from marshmallow import Schema, fields, pre_load, EXCLUDE, INCLUDE, validates_schema
56
+ from marshmallow.exceptions import ValidationError
57
+
58
+ from pypeline.executable_job_config_schema import ExecutableJobSchema
59
+ from pypeline.utils.module_utils import PypelineModuleLoader, normalized_pkg_name
60
+ from pypeline.constants import PYPELINE_YAML_PATH, PYPELINE_CLIENT_PKG_NAME
61
+ from pypeline.pipeline_config_schema import BasePipelineSchema
62
+ from pypeline.schedule_config_schema import BaseScheduleSchema
63
+
64
+ logger = logging.getLogger(__name__)
65
+
66
+
67
+ class InvalidPackagePath(Exception):
68
+ pass
69
+
70
+
71
+ class InvalidPypelineConfig(Exception):
72
+ pass
73
+
74
+
75
+ class MissingPypelineConfig(Exception):
76
+ pass
77
+
78
+
79
+ class ExcludeUnknownSchema(Schema):
80
+ class Meta:
81
+ unknown = EXCLUDE
82
+
83
+
84
+ class NameSchema(Schema):
85
+ """Validated name string field."""
86
+
87
+ name = fields.String(
88
+ required=True,
89
+ description="Name for service or image. Must include "
90
+ "only alphanumeric characters along with `_` and `-`.",
91
+ example="my-service-name",
92
+ )
93
+
94
+ @pre_load
95
+ def validate_characters(self, item, **kwargs):
96
+ """Ensure name field conforms to allowed characters"""
97
+ valid_chars = r"^[\w\d\-\_]+$"
98
+ if not bool(re.match(valid_chars, item["name"])):
99
+ raise ValueError(
100
+ f"Invalid name: {item['name']}. Only alphanumeric characters "
101
+ "allowed along with `-` and `_`."
102
+ )
103
+ return item
104
+
105
+
106
+ class PypelineRegisteredTaskDetailConfigSchema(Schema):
107
+ handler = fields.String(
108
+ required=True,
109
+ description="Full path to the Method handles work / pipeline tasks.",
110
+ example="pypeline_customer_client.workers.worker_group.useful_worker",
111
+ )
112
+
113
+ event = fields.Raw(
114
+ required=False,
115
+ unknown=INCLUDE,
116
+ description="Arbitrary user data, passed through `event` arg in task.",
117
+ )
118
+
119
+
120
+ class PypelineCeleryWorkerConfigSchema(Schema):
121
+ """Attributes for a celery worker. This worker will run all of the
122
+ pipelines and scheduled tasks.
123
+ """
124
+
125
+ registeredTasks = fields.List(
126
+ fields.Nested(PypelineRegisteredTaskDetailConfigSchema, required=True),
127
+ required=False,
128
+ _required=True,
129
+ description="List of task handlers to register for to your Pypeline app.",
130
+ )
131
+
132
+
133
+ class PypelineServiceConfigSchema(
134
+ ExcludeUnknownSchema, PypelineCeleryWorkerConfigSchema, NameSchema
135
+ ):
136
+ """Base service config object definition for workers."""
137
+
138
+ pass
139
+
140
+
141
+ class PypelineYamlSchema(ExcludeUnknownSchema):
142
+ """The primary `pypeline.yaml` file schema. This defines all available
143
+ properties in a valid Pypeline configuration file.
144
+ """
145
+
146
+ serviceConfig = fields.List(
147
+ fields.Nested(
148
+ PypelineServiceConfigSchema,
149
+ required=True,
150
+ description="Core service configuration.",
151
+ ),
152
+ description="List of workers for Pypeline to manage.",
153
+ required=True,
154
+ )
155
+
156
+ pipelines = fields.Dict(
157
+ keys=fields.String(),
158
+ values=fields.Nested(BasePipelineSchema),
159
+ description="List of pipelines",
160
+ required=False,
161
+ )
162
+
163
+ scheduledTasks = fields.Dict(
164
+ keys=fields.String(),
165
+ values=fields.Nested(BaseScheduleSchema),
166
+ description="List of scheduled tasks",
167
+ required=False,
168
+ )
169
+
170
+ executableJobs = fields.Dict(
171
+ keys=fields.String(),
172
+ values=fields.Nested(ExecutableJobSchema),
173
+ description="List of executable jobs",
174
+ required=False,
175
+ allow_none=True,
176
+ )
177
+
178
+ def validate_errors(self, schema: Schema, value: dict):
179
+ """Run Marshmallow validate() and raise if any errors"""
180
+ schema = schema()
181
+ errors = schema.validate(value)
182
+ if len(errors.keys()) > 0:
183
+ raise ValidationError(errors)
184
+
185
+ @validates_schema
186
+ def validate_schema(self, data, **kwargs):
187
+ """Additional validation.
188
+
189
+ Nested fields that are not required are not validated by Marshmallow
190
+ by default. Do a single level down of validation for now.
191
+
192
+ imageConfig can provide *either* an install command for Pypeline
193
+ to use to build the image for customer *or* a Docker repository
194
+ for Pypeline to pull.
195
+ """
196
+ # Vaidate nested
197
+ key_schema_pairs = (("serviceConfig", PypelineServiceConfigSchema),)
198
+ for k_s in key_schema_pairs:
199
+ val = data.get(k_s[0], None)
200
+ if val is not None:
201
+ if type(val) == list:
202
+ for v in val:
203
+ self.validate_errors(k_s[1], v)
204
+ else:
205
+ self.validate_errors(k_s[1], val)
206
+
207
+ # Validate the services. We list every service schema field as not
208
+ # required in order to use them as mixins for a generic service object,
209
+ # however, they ARE required, so validate here using the custom
210
+ # metadata property `_required`. Default to value of `required`.
211
+ for service in data.get("serviceConfig"):
212
+ schema = PypelineCeleryWorkerConfigSchema
213
+ for field in schema().fields:
214
+ try:
215
+ if (
216
+ schema()
217
+ .fields[field]
218
+ .metadata.get(
219
+ "_required", getattr(schema().fields[field], "required")
220
+ )
221
+ ):
222
+ assert field in service
223
+ except AssertionError:
224
+ raise ValidationError(f"`{field}` missing in worker definition.")
225
+
226
+ # Validate unique pipeline ids
227
+ if "pipelines" in data:
228
+ pipeline_ids = set()
229
+ for pipeline_id, pipeline_data in data["pipelines"].items():
230
+ if pipeline_id in pipeline_ids:
231
+ raise ValidationError("All pipeline ids must be unique!")
232
+ pipeline_ids.add(pipeline_id)
233
+ schema_version = pipeline_data["schemaVersion"]
234
+ PipelineSchema = BasePipelineSchema.get_by_version(schema_version)
235
+ self.validate_errors(PipelineSchema, pipeline_data)
236
+
237
+ # Validate unique scheduled tasks names
238
+ if "scheduledTasks" in data:
239
+ task_ids = set()
240
+ for task_id, task_data in data["scheduledTasks"].items():
241
+ if task_id in task_ids:
242
+ raise ValidationError("All schedule ids must be unique!")
243
+ task_ids.add(task_id)
244
+ schema_version = task_data["schemaVersion"]
245
+ TaskSchema = BaseScheduleSchema.get_by_version(schema_version)
246
+ self.validate_errors(TaskSchema, task_data)
247
+
248
+
249
+ class YamlPatternConstructor:
250
+ """Adds a pattern resolver + constructor to PyYaml.
251
+
252
+ Typical/deault usage is for parsing environment variables
253
+ in a yaml file but this can be used for any pattern you provide.
254
+
255
+ See: https://pyyaml.org/wiki/PyYAMLDocumentation
256
+ """
257
+
258
+ def __init__(self, env_var_pattern: str = None, add_constructor: bool = True):
259
+ self.env_var_pattern = env_var_pattern
260
+ if self.env_var_pattern is None:
261
+ # Default pattern is: ${VAR:default}
262
+ self.env_var_pattern = r"^\$\{(.*)\}$"
263
+ self.path_matcher = re.compile(self.env_var_pattern)
264
+
265
+ if add_constructor:
266
+ self.add_constructor()
267
+
268
+ def _path_constructor(self, loader, node):
269
+ """Extract the matched value, expand env variable,
270
+ and replace the match
271
+
272
+ TODO: Would need to update this (specifically the parsing) if any
273
+ pattern other than our default (or a highly compatible variation)
274
+ is provided.
275
+ """
276
+ # Try to match the correct env variable pattern in this node's value
277
+ # If the value does not match the pattern, return None (which means
278
+ # this node will not be parsed for ENV variables and instead just
279
+ # returned as-is).
280
+ env_var_name = re.match(self.env_var_pattern, node.value)
281
+ try:
282
+ env_var_name = env_var_name.group(1)
283
+ except AttributeError:
284
+ return None
285
+
286
+ # If we get down here, then the 'node.value' matches our specified
287
+ # pattern, so try to parse. env_var_name is the value inside ${...}.
288
+ # Split on `:`, which is our delimiter for default values.
289
+ env_var_name_split = env_var_name.split(":")
290
+
291
+ # Attempt to retrieve the environment variable...from the environment
292
+ env_var = os.environ.get(env_var_name_split[0], None)
293
+
294
+ if env_var is None: # Nothing found in environment
295
+ # If a default was provided (e.g. VAR:default), return that.
296
+ # We join anything after first element because the default
297
+ # value might be a URL or something with a colon in it
298
+ # which would have 'split' above
299
+ if len(env_var_name_split) > 1:
300
+ return ":".join(env_var_name_split[1:])
301
+ return "unset" # Return 'unset' if not in environ nor default
302
+ return env_var
303
+
304
+ def add_constructor(self):
305
+ """Initialize PyYaml with ability to resolve/load environment
306
+ variables defined in a yaml template when they exist in
307
+ the environment.
308
+
309
+ Add to SafeLoader in addition to standard Loader.
310
+ """
311
+ # Add the `!env_var` tag to any scalar (value) that matches the
312
+ # pattern self.path_matcher. This allows the template to be much more
313
+ # intuitive vs needing to add !env_var to the beginning of each value
314
+ yaml.add_implicit_resolver("!env_var", self.path_matcher)
315
+ yaml.add_implicit_resolver("!env_var", self.path_matcher, Loader=SafeLoader)
316
+
317
+ # Add constructor for the tag `!env_var`, which is a function that
318
+ # converts a node of a YAML representation graph to a native Python
319
+ # object.
320
+ yaml.add_constructor("!env_var", self._path_constructor)
321
+ yaml.add_constructor("!env_var", self._path_constructor, Loader=SafeLoader)
322
+
323
+
324
+ def parse_config_file(pypeline_yaml: str):
325
+ """Parse the `pypeline.yaml` file when it's been loaded.
326
+
327
+ Arguments:
328
+ pypeline_yaml (required): String of loaded pypeline.yaml file.
329
+ """
330
+ YamlPatternConstructor() # Add our env variable parser
331
+ try:
332
+ pypeline_yaml_schema = PypelineYamlSchema()
333
+ # First suss out yaml issues
334
+ pypeline_config = yaml.safe_load(pypeline_yaml)
335
+ # Then schema issues
336
+ pypeline_config = pypeline_yaml_schema.load(pypeline_config)
337
+ except ValidationError as e:
338
+ msg = "Invalid Pypeline configuration due to {}".format(e.messages)
339
+ logger.error(msg)
340
+ raise InvalidPypelineConfig(msg)
341
+ except Exception as e:
342
+ msg = (
343
+ "Invalid Pypeline configuration, likely due to invalid "
344
+ "YAML formatting ..."
345
+ )
346
+ logger.exception("{} {}".format(msg, e))
347
+ raise InvalidPypelineConfig(msg)
348
+ return pypeline_config
349
+
350
+
351
+ def _get_pkg_name(pkg_name: str) -> str:
352
+ """Retrieve the normalized package name."""
353
+ if pkg_name is None:
354
+ pkg_name = PYPELINE_CLIENT_PKG_NAME # From environment
355
+ if pkg_name is None:
356
+ return None
357
+ return normalized_pkg_name(pkg_name)
358
+
359
+
360
+ def load_pypeline_config(
361
+ pkg_name: str = None, pypeline_yaml_filename: str = None, as_dict: bool = True
362
+ ):
363
+ """Load and parse the `pypeline.yaml` file. Issue usable exceptions for
364
+ known error modes so bootstrapping can handle appropriately.
365
+
366
+ Arguments:
367
+ pkg_name (required): Directory name for your Python
368
+ package. e.g. my_package_name . If none provided, will check
369
+ environment for `PYPELINE_CLIENT_PKG_NAME`. If not found,
370
+ will exit.
371
+ pypeline_yaml_filename (optional): Relative path to find your
372
+ `pypeline.yaml` configuration file. Defaults to `pypeline.yaml`
373
+ which should be found inside your `pkg_name`
374
+ as_dict (optional): If true (default), return the loaded pypeline
375
+ configuration as a dictionary. If false, return the loaded
376
+ string value of the yaml file.
377
+ """
378
+ if pypeline_yaml_filename is None:
379
+ pypeline_yaml_filename = PYPELINE_YAML_PATH
380
+
381
+ logger.info(
382
+ f"Loading `pypeline.yaml` from package `{pkg_name}` "
383
+ f"and file location `{pypeline_yaml_filename}` ..."
384
+ )
385
+ pypeline_config = None
386
+
387
+ pkg_name = _get_pkg_name(pkg_name)
388
+
389
+ if pkg_name is None: # Nothing to retrieve at this point
390
+ logger.warning("Unable to retrieve pypeline.yaml configuration ...")
391
+ return pypeline_config
392
+
393
+ try:
394
+ pypeline_config_path = pkg_resources.resource_filename(
395
+ pkg_name, pypeline_yaml_filename
396
+ )
397
+ except Exception as e:
398
+ msg = (
399
+ "Either pkg_name ({}) or pypeline_yaml_filename ({}) is "
400
+ "invalid ...".format(pkg_name, pypeline_yaml_filename)
401
+ )
402
+ logger.error("{} ... {}".format(msg, e))
403
+ raise InvalidPackagePath(e)
404
+
405
+ try:
406
+ with open(pypeline_config_path, "r") as f:
407
+ pypeline_yaml = f.read()
408
+ pypeline_config = parse_config_file(pypeline_yaml)
409
+ except InvalidPypelineConfig as e:
410
+ raise
411
+ except FileNotFoundError as e:
412
+ msg = "Pypeline config file could not be found at path {} ...".format(
413
+ pypeline_config_path
414
+ )
415
+ raise MissingPypelineConfig(msg)
416
+ except Exception as e:
417
+ raise e
418
+ if as_dict:
419
+ return pypeline_config
420
+ return yaml.safe_dump(pypeline_config)
421
+
422
+
423
+ def load_client_config_and_version(
424
+ pkg_name: str = None, pypeline_yaml_filename: str = None
425
+ ):
426
+ """Load and parse the `pypeline.yaml` file and a client package's version.
427
+
428
+ Arguments:
429
+ pkg_name (required): Directory name for your Python
430
+ package. e.g. my_package_name . If none provided, will check
431
+ environment for `PYPELINE_CLIENT_PKG_NAME`. If not found,
432
+ will exit.
433
+ pypeline_yaml_filename (optional): Relative path to find your
434
+ `pypeline.yaml` configuration file. Defaults to `pypeline.yaml`
435
+ which should be found inside your `pkg_name`
436
+ as_dict (optional): If true (default), return the loaded pypeline
437
+ configuration as a dictionary. If false, return the loaded
438
+ string value of the yaml file.
439
+
440
+ For this to work properly, the provided package must be installed in the
441
+ same environment as this Pypeline package and it must have a `__version__`
442
+ variable inside its `__init__.py` file, e.g. `__version__ = '0.0.0'`
443
+ """
444
+ pypeline_config = None
445
+ client_version = None
446
+
447
+ pkg_name = _get_pkg_name(pkg_name)
448
+
449
+ try:
450
+ loader = PypelineModuleLoader()
451
+ pkg = loader.get_module(pkg_name + ".__init__")
452
+ client_version = getattr(pkg, "__version__", "0.0.0")
453
+ pypeline_config = load_pypeline_config(pkg_name, pypeline_yaml_filename)
454
+ except MissingPypelineConfig as e:
455
+ logger.error(e)
456
+ except InvalidPypelineConfig as e:
457
+ logger.error(e)
458
+ except InvalidPackagePath as e:
459
+ logger.error(e)
460
+ except Exception as e:
461
+ logger.error(
462
+ "Unable to load client's pkg __version__ or "
463
+ "{} config file for package: {} ... {}".format(
464
+ pypeline_yaml_filename, pkg_name, e
465
+ )
466
+ )
467
+
468
+ return pypeline_config, client_version
@@ -0,0 +1,125 @@
1
+ """ Schemas for Schedule Configuration
2
+ """
3
+ import re
4
+
5
+ # from celery.schedules import crontab_parser
6
+ from croniter import croniter
7
+ from marshmallow.exceptions import ValidationError
8
+ from marshmallow import Schema, fields, EXCLUDE, pre_load, validates_schema
9
+
10
+
11
+ class ExcludeUnknownSchema(Schema):
12
+ """Remove unknown keys from loaded dictionary"""
13
+
14
+ class Meta:
15
+ unknown = EXCLUDE
16
+
17
+
18
+ class CrontabScheduleSchema(Schema):
19
+ minute = fields.String(required=True)
20
+ hour = fields.String(required=True)
21
+ dayOfWeek = fields.String(required=True)
22
+ dayOfMonth = fields.String(required=True)
23
+ monthOfYear = fields.String(required=True)
24
+
25
+ @validates_schema
26
+ def validate_values(self, data, **kwargs):
27
+ if (
28
+ data["minute"] is None
29
+ or data["hour"] is None
30
+ or data["dayOfWeek"] is None
31
+ or data["dayOfMonth"] is None
32
+ or data["monthOfYear"] is None
33
+ ):
34
+ raise ValidationError("Empty crontab value")
35
+
36
+ test_cron_expression = (
37
+ f"{data['minute']} {data['hour']} {data['dayOfMonth']} "
38
+ f"{data['monthOfYear']} {data['dayOfWeek']}"
39
+ )
40
+
41
+ if not croniter.is_valid(test_cron_expression):
42
+ return ValidationError("Invalid crontab value")
43
+
44
+
45
+ class Schedule(fields.Dict):
46
+ def _serialize(self, value, attr, obj, **kwargs):
47
+ return value
48
+
49
+ def _deserialize(self, value, attr, data, **kwargs):
50
+ schema = CrontabScheduleSchema()
51
+ return schema.load(value)
52
+
53
+
54
+ class ScheduleConfigSchemaV1(ExcludeUnknownSchema):
55
+ """Definition of a single schedule entry"""
56
+
57
+ queue = fields.String(
58
+ required=True,
59
+ description="Name of queue on which to place task.",
60
+ example="my-default-queue",
61
+ )
62
+ task = fields.String(
63
+ required=True,
64
+ description="Path to task to invoke.",
65
+ example="my_app.module.method",
66
+ )
67
+
68
+ schedule = Schedule(required=True)
69
+
70
+
71
+ class BaseScheduleSchema(ExcludeUnknownSchema):
72
+ __schema_version__ = 0
73
+
74
+ name = fields.String(
75
+ required=True,
76
+ description="Name of schedule entry.",
77
+ example="My Scheduled Task",
78
+ )
79
+ schemaVersion = fields.Integer(required=True)
80
+ config = fields.Dict(required=True)
81
+ enabled = fields.Boolean(
82
+ required=True, description="Whether entry is enabled.", example=True
83
+ )
84
+
85
+ @classmethod
86
+ def get_by_version(cls, version):
87
+ for subclass in cls.__subclasses__():
88
+ if subclass.__schema_version__ == version:
89
+ return subclass
90
+
91
+ return None
92
+
93
+ @classmethod
94
+ def get_latest(cls):
95
+ max_version = 0
96
+ max_class = None
97
+ for subclass in cls.__subclasses__():
98
+ if subclass.__schema_version__ > max_version:
99
+ max_version = max_version
100
+ max_class = subclass
101
+
102
+ return max_class
103
+
104
+ @validates_schema
105
+ def validate_scheduled_tasks(self, data, **kwargs):
106
+ schema_version = data["schemaVersion"]
107
+ TaskSchema = BaseScheduleSchema.get_by_version(schema_version)
108
+ schema = TaskSchema()
109
+ schema.load(data)
110
+
111
+
112
+ class ScheduleSchemaV1(BaseScheduleSchema):
113
+ __schema_version__ = 1
114
+
115
+ config = fields.Nested(
116
+ ScheduleConfigSchemaV1,
117
+ required=True,
118
+ description="Configuration information for this schedule.",
119
+ )
120
+
121
+ def validate_scheduled_tasks(self, data, **kwargs):
122
+ # We need to add this function to avoid infinite recursion since
123
+ # the BaseScheduleSchema class above uses the same method for
124
+ # validation
125
+ pass
File without changes
@@ -0,0 +1,81 @@
1
+ import logging
2
+ from typing import Union
3
+
4
+ from pypeline.constants import WORKER_NAME
5
+ from pypeline.pypeline_yaml import load_pypeline_config
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ def retrieve_latest_pipeline_config(
11
+ pipeline_id: Union[str, None] = None
12
+ ) -> Union[dict, list]:
13
+ """Retrieve the 'latest' pipeline configuration for a given pipeline."""
14
+ pypeline_config = load_pypeline_config()
15
+ if "pipelines" in pypeline_config:
16
+ pipelines = []
17
+ found_pipeline = None
18
+ for p_id, config in pypeline_config["pipelines"].items():
19
+ if pipeline_id == p_id:
20
+ found_pipeline = config
21
+ break
22
+ pipelines.append(config)
23
+
24
+ if pipeline_id:
25
+ if found_pipeline:
26
+ return found_pipeline
27
+ raise ValueError(f"Invalid pipeline {pipeline_id}")
28
+
29
+ return pipelines
30
+ return None
31
+
32
+
33
+ def retrieve_latest_schedule_config():
34
+ """Retrieve the 'latest' scheduled tasks configuration."""
35
+ pypeline_config = load_pypeline_config()
36
+ if "scheduledTasks" in pypeline_config:
37
+ tasks = []
38
+ for task_id, config in pypeline_config["scheduledTasks"].items():
39
+ tasks.append(config)
40
+ return tasks
41
+ return None
42
+
43
+
44
+ def retrieve_executable_job_config():
45
+ pypeline_config = load_pypeline_config()
46
+
47
+ if not pypeline_config:
48
+ return None
49
+ if "executableJobs" in pypeline_config:
50
+ tasks = []
51
+ for task_id, config in pypeline_config["executableJobs"].items():
52
+ tasks.append(config)
53
+ return tasks
54
+ return None
55
+
56
+
57
+ def get_service_config_for_worker(
58
+ pypeline_config: dict, worker_name: str = None
59
+ ) -> Union[dict, None]:
60
+ """For the current WORKER_NAME (which must be present in the environment
61
+ of this worker instance for a valid deployment), return the worker's
62
+ serviceConfig object.
63
+ """
64
+ if pypeline_config is None:
65
+ raise ValueError("Pypeline config was not provided")
66
+ if worker_name is None:
67
+ worker_name = WORKER_NAME
68
+ if worker_name is None:
69
+ return None
70
+
71
+ service_config = pypeline_config.get("serviceConfig", [])
72
+ for service in service_config:
73
+ if service["name"] == worker_name:
74
+ return service
75
+
76
+ raise ValueError(
77
+ "Could not find a service config for worker "
78
+ f"`{worker_name}`. Make sure you have added the service in"
79
+ f" your pypeline.yaml with `name: {worker_name}` and "
80
+ "`type: celery-worker`."
81
+ )