scalable-pypeline 1.2.3__py2.py3-none-any.whl → 2.0.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. pypeline/__init__.py +1 -1
  2. pypeline/barrier.py +34 -0
  3. pypeline/composition.py +349 -0
  4. pypeline/constants.py +51 -84
  5. pypeline/dramatiq.py +470 -0
  6. pypeline/extensions.py +9 -8
  7. pypeline/flask/__init__.py +3 -5
  8. pypeline/flask/api/pipelines.py +109 -148
  9. pypeline/flask/api/schedules.py +14 -39
  10. pypeline/flask/decorators.py +18 -53
  11. pypeline/flask/flask_pypeline.py +156 -0
  12. pypeline/middleware.py +61 -0
  13. pypeline/pipeline_config_schema.py +105 -92
  14. pypeline/pypeline_yaml.py +458 -0
  15. pypeline/schedule_config_schema.py +35 -120
  16. pypeline/utils/config_utils.py +52 -310
  17. pypeline/utils/module_utils.py +35 -71
  18. pypeline/utils/pipeline_utils.py +161 -0
  19. scalable_pypeline-2.0.2.dist-info/METADATA +217 -0
  20. scalable_pypeline-2.0.2.dist-info/RECORD +27 -0
  21. scalable_pypeline-2.0.2.dist-info/entry_points.txt +3 -0
  22. tests/fixtures/__init__.py +0 -1
  23. pypeline/celery.py +0 -206
  24. pypeline/celery_beat.py +0 -254
  25. pypeline/flask/api/utils.py +0 -35
  26. pypeline/flask/flask_sermos.py +0 -156
  27. pypeline/generators.py +0 -196
  28. pypeline/logging_config.py +0 -171
  29. pypeline/pipeline/__init__.py +0 -0
  30. pypeline/pipeline/chained_task.py +0 -70
  31. pypeline/pipeline/generator.py +0 -254
  32. pypeline/sermos_yaml.py +0 -442
  33. pypeline/utils/graph_utils.py +0 -144
  34. pypeline/utils/task_utils.py +0 -552
  35. scalable_pypeline-1.2.3.dist-info/METADATA +0 -163
  36. scalable_pypeline-1.2.3.dist-info/RECORD +0 -33
  37. scalable_pypeline-1.2.3.dist-info/entry_points.txt +0 -2
  38. tests/fixtures/s3_fixtures.py +0 -52
  39. {scalable_pypeline-1.2.3.dist-info → scalable_pypeline-2.0.2.dist-info}/LICENSE +0 -0
  40. {scalable_pypeline-1.2.3.dist-info → scalable_pypeline-2.0.2.dist-info}/WHEEL +0 -0
  41. {scalable_pypeline-1.2.3.dist-info → scalable_pypeline-2.0.2.dist-info}/top_level.txt +0 -0
pypeline/dramatiq.py ADDED
@@ -0,0 +1,470 @@
1
+ import importlib
2
+ import os.path
3
+ import sys
4
+ import typing
5
+ import logging
6
+ import click
7
+ from pypeline.extensions import pypeline_config
8
+ from warnings import warn
9
+ from functools import wraps
10
+ from apscheduler.schedulers.blocking import BlockingScheduler
11
+ from apscheduler.triggers.cron import CronTrigger
12
+ from typing import Awaitable, Callable, Optional, Union, TYPE_CHECKING, TypeVar
13
+ from dramatiq import Broker, Middleware, actor as register_actor, set_broker, get_broker
14
+ from dramatiq.brokers.rabbitmq import RabbitmqBroker
15
+ from dramatiq.cli import (
16
+ CPUS,
17
+ HAS_WATCHDOG,
18
+ main as dramatiq_worker,
19
+ make_argument_parser as dramatiq_argument_parser,
20
+ import_object,
21
+ )
22
+ from dramatiq.middleware import default_middleware
23
+ from dramatiq.results import Results
24
+ from dramatiq.results.backends.redis import RedisBackend
25
+ from flask import current_app, Flask
26
+ from flask.cli import with_appcontext
27
+
28
+ from pypeline.constants import (
29
+ REDIS_URL,
30
+ RABBIT_URL,
31
+ DEFAULT_BROKER_CALLABLE,
32
+ MS_IN_SECONDS,
33
+ DEFAULT_TASK_TTL,
34
+ DEFAULT_RESULT_TTL,
35
+ DEFAULT_TASK_MAX_RETRY,
36
+ DEFAULT_TASK_MIN_BACKOFF,
37
+ DEFAULT_TASK_MAX_BACKOFF,
38
+ )
39
+ from pypeline.middleware import ParallelPipeline
40
+ from pypeline.utils.config_utils import (
41
+ retrieve_latest_schedule_config,
42
+ get_service_config_for_worker,
43
+ )
44
+
45
+ if TYPE_CHECKING:
46
+ from typing_extensions import ParamSpec
47
+
48
+ P = ParamSpec("P")
49
+ else:
50
+ P = TypeVar("P")
51
+ R = TypeVar("R")
52
+
53
+ logger = logging.getLogger(__name__)
54
+
55
+
56
+ def configure_default_broker(broker: Broker = None):
57
+ redis_backend = RedisBackend(url=REDIS_URL)
58
+ rabbit_broker = broker if broker is not None else RabbitmqBroker(url=RABBIT_URL)
59
+ rabbit_broker.add_middleware(Results(backend=redis_backend))
60
+ rabbit_broker.add_middleware(ParallelPipeline(redis_url=REDIS_URL))
61
+ register_actors_for_workers(rabbit_broker)
62
+ set_broker(rabbit_broker)
63
+
64
+
65
+ def guess_code_directory(broker):
66
+ actor = next(iter(broker.actors.values()))
67
+ modname, *_ = actor.fn.__module__.partition(".")
68
+ mod = sys.modules[modname]
69
+ return os.path.dirname(mod.__file__)
70
+
71
+
72
+ def get_module(resource_dot_path: str):
73
+ """Retrieve the module based on a 'resource dot path'.
74
+ e.g. package.subdir.feature_file.MyCallable
75
+ """
76
+ module_path = ".".join(resource_dot_path.split(".")[:-1])
77
+ module = importlib.import_module(module_path)
78
+ return module
79
+
80
+
81
+ def get_callable_name(resource_dot_path: str) -> str:
82
+ """Retrieve the callable based on config string.
83
+ e.g. package.subdir.feature_file.MyCallable
84
+ """
85
+ callable_name = resource_dot_path.split(".")[-1]
86
+ return callable_name
87
+
88
+
89
+ def get_callable(resource_dot_path: str) -> Callable:
90
+ """Retrieve the actual handler class based on config string.
91
+ e.g. package.subdir.feature_file.MyCallable
92
+ """
93
+ module = get_module(resource_dot_path)
94
+ callable_name = get_callable_name(resource_dot_path)
95
+ return getattr(module, callable_name)
96
+
97
+
98
+ def register_lazy_actor(
99
+ broker: Broker,
100
+ fn: Optional[Callable[P, Union[Awaitable[R], R]]] = None,
101
+ pipeline_meta: typing.Dict = {},
102
+ **kwargs,
103
+ ) -> typing.Type["LazyActor"]:
104
+ kwargs["queue_name"] = pipeline_meta.get("queue", "default")
105
+ kwargs["max_retries"] = pipeline_meta.get("maxRetry", DEFAULT_TASK_MAX_RETRY)
106
+ # Convert from seconds to milliseconds
107
+ kwargs["min_backoff"] = (
108
+ pipeline_meta.get("retryBackoff", DEFAULT_TASK_MIN_BACKOFF) * MS_IN_SECONDS
109
+ )
110
+ kwargs["max_backoff"] = (
111
+ pipeline_meta.get("retryBackoffMax", DEFAULT_TASK_MAX_BACKOFF) * MS_IN_SECONDS
112
+ )
113
+ kwargs["time_limit"] = pipeline_meta.get("maxTtl", DEFAULT_TASK_TTL) * MS_IN_SECONDS
114
+ # Always store results for registered pipeline actors
115
+ kwargs["store_results"] = pipeline_meta.get("store_results", False)
116
+ if kwargs["store_results"]:
117
+ kwargs["result_ttl"] = (
118
+ pipeline_meta.get("result_ttl", DEFAULT_RESULT_TTL) * MS_IN_SECONDS
119
+ )
120
+ lazy_actor: LazyActor = LazyActor(fn, kwargs)
121
+ lazy_actor.register(broker)
122
+ return lazy_actor
123
+
124
+
125
+ def register_actors_for_workers(broker: Broker):
126
+ service = get_service_config_for_worker(pypeline_config)
127
+ scheduled_jobs_config = retrieve_latest_schedule_config()
128
+
129
+ if not service:
130
+ return
131
+ for task in service.get("registeredTasks", []):
132
+ print(task)
133
+ pipeline_meta = None
134
+ for pipeline_key, pipeline in pypeline_config["pipelines"].items():
135
+ pipeline_config = pipeline["config"]
136
+ pipeline_tasks = [
137
+ t["handler"] for t in pipeline_config["taskDefinitions"].values()
138
+ ]
139
+ if task["handler"] in pipeline_tasks:
140
+ pipeline_meta = pipeline_config["metadata"]
141
+ break
142
+
143
+ if pipeline_meta is None:
144
+ for job in scheduled_jobs_config:
145
+ config = job["config"]
146
+ if config["task"] == task["handler"]:
147
+ pipeline_meta = {"queue": config.get("queue", "default")}
148
+
149
+ if pipeline_meta is None:
150
+ raise ValueError(
151
+ f"Registered task {task['handler']} is not defined in a pipeline or scheduled task"
152
+ )
153
+
154
+ try:
155
+ worker_path = task["handler"] # Required, no default
156
+ tmp_handler = get_callable(worker_path)
157
+ if pipeline_meta and pipeline_meta.get("maxRetry", 0) >= 0:
158
+ pipeline_meta["store_results"] = True
159
+ _ = register_lazy_actor(broker, tmp_handler, pipeline_meta)
160
+ except Exception as e:
161
+ print(e)
162
+ print(f"Unable to add a task to dramatiq: {e}")
163
+
164
+
165
+ class Dramatiq:
166
+ """Flask extension bridging Dramatiq broker and Flask app.
167
+
168
+ Dramatiq API is eager. Broker initialisation precede actor declaration.
169
+ This breaks application factory pattern and other way to initialize
170
+ configuration after import.
171
+
172
+ This class enables lazy initialization of Dramatiq. Actual Dramatiq broker
173
+ is instanciated only once Flask app is created.
174
+
175
+ .. automethod:: actor
176
+ .. automethod:: init_app
177
+ """
178
+
179
+ def __init__(
180
+ self,
181
+ app: Flask = None,
182
+ name: str = "dramatiq",
183
+ config_prefix: str = None,
184
+ middleware: typing.List[Middleware] = None,
185
+ ):
186
+ """
187
+ :app: Flask application if created. See :meth:`init_app`.
188
+
189
+ :param broker_configuration_callable_module: In order to work in fork and spawn mode
190
+ we need to configure our broker using a callable function. Default is specified as
191
+ "pypeline.flask_dramatiq:configure_default_broker". This allows the user to
192
+ override if necessary.
193
+
194
+ :param name: Unique identifier for multi-broker app.
195
+
196
+ :param config_prefix: Flask configuration option prefix for this
197
+ broker. By default, it is derived from ``name`` parameter,
198
+ capitalized.
199
+
200
+ :param middleware: List of Dramatiq middleware instances to override
201
+ Dramatiq defaults.
202
+
203
+ Flask-Dramatiq always prepend a custom middleware to the middleware
204
+ stack that setup Flask context. This way, every middleware can use
205
+ Flask app context.
206
+
207
+ """
208
+ self.actors = []
209
+ self.app = None
210
+ self.config_prefix = config_prefix or name.upper() + "_BROKER"
211
+ self.name = name
212
+ self.broker = None
213
+ if middleware is None:
214
+ middleware = [m() for m in default_middleware]
215
+ self.middleware = middleware
216
+ if app:
217
+ self.init_app(app)
218
+
219
+ def __repr__(self) -> str:
220
+ return "<%s %s>" % (self.__class__.__name__, self.name)
221
+
222
+ def init_app(self, app: Flask):
223
+ """Initialize extension for one Flask application
224
+
225
+ This method triggers Dramatiq broker instantiation and effective actor
226
+ registration.
227
+
228
+ """
229
+ if self.app is not None:
230
+ warn(
231
+ "%s is used by more than one flask application. "
232
+ "Actor's context may be set incorrectly." % (self,),
233
+ stacklevel=2,
234
+ )
235
+ self.app = app
236
+ app.extensions["dramatiq-" + self.name] = self
237
+
238
+ module_name, broker_or_callable = import_object(DEFAULT_BROKER_CALLABLE)
239
+
240
+ # Callable function is expected to setBroker()
241
+ if callable(broker_or_callable):
242
+ print(f"Configuring broker via {DEFAULT_BROKER_CALLABLE}")
243
+ broker_or_callable()
244
+ else:
245
+ raise TypeError("DEFAULT_BROKER_CALLABLE must point to a callable function")
246
+ self.broker = get_broker()
247
+ for actor in self.actors:
248
+ actor.register(broker=self.broker)
249
+
250
+ def actor(self, fn=None, **kw):
251
+ """Register a callable as Dramatiq actor.
252
+
253
+ This decorator lazily register a callable as a Dramatiq actor. The
254
+ actor can't be called before :meth:`init_app` is called.
255
+
256
+ :param kw: Keywords argument passed to :func:`dramatiq.actor`.
257
+
258
+ """
259
+ # Substitute dramatiq.actor decorator to return a lazy wrapper. This
260
+ # allows to register actors in extension before the broker is
261
+ # effectively configured by init_app.
262
+
263
+ def decorator(fn):
264
+ lazy_actor = LazyActor(self, fn, kw)
265
+ self.actors.append(lazy_actor)
266
+ if self.app:
267
+ lazy_actor.register(self.broker)
268
+ return lazy_actor
269
+
270
+ if fn:
271
+ return decorator(fn)
272
+ return decorator
273
+
274
+
275
+ def format_actor(actor):
276
+ return "%s@%s" % (actor.actor_name, actor.queue_name)
277
+
278
+
279
+ def ensure_return_value(default_value=None):
280
+ def decorator(func):
281
+ @wraps(func)
282
+ def wrapper(*args, **kwargs):
283
+ # Call the original function
284
+ result = func(*args, **kwargs)
285
+ # Check if the function has returned a value
286
+ if result is None:
287
+ # Return the default value if the function returned None
288
+ return default_value
289
+ return result
290
+
291
+ return wrapper
292
+
293
+ return decorator
294
+
295
+
296
+ class LazyActor(object):
297
+ # Intermediate object that register actor on broker an call.
298
+
299
+ def __init__(self, fn, kw):
300
+ self.fn = fn
301
+ self.kw = kw
302
+ self.actor = None
303
+
304
+ def __call__(self, *a, **kw):
305
+ return self.fn(*a, **kw)
306
+
307
+ def __repr__(self):
308
+ return "<%s %s.%s>" % (
309
+ self.__class__.__name__,
310
+ self.fn.__module__,
311
+ self.fn.__name__,
312
+ )
313
+
314
+ def __getattr__(self, name):
315
+ if not self.actor:
316
+ raise AttributeError(name)
317
+ return getattr(self.actor, name)
318
+
319
+ def register(self, broker):
320
+ self.actor = register_actor(broker=broker, **self.kw)(
321
+ ensure_return_value(default_value=True)(self.fn)
322
+ )
323
+
324
+ # Next is regular actor API.
325
+ def send(self, *a, **kw):
326
+ return self.actor.send(*a, **kw)
327
+
328
+ def message(self, *a, **kw):
329
+ return self.actor.message(*a, **kw)
330
+
331
+ def send_with_options(self, *a, **kw):
332
+ return self.actor.send_with_options(*a, **kw)
333
+
334
+
335
+ def list_managed_actors(broker, queues):
336
+ queues = set(queues)
337
+ all_actors = broker.actors.values()
338
+ if not queues:
339
+ return all_actors
340
+ else:
341
+ return [a for a in all_actors if a.queue_name in queues]
342
+
343
+
344
+ @click.command("cron-scheduler")
345
+ def cron_scheduler(): # pragma: no cover
346
+ # Configure our broker that we will schedule registered tasks for
347
+ scheduler = BlockingScheduler()
348
+ configure_default_broker()
349
+ broker = get_broker()
350
+ jobs = retrieve_latest_schedule_config()
351
+
352
+ for job in jobs:
353
+ if job["enabled"]:
354
+ config = job["config"]
355
+ worker_path = config["task"]
356
+ tmp_handler = get_callable(worker_path)
357
+ pipeline_meta = {"queue": config.get("queue", "default")}
358
+ actor = register_lazy_actor(broker, tmp_handler, pipeline_meta)
359
+ schedule = config["schedule"]
360
+ scheduler.add_job(
361
+ actor.send,
362
+ CronTrigger.from_crontab(
363
+ f"{schedule['minute']} {schedule['hour']} {schedule['dayOfMonth']} {schedule['monthOfYear']} {schedule['dayOfWeek']}"
364
+ ),
365
+ )
366
+
367
+ try:
368
+ scheduler.start()
369
+ except KeyboardInterrupt:
370
+ scheduler.shutdown()
371
+
372
+
373
+ @click.command("pypeline-worker")
374
+ @click.argument("broker_name", default="dramatiq")
375
+ @click.option(
376
+ "-v", "--verbose", default=0, count=True, help="turn on verbose log output"
377
+ )
378
+ @click.option(
379
+ "-p",
380
+ "--processes",
381
+ default=CPUS,
382
+ metavar="PROCESSES",
383
+ show_default=True,
384
+ help="the number of worker processes to run",
385
+ )
386
+ @click.option(
387
+ "-t",
388
+ "--threads",
389
+ default=8,
390
+ metavar="THREADS",
391
+ show_default=True,
392
+ help="the number of worker treads per processes",
393
+ )
394
+ @click.option(
395
+ "-Q",
396
+ "--queues",
397
+ type=str,
398
+ default=None,
399
+ metavar="QUEUES",
400
+ show_default=True,
401
+ help="listen to a subset of queues, comma separated",
402
+ )
403
+ @click.option(
404
+ "--use-spawn",
405
+ type=bool,
406
+ default=False,
407
+ metavar="USE_SPAWN",
408
+ show_default=True,
409
+ help="start processes by spawning (default: fork on unix, spawn on windows)",
410
+ )
411
+ @with_appcontext
412
+ def pypeline_worker(
413
+ verbose, processes, threads, queues, broker_name, use_spawn
414
+ ): # pragma: no cover
415
+ """Run dramatiq workers.
416
+
417
+ Setup Dramatiq with broker and task modules from Flask app.
418
+
419
+ \b
420
+ examples:
421
+ # Run dramatiq with 1 thread per process.
422
+ $ flask worker --threads 1
423
+
424
+ \b
425
+ # Listen only to the "foo" and "bar" queues.
426
+ $ flask worker -Q foo,bar
427
+
428
+ \b
429
+ # Consuming from a specific broker
430
+ $ flask worker mybroker
431
+ """
432
+ # Plugin for flask.commands entrypoint.
433
+ #
434
+ # Wraps dramatiq worker CLI in a Flask command. This is private API of
435
+ # dramatiq.
436
+
437
+ parser = dramatiq_argument_parser()
438
+
439
+ # Set worker broker globally.
440
+ needle = "dramatiq-" + broker_name
441
+ broker = current_app.extensions[needle].broker
442
+ set_broker(broker)
443
+
444
+ command = [
445
+ "--processes",
446
+ str(processes),
447
+ "--threads",
448
+ str(threads),
449
+ # Fall back to flask_dramatiq global broker
450
+ DEFAULT_BROKER_CALLABLE,
451
+ ]
452
+
453
+ if use_spawn:
454
+ command += ["--use-spawn"]
455
+
456
+ if current_app.config["DEBUG"]:
457
+ verbose = max(1, verbose)
458
+ if HAS_WATCHDOG:
459
+ command += ["--watch", guess_code_directory(broker)]
460
+
461
+ queues = queues.split(",") if queues else []
462
+ if queues:
463
+ command += ["--queues"] + queues
464
+ command += verbose * ["-v"]
465
+ args = parser.parse_args(command)
466
+ current_app.logger.info("Able to execute the following actors:")
467
+ for actor in list_managed_actors(broker, queues):
468
+ current_app.logger.info(" %s.", format_actor(actor))
469
+
470
+ dramatiq_worker(args)
pypeline/extensions.py CHANGED
@@ -5,12 +5,13 @@ import logging
5
5
  logger = logging.getLogger(__name__)
6
6
 
7
7
  try:
8
- # Client packages *should* provide a `sermos.yaml` file. This
9
- # loads the configuration file with the provided name ofthe client
10
- # package (e.g. sermos_demo_client)
11
- from pypeline.sermos_yaml import load_client_config_and_version
12
- sermos_config, sermos_client_version = load_client_config_and_version()
8
+ # Client packages *should* provide a `pypeline.yaml` file. This
9
+ # loads the configuration file with the provided name of the client
10
+ # package (e.g. pypeline_demo)
11
+ from pypeline.pypeline_yaml import load_client_config_and_version
12
+
13
+ pypeline_config, pypeline_client_version = load_client_config_and_version()
13
14
  except Exception as e:
14
- sermos_config = None
15
- sermos_client_version = None
16
- logger.warning("Unable to load client Sermos config ... {}".format(e))
15
+ pypeline_config = None
16
+ pypeline_client_version = None
17
+ logger.warning("Unable to load client Pypeline config ... {}".format(e))
@@ -8,11 +8,9 @@ try:
8
8
  from flask_smorest import Blueprint, Api
9
9
  from flask import abort
10
10
  except Exception as e:
11
- logger.error("Unable to import Web services (Blueprint, API, abort)"
12
- f" ... {e}")
11
+ logger.error("Unable to import Web services (Blueprint, API, abort)" f" ... {e}")
13
12
 
14
13
  try:
15
- from pypeline.flask.flask_sermos import FlaskSermos
14
+ from pypeline.flask.flask_pypeline import FlaskPypeline
16
15
  except Exception as e:
17
- logger.exception("Unable to import Sermos services (FlaskSermos)"
18
- f" ... {e}")
16
+ logger.exception("Unable to import Sermos services (FlaskPypeline)" f" ... {e}")